Home > OS >  BoxPlot based on a condition on a large dataset
BoxPlot based on a condition on a large dataset

Time:06-22

I wish to plot four box plots based on the condition that in the data frame the last column contains four specific names. So a box plot for every different same, but the catch is that the names are repeated several times, and the data set is huge too.

An example of my dataset has been shown below.

I feel the dataset should be melted and then plotted but I'm not sure how this can be done

dput(ttcluster_dataset_11[,590:597])

structure(list(ZNF510 = c(5.94623, 5.712807, 5.715769, 6.429768, 
6.363147, 5.202924, 5.166514, 5.096154, 5.554974, 6.541608, 6.074222, 
5.29267, 5.32738, 5.273095, 5.502868, 5.50581, 5.305756, 5.492444, 
5.115218, 5.172554, 6.081505, 5.141239, 5.211224, 5.457632, 5.397438, 
4.896902, 4.991186, 5.43112, 5.344361, 5.648219, 5.562535, 5.051546, 
5.505862, 5.368761, 6.228927, 5.225693, 5.256812, 5.063112, 5.133075, 
5.389905, 5.56511, 5.748641, 5.497383, 4.975891, 5.983906, 5.171345, 
4.658596, 4.911625, 4.951117, 4.557619, 4.511041, 4.753554, 4.84636, 
4.42409, 4.76813, 5.188278, 4.992479, 4.728725, 5.095165, 4.91609, 
4.894247, 4.792943, 4.692972, 4.816823, 4.618915, 4.581387, 4.946765, 
5.042702, 4.560496, 6.003053, 4.885398, 4.860576, 5.075357, 5.150366, 
5.85212, 5.459071, 4.932498, 5.017022, 4.635898, 4.901394, 4.886047, 
5.122404, 5.524959, 5.382294, 5.417478, 5.277377, 4.839226, 4.954079, 
5.049926, 5.150093, 4.824094, 5.374495, 5.068106, 4.486112, 4.744695, 
5.185416, 4.684261, 5.470918, 5.661212, 5.273184, 5.128978, 5.157254, 
4.886681, 4.907041, 4.560776, 4.682315, 5.083687, 5.321728, 4.012521, 
4.992144, 4.952402, 5.425613, 4.921027, 4.352164, 4.82214, 4.880961, 
4.763493, 5.218399, 4.76853, 4.865044, 4.26001, 4.756529, 5.135761, 
4.750705, 4.743659, 5.602717, 4.523316, 4.782607, 4.622841, 4.560736, 
5.027406, 4.509971, 4.491781, 4.815731, 4.803335, 4.386224, 4.446848, 
4.6528, 4.918608, 5.259151, 5.008561, 4.96345, 5.079289, 5.338217, 
4.721531, 5.005306, 5.175675), ZNF606 = c(7.986169, 7.376799, 
8.159966, 7.70599, 7.689716, 8.181988, 6.661424, 6.517191, 7.272025, 
6.078736, 7.453391, 5.956133, 5.998482, 6.712002, 7.04154, 6.833371, 
6.900238, 7.506576, 7.062574, 7.160865, 7.946853, 6.732333, 6.721415, 
6.670972, 8.319127, 6.432638, 6.08956, 7.665114, 7.769902, 7.074339, 
7.493044, 7.021143, 7.102124, 7.907245, 7.13656, 6.721443, 7.418671, 
6.613831, 7.726761, 6.834814, 7.325814, 6.036518, 7.047737, 6.758348, 
7.130228, 7.535751, 8.124756, 7.740062, 6.895697, 5.91543, 6.645065, 
6.769546, 5.952405, 5.757844, 5.755265, 7.37749, 7.401689, 6.694484, 
7.883158, 7.40073, 6.013217, 7.727535, 7.127403, 6.114635, 6.394833, 
7.493751, 6.03707, 6.313664, 5.920508, 7.52394, 7.236138, 7.924246, 
6.406277, 7.688128, 8.270575, 9.714792, 8.495282, 7.230629, 7.843253, 
6.501139, 7.385027, 7.135641, 7.305913, 8.113981, 7.900572, 7.439215, 
6.942999, 7.392146, 6.983993, 7.114682, 7.610552, 7.860252, 7.779078, 
6.516645, 5.891245, 7.102117, 6.206292, 6.80473, 7.398404, 7.742634, 
7.976577, 7.68005, 8.063756, 5.588883, 5.360495, 5.512573, 7.020665, 
6.800375, 5.142791, 6.408366, 7.331572, 7.55185, 5.995496, 5.866154, 
5.601758, 5.995924, 6.700189, 7.276173, 6.48461, 6.126766, 5.881729, 
5.600483, 6.141761, 6.24525, 6.244509, 6.503886, 5.642926, 5.8735, 
6.203544, 6.964948, 5.896745, 5.525771, 6.185229, 6.101975, 5.943739, 
6.77505, 5.768837, 6.845364, 6.406915, 6.791533, 5.378078, 5.944972, 
6.089856, 6.276459, 6.014454, 6.162838, 5.983889), ZNF643 = c(5.516779, 
5.109641, 4.222278, 5.669309, 4.24417, 3.973801, 4.710076, 4.441062, 
4.896763, 4.453119, 4.146139, 4.194073, 4.569539, 4.350931, 5.826574, 
4.060216, 3.887662, 4.596281, 4.494383, 4.214856, 5.221983, 4.573985, 
4.134596, 4.276018, 4.355076, 4.715127, 3.873693, 4.224615, 3.975312, 
5.701436, 4.268369, 4.486031, 4.842966, 4.831121, 4.352275, 4.140755, 
4.863006, 4.787726, 4.518018, 4.469541, 5.370321, 4.531806, 4.720145, 
4.998464, 4.801315, 3.759611, 4.013963, 3.649857, 3.908661, 3.939353, 
4.104779, 3.971606, 4.169034, 4.071793, 4.287082, 4.294498, 3.616463, 
4.011011, 4.200286, 3.808939, 4.198028, 3.921237, 4.002587, 4.021958, 
3.833761, 3.918443, 4.510014, 4.070512, 4.116308, 4.587369, 3.769469, 
3.959945, 3.866864, 4.239372, 3.859389, 3.750507, 3.690652, 3.593089, 
3.792786, 3.900709, 3.811974, 4.282864, 4.123551, 4.062027, 4.341666, 
3.967817, 3.715064, 3.750994, 3.814445, 4.148966, 3.817549, 3.571791, 
3.741759, 4.223668, 4.632651, 4.109867, 4.014606, 4.381486, 3.847104, 
3.732784, 3.648111, 3.79586, 3.735215, 4.1199, 3.844268, 4.060126, 
3.840929, 3.850423, 3.967698, 3.802081, 3.776515, 4.02642, 3.806103, 
3.846848, 3.652238, 4.097868, 4.028811, 4.212841, 3.874445, 3.965345, 
3.591975, 4.357731, 3.91568, 3.813801, 3.976975, 3.87096, 3.864385, 
3.880382, 4.102032, 3.969792, 3.9359, 4.312307, 4.140471, 3.606493, 
3.522976, 3.995031, 4.302133, 4.316521, 4.161633, 3.876039, 3.799649, 
3.730395, 3.832043, 3.593912, 3.792549, 4.181811, 4.369276), 
    ZNF671 = c(5.925655, 5.846523, 5.920034, 5.717773, 6.033784, 
    5.929354, 5.681274, 5.446523, 5.588, 4.901026, 6.018967, 
    4.563166, 5.007654, 5.335216, 5.328249, 5.410705, 5.782957, 
    5.986309, 6.008752, 5.735078, 6.198303, 6.114576, 5.167656, 
    5.558669, 5.96253, 5.44165, 5.12862, 5.717307, 5.900972, 
    5.812882, 5.725497, 5.486513, 6.086546, 6.059247, 5.401379, 
    5.670671, 5.916253, 5.41078, 5.796643, 5.578754, 5.853106, 
    5.358751, 4.88637, 5.16898, 5.48357, 5.678697, 6.112117, 
    6.023871, 6.031749, 5.115226, 5.307511, 5.325953, 5.554291, 
    5.571159, 5.421624, 5.675385, 5.354039, 5.515843, 5.851099, 
    6.226074, 4.954463, 5.542909, 5.346119, 5.633893, 5.879304, 
    5.715662, 5.433942, 5.228324, 5.639323, 5.335542, 5.535654, 
    6.481325, 6.00583, 5.89689, 6.617966, 7.409558, 6.740933, 
    5.739933, 6.166158, 5.643891, 6.030342, 5.857361, 6.09365, 
    6.52582, 6.08245, 5.736298, 5.938821, 5.978616, 6.099404, 
    5.989758, 5.778333, 6.656244, 6.587056, 5.651474, 5.55851, 
    5.791252, 5.130876, 5.893974, 6.254475, 6.352677, 6.285109, 
    6.218828, 6.532462, 5.31754, 5.206557, 5.273588, 6.27646, 
    5.599431, 5.543145, 5.442304, 5.362417, 5.87653, 5.245789, 
    5.464902, 5.650898, 5.358025, 5.330712, 5.942345, 5.311266, 
    5.266184, 4.958812, 5.43093, 5.638973, 4.084797, 5.230342, 
    5.795891, 4.484665, 5.676921, 5.266355, 5.708541, 5.390116, 
    5.717329, 4.966538, 5.364846, 5.450049, 5.421346, 5.064345, 
    5.403535, 5.269067, 5.875816, 5.324136, 5.305508, 5.910451, 
    5.806817, 5.529564, 5.455927, 5.227397), ZNF711 = c(6.135167, 
    6.177082, 6.708938, 5.110068, 6.198867, 4.668883, 4.505945, 
    4.152014, 5.787948, 5.487948, 6.041725, 4.218519, 5.356081, 
    5.573695, 5.278823, 4.905638, 5.150937, 5.633478, 4.754774, 
    5.664713, 7.05277, 4.8987, 4.831791, 4.830966, 5.042365, 
    5.649319, 4.182002, 4.890872, 4.845807, 5.402074, 5.392994, 
    5.480273, 5.429907, 5.90185, 6.474454, 4.807848, 5.371962, 
    4.34629, 4.904435, 5.042043, 4.633989, 5.6408, 5.129399, 
    4.242098, 5.751459, 4.4931, 5.055793, 4.729417, 4.575814, 
    4.102812, 4.359219, 3.923245, 4.342203, 3.978587, 4.531973, 
    4.366895, 4.277852, 4.148857, 4.202296, 3.933186, 5.087968, 
    4.606007, 4.641724, 3.741272, 4.3427, 4.487933, 3.720022, 
    4.782166, 4.232429, 5.992919, 4.172031, 5.684018, 4.261097, 
    4.922277, 5.840793, 4.487535, 4.580727, 4.175028, 4.111755, 
    3.891364, 3.921483, 3.947222, 4.931146, 5.682478, 5.664157, 
    5.21578, 4.331579, 4.510376, 4.499638, 4.421184, 4.422382, 
    5.738777, 5.326779, 4.17621, 4.09401, 4.512306, 4.737281, 
    5.285866, 4.704729, 5.092423, 4.890063, 4.597651, 4.132479, 
    3.471144, 3.865533, 3.924907, 4.993866, 4.871375, 4.039307, 
    4.263758, 4.158168, 5.299139, 3.991539, 3.846281, 3.890258, 
    3.931811, 3.806779, 5.004343, 4.686404, 4.494223, 3.961933, 
    4.013318, 4.780627, 4.007234, 4.153169, 4.386958, 4.01196, 
    4.058655, 3.970959, 3.830703, 4.122821, 3.968203, 4.270955, 
    4.02155, 4.315272, 4.669395, 3.725762, 4.387163, 3.878838, 
    4.148402, 4.696083, 3.902561, 4.292255, 4.306139, 4.240283, 
    4.809729, 4.110889), ZNF8 = c(6.535976, 5.617546, 5.520076, 
    6.173408, 5.636716, 5.600262, 5.220575, 4.743357, 4.901029, 
    4.925283, 5.838014, 4.596382, 4.914675, 5.377216, 5.555028, 
    5.314098, 5.297864, 5.750704, 5.488065, 4.999103, 5.992091, 
    5.642773, 5.07518, 5.314475, 5.900937, 5.559608, 4.522685, 
    5.265154, 5.324593, 5.354034, 5.7289, 5.057386, 5.06977, 
    5.597334, 5.153986, 5.108475, 5.570181, 4.899142, 5.406266, 
    5.113253, 4.960403, 5.435193, 6.867495, 4.856415, 5.111619, 
    5.449276, 5.198133, 5.388347, 5.282403, 4.632593, 4.829716, 
    4.811047, 5.32258, 4.610439, 5.18732, 5.64755, 5.482928, 
    5.119356, 5.444986, 5.477327, 4.885934, 5.349004, 5.66351, 
    4.303683, 4.592721, 5.422557, 5.087755, 4.716865, 4.669027, 
    5.340175, 4.782326, 5.649563, 5.132957, 5.683762, 5.932028, 
    7.316127, 6.146593, 5.72388, 5.966438, 4.792467, 5.720676, 
    5.148644, 5.73848, 6.212466, 6.308748, 5.379114, 5.272329, 
    5.439909, 5.586977, 5.09346, 5.576321, 6.207303, 5.592295, 
    4.593619, 4.889057, 5.293568, 4.586061, 5.428235, 5.8397, 
    5.96753, 5.801161, 5.987631, 6.203965, 4.839773, 4.715386, 
    4.636048, 5.860811, 6.033741, 4.733141, 5.138581, 5.114714, 
    5.466906, 4.908799, 5.060534, 5.255573, 4.830971, 5.263964, 
    5.525848, 5.220203, 4.957148, 4.740335, 4.5796, 4.594491, 
    5.352114, 5.157587, 5.436527, 4.91852, 5.08691, 5.023482, 
    5.534429, 5.051983, 5.253279, 5.040294, 5.216557, 4.901129, 
    5.324232, 4.74494, 5.136233, 5.024926, 5.33796, 4.793476, 
    4.707207, 5.811963, 5.502805, 5.038211, 4.890697, 4.606382
    ), ZNF804A = c(6.51568, 6.439824, 4.177717, 5.722987, 7.93036, 
    8.825149, 6.117847, 5.202625, 6.943736, 6.10238, 5.395202, 
    4.005275, 5.449567, 5.250869, 5.375079, 6.279801, 4.846173, 
    4.312459, 4.968536, 7.3861, 8.261597, 7.374785, 6.239196, 
    5.368542, 5.101474, 5.054613, 5.31017, 4.846913, 5.216192, 
    5.737364, 5.338054, 4.207094, 6.386935, 4.983061, 7.574587, 
    4.413107, 5.352411, 6.507691, 5.199225, 5.900246, 5.338985, 
    4.046878, 8.342389, 4.816628, 5.722269, 4.635663, 4.8078, 
    4.223784, 5.340559, 5.434575, 4.493881, 4.84106, 6.921161, 
    4.690488, 6.626063, 4.473161, 4.489687, 4.323735, 4.360158, 
    4.583291, 6.180486, 4.410217, 6.529253, 4.332168, 5.635954, 
    4.46047, 5.444109, 4.409367, 4.607851, 4.415356, 3.9169, 
    4.028548, 4.286836, 4.498551, 5.981919, 4.136005, 4.24244, 
    4.089674, 4.29059, 3.859174, 4.062029, 4.326703, 4.236188, 
    4.27184, 3.95776, 4.491866, 4.209833, 4.202512, 4.158955, 
    4.174552, 5.370753, 4.933963, 3.982912, 4.162634, 4.101793, 
    4.332177, 4.263031, 5.336449, 4.02567, 4.168412, 4.131943, 
    4.263566, 4.107345, 4.111515, 4.241965, 4.483141, 4.170102, 
    4.146675, 4.479603, 4.473624, 4.667704, 4.019978, 4.190319, 
    4.442643, 4.456675, 5.083862, 4.649441, 4.57102, 4.110476, 
    4.423792, 4.461519, 4.309157, 3.784853, 4.249824, 4.127431, 
    4.136897, 4.779541, 4.261828, 4.150655, 4.605232, 4.489233, 
    4.284094, 5.285193, 4.413362, 4.528692, 4.108301, 5.526067, 
    5.096504, 4.480856, 4.271599, 5.242742, 4.213557, 4.74649, 
    4.027772, 4.640365, 4.344877, 4.077101), ttcluster_dataset_1.Subtype = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("Proneural", 
    "Neural", "Classical", "Mesenchymal"), class = "factor")), class = "data.frame", row.names = c("TCGA.02.0014.01", 
"TCGA.02.0026.01", "TCGA.02.0048.01", "TCGA.02.0069.01", "TCGA.02.0074.01", 
"TCGA.02.0080.01", "TCGA.02.0084.01", "TCGA.02.0087.01", "TCGA.02.0104.01", 
"TCGA.02.0114.01", "TCGA.02.0281.01", "TCGA.02.0321.01", "TCGA.02.0325.01", 
"TCGA.02.0338.01", "TCGA.02.0339.01", "TCGA.02.0432.01", "TCGA.02.0439.01", 
"TCGA.02.0440.01", "TCGA.02.0446.01", "TCGA.06.0128.01", "TCGA.06.0129.01", 
"TCGA.06.0146.01", "TCGA.06.0156.01", "TCGA.06.0166.01", "TCGA.06.0174.01", 
"TCGA.06.0177.01", "TCGA.06.0238.01", "TCGA.06.0241.01", "TCGA.06.0410.01", 
"TCGA.06.0413.01", "TCGA.06.0414.01", "TCGA.06.0646.01", "TCGA.06.0648.01", 
"TCGA.08.0245.01", "TCGA.08.0344.01", "TCGA.08.0347.01", "TCGA.08.0348.01", 
"TCGA.08.0350.01", "TCGA.08.0353.01", "TCGA.08.0359.01", "TCGA.08.0385.01", 
"TCGA.08.0517.01", "TCGA.08.0524.01", "TCGA.12.0616.01", "TCGA.12.0618.01", 
"TCGA.02.0089.01", "TCGA.02.0113.01", "TCGA.02.0115.01", "TCGA.02.0451.01", 
"TCGA.06.0132.01", "TCGA.06.0133.01", "TCGA.06.0138.01", "TCGA.06.0160.01", 
"TCGA.06.0162.01", "TCGA.06.0167.01", "TCGA.06.0171.01", "TCGA.06.0173.01", 
"TCGA.06.0179.01", "TCGA.06.0182.01", "TCGA.06.0185.01", "TCGA.06.0195.01", 
"TCGA.06.0208.01", "TCGA.06.0214.01", "TCGA.06.0219.01", "TCGA.06.0221.01", 
"TCGA.06.0237.01", "TCGA.06.0240.01", "TCGA.08.0349.01", "TCGA.08.0380.01", 
"TCGA.08.0386.01", "TCGA.08.0520.01", "TCGA.02.0016.01", "TCGA.02.0023.01", 
"TCGA.02.0070.01", "TCGA.02.0102.01", "TCGA.02.0260.01", "TCGA.02.0269.01", 
"TCGA.02.0285.01", "TCGA.02.0289.01", "TCGA.02.0290.01", "TCGA.02.0317.01", 
"TCGA.02.0333.01", "TCGA.02.0422.01", "TCGA.02.0430.01", "TCGA.06.0125.01", 
"TCGA.06.0126.01", "TCGA.06.0137.01", "TCGA.06.0145.01", "TCGA.06.0148.01", 
"TCGA.06.0187.01", "TCGA.06.0211.01", "TCGA.06.0402.01", "TCGA.08.0246.01", 
"TCGA.08.0354.01", "TCGA.08.0355.01", "TCGA.08.0357.01", "TCGA.08.0358.01", 
"TCGA.08.0375.01", "TCGA.08.0511.01", "TCGA.08.0514.01", "TCGA.08.0518.01", 
"TCGA.08.0529.01", "TCGA.08.0531.01", "TCGA.02.0004.01", "TCGA.02.0039.01", 
"TCGA.02.0059.01", "TCGA.02.0064.01", "TCGA.02.0075.01", "TCGA.02.0079.01", 
"TCGA.02.0085.01", "TCGA.02.0086.01", "TCGA.02.0099.01", "TCGA.02.0106.01", 
"TCGA.02.0107.01", "TCGA.02.0111.01", "TCGA.02.0326.01", "TCGA.02.0337.01", 
"TCGA.06.0122.01", "TCGA.06.0124.01", "TCGA.06.0143.01", "TCGA.06.0147.01", 
"TCGA.06.0149.01", "TCGA.06.0152.01", "TCGA.06.0154.01", "TCGA.06.0164.01", 
"TCGA.06.0175.01", "TCGA.06.0176.01", "TCGA.06.0184.01", "TCGA.06.0189.01", 
"TCGA.06.0190.01", "TCGA.06.0194.01", "TCGA.06.0197.01", "TCGA.06.0210.01", 
"TCGA.06.0397.01", "TCGA.06.0409.01", "TCGA.06.0412.01", "TCGA.06.0644.01", 
"TCGA.06.0645.01", "TCGA.08.0346.01", "TCGA.08.0352.01", "TCGA.08.0360.01", 
"TCGA.08.0390.01", "TCGA.08.0509.01", "TCGA.08.0510.01", "TCGA.08.0512.01", 
"TCGA.12.0619.01", "TCGA.12.0620.01"))

All helps shall be highly apprecited.

CodePudding user response:

Let me know if is that what you are looking for:

library(tidyverse)
    df %>%
      tidyr::pivot_longer(cols = -ttcluster_dataset_1.Subtype) %>%
      ggplot() 
      geom_boxplot(aes(
        x =ttcluster_dataset_1.Subtype, 
        y = value)) 
      labs(x = "Subtype",
           y = "Value")

enter image description here

  • Related