I have multiple data frames with the exact same columns, but different rows (and different row numbers). I want to find out if there are shared rownames between those data frames. It doesn't matter what are the values of those rows in each column, the only important thing is the row names.
I can do that for two data frames, like this: intersect(rownames(DF1), rownames(DF2))
But this will take forever cause I have many datasets. Is there a way to check common row names for all of them together at the same time?
EDIT - here is a sample from one of the data frames:
structure(list(logFC = c(-6.9789425276725, -6.80564834512398,
-5.6476014320244, -6.26235013729251, -5.28269672881498, -3.09147686903668,
-4.96792335935695, -4.19915236394742, -4.83218090923083, -3.98281311064966,
-5.67532487307823, -5.61153529083818, -3.63779239727337, -5.51132458075007,
-5.84723411702433, -3.81087466017047, -6.63518445918401, -3.12684504261733,
-3.67427178247997, -3.55494509554008, -2.93288926187307, -3.34600996743472,
-6.95421793602619, -4.54818507041332, -4.40148611002215, -6.25233850951737,
-4.69560779360338, -3.48176941704349, -6.11632342272272, -4.65245863713412,
-3.93886612606331, -5.80739169944222, -6.91086035105093, -2.82629426029907,
-3.91313349601559, -2.15235576487352, -3.39637748720724, -3.31736956389026,
-3.23641283622935, -4.33878108141206, -4.49762339558376, -2.65451540161907,
-5.41165350920953, -5.94944402709093, -3.80784569069491, -2.83992888237486,
-4.79770162793385, -4.44954188156457, -2.64034862912764, -4.4751984037916
), CI.L = c(-8.67994440382904, -8.75329890267049, -7.33972836815489,
-8.10720823542097, -6.87728491837549, -4.05774266553713, -6.52757600952993,
-5.58307963784586, -6.45170192050377, -5.39488819005967, -7.63936737535009,
-7.58821634428815, -4.95285575867368, -7.54470153062114, -7.95005561275759,
-5.18806492548832, -9.11781751841251, -4.32080214140813, -5.12196476973559,
-4.98320918777515, -4.11606326830917, -4.68673311153421, -9.78640386618229,
-6.36301071764781, -6.20444870302627, -8.80996124532594, -6.60520029900126,
-4.92779850256981, -8.65559956418551, -6.59106405332152, -5.61846701732563,
-8.28675525942228, -9.87474093313019, -4.04871009769845, -5.62314782172317,
-3.10511637109949, -4.90291184721364, -4.79184235598173, -4.70738723812096,
-6.26409071259927, -6.52317179223395, -3.87446905381624, -7.88060413550419,
-8.67817242258957, -5.56793785199272, -4.15957023109633, -6.97818764981877,
-6.50898609325095, -3.89588719023542, -6.57866771201211), CI.R = c(-5.27794065151596,
-4.85799778757747, -3.95547449589392, -4.41749203916404, -3.68810853925447,
-2.12521107253623, -3.40827070918396, -2.81522509004899, -3.21265989795789,
-2.57073803123965, -3.71128237080637, -3.63485423738822, -2.32272903587306,
-3.47794763087899, -3.74441262129107, -2.43368439485263, -4.1525513999555,
-1.93288794382653, -2.22657879522435, -2.12668100330501, -1.74971525543697,
-2.00528682333523, -4.1220320058701, -2.73335942317883, -2.59852351701803,
-3.69471577370881, -2.78601528820549, -2.03574033151717, -3.57704728125992,
-2.71385322094672, -2.259265234801, -3.32802813946217, -3.94697976897167,
-1.6038784228997, -2.20311917030801, -1.19959515864755, -1.88984312720085,
-1.84289677179878, -1.76543843433774, -2.41347145022485, -2.47207499893357,
-1.4345617494219, -2.94270288291487, -3.2207156315923, -2.0477535293971,
-1.52028753365338, -2.61721560604892, -2.3900976698782, -1.38481006801987,
-2.37172909557108), AveExpr = c(2.84380439452138, -0.641350163188155,
1.9374612692823, -0.235638199101607, -0.435518029701078, 3.48510080561988,
1.87095233967784, 1.01628061150791, 1.22174650978923, 2.72177717639537,
-1.20309940325757, -0.857551814218222, 2.20260076015391, 4.80478044066334,
-1.13513003361954, 0.123295030736262, -0.675210875658111, 3.87956282628011,
2.64674998200021, 2.87011242677628, 5.51349257254121, 0.73720258641836,
2.15656931364667, -0.880676268215307, 2.38219307498489, 1.02322266480762,
-0.262985749060007, 2.39888745207399, -2.13290506986535, -1.59949076132351,
0.535926790059711, -1.04571229127454, -1.17629503943425, 0.774433319285671,
0.47146013212957, 3.70538014394418, 2.44644004853682, -0.156538084053944,
3.74986228626275, -1.65408242874073, -1.45872102403627, 3.31161564832369,
-1.17183730367653, -0.890754518007027, 2.41868054745353, 5.45111763272805,
-2.89316057050776, -2.22502982657658, 3.03149820351438, -2.1209576010754
), t = c(-8.67971178415031, -7.39228905582827, -7.06076205121055,
-7.18116415109683, -7.00854088575108, -6.76846004661448, -6.73856582234944,
-6.41902164094181, -6.31214655178395, -5.96695296657183, -6.11308339182853,
-6.00572682477871, -5.85209958519877, -5.73401160854389, -5.88258959067589,
-5.85397948220358, -5.65406688510248, -5.54036162493977, -5.36927047559566,
-5.26556350546633, -5.2440584137816, -5.2796921349758, -5.19453899631105,
-5.30180432734165, -5.16455723198513, -5.17161842960374, -5.20201485889234,
-5.09381834167428, -5.09566636573943, -5.0770749425756, -4.9611870222842,
-4.95520219816026, -4.93278163900794, -4.89123899468251, -4.84111490047663,
-4.77915087210746, -4.76933015577409, -4.75967794339552, -4.65456688053002,
-4.76746627545286, -4.69743671824526, -4.60322701266885, -4.63701021254403,
-4.61250179178841, -4.57682605412049, -4.55273135759758, -4.65479333272545,
-4.57073183768021, -4.44889026750723, -4.50087124759215), P.Value = c(1.55229424251649e-07,
1.30133609714356e-06, 2.33015419714361e-06, 1.88260870380903e-06,
2.55754415226669e-06, 3.94282455388895e-06, 4.16345462038873e-06,
7.50810099243057e-06, 9.17340525927766e-06, 1.77065831075769e-05,
1.33775331440198e-05, 1.64325937561506e-05, 2.2115888159301e-05,
2.7847395364839e-05, 2.08446126717148e-05, 2.20352317313369e-05,
3.25827813373579e-05, 4.07954068339292e-05, 5.73909145200287e-05,
7.07062567807361e-05, 7.38445843217097e-05, 6.87194358047315e-05,
8.16281452619761e-05, 6.57245741306385e-05, 8.67463223172338e-05,
8.5511928670405e-05, 8.04010139024611e-05, 0.000100171705417072,
9.9795109966445e-05, 0.000103650985838079, 0.000131395582252142,
0.000133020277243722, 0.000139292195697043, 0.000151726973454171,
0.000168257771930849, 0.000191272077639828, 0.000195205125031331,
0.000199151346374514, 0.000247795188234922, 0.000195960874195937,
0.000226635597310478, 0.000275812668466433, 0.000257033617226946,
0.000270521748860173, 0.000291459122749773, 0.000306528693840875,
0.000247678269149948, 0.000295197741620148, 0.000381128297979153,
0.000341717532639182), adj.P.Val = c(0.00241102341747661, 0.00794475515460126,
0.00794475515460126, 0.00794475515460126, 0.00794475515460126,
0.00923811102341111, 0.00923811102341111, 0.014576978076804,
0.0158312589430112, 0.0229002649926842, 0.0207779844792916, 0.0229002649926842,
0.0229002649926842, 0.0270328590504175, 0.0229002649926842, 0.0229002649926842,
0.0297691623371673, 0.0352019032746993, 0.0469155623328993, 0.0498675688558607,
0.0498675688558607, 0.0498675688558607, 0.0499016251196769, 0.0498675688558607,
0.0499016251196769, 0.0499016251196769, 0.0499016251196769, 0.0536505837426884,
0.0536505837426884, 0.0536635704012349, 0.0645647170671714, 0.0645647170671714,
0.0655601934414083, 0.0693124515202994, 0.0746679918179983, 0.0793133003048448,
0.0793133003048448, 0.0793133003048448, 0.0916370205634477, 0.0793133003048448,
0.0880026024356585, 0.0951982748137919, 0.0928429335527656, 0.0951982748137919,
0.097553432400939, 0.0991875765153432, 0.0916370205634477, 0.097553432400939,
0.116072249494357, 0.108317484019424), B = c(7.20297424220909,
5.12185903325429, 4.85086822927105, 4.82778189573735, 4.47878578846026,
4.45015062142229, 4.32665141548369, 3.68905526549073, 3.58105745979752,
3.07644941699046, 3.02010053423525, 2.91419323987163, 2.83925429337962,
2.70647282644387, 2.69778050078078, 2.64780216667016, 2.39793196132607,
2.34766742399694, 2.01017932857179, 1.82776636178526, 1.80163027812608,
1.72843647510305, 1.69294586705913, 1.66964379660484, 1.63598134270543,
1.63594094804558, 1.57140154178204, 1.49617298986084, 1.30350655559773,
1.21061838557398, 1.19176011624231, 1.14701956815954, 1.13326015821754,
1.0063098526262, 0.959474301794882, 0.917731867637779, 0.892661776963931,
0.720160756477969, 0.676375764639774, 0.665484835822247, 0.611282810384547,
0.581080152807092, 0.557812063513063, 0.545239611133442, 0.530816871649995,
0.463782969698698, 0.390941796344767, 0.286655122678496, 0.285729527776393,
0.185338863248642)), row.names = c("PLIN4", "LEP", "PLIN1", "SAA1",
"RBP4", "SEMA3G", "GPD1", "FMO3", "STMN2", "COL8A1", "SAA2",
"TUSC5", "PCSK5", "JCHAIN", "PCK1", "AQP7", "EPYC", "AOC3", "FNDC1",
"LYVE1", "FHL1", "AKR1C1", "COMP", "ADAMTS18", "G0S2", "ADIPOQ",
"CIDEC", "ITGBL1", "GABRA2", "C6", "AKR1C2", "ITLN2", "VSTM2B",
"ADCY5", "FHL5", "FAM46C", "NPNT", "EGFL6", "SLIT3", "LGALS12",
"TNMD", "PALMD", "ZIC5", "OLIG2", "FABP4", "LTBP2", "C14orf180",
"KERA", "MRVI1", "LBP"), class = "data.frame")
As you see in the example, the rows are genes. I basically want to find the common genes between all different data frames.
CodePudding user response:
Using intersect
with Reduce
. Put your data frames in a list, lst <- list(df1, df2, ...)
, then do
Reduce(intersect, lapply(lst, rownames))
# [1] "49" "18" "100" "47" "71" "37" "20" "26" "3" "41" "27" "36" "34" "87" "58" "42" "93" "30" "22" "80"
# [21] "84" "68" "88" "63" "52" "61" "55" "10" "29" "12" "35" "16" "51" "78" "17" "99" "97" "62"
Data:
d0 <- data.frame(matrix(0, 100, 2))
set.seed(42)
lst <- replicate(10, d0[sample(nrow(d0), 90), ], simplify=FALSE)