Home > Software engineering >  Rownames intersect of mutiple dataframes
Rownames intersect of mutiple dataframes

Time:08-31

I have multiple data frames with the exact same columns, but different rows (and different row numbers). I want to find out if there are shared rownames between those data frames. It doesn't matter what are the values of those rows in each column, the only important thing is the row names.

I can do that for two data frames, like this: intersect(rownames(DF1), rownames(DF2))

But this will take forever cause I have many datasets. Is there a way to check common row names for all of them together at the same time?

EDIT - here is a sample from one of the data frames:

structure(list(logFC = c(-6.9789425276725, -6.80564834512398, 
-5.6476014320244, -6.26235013729251, -5.28269672881498, -3.09147686903668, 
-4.96792335935695, -4.19915236394742, -4.83218090923083, -3.98281311064966, 
-5.67532487307823, -5.61153529083818, -3.63779239727337, -5.51132458075007, 
-5.84723411702433, -3.81087466017047, -6.63518445918401, -3.12684504261733, 
-3.67427178247997, -3.55494509554008, -2.93288926187307, -3.34600996743472, 
-6.95421793602619, -4.54818507041332, -4.40148611002215, -6.25233850951737, 
-4.69560779360338, -3.48176941704349, -6.11632342272272, -4.65245863713412, 
-3.93886612606331, -5.80739169944222, -6.91086035105093, -2.82629426029907, 
-3.91313349601559, -2.15235576487352, -3.39637748720724, -3.31736956389026, 
-3.23641283622935, -4.33878108141206, -4.49762339558376, -2.65451540161907, 
-5.41165350920953, -5.94944402709093, -3.80784569069491, -2.83992888237486, 
-4.79770162793385, -4.44954188156457, -2.64034862912764, -4.4751984037916
), CI.L = c(-8.67994440382904, -8.75329890267049, -7.33972836815489, 
-8.10720823542097, -6.87728491837549, -4.05774266553713, -6.52757600952993, 
-5.58307963784586, -6.45170192050377, -5.39488819005967, -7.63936737535009, 
-7.58821634428815, -4.95285575867368, -7.54470153062114, -7.95005561275759, 
-5.18806492548832, -9.11781751841251, -4.32080214140813, -5.12196476973559, 
-4.98320918777515, -4.11606326830917, -4.68673311153421, -9.78640386618229, 
-6.36301071764781, -6.20444870302627, -8.80996124532594, -6.60520029900126, 
-4.92779850256981, -8.65559956418551, -6.59106405332152, -5.61846701732563, 
-8.28675525942228, -9.87474093313019, -4.04871009769845, -5.62314782172317, 
-3.10511637109949, -4.90291184721364, -4.79184235598173, -4.70738723812096, 
-6.26409071259927, -6.52317179223395, -3.87446905381624, -7.88060413550419, 
-8.67817242258957, -5.56793785199272, -4.15957023109633, -6.97818764981877, 
-6.50898609325095, -3.89588719023542, -6.57866771201211), CI.R = c(-5.27794065151596, 
-4.85799778757747, -3.95547449589392, -4.41749203916404, -3.68810853925447, 
-2.12521107253623, -3.40827070918396, -2.81522509004899, -3.21265989795789, 
-2.57073803123965, -3.71128237080637, -3.63485423738822, -2.32272903587306, 
-3.47794763087899, -3.74441262129107, -2.43368439485263, -4.1525513999555, 
-1.93288794382653, -2.22657879522435, -2.12668100330501, -1.74971525543697, 
-2.00528682333523, -4.1220320058701, -2.73335942317883, -2.59852351701803, 
-3.69471577370881, -2.78601528820549, -2.03574033151717, -3.57704728125992, 
-2.71385322094672, -2.259265234801, -3.32802813946217, -3.94697976897167, 
-1.6038784228997, -2.20311917030801, -1.19959515864755, -1.88984312720085, 
-1.84289677179878, -1.76543843433774, -2.41347145022485, -2.47207499893357, 
-1.4345617494219, -2.94270288291487, -3.2207156315923, -2.0477535293971, 
-1.52028753365338, -2.61721560604892, -2.3900976698782, -1.38481006801987, 
-2.37172909557108), AveExpr = c(2.84380439452138, -0.641350163188155, 
1.9374612692823, -0.235638199101607, -0.435518029701078, 3.48510080561988, 
1.87095233967784, 1.01628061150791, 1.22174650978923, 2.72177717639537, 
-1.20309940325757, -0.857551814218222, 2.20260076015391, 4.80478044066334, 
-1.13513003361954, 0.123295030736262, -0.675210875658111, 3.87956282628011, 
2.64674998200021, 2.87011242677628, 5.51349257254121, 0.73720258641836, 
2.15656931364667, -0.880676268215307, 2.38219307498489, 1.02322266480762, 
-0.262985749060007, 2.39888745207399, -2.13290506986535, -1.59949076132351, 
0.535926790059711, -1.04571229127454, -1.17629503943425, 0.774433319285671, 
0.47146013212957, 3.70538014394418, 2.44644004853682, -0.156538084053944, 
3.74986228626275, -1.65408242874073, -1.45872102403627, 3.31161564832369, 
-1.17183730367653, -0.890754518007027, 2.41868054745353, 5.45111763272805, 
-2.89316057050776, -2.22502982657658, 3.03149820351438, -2.1209576010754
), t = c(-8.67971178415031, -7.39228905582827, -7.06076205121055, 
-7.18116415109683, -7.00854088575108, -6.76846004661448, -6.73856582234944, 
-6.41902164094181, -6.31214655178395, -5.96695296657183, -6.11308339182853, 
-6.00572682477871, -5.85209958519877, -5.73401160854389, -5.88258959067589, 
-5.85397948220358, -5.65406688510248, -5.54036162493977, -5.36927047559566, 
-5.26556350546633, -5.2440584137816, -5.2796921349758, -5.19453899631105, 
-5.30180432734165, -5.16455723198513, -5.17161842960374, -5.20201485889234, 
-5.09381834167428, -5.09566636573943, -5.0770749425756, -4.9611870222842, 
-4.95520219816026, -4.93278163900794, -4.89123899468251, -4.84111490047663, 
-4.77915087210746, -4.76933015577409, -4.75967794339552, -4.65456688053002, 
-4.76746627545286, -4.69743671824526, -4.60322701266885, -4.63701021254403, 
-4.61250179178841, -4.57682605412049, -4.55273135759758, -4.65479333272545, 
-4.57073183768021, -4.44889026750723, -4.50087124759215), P.Value = c(1.55229424251649e-07, 
1.30133609714356e-06, 2.33015419714361e-06, 1.88260870380903e-06, 
2.55754415226669e-06, 3.94282455388895e-06, 4.16345462038873e-06, 
7.50810099243057e-06, 9.17340525927766e-06, 1.77065831075769e-05, 
1.33775331440198e-05, 1.64325937561506e-05, 2.2115888159301e-05, 
2.7847395364839e-05, 2.08446126717148e-05, 2.20352317313369e-05, 
3.25827813373579e-05, 4.07954068339292e-05, 5.73909145200287e-05, 
7.07062567807361e-05, 7.38445843217097e-05, 6.87194358047315e-05, 
8.16281452619761e-05, 6.57245741306385e-05, 8.67463223172338e-05, 
8.5511928670405e-05, 8.04010139024611e-05, 0.000100171705417072, 
9.9795109966445e-05, 0.000103650985838079, 0.000131395582252142, 
0.000133020277243722, 0.000139292195697043, 0.000151726973454171, 
0.000168257771930849, 0.000191272077639828, 0.000195205125031331, 
0.000199151346374514, 0.000247795188234922, 0.000195960874195937, 
0.000226635597310478, 0.000275812668466433, 0.000257033617226946, 
0.000270521748860173, 0.000291459122749773, 0.000306528693840875, 
0.000247678269149948, 0.000295197741620148, 0.000381128297979153, 
0.000341717532639182), adj.P.Val = c(0.00241102341747661, 0.00794475515460126, 
0.00794475515460126, 0.00794475515460126, 0.00794475515460126, 
0.00923811102341111, 0.00923811102341111, 0.014576978076804, 
0.0158312589430112, 0.0229002649926842, 0.0207779844792916, 0.0229002649926842, 
0.0229002649926842, 0.0270328590504175, 0.0229002649926842, 0.0229002649926842, 
0.0297691623371673, 0.0352019032746993, 0.0469155623328993, 0.0498675688558607, 
0.0498675688558607, 0.0498675688558607, 0.0499016251196769, 0.0498675688558607, 
0.0499016251196769, 0.0499016251196769, 0.0499016251196769, 0.0536505837426884, 
0.0536505837426884, 0.0536635704012349, 0.0645647170671714, 0.0645647170671714, 
0.0655601934414083, 0.0693124515202994, 0.0746679918179983, 0.0793133003048448, 
0.0793133003048448, 0.0793133003048448, 0.0916370205634477, 0.0793133003048448, 
0.0880026024356585, 0.0951982748137919, 0.0928429335527656, 0.0951982748137919, 
0.097553432400939, 0.0991875765153432, 0.0916370205634477, 0.097553432400939, 
0.116072249494357, 0.108317484019424), B = c(7.20297424220909, 
5.12185903325429, 4.85086822927105, 4.82778189573735, 4.47878578846026, 
4.45015062142229, 4.32665141548369, 3.68905526549073, 3.58105745979752, 
3.07644941699046, 3.02010053423525, 2.91419323987163, 2.83925429337962, 
2.70647282644387, 2.69778050078078, 2.64780216667016, 2.39793196132607, 
2.34766742399694, 2.01017932857179, 1.82776636178526, 1.80163027812608, 
1.72843647510305, 1.69294586705913, 1.66964379660484, 1.63598134270543, 
1.63594094804558, 1.57140154178204, 1.49617298986084, 1.30350655559773, 
1.21061838557398, 1.19176011624231, 1.14701956815954, 1.13326015821754, 
1.0063098526262, 0.959474301794882, 0.917731867637779, 0.892661776963931, 
0.720160756477969, 0.676375764639774, 0.665484835822247, 0.611282810384547, 
0.581080152807092, 0.557812063513063, 0.545239611133442, 0.530816871649995, 
0.463782969698698, 0.390941796344767, 0.286655122678496, 0.285729527776393, 
0.185338863248642)), row.names = c("PLIN4", "LEP", "PLIN1", "SAA1", 
"RBP4", "SEMA3G", "GPD1", "FMO3", "STMN2", "COL8A1", "SAA2", 
"TUSC5", "PCSK5", "JCHAIN", "PCK1", "AQP7", "EPYC", "AOC3", "FNDC1", 
"LYVE1", "FHL1", "AKR1C1", "COMP", "ADAMTS18", "G0S2", "ADIPOQ", 
"CIDEC", "ITGBL1", "GABRA2", "C6", "AKR1C2", "ITLN2", "VSTM2B", 
"ADCY5", "FHL5", "FAM46C", "NPNT", "EGFL6", "SLIT3", "LGALS12", 
"TNMD", "PALMD", "ZIC5", "OLIG2", "FABP4", "LTBP2", "C14orf180", 
"KERA", "MRVI1", "LBP"), class = "data.frame")

As you see in the example, the rows are genes. I basically want to find the common genes between all different data frames.

CodePudding user response:

Using intersect with Reduce. Put your data frames in a list, lst <- list(df1, df2, ...), then do

Reduce(intersect, lapply(lst, rownames))
# [1] "49"  "18"  "100" "47"  "71"  "37"  "20"  "26"  "3"   "41"  "27"  "36"  "34"  "87"  "58"  "42"  "93"  "30"  "22"  "80" 
# [21] "84"  "68"  "88"  "63"  "52"  "61"  "55"  "10"  "29"  "12"  "35"  "16"  "51"  "78"  "17"  "99"  "97"  "62" 

Data:

d0 <- data.frame(matrix(0, 100, 2))
set.seed(42)
lst <- replicate(10, d0[sample(nrow(d0), 90), ], simplify=FALSE)
  • Related