I have two data frames exp.kirp.log2
and g_list
where the rownames of exp.kirp.log2
matches the g_list$hgnc_symbol
, but they are in different orders.
I want to replace the rownames of exp.kirp.log2
with the gene_list$ensembl_gene_id
based on the matching g_list$hgnc_symbol
.
# Obtain g_list
genes <- rownames(exp.kirp.log2)
ensembl = useEnsembl(biomart = "genes", dataset = "hsapiens_gene_ensembl", GRCh = 37, verbose = T)
ensembl <- useDataset(dataset = "hsapiens_gene_ensembl", mart = ensembl)
g_list <- getBM(attributes = c('ensembl_gene_id','hgnc_symbol'),
filters='hgnc_symbol',values=genes,
mart=ensembl)
g_list <- g_list[order(g_list$hgnc_symbol),]
rownames(exp.kirp.log2) <- g_list$ensembl_gene_id %in% g_list[match(rownames(exp.kirp.log2), g_list$hgnc_symbol),2]
Traceback:
Error in `.rowNamesDF<-`(x, value = value) : invalid 'row.names' length
exp.kirp
dput(exp.kirp.log2[1:20,1:20])
structure(list(TCGA.2K.A9WE.01A = c(7.65342121905285, 2.03892776611756,
-0.96100202120249, 6.35598354101006, 14.3511850042327, -Inf,
10.3737643425674, 3.79382306985866, -Inf, 10.0819596419255, 9.44832324553207,
4.20886056913751, -0.96100202120249, -Inf, -Inf, -Inf, 5.36085937172008,
9.78880184184623, 10.3776687573505, 11.16757118884), TCGA.2Z.A9J1.01A = c(5.09389393824392,
5.2160706644244, -Inf, 6.93597002271109, 12.4136523086721, -Inf,
11.1918237390263, 2.98724809259115, -Inf, 10.1912122382252, 9.9623840324273,
4.71517403960983, -Inf, -Inf, -Inf, -Inf, 6.22565668754941, 10.3398477765017,
10.3103072842012, 11.1287210937383), TCGA.2Z.A9J2.01A = c(5.51854458067276,
4.11644793551166, -Inf, 7.5307754013178, 12.2744679621487, -Inf,
9.93114303849412, -Inf, -Inf, 10.3189198720956, 10.2574585613045,
4.11644793551166, -Inf, -Inf, -Inf, 0.309059742501585, 6.16707286132018,
10.2991951943744, 10.5852157015366, 11.5823040757623), TCGA.2Z.A9J3.01A = c(4.70168212029528,
3.34111759260469, 3.57815377007565, 7.54694769203808, 10.1689338100564,
-Inf, 9.96839262629172, 5.28865017271056, -Inf, 9.87305770150294,
9.75535162798677, 3.57815377007565, -Inf, -Inf, -Inf, -Inf, 6.170389794965,
10.238532641469, 9.94050095178643, 11.0690397931313), TCGA.2Z.A9J5.01A = c(7.99645936536463,
5.20408983959317, 1.64952349150802, 6.89258167250936, 13.6832285748428,
-Inf, 10.3714563849361, 2.6495004987031, -Inf, 10.4176870383992,
10.0652444551968, 6.86867071663319, -Inf, -Inf, -Inf, -0.350522494468264,
5.98935248863472, 10.1079093507719, 11.2050505161752, 11.6645692817891
), TCGA.2Z.A9J6.01A = c(5.13719199914349, 6.63590381796106, -1.00057719346275,
6.92859654071157, 12.0367193976262, -Inf, 10.8202555636581, 5.50707469845262,
-Inf, 10.3262700402849, 9.91216810777653, 5.94179415093086, -Inf,
-Inf, -Inf, -Inf, 5.52284042813955, 10.0653680664815, 10.5954686012028,
11.2355920880251), TCGA.2Z.A9J7.01A = c(6.95117512427229, 2.24944534108584,
-Inf, 7.25014205824679, 10.9656928148969, 1.07949758402178, 10.5991523452113,
4.32744306245973, -Inf, 10.4556415168452, 9.46537845450025, 3.66448284036468,
-Inf, -Inf, -0.920570684997085, -Inf, 6.72337288854289, 10.0139441477751,
9.28408724134641, 11.4833270722276), TCGA.2Z.A9J8.01A = c(3.61712213221935,
5.39472334226273, -Inf, 7.92111077839189, 11.9975977242282, -1.58251200148633,
9.91379851626213, 5.10394657615628, -Inf, 9.95999222715916, 9.90779350021794,
5.82683572660569, -Inf, -Inf, -Inf, -Inf, 7.85831302551685, 10.3997246047534,
11.7402171909708, 11.7246448152361), TCGA.2Z.A9J9.01A = c(6.05389548011115,
-1.41888982477445, -Inf, 5.73237232300075, 14.8647326244225,
-1.41888982477445, 10.4697437586612, 4.51174000542664, -0.419082711336792,
9.28576885726015, 9.21399581402162, -1.41888982477445, -Inf,
-Inf, -Inf, -Inf, 6.0887945976893, 9.39798410008432, 9.51616568261168,
11.1846268780251), TCGA.2Z.A9JD.01A = c(3.15639661659767, 4.2623504664045,
-Inf, 8.66937017282105, 10.9506421354115, -Inf, 10.8015070949819,
1.37484474128973, -Inf, 9.51027580369917, 10.0114476969219, 4.30081927275683,
-Inf, -Inf, -Inf, -Inf, 5.9232585434528, 10.1410206692489, 10.9093204661635,
11.1601119970792), TCGA.2Z.A9JE.01A = c(4.55671142771396, 0.976583597876997,
-Inf, 7.39711669725343, 11.338916568945, 0.976583597876997, 11.4922670603536,
2.97656526897918, -Inf, 10.098748119083, 9.84075338026455, 5.28034764124523,
-Inf, -Inf, -Inf, -Inf, 5.7709822912126, 10.6735346943631, 10.5435725361632,
11.1417882886223), TCGA.2Z.A9JG.01A = c(7.27924748225939, 3.74051746156171,
2.8145094080944, 5.79197812282256, 12.0865056775331, -Inf, 10.327056990394,
3.53698537579194, -Inf, 10.5780583590704, 10.8649068477431, 6.57473428966518,
-Inf, -Inf, -Inf, -Inf, 7.25413670394464, 9.83577157545057, 10.6502486545903,
11.1906393649786), TCGA.2Z.A9JI.01A = c(8.20162111992077, 5.71548707501235,
0.393526032228356, 5.73826794012289, 12.7954861179578, 2.61595767716595,
10.0094938620897, 5.20091233325219, -Inf, 9.93400880935778, 9.75330735360058,
5.21798676951157, -Inf, -Inf, -Inf, -Inf, 7.0082672553098, 9.74081003982032,
10.7382235152475, 11.6720072357516), TCGA.2Z.A9JJ.01A = c(6.26475409489153,
-0.415229871442725, 4.46742296017359, 6.39123499553712, 12.8198023802381,
-Inf, 11.7916439373724, 2.04421923663312, -Inf, 9.53606689182685,
10.3591574288036, -1.41503749927884, -Inf, -Inf, -Inf, -Inf,
5.71406413888836, 10.0630462305102, 10.2195932783632, 11.455724780085
), TCGA.2Z.A9JK.01A = c(5.9386386584728, 6.15383572961508, -Inf,
6.96731849612975, 14.0309071832818, -1.46076672913396, 10.1870344226096,
1.99855658313033, -Inf, 9.35930526481309, 9.09945932891836, 4.68887411799513,
1.99855658313033, -Inf, -Inf, -1.46076672913396, 6.2670469517503,
9.44376099865969, 10.3192560726034, 11.2260646035167), TCGA.2Z.A9JL.01A = c(7.02541755196655,
3.6235742674856, 0.623585972399712, 6.53709878587009, 12.9644083075158,
-0.961282892427146, 11.008714051357, 7.39616034553605, -Inf,
10.2035151452418, 9.55233575995403, 4.03861176676445, -Inf, -Inf,
-Inf, -Inf, 6.02729341208669, 10.2444014184424, 10.8539915160092,
10.9360755463691), TCGA.2Z.A9JM.01A = c(5.91786850792426, 5.24907362097786,
1.28557977040279, 6.72907414917957, 13.7490268867095, -Inf, 10.9548399985167,
4.00806552147407, -Inf, 10.1222822552784, 9.55238706244896, 5.47146712618562,
-Inf, -Inf, -Inf, -1.03622996892924, 6.23045858328003, 10.2600151849016,
10.4635183450003, 10.7143794759862), TCGA.2Z.A9JN.01A = c(4.09355172548716,
-Inf, -Inf, 7.28292161137752, 12.2911433534863, -Inf, 10.8165919471646,
-1.22398025935253, -Inf, 8.92443634456794, 9.76180168782627,
-1.22398025935253, -1.22398025935253, -Inf, -Inf, -Inf, 7.12910679128811,
10.6459390192132, 10.9762453349304, 11.3740122319006), TCGA.2Z.A9JO.01A = c(5.31084177712261,
-0.707218250772154, 0.292663973396858, 7.23068817327933, 13.1978705809921,
3.38016145559557, 9.83862580682156, -Inf, -Inf, 9.54417855835282,
10.3479785104067, 5.93655314137344, -Inf, -Inf, -Inf, -Inf, 6.95803180123955,
10.5275135616911, 10.7557094705532, 11.5723066760841), TCGA.2Z.A9JP.01A = c(6.28494714327597,
0.226878404196269, 1.22687840419627, 7.45773299323875, 13.34336295947,
-Inf, 11.208058065601, 1.22687840419627, -Inf, 10.0414301364209,
9.66875435596374, 4.51225217454896, -Inf, -Inf, -Inf, 0.226878404196269,
6.145710058105, 9.94538047931203, 10.6250568350002, 11.0719455710567
)), row.names = c("A1BG", "A1CF", "A2BP1", "A2LD1", "A2M", "A2ML1",
"A4GALT", "A4GNT", "AAA1", "AAAS", "AACS", "AACSL", "AADAC",
"AADACL2", "AADACL3", "AADACL4", "AADAT", "AAGAB", "AAK1", "AAMP"
), class = "data.frame")
g_list
> dput(g_list[1:20,1:2])
structure(list(ensembl_gene_id = c("ENSG00000121410", "ENSG00000148584",
"ENSG00000175899", "ENSG00000166535", "ENSG00000128274", "ENSG00000118017",
"ENSG00000094914", "ENSG00000081760", "ENSG00000114771", "ENSG00000261846",
"ENSG00000197953", "ENSG00000188984", "ENSG00000204518", "ENSG00000109576",
"ENSG00000103591", "ENSG00000115977", "ENSG00000127837", "ENSG00000129673",
"ENSG00000090861", "ENSG00000124608"), hgnc_symbol = c("A1BG",
"A1CF", "A2M", "A2ML1", "A4GALT", "A4GNT", "AAAS", "AACS", "AADAC",
"AADACL2", "AADACL2", "AADACL3", "AADACL4", "AADAT", "AAGAB",
"AAK1", "AAMP", "AANAT", "AARS", "AARS2")), row.names = c(NA,
20L), class = "data.frame")
CodePudding user response:
Row names are discouraged if not actively removed by many functions in
dplyr
and the rest of the tidyverse. You would do well to either migrate them to a "real column" in the data or stop trying to use them (I suspect you need the information, so the former is your best path). In that regard, you can doexp.kirp.log2 %>% tibble::rownames_to_column("hgnc_symbol") %>% right_join(g_list, ., by = "hgnc_symbol") %>% tibble() # this is only "needed" to facilitate visualization here on Stack # # A tibble: 21 x 22 # ensembl_gene_id hgnc_symbol TCGA.2K.A9WE.01A TCGA.2Z.A9J1.01A TCGA.2Z.A9J2.01A TCGA.2Z.A9J3.01A TCGA.2Z.A9J5.01A # <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> # 1 ENSG00000121410 A1BG 7.65 5.09 5.52 4.70 8.00 # 2 ENSG00000148584 A1CF 2.04 5.22 4.12 3.34 5.20 # 3 ENSG00000175899 A2M 14.4 12.4 12.3 10.2 13.7 # 4 ENSG00000166535 A2ML1 -Inf -Inf -Inf -Inf -Inf # 5 ENSG00000128274 A4GALT 10.4 11.2 9.93 9.97 10.4 # 6 ENSG00000118017 A4GNT 3.79 2.99 -Inf 5.29 2.65 # 7 ENSG00000094914 AAAS 10.1 10.2 10.3 9.87 10.4 # 8 ENSG00000081760 AACS 9.45 9.96 10.3 9.76 10.1 # 9 ENSG00000114771 AADAC -0.961 -Inf -Inf -Inf -Inf # 10 ENSG00000261846 AADACL2 -Inf -Inf -Inf -Inf -Inf # # ... with 11 more rows, and 15 more variables: TCGA.2Z.A9J6.01A <dbl>, TCGA.2Z.A9J7.01A <dbl>, TCGA.2Z.A9J8.01A <dbl>, # # TCGA.2Z.A9J9.01A <dbl>, TCGA.2Z.A9JD.01A <dbl>, TCGA.2Z.A9JE.01A <dbl>, TCGA.2Z.A9JG.01A <dbl>, TCGA.2Z.A9JI.01A <dbl>, # # TCGA.2Z.A9JJ.01A <dbl>, TCGA.2Z.A9JK.01A <dbl>, TCGA.2Z.A9JL.01A <dbl>, TCGA.2Z.A9JM.01A <dbl>, TCGA.2Z.A9JN.01A <dbl>, # # TCGA.2Z.A9JO.01A <dbl>, TCGA.2Z.A9JP.01A <dbl>
FYI, my choice to use
right_join
was mostly for demonstration here on Stack, preferring to keep the two columns on the left side of the frame so they appear here in the truncated rendering. Normally, many people prefer the "thinking" of%>% left_join(g_list, by = "hgnc_symbol")
, but the two calls are functionally identical other than the resulting order of columns.If you really need them to be a row name, then you can always use the above, un-
tibble
the frame, then apply row names fromensembl_gene_id
. Unfortunately, with your current data this cannot be demonstrated since you have duplicate symbolsAADACL2
, so row names will either fail or change the row names to force uniqueness, in which case some of your row names are not pristine.
CodePudding user response:
Consider merge
on row.names
and hgnc_symbol
. Then, assign row.names
of ensemble_gene_id
accordingly:
exp.kirp.log2_new <- merge(
exp.kirp.log2, g_list, by.x="row.names", by.y="hgnc_symbol"
)
rownames(exp.kirp.log2_new) <- exp.kirp.log2_new$ensembl_gene_id
colnames(exp.kirp.log2_new)[1] <- "genes"
exp.kirp.log2_new$ensembl_gene_id <- NULL
Output
exp.kirp.log2_new
genes TCGA.2K.A9WE.01A TCGA.2Z.A9J1.01A TCGA.2Z.A9J2.01A TCGA.2Z.A9J3.01A TCGA.2Z.A9J5.01A TCGA.2Z.A9J6.01A TCGA.2Z.A9J7.01A TCGA.2Z.A9J8.01A TCGA.2Z.A9J9.01A TCGA.2Z.A9JD.01A
ENSG00000121410 A1BG 7.653421 5.093894 5.5185446 4.701682 7.9964594 5.137192 6.9511751 3.617122 6.053895 3.156397
ENSG00000148584 A1CF 2.038928 5.216071 4.1164479 3.341118 5.2040898 6.635904 2.2494453 5.394723 -1.418890 4.262350
ENSG00000175899 A2M 14.351185 12.413652 12.2744680 10.168934 13.6832286 12.036719 10.9656928 11.997598 14.864733 10.950642
ENSG00000166535 A2ML1 -Inf -Inf -Inf -Inf -Inf -Inf 1.0794976 -1.582512 -1.418890 -Inf
ENSG00000128274 A4GALT 10.373764 11.191824 9.9311430 9.968393 10.3714564 10.820256 10.5991523 9.913799 10.469744 10.801507
ENSG00000118017 A4GNT 3.793823 2.987248 -Inf 5.288650 2.6495005 5.507075 4.3274431 5.103947 4.511740 1.374845
ENSG00000094914 AAAS 10.081960 10.191212 10.3189199 9.873058 10.4176870 10.326270 10.4556415 9.959992 9.285769 9.510276
ENSG00000081760 AACS 9.448323 9.962384 10.2574586 9.755352 10.0652445 9.912168 9.4653785 9.907794 9.213996 10.011448
ENSG00000114771 AADAC -0.961002 -Inf -Inf -Inf -Inf -Inf -Inf -Inf -Inf -Inf
ENSG00000261846 AADACL2 -Inf -Inf -Inf -Inf -Inf -Inf -Inf -Inf -Inf -Inf
ENSG00000197953 AADACL2 -Inf -Inf -Inf -Inf -Inf -Inf -Inf -Inf -Inf -Inf
ENSG00000188984 AADACL3 -Inf -Inf -Inf -Inf -Inf -Inf -0.9205707 -Inf -Inf -Inf
ENSG00000204518 AADACL4 -Inf -Inf 0.3090597 -Inf -0.3505225 -Inf -Inf -Inf -Inf -Inf
ENSG00000109576 AADAT 5.360859 6.225657 6.1670729 6.170390 5.9893525 5.522840 6.7233729 7.858313 6.088795 5.923259
ENSG00000103591 AAGAB 9.788802 10.339848 10.2991952 10.238533 10.1079094 10.065368 10.0139441 10.399725 9.397984 10.141021
ENSG00000115977 AAK1 10.377669 10.310307 10.5852157 9.940501 11.2050505 10.595469 9.2840872 11.740217 9.516166 10.909320
ENSG00000127837 AAMP 11.167571 11.128721 11.5823041 11.069040 11.6645693 11.235592 11.4833271 11.724645 11.184627 11.160112
TCGA.2Z.A9JE.01A TCGA.2Z.A9JG.01A TCGA.2Z.A9JI.01A TCGA.2Z.A9JJ.01A TCGA.2Z.A9JK.01A TCGA.2Z.A9JL.01A TCGA.2Z.A9JM.01A TCGA.2Z.A9JN.01A TCGA.2Z.A9JO.01A TCGA.2Z.A9JP.01A
ENSG00000121410 4.5567114 7.279247 8.201621 6.2647541 5.938639 7.0254176 5.917869 4.093552 5.3108418 6.2849471
ENSG00000148584 0.9765836 3.740517 5.715487 -0.4152299 6.153836 3.6235743 5.249074 -Inf -0.7072183 0.2268784
ENSG00000175899 11.3389166 12.086506 12.795486 12.8198024 14.030907 12.9644083 13.749027 12.291143 13.1978706 13.3433630
ENSG00000166535 0.9765836 -Inf 2.615958 -Inf -1.460767 -0.9612829 -Inf -Inf 3.3801615 -Inf
ENSG00000128274 11.4922671 10.327057 10.009494 11.7916439 10.187034 11.0087141 10.954840 10.816592 9.8386258 11.2080581
ENSG00000118017 2.9765653 3.536985 5.200912 2.0442192 1.998557 7.3961603 4.008066 -1.223980 -Inf 1.2268784
ENSG00000094914 10.0987481 10.578058 9.934009 9.5360669 9.359305 10.2035151 10.122282 8.924436 9.5441786 10.0414301
ENSG00000081760 9.8407534 10.864907 9.753307 10.3591574 9.099459 9.5523358 9.552387 9.761802 10.3479785 9.6687544
ENSG00000114771 -Inf -Inf -Inf -Inf 1.998557 -Inf -Inf -1.223980 -Inf -Inf
ENSG00000261846 -Inf -Inf -Inf -Inf -Inf -Inf -Inf -Inf -Inf -Inf
ENSG00000197953 -Inf -Inf -Inf -Inf -Inf -Inf -Inf -Inf -Inf -Inf
ENSG00000188984 -Inf -Inf -Inf -Inf -Inf -Inf -Inf -Inf -Inf -Inf
ENSG00000204518 -Inf -Inf -Inf -Inf -1.460767 -Inf -1.036230 -Inf -Inf 0.2268784
ENSG00000109576 5.7709823 7.254137 7.008267 5.7140641 6.267047 6.0272934 6.230459 7.129107 6.9580318 6.1457101
ENSG00000103591 10.6735347 9.835772 9.740810 10.0630462 9.443761 10.2444014 10.260015 10.645939 10.5275136 9.9453805
ENSG00000115977 10.5435725 10.650249 10.738224 10.2195933 10.319256 10.8539915 10.463518 10.976245 10.7557095 10.6250568
ENSG00000127837 11.1417883 11.190639 11.672007 11.4557248 11.226065 10.9360755 10.714379 11.374012 11.5723067 11.0719456