I'm using str_split_fixed()
to separate taxa IDs which works fine. However, the resulting dataset has new rownames (1:nrow) but I need the original rownames.
My original dataset example:
> dput(tax.example)
structure(list(phylodist = c("Archaea;Candidatus_Diapherotrites;unclassified_Candidatus_Diapherotrites;unclassified_Candidatus_Diapherotrites;unclassified_Candidatus_Diapherotrites;Candidatus_Iainarchaeum;Candidatus_Iainarchaeum_andersonii;Candidatus_Iainarchaeum_andersonii_SCGC_AAA011-E11_(contamination_screened)",
"Archaea;Candidatus_Korarchaeota;unclassified_Candidatus_Korarchaeota;unclassified_Candidatus_Korarchaeota;unclassified_Candidatus_Korarchaeota;Candidatus_Korarchaeum;Candidatus_Korarchaeum_cryptofilum;Candidatus_Korarchaeum_cryptofilum_OPF8",
"Archaea;Candidatus_Micrarchaeota;unclassified_Candidatus_Micrarchaeota;unclassified_Candidatus_Micrarchaeota;unclassified_Candidatus_Micrarchaeota;Candidatus_Mancarchaeum;Candidatus_Mancarchaeum_acidiphilum;Candidatus_Mancarchaeum_acidiphilum_Mia14",
"Archaea;Candidatus_Thermoplasmatota;Candidatus_Poseidoniia;Candidatus_Poseidoniales;unclassified_Candidatus_Poseidoniales;unclassified_Candidatus_Poseidoniales;uncultured_Candidatus_Poseidoniales_archaeon;Candidatus_Poseidoniales_archaeon_AHCG",
"Archaea;Candidatus_Thermoplasmatota;Thermoplasmata;Methanomassiliicoccales;Candidatus_Methanomethylophilaceae;Candidatus_Methanomethylophilus;Candidatus_Methanomethylophilus_sp._1R26;Candidatus_Methanomethylophilus_sp._1R26"
)), row.names = c("phylo1", "phylo2", "phylo3", "phylo4", "phylo5"
), class = "data.frame")
Which I then apply
tax.example <- data.frame(str_split_fixed(tax.example$phylodist,";",8))
To separate the taxa IDs. However, the output file (below) has simple 1:5 rownames. Any thoughts on how to keep the phylo1... phylo5? TIA
> dput(tax.example)
structure(list(X1 = c("Archaea", "Archaea", "Archaea", "Archaea",
"Archaea"), X2 = c("Candidatus_Diapherotrites", "Candidatus_Korarchaeota",
"Candidatus_Micrarchaeota", "Candidatus_Thermoplasmatota", "Candidatus_Thermoplasmatota"
), X3 = c("unclassified_Candidatus_Diapherotrites", "unclassified_Candidatus_Korarchaeota",
"unclassified_Candidatus_Micrarchaeota", "Candidatus_Poseidoniia",
"Thermoplasmata"), X4 = c("unclassified_Candidatus_Diapherotrites",
"unclassified_Candidatus_Korarchaeota", "unclassified_Candidatus_Micrarchaeota",
"Candidatus_Poseidoniales", "Methanomassiliicoccales"), X5 = c("unclassified_Candidatus_Diapherotrites",
"unclassified_Candidatus_Korarchaeota", "unclassified_Candidatus_Micrarchaeota",
"unclassified_Candidatus_Poseidoniales", "Candidatus_Methanomethylophilaceae"
), X6 = c("Candidatus_Iainarchaeum", "Candidatus_Korarchaeum",
"Candidatus_Mancarchaeum", "unclassified_Candidatus_Poseidoniales",
"Candidatus_Methanomethylophilus"), X7 = c("Candidatus_Iainarchaeum_andersonii",
"Candidatus_Korarchaeum_cryptofilum", "Candidatus_Mancarchaeum_acidiphilum",
"uncultured_Candidatus_Poseidoniales_archaeon", "Candidatus_Methanomethylophilus_sp._1R26"
), X8 = c("Candidatus_Iainarchaeum_andersonii_SCGC_AAA011-E11_(contamination_screened)",
"Candidatus_Korarchaeum_cryptofilum_OPF8", "Candidatus_Mancarchaeum_acidiphilum_Mia14",
"Candidatus_Poseidoniales_archaeon_AHCG", "Candidatus_Methanomethylophilus_sp._1R26"
)), class = "data.frame", row.names = c(NA, -5L))
CodePudding user response:
When setting up the new data.frame you can keep the original row names by getting the row name attribute from the original data set.
attributes(df)$row.names
[1] "phylo1" "phylo2" "phylo3" "phylo4" "phylo5"
data.frame(str_split_fixed(df$phylodist,";",8),
row.names = attributes(df)$row.names)