I want to subset the res.gs
dataframe to keep only samples that are mutated. The sample is identified as "mutated" if the first letter in the n_mutated_group1
column is not zero. I then want to create another dataframe as wt.samp
, which retains samples that are not mutated.
mut.samp <- res.gs[res.gs %>% mutate(first_letter = substr(n_mutated_group1,1,1))!="0",]
wt.samp <- setdiff(res.gs, mut.samp)
> dput(res.gs)
structure(list(Hugo_Symbol = c("AKAP9", "AKAP9", "ERCC2", "ERCC2",
"HECTD1", "HECTD1", "HERC1", "HERC1", "KMT2C", "KMT2C", "MACF1",
"MACF1", "MROH2B", "MROH2B"), Missense_Mutation = c(9L, 9L, 9L,
9L, 6L, 6L, 8L, 8L, 19L, 19L, 5L, 5L, 5L, 5L), Nonsense_Mutation = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L), Splice_Site = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L), total = c(9L,
9L, 9L, 9L, 6L, 6L, 8L, 8L, 20L, 20L, 6L, 6L, 6L, 6L), MutatedSamples = c(6L,
6L, 9L, 9L, 6L, 6L, 6L, 6L, 8L, 8L, 6L, 6L, 6L, 6L), AlteredSamples = c(6L,
6L, 9L, 9L, 6L, 6L, 6L, 6L, 8L, 8L, 6L, 6L, 6L, 6L), Group1 = c("Non-Responder",
"Responder", "Non-Responder", "Responder", "Non-Responder", "Responder",
"Non-Responder", "Responder", "Non-Responder", "Responder", "Non-Responder",
"Responder", "Non-Responder", "Responder"), Group2 = c("Rest",
"Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest",
"Rest", "Rest", "Rest", "Rest", "Rest"), n_mutated_group1 = c("0 of 25",
"6 of 25", "0 of 25", "9 of 25", "0 of 25", "6 of 25", "0 of 25",
"6 of 25", "1 of 25", "7 of 25", "0 of 25", "6 of 25", "0 of 25",
"6 of 25"), n_mutated_group2 = c("6 of 25", "0 of 25", "9 of 25",
"0 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25", "7 of 25",
"1 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25"), p_value = c(0.022289766970618,
0.022289766970618, 0.00163083541184905, 0.00163083541184905,
0.022289766970618, 0.022289766970618, 0.022289766970618, 0.022289766970618,
0.0487971536957187, 0.0487971536957187, 0.022289766970618, 0.022289766970618,
0.022289766970618, 0.022289766970618), OR = c(0, Inf, 0, Inf,
0, Inf, 0, Inf, 0.111488645279478, 8.96952328636894, 0, Inf,
0, Inf), OR_low = c(0, 1.33358819424024, 0, 2.56647319276964,
0, 1.33358819424024, 0, 1.33358819424024, 0.00228988507629356,
1.0079479819766, 0, 1.33358819424024, 0, 1.33358819424024), OR_high = c(0.749856668137133,
Inf, 0.38963976043749, Inf, 0.749856668137133, Inf, 0.749856668137133,
Inf, 0.992114690322592, 436.703138665198, 0.749856668137133,
Inf, 0.749856668137133, Inf), fdr = c(0.248902397838568, 0.248902397838568,
0.109265972593886, 0.109265972593886, 0.248902397838568, 0.248902397838568,
0.248902397838568, 0.248902397838568, 0.467058471087594, 0.467058471087594,
0.248902397838568, 0.248902397838568, 0.248902397838568, 0.248902397838568
)), row.names = c(NA, -14L), class = "data.frame")
My code is returning 198 mutants and 0 wt.
CodePudding user response:
As Wimpel showed in a comment, and Andrea M showed in an answer, there are multiple approaches you can take here. However, I think the base function subset()
may be the simplest.
subset(res.gs, grepl('^[^0]', n_mutated_group1))
Indexing also works.
res.gs[grepl('^[^0]', res.gs$n_mutated_group1), ]
The key is the grepl
code from Wimpel.
CodePudding user response:
One way of doing this is with filter
from dplyr
. You also need to use substr
to identify the first character of a column.
library(dplyr)
wt.samp <- res.gs %>% filter(substr(n_mutated_group1, 1, 1) == "0")
res.gs <- res.gs %>% filter(substr(n_mutated_group1, 1, 1) != "0")