Samples are either wild-type or mutant. I want to find the number of samples that are wild-type wt
versus mutant mut
with respect to the respondents Responder
in the Group1
column of the res.sig
dataframe. In the n_mutated_group1
column, the first letter indicates the number of mutant and the last two digits (after or
) indicates the total number for that group. wt
is the difference between total
and mut
.
# Number of samples in wild-type versus mutant with respect to the most significantly enriched genes
total <- sum(as.numeric(res.sig %>%
filter(Group1=="Responder") %>%
mutate(last_letter = substr(n_mutated_group1,-2,-1)) %>%
pull(last_letter)))
mut <- sum(as.numeric(res.sig %>%
filter(Group1=="Responder") %>%
mutate(first_letter = substr(n_mutated_group1,1,1)) %>%
pull(first_letter)))
wt <- total-mut
res.sig
> dput(res.sig)
structure(list(Hugo_Symbol = c("ERCC2", "ERCC2", "AKAP9", "AKAP9",
"HERC1", "HERC1", "HECTD1", "HECTD1", "MACF1", "MACF1", "MROH2B",
"MROH2B", "KMT2C", "KMT2C"), Group1 = c("Non-Responder", "Responder",
"Non-Responder", "Responder", "Non-Responder", "Responder", "Non-Responder",
"Responder", "Non-Responder", "Responder", "Non-Responder", "Responder",
"Non-Responder", "Responder"), Group2 = c("Rest", "Rest", "Rest",
"Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest", "Rest",
"Rest", "Rest", "Rest"), n_mutated_group1 = c("0 of 25", "9 of 25",
"0 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25", "6 of 25",
"0 of 25", "6 of 25", "0 of 25", "6 of 25", "1 of 25", "7 of 25"
), n_mutated_group2 = c("9 of 25", "0 of 25", "6 of 25", "0 of 25",
"6 of 25", "0 of 25", "6 of 25", "0 of 25", "6 of 25", "0 of 25",
"6 of 25", "0 of 25", "7 of 25", "1 of 25"), p_value = c(0.00163083541184905,
0.00163083541184905, 0.022289766970618, 0.022289766970618, 0.022289766970618,
0.022289766970618, 0.022289766970618, 0.022289766970618, 0.022289766970618,
0.022289766970618, 0.022289766970618, 0.022289766970618, 0.0487971536957187,
0.0487971536957187), OR = c(0, Inf, 0, Inf, 0, Inf, 0, Inf, 0,
Inf, 0, Inf, 0.111488645279478, 8.96952328636894), OR_low = c(0,
2.56647319276964, 0, 1.33358819424024, 0, 1.33358819424024, 0,
1.33358819424024, 0, 1.33358819424024, 0, 1.33358819424024, 0.00228988507629356,
1.0079479819766), OR_high = c(0.38963976043749, Inf, 0.749856668137133,
Inf, 0.749856668137133, Inf, 0.749856668137133, Inf, 0.749856668137133,
Inf, 0.749856668137133, Inf, 0.992114690322592, 436.703138665198
), fdr = c(0.109265972593886, 0.109265972593886, 0.248902397838568,
0.248902397838568, 0.248902397838568, 0.248902397838568, 0.248902397838568,
0.248902397838568, 0.248902397838568, 0.248902397838568, 0.248902397838568,
0.248902397838568, 0.467058471087594, 0.467058471087594)), row.names = c(NA,
-14L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x000002adab171ef0>)
Problem:
Here, I'm getting total
as NA_real_
instead of the total number.
CodePudding user response:
You can extract for total
everything after the of
string in your column and for mut
you can extract everything before of
. You can use this code:
library(tidyverse)
total <- sum(as.numeric(res.sig %>%
filter(Group1=="Responder") %>%
mutate(first_letter = sub('. of(. )', '\\1', n_mutated_group1)) %>%
pull(first_letter)))
mut <- sum(as.numeric(res.sig %>%
filter(Group1 == "Responder") %>%
mutate(last_letter = sub("\\of.*", "", n_mutated_group1)) %>%
pull(last_letter)))
wt <- total-mut
wt
Output wt:
[1] 129