I've read through many of the similar questions and I can't figure out what I"m doing wrong. I think I don't properly understand the apply functions, or the dplyr
version.
Any help is appreciated.
Here is my data:
> dput(dat)
structure(list(Sample.ID = c("auto-wn2e-1", "auto-wn2e-2", "auto-wn2e-3",
"auto-wn2e-4", "auto-wn2e-5", "auto-wn2e-6", "auto-wn2e-7", "auto-wn2e-8",
"auto-wn2e-9", "auto-wn2e-10", "auto-wn2e-11", "auto-wn2e-12",
"auto-wn2e-13", "auto-wn2e-14", "auto-wn2e-15", "auto-wn2e-16",
"Saliva-manual-01", "Saliva-manual-02", "Saliva-manual-03", "Saliva-manual-04",
"Saliva-auto2Xetoh-01", "Saliva-auto2Xetoh-02", "Saliva-auto2Xetoh-03",
"Saliva-auto2Xetoh-04", "Saliva-auto2Xetoh-05", "Saliva-auto2Xetoh-06",
"Saliva-auto2Xetoh-07", "Saliva-auto2Xetoh-08", "Saliva-auto2Xetoh-09",
"Saliva-auto2Xetoh-10", "Saliva-auto2Xetoh-11", "Saliva-auto2Xetoh-12",
"Saliva-auto2Xetoh-13", "Saliva-auto2Xetoh-14", "Saliva-auto2Xetoh-15",
"Saliva-auto2Xetoh-16"), Nucleic.Acid.Conc. = c(106.9, 65.3,
63.9, 63, 65, 68.2, 63.4, 75, 77.4, 99, 61.8, 58.9, 56, 102.7,
93, 71.9, 100.9, 140.6, 114.7, 121.5, 104.5, 109, 92.8, 85.3,
101.7, 120.8, 110.9, 112.3, 89.7, 104.2, 104.4, 120.2, 111.8,
96, 67.7, 111.8), X260.280 = c(1.82, 1.99, 1.99, 1.98, 1.95,
1.94, 1.93, 1.86, 1.97, 1.76, 1.97, 1.97, 1.99, 1.77, 1.79, 1.93,
1.58, 1.62, 1.62, 1.64, 1.79, 1.78, 1.84, 1.85, 1.77, 1.71, 1.76,
1.71, 1.86, 1.75, 1.81, 1.73, 1.75, 1.8, 1.77, 1.74), X260.230 = c(1.23,
1.78, 1.61, 1.66, 1.72, 1.54, 1.57, 1.38, 1.64, 1.14, 1.71, 1.59,
1.72, 1.15, 1.2, 1.52, 0.82, 0.82, 0.86, 0.99, 1.08, 1.1, 1.22,
1.26, 1.14, 0.98, 1.05, 0.96, 1.27, 1.05, 1.18, 1.03, 1.05, 1.12,
1.06, 1)), row.names = c(NA, -36L), class = "data.frame")
I have written a function to parse the Sample.ID and determine what 'location' or column on the plate that sample was in. Samples 1-8 are in column 1, 9-16 are in column 2 etc.
plate_col<-function(x){
col1<-seq(from=1,length.out=8)
col2<-seq(from=9,length.out=8)
col3<-seq(from=17,length.out=8)
col4<-seq(from=25,length.out=8)
col5<-seq(from=33,length.out=8)
col6<-seq(from=41,length.out=8)
col7<-seq(from=49,length.out=8)
col8<-seq(from=57,length.out=8)
col9<-seq(from=65,length.out=8)
col10<-seq(from=73,length.out=8)
col11<-seq(from=81,length.out=8)
col12<-seq(from=89,length.out=8)
if(str_split_fixed(x, "-",3)[,3]%in%col1 ){
dat$pl_col<-1
}
if (str_split_fixed(x, "-",3)[,3]%in%col2){
dat$pl_col<-2
}
if (str_split_fixed(x, "-",3)[,3]%in%col3){
dat$pl_col<-3
}
}
Here is my tidyr
version. I know its wrong because my function has dat$pl_col<-value
dat%>%
mutate(pl_col=plate_col(Sample.ID))
But this doesn't seem to work, or really seem logical either.
dat$pl_col<-plate_col(dat$Sample.ID)
Any help appreciated, Thanks
CodePudding user response:
You can use the %/%
operator to find out the quotient of the division, where when the number divides by 8, the quotient would equal your number of column (with a little twitch with 1
and - 1
).
library(dplyr)
dat %>% mutate(pl_col = ((as.integer(gsub("^.*-", "", Sample.ID)) - 1) %/% 8) 1)
Sample.ID Nucleic.Acid.Conc. X260.280 X260.230 pl_col
1 auto-wn2e-1 106.9 1.82 1.23 1
2 auto-wn2e-2 65.3 1.99 1.78 1
3 auto-wn2e-3 63.9 1.99 1.61 1
4 auto-wn2e-4 63.0 1.98 1.66 1
5 auto-wn2e-5 65.0 1.95 1.72 1
6 auto-wn2e-6 68.2 1.94 1.54 1
7 auto-wn2e-7 63.4 1.93 1.57 1
8 auto-wn2e-8 75.0 1.86 1.38 1
9 auto-wn2e-9 77.4 1.97 1.64 2
10 auto-wn2e-10 99.0 1.76 1.14 2
11 auto-wn2e-11 61.8 1.97 1.71 2
12 auto-wn2e-12 58.9 1.97 1.59 2
13 auto-wn2e-13 56.0 1.99 1.72 2
14 auto-wn2e-14 102.7 1.77 1.15 2
15 auto-wn2e-15 93.0 1.79 1.20 2
16 auto-wn2e-16 71.9 1.93 1.52 2
17 Saliva-manual-01 100.9 1.58 0.82 1
18 Saliva-manual-02 140.6 1.62 0.82 1
19 Saliva-manual-03 114.7 1.62 0.86 1
20 Saliva-manual-04 121.5 1.64 0.99 1
21 Saliva-auto2Xetoh-01 104.5 1.79 1.08 1
22 Saliva-auto2Xetoh-02 109.0 1.78 1.10 1
23 Saliva-auto2Xetoh-03 92.8 1.84 1.22 1
24 Saliva-auto2Xetoh-04 85.3 1.85 1.26 1
25 Saliva-auto2Xetoh-05 101.7 1.77 1.14 1
26 Saliva-auto2Xetoh-06 120.8 1.71 0.98 1
27 Saliva-auto2Xetoh-07 110.9 1.76 1.05 1
28 Saliva-auto2Xetoh-08 112.3 1.71 0.96 1
29 Saliva-auto2Xetoh-09 89.7 1.86 1.27 2
30 Saliva-auto2Xetoh-10 104.2 1.75 1.05 2
31 Saliva-auto2Xetoh-11 104.4 1.81 1.18 2
32 Saliva-auto2Xetoh-12 120.2 1.73 1.03 2
33 Saliva-auto2Xetoh-13 111.8 1.75 1.05 2
34 Saliva-auto2Xetoh-14 96.0 1.80 1.12 2
35 Saliva-auto2Xetoh-15 67.7 1.77 1.06 2
36 Saliva-auto2Xetoh-16 111.8 1.74 1.00 2
CodePudding user response:
Perhaps this helps
library(dplyr)
library(tidyr)
dat %>%
separate(Sample.ID, into = c("Sample_Prefix", "ind"),
sep = "-(?=\\d $)", remove = FALSE) %>%
mutate(pl_col = match(Sample_Prefix, unique(Sample_Prefix)))
Or can also be
dat %>%
separate(Sample.ID, into = c("Sample_Prefix", "ind"),
sep = "-(?=\\d $)", remove = FALSE) %>%
group_by(Sample.ID) %>%
mutate(pl_col = as.integer(gl(n(), 8, n()))) %>%
ungroup
CodePudding user response:
Here is another possibility:
vals <- as.numeric(str_split_fixed(dat$Sample.ID, "-", 3)[, 3])
pl_col <- cut(vals, breaks=seq(0, 96, by=8), labels=1:12)
pl_col
# [1] 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2
# Levels: 1 2 3 4 5 6 7 8 9 10 11 12