R using a function to create a new column based on another column-CodePudding

I've read through many of the similar questions and I can't figure out what I"m doing wrong. I think I don't properly understand the apply functions, or the dplyr version. Any help is appreciated. Here is my data:

> dput(dat)
structure(list(Sample.ID = c("auto-wn2e-1", "auto-wn2e-2", "auto-wn2e-3", 
"auto-wn2e-4", "auto-wn2e-5", "auto-wn2e-6", "auto-wn2e-7", "auto-wn2e-8", 
"auto-wn2e-9", "auto-wn2e-10", "auto-wn2e-11", "auto-wn2e-12", 
"auto-wn2e-13", "auto-wn2e-14", "auto-wn2e-15", "auto-wn2e-16", 
"Saliva-manual-01", "Saliva-manual-02", "Saliva-manual-03", "Saliva-manual-04", 
"Saliva-auto2Xetoh-01", "Saliva-auto2Xetoh-02", "Saliva-auto2Xetoh-03", 
"Saliva-auto2Xetoh-04", "Saliva-auto2Xetoh-05", "Saliva-auto2Xetoh-06", 
"Saliva-auto2Xetoh-07", "Saliva-auto2Xetoh-08", "Saliva-auto2Xetoh-09", 
"Saliva-auto2Xetoh-10", "Saliva-auto2Xetoh-11", "Saliva-auto2Xetoh-12", 
"Saliva-auto2Xetoh-13", "Saliva-auto2Xetoh-14", "Saliva-auto2Xetoh-15", 
"Saliva-auto2Xetoh-16"), Nucleic.Acid.Conc. = c(106.9, 65.3, 
63.9, 63, 65, 68.2, 63.4, 75, 77.4, 99, 61.8, 58.9, 56, 102.7, 
93, 71.9, 100.9, 140.6, 114.7, 121.5, 104.5, 109, 92.8, 85.3, 
101.7, 120.8, 110.9, 112.3, 89.7, 104.2, 104.4, 120.2, 111.8, 
96, 67.7, 111.8), X260.280 = c(1.82, 1.99, 1.99, 1.98, 1.95, 
1.94, 1.93, 1.86, 1.97, 1.76, 1.97, 1.97, 1.99, 1.77, 1.79, 1.93, 
1.58, 1.62, 1.62, 1.64, 1.79, 1.78, 1.84, 1.85, 1.77, 1.71, 1.76, 
1.71, 1.86, 1.75, 1.81, 1.73, 1.75, 1.8, 1.77, 1.74), X260.230 = c(1.23, 
1.78, 1.61, 1.66, 1.72, 1.54, 1.57, 1.38, 1.64, 1.14, 1.71, 1.59, 
1.72, 1.15, 1.2, 1.52, 0.82, 0.82, 0.86, 0.99, 1.08, 1.1, 1.22, 
1.26, 1.14, 0.98, 1.05, 0.96, 1.27, 1.05, 1.18, 1.03, 1.05, 1.12, 
1.06, 1)), row.names = c(NA, -36L), class = "data.frame")

I have written a function to parse the Sample.ID and determine what 'location' or column on the plate that sample was in. Samples 1-8 are in column 1, 9-16 are in column 2 etc.

plate_col<-function(x){
  col1<-seq(from=1,length.out=8)
  col2<-seq(from=9,length.out=8)
  col3<-seq(from=17,length.out=8)
  col4<-seq(from=25,length.out=8)
  col5<-seq(from=33,length.out=8)
  col6<-seq(from=41,length.out=8)
  col7<-seq(from=49,length.out=8)
  col8<-seq(from=57,length.out=8)
  col9<-seq(from=65,length.out=8)
  col10<-seq(from=73,length.out=8)
  col11<-seq(from=81,length.out=8)
  col12<-seq(from=89,length.out=8)

if(str_split_fixed(x, "-",3)[,3]%in%col1 ){
  dat$pl_col<-1
}
if (str_split_fixed(x, "-",3)[,3]%in%col2){
  dat$pl_col<-2
}
if  (str_split_fixed(x, "-",3)[,3]%in%col3){
  dat$pl_col<-3
}
}

Here is my tidyr version. I know its wrong because my function has dat$pl_col<-value

dat%>%
  mutate(pl_col=plate_col(Sample.ID))

But this doesn't seem to work, or really seem logical either.

dat$pl_col<-plate_col(dat$Sample.ID)

Any help appreciated, Thanks

CodePudding user response：

You can use the %/% operator to find out the quotient of the division, where when the number divides by 8, the quotient would equal your number of column (with a little twitch with 1 and - 1).

library(dplyr)

dat %>% mutate(pl_col = ((as.integer(gsub("^.*-", "", Sample.ID)) - 1) %/% 8)   1)

              Sample.ID Nucleic.Acid.Conc. X260.280 X260.230 pl_col
1           auto-wn2e-1              106.9     1.82     1.23      1
2           auto-wn2e-2               65.3     1.99     1.78      1
3           auto-wn2e-3               63.9     1.99     1.61      1
4           auto-wn2e-4               63.0     1.98     1.66      1
5           auto-wn2e-5               65.0     1.95     1.72      1
6           auto-wn2e-6               68.2     1.94     1.54      1
7           auto-wn2e-7               63.4     1.93     1.57      1
8           auto-wn2e-8               75.0     1.86     1.38      1
9           auto-wn2e-9               77.4     1.97     1.64      2
10         auto-wn2e-10               99.0     1.76     1.14      2
11         auto-wn2e-11               61.8     1.97     1.71      2
12         auto-wn2e-12               58.9     1.97     1.59      2
13         auto-wn2e-13               56.0     1.99     1.72      2
14         auto-wn2e-14              102.7     1.77     1.15      2
15         auto-wn2e-15               93.0     1.79     1.20      2
16         auto-wn2e-16               71.9     1.93     1.52      2
17     Saliva-manual-01              100.9     1.58     0.82      1
18     Saliva-manual-02              140.6     1.62     0.82      1
19     Saliva-manual-03              114.7     1.62     0.86      1
20     Saliva-manual-04              121.5     1.64     0.99      1
21 Saliva-auto2Xetoh-01              104.5     1.79     1.08      1
22 Saliva-auto2Xetoh-02              109.0     1.78     1.10      1
23 Saliva-auto2Xetoh-03               92.8     1.84     1.22      1
24 Saliva-auto2Xetoh-04               85.3     1.85     1.26      1
25 Saliva-auto2Xetoh-05              101.7     1.77     1.14      1
26 Saliva-auto2Xetoh-06              120.8     1.71     0.98      1
27 Saliva-auto2Xetoh-07              110.9     1.76     1.05      1
28 Saliva-auto2Xetoh-08              112.3     1.71     0.96      1
29 Saliva-auto2Xetoh-09               89.7     1.86     1.27      2
30 Saliva-auto2Xetoh-10              104.2     1.75     1.05      2
31 Saliva-auto2Xetoh-11              104.4     1.81     1.18      2
32 Saliva-auto2Xetoh-12              120.2     1.73     1.03      2
33 Saliva-auto2Xetoh-13              111.8     1.75     1.05      2
34 Saliva-auto2Xetoh-14               96.0     1.80     1.12      2
35 Saliva-auto2Xetoh-15               67.7     1.77     1.06      2
36 Saliva-auto2Xetoh-16              111.8     1.74     1.00      2

CodePudding user response：

Perhaps this helps

library(dplyr)
library(tidyr)
dat %>%
   separate(Sample.ID, into = c("Sample_Prefix", "ind"), 
      sep = "-(?=\\d $)", remove = FALSE) %>%
   mutate(pl_col = match(Sample_Prefix, unique(Sample_Prefix)))

Or can also be

 dat %>% 
  separate(Sample.ID, into = c("Sample_Prefix", "ind"), 
      sep = "-(?=\\d $)", remove = FALSE) %>% 
  group_by(Sample.ID) %>%
  mutate(pl_col = as.integer(gl(n(), 8, n()))) %>%
  ungroup

CodePudding user response：

Here is another possibility:

vals <- as.numeric(str_split_fixed(dat$Sample.ID, "-", 3)[, 3])
pl_col <- cut(vals, breaks=seq(0, 96, by=8), labels=1:12)
pl_col
#  [1] 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2
# Levels: 1 2 3 4 5 6 7 8 9 10 11 12