I am trying to work out how the Sub works in R. I was trying to break a list up into two sections. For instance, i have this working for list_2, where Var1 = 0_300 and var2 = minus 5.
list_2 <- c("area_0_300_minus5",
"area_0_300_minus4" ,
"area_0_300_minus3" ,
"area_0_300_minus2" ,
"area_0_300_minus1" ,
"area_0_300_0" ,
"area_0_300_1" ,
"area_0_300_2" ,
"area_0_300_3" ,
"area_0_300_4" ,
"area_0_300_5" ,
"area_300_600_minus5" ,
"area_300_600_minus4" ,
"area_300_600_minus3" ,
"area_300_600_minus2" ,
"area_300_600_minus1" ,
"area_300_600_0" ,
"area_300_600_1" ,
"area_300_600_2" ,
"area_300_600_3" ,
"area_300_600_4" ,
"area_300_600_5" ,
"area_600_800_minus5" ,
"area_600_800_minus4" ,
"area_600_800_minus3" ,
"area_600_800_minus2" ,
"area_600_800_minus1" ,
"area_600_800_0" ,
"area_600_800_1" ,
"area_600_800_2" ,
"area_600_800_3" ,
"area_600_800_4" ,
"area_600_800_5" )
var1_working = sub("^.*_(\\d _\\d )_.*$", "\\1", list_2)
var2_working = sub("^.*_(.*)$", "\\1", list_2)
But in my list 1, i cant seem to extract 0_300 etc to equal to var1 and the 'm5'/ 'm4' / 'm3' / 'm2' / 'm1' / '0' / '1' etc etc to equal to Var2.
list_1 <- c("as.factor(radius_ring)0_300:as.factor(year_delta)0:units",
"as.factor(radius_ring)300_600:as.factor(year_delta)0:units" ,
"as.factor(radius_ring)600_800:as.factor(year_delta)0:units" ,
"as.factor(radius_ring)800_1000:as.factor(year_delta)0:units" ,
"as.factor(radius_ring)0_300:as.factor(year_delta)1:units" ,
"as.factor(radius_ring)300_600:as.factor(year_delta)1:units" ,
"as.factor(radius_ring)600_800:as.factor(year_delta)1:units" ,
"as.factor(radius_ring)800_1000:as.factor(year_delta)1:units" ,
"as.factor(radius_ring)0_300:as.factor(year_delta)2:units" ,
"as.factor(radius_ring)300_600:as.factor(year_delta)2:units" ,
"as.factor(radius_ring)600_800:as.factor(year_delta)2:units",
"as.factor(radius_ring)800_1000:as.factor(year_delta)2:units",
"as.factor(radius_ring)0_300:as.factor(year_delta)3:units",
"as.factor(radius_ring)300_600:as.factor(year_delta)3:units",
"as.factor(radius_ring)600_800:as.factor(year_delta)3:units",
"as.factor(radius_ring)800_1000:as.factor(year_delta)3:units" ,
"as.factor(radius_ring)0_300:as.factor(year_delta)4:units",
"as.factor(radius_ring)300_600:as.factor(year_delta)4:units",
"as.factor(radius_ring)600_800:as.factor(year_delta)4:units" ,
"as.factor(radius_ring)800_1000:as.factor(year_delta)4:units",
"as.factor(radius_ring)0_300:as.factor(year_delta)5:units",
"as.factor(radius_ring)300_600:as.factor(year_delta)5:units",
"as.factor(radius_ring)600_800:as.factor(year_delta)5:units",
"as.factor(radius_ring)800_1000:as.factor(year_delta)5:units",
"as.factor(radius_ring)0_300:as.factor(year_delta)m1:units",
"as.factor(radius_ring)300_600:as.factor(year_delta)m1:units",
"as.factor(radius_ring)600_800:as.factor(year_delta)m1:units",
"as.factor(radius_ring)800_1000:as.factor(year_delta)m1:units",
"as.factor(radius_ring)0_300:as.factor(year_delta)m2:units",
"as.factor(radius_ring)300_600:as.factor(year_delta)m2:units",
"as.factor(radius_ring)600_800:as.factor(year_delta)m2:units" ,
"as.factor(radius_ring)800_1000:as.factor(year_delta)m2:units",
"as.factor(radius_ring)0_300:as.factor(year_delta)m3:units",
"as.factor(radius_ring)300_600:as.factor(year_delta)m3:units" ,
"as.factor(radius_ring)600_800:as.factor(year_delta)m3:units" ,
"as.factor(radius_ring)800_1000:as.factor(year_delta)m3:units",
"as.factor(radius_ring)0_300:as.factor(year_delta)m4:units" ,
"as.factor(radius_ring)300_600:as.factor(year_delta)m4:units" ,
"as.factor(radius_ring)600_800:as.factor(year_delta)m4:units" ,
"as.factor(radius_ring)800_1000:as.factor(year_delta)m4:units",
"as.factor(radius_ring)0_300:as.factor(year_delta)m5:units" ,
"as.factor(radius_ring)300_600:as.factor(year_delta)m5:units",
"as.factor(radius_ring)600_800:as.factor(year_delta)m5:units" ,
"as.factor(radius_ring)800_1000:as.factor(year_delta)m5:units")
var1_nonworking = sub("^.*_(\\d _\\d )_.*$", "\\1", list_1)
var2_nonworking = sub("^.*_(.*)$", "\\1", list_1)
I am actually just a bit unsure on how the pattern extraction works "^.*_(\\d _\\d )_.*$", "\\1"
which means quite little to me to be able to adapt it to my list 1.
Hope this makes sense
CodePudding user response:
Two approaches:
strcapture
returns a frame, one column per capture group.strcapture(".*\\)([^:]*).*\\)([^:]*):.*", list_1, proto = list(var1 = "", var2 = ""))[c(1:3, 42:44),] # var1 var2 # 1 0_300 0 # 2 300_600 0 # 3 600_800 0 # 42 300_600 m5 # 43 600_800 m5 # 44 800_1000 m5
gregexpr
to extract zero or more per line of text.gre <- gregexpr("(?<=\\))([^:]*)(?=:)", list_1, perl = TRUE) regmatches(list_1, gre)[c(1:3, 42:44)] # [[1]] # [1] "0_300" "0" # [[2]] # [1] "300_600" "0" # [[3]] # [1] "600_800" "0" # [[4]] # [1] "300_600" "m5" # [[5]] # [1] "600_800" "m5" # [[6]] # [1] "800_1000" "m5"
CodePudding user response:
Using the input shown in the Note at the end to avoid excessive length replace the last underscore with a space and then read that using read.table
. With this approach only relatively simple regular expressions are needed.
read.table(text = sub("(.*)_", "\\1 ", list_2))
## V1 V2
## 1 area_0_300 minus5
## 2 area_0_300 minus4
## 3 area_0_300 minus3
## 4 area_0_300 minus2
## 5 area_0_300 minus1
## 6 area_0_300 0
Similarly for list_1 replace all occurrences of : and ) with a space, read it in and extract columns 2 and 4.
read.table(text = gsub("[:)]", " ", list_1))[c(2, 4)]
## V2 V4
## 1 0_300 0
## 2 300_600 0
## 3 600_800 0
## 4 800_1000 0
## 5 0_300 1
## 6 300_600 1
Note
list_2 <- c("area_0_300_minus5",
"area_0_300_minus4",
"area_0_300_minus3",
"area_0_300_minus2",
"area_0_300_minus1",
"area_0_300_0")
list_1 <- c("as.factor(radius_ring)0_300:as.factor(year_delta)0:units",
"as.factor(radius_ring)300_600:as.factor(year_delta)0:units",
"as.factor(radius_ring)600_800:as.factor(year_delta)0:units",
"as.factor(radius_ring)800_1000:as.factor(year_delta)0:units",
"as.factor(radius_ring)0_300:as.factor(year_delta)1:units",
"as.factor(radius_ring)300_600:as.factor(year_delta)1:units")