I have a long format data frame called ind_long_sub
. I want to select only characters such sum
or mean
from the characters in indices
column, so that I can create another column contain only characters such as mean
and sum
.
I have tried substr()
and strsplit()
functions, but could not achieved anything, because the characters in the column have different dimensions.
Any thoughts or ideas please?
# reshape data to long format
> ind_long <- ind_csv %>% pivot_longer(3:12, names_to = "indices")
> view(ind_long)
> #create subset
> ind_long_sub <- subset(ind_long, year >= 2001 & year <= 2003)
> ind_long_sub
# A tibble: 360 x 4
year month indices value
<int> <int> <chr> <dbl>
1 2001 1 gridded_idw2_sum 156.
2 2001 1 Mada4_sum 87
3 2001 1 Mada22_sum 185
4 2001 1 Mada38_sum 85
5 2001 1 Mada53_sum 180
6 2001 1 GDD.AS_sum 546.
7 2001 1 GDD.Ch_sum 552.
8 2001 1 hum.AS.mean 0.82
9 2001 1 hum.Ch.mean 0.83
10 2001 1 ndvi.mean 0.72
# ... with 350 more rows
> dput(ind_long_sub)
structure(list(year = c(2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2001L,
2001L, 2001L, 2001L, 2001L, 2001L, 2001L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L,
2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L, 2002L,
2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L,
2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L,
2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L,
2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L,
2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L,
2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L,
2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L,
2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L,
2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L,
2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L,
2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L,
2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L,
2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L, 2003L,
2003L, 2003L, 2003L), month = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L), indices = c("gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean", "gridded_idw2_sum",
"Mada4_sum", "Mada22_sum", "Mada38_sum", "Mada53_sum", "GDD.AS_sum",
"GDD.Ch_sum", "hum.AS.mean", "hum.Ch.mean", "ndvi.mean"), value = c(156.37,
87, 185, 85, 180, 546.2, 552.1, 0.82, 0.83, 0.72, 30.53, 12,
34, 38, 46, 517.8, 533.2, 0.74, 0.73, 0.42, 207.65, 189, 249,
174, 286, 577.8, 579.8, 0.8, 0.81, 0.44, 235.14, 210, 289, 239,
206, 571.7, 579.1, 0.85, 0.83, 0.57, 146.38, 141, 156, 132, 121,
587.8, 592.9, 0.85, 0.84, 0.62, 167.42, 175, 113, 110, 297, 543.8,
548.9, 0.85, 0.84, 0.75, 108.58, 100, 123, 137, 66, 566.3, 575,
0.85, 0.83, 0.62, 154.86, 212, 134, 165, 230, 561, 569.2, 0.85,
0.84, 0.52, 229.65, 251, 198, 202, 241, 537.9, 533.5, 0.85, 0.86,
0.39, 343.23, 386, 250, 411, 397, 552.5, 545.6, 0.87, 0.88, 0.48,
147.07, 174, 147, 94, 233, 540.5, 527.7, 0.83, 0.84, 0.65, 89.54,
81, 82, 120, 116, 551.2, 538.5, 0.8, 0.82, 0.7, 0.36, 3, 0, 0,
0, 552.1, 552.3, 0.68, 0.74, 0.53, 1.45, 0, 0, 0, 0, 520.8, 542.8,
0.64, 0.68, 0.33, 36.97, 6, 61, 25, 79, 615.2, 619.5, 0.67, 0.7,
0.28, 268.37, 196, 317, 243, 316, 583.3, 585.9, 0.79, 0.81, 0.43,
122, 95, 93, 187, 162, 597.2, 593.7, 0.8, 0.83, 0.7, 145.84,
186, 297, 94, 92, 557.1, 554.9, 0.83, 0.85, 0.8, 222.34, 267,
213, 231, 263, 559.1, 556.5, 0.84, 0.87, 0.75, 200.16, 249, 181,
257, 245, 553.4, 552, 0.84, 0.87, 0.57, 364.37, 251, 273, 458,
575, 530, 524.3, 0.85, 0.88, 0.6, 207.03, 183, 187, 168, 245,
557.3, 555.8, 0.84, 0.87, 0.66, 227.27, 252, 263, 248, 190, 533.6,
534.7, 0.84, 0.86, 0.79, 145.6, 96, 142, 206, 83, 573.1, 571.5,
0.8, 0.84, 0.71, 9.89, 2, 9, 2, 49, 563.1, 565.5, 0.71, 0.76,
0.54, 21.65, 54, 5, 29, 76, 531.2, 539.8, 0.67, 0.72, 0.34, 204.21,
212, 209, 185, 191, 607.2, 606, 0.73, 0.78, 0.4, 131.67, 70,
172, 78, 109, 583.6, 586.7, 0.8, 0.81, 0.46, 126.64, 219, 125,
102, 159, 585.8, 588.5, 0.83, 0.84, 0.76, 173.15, 110, 200, 249,
123, 552, 547.8, 0.84, 0.85, 0.76, 261.72, 207, 232, 205, 404,
552.8, 552.4, 0.85, 0.86, 0.58, 291.09, 288, 320, 345, 351, 561,
554.6, 0.85, 0.87, 0.41, 328.42, 328, 295, 366, 355, 528.3, 523.9,
0.85, 0.88, 0.56, 524.7, 339, 382, 612, 807, 528.7, 527, 0.88,
0.9, 0.62, 174.36, 142, 167, 93, 296, 545.9, 538.9, 0.84, 0.85,
0.68, 42.39, 26, 46, 33, 45, 552.1, 541.8, 0.75, 0.8, 0.79)), row.names = c(NA,
-360L), class = c("tbl_df", "tbl", "data.frame"))
CodePudding user response:
You could use str_extract
:
stringr::str_extract(ind_long_sub$indices,'sum|mean')
[1] "sum" "sum" "sum" "sum" "sum" "sum" "sum" "mean" "mean" "mean" "sum"
[12] "sum" "sum" "sum" "sum" "sum" "sum" "mean" "mean" "mean" "sum" "sum"
...
CodePudding user response:
A base R option using sub
-
We can drop everything till a dot (.
) or an underscore (_
).
sub('.*(\\.|_)', '', ind_long_sub$indices)
#[1] "sum" "sum" "sum" "sum" "sum" "sum" "sum" "mean" "mean" "mean" "sum" "sum"
#[13] "sum" "sum" "sum" "sum" "sum" "mean" "mean" "mean" "sum" "sum" "sum" "sum"
#[25] "sum" "sum" "sum" "mean" "mean" "mean" "sum" "sum" "sum" "sum" "sum" "sum"
#...
#...
CodePudding user response:
In base R
, we can use trimws
trimws(ind_long_sub$indices, whitespace = ".*[._]")
-output
[1] "sum" "sum" "sum" "sum" "sum" "sum" "sum" "mean" "mean" "mean" "sum" "sum" "sum" "sum" "sum" "sum" "sum"
[18] "mean" "mean" "mean" "sum" "sum" "sum" "sum" "sum" "sum" "sum" "mean" "mean" "mean" "sum" "sum" "sum" "sum"
[35] "sum" "sum" "sum" "mean" "mean" "mean" "sum" "sum" "sum" "sum" "sum" "sum" "sum" "mean" "mean" "mean" "sum"
[52] "sum" "sum" "sum" "sum" "sum" "sum" ...