I have sample dataset like this:
Show in New Window
[1] 84
Show in New Window
structure(list(variable2 = c("ea_level_dataset::constituency",
"ea_level_dataset::constituency", "ea_level_dataset::constituency",
"ea_level_dataset::ea_positive_2016", "ea_level_dataset::ea_positive_2016",
"ea_level_dataset::ea_positive_2016", "ea_level_dataset::ea_positive_2016",
"ea_level_dataset::ea_positive_2016", "ea_level_dataset::ea_type",
"ea_level_dataset::ea_type", "ea_level_dataset::households_sprayed_2016",
"ea_level_dataset::households_sprayed_2016", "ea_level_dataset::households_sprayed_2016",
"ea_level_dataset::households_sprayed_2016", "ea_level_dataset::households_sprayed_2016",
"ea_level_dataset::households_sprayed_2016", "ea_level_dataset::households_sprayed_2016",
"ea_level_dataset::households_sprayed_2016", "ea_level_dataset::households_sprayed_2016",
"ea_level_dataset::region"), values = c("Kongola", "Linyanti",
"Sibbinda", "0", "1", "2", "3", "4", "Rural", "Urban", "0", "4",
"5", "6", "7", "8", "9", "11", "27", "Caprivi"), mappedTerm = c("Kongola",
"Linyanti", "Sibbinda", "0", "1", "2", "3", "4", "Rural", "Urban",
"0", "4", "5", "6", "7", "8", "9", "11", "27", "Caprivi"), valueOrder = c(NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L), groups = structure(list(
variable2 = c("ea_level_dataset::constituency", "ea_level_dataset::ea_positive_2016",
"ea_level_dataset::ea_type", "ea_level_dataset::households_sprayed_2016",
"ea_level_dataset::region"), .rows = structure(list(1:3,
4:8, 9:10, 11:19, 20L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -5L), .drop = TRUE))
Now what i want to do is getting the number value(like 0,1,2,3,4) within each group of variable (those numbers are character format), and remove the words value (like "Rural") in col of mappedTerm.
Could someone help how to do that with regex in R? Thanks~~!
CodePudding user response:
We could use if_all
to return only rows having digits by either detecting for one or more digits (\\d
) from the start (^
) to end ($
) of string,
library(dplyr)
library(stringr)
df1 %>%
ungroup %>%
filter(if_all(values:mappedTerm, ~ str_detect(.x, "^\\d $"))) %>%
type.convert(as.is = TRUE)
-output
# A tibble: 14 × 4
variable2 values mappedTerm valueOrder
<chr> <int> <int> <lgl>
1 ea_level_dataset::ea_positive_2016 0 0 NA
2 ea_level_dataset::ea_positive_2016 1 1 NA
3 ea_level_dataset::ea_positive_2016 2 2 NA
4 ea_level_dataset::ea_positive_2016 3 3 NA
5 ea_level_dataset::ea_positive_2016 4 4 NA
6 ea_level_dataset::households_sprayed_2016 0 0 NA
7 ea_level_dataset::households_sprayed_2016 4 4 NA
8 ea_level_dataset::households_sprayed_2016 5 5 NA
9 ea_level_dataset::households_sprayed_2016 6 6 NA
10 ea_level_dataset::households_sprayed_2016 7 7 NA
11 ea_level_dataset::households_sprayed_2016 8 8 NA
12 ea_level_dataset::households_sprayed_2016 9 9 NA
13 ea_level_dataset::households_sprayed_2016 11 11 NA
14 ea_level_dataset::households_sprayed_2016 27 27 NA
Or another option is to force it to numeric with as.numeric
and remove the NA
elements with complete.cases
(will have a warning)
df1 %>%
ungroup %>%
mutate(across(values:mappedTerm, as.numeric)) %>%
filter(if_all(values:mappedTerm, complete.cases))