I have a script written in explicit for loop
and I would like to transform it so I can use the map
family of functions or the apply family. I would like to do so is to improve on time taken for the script to run. Any leads?
library(dplyr)
library(stringr)
mydata <- tibble(
a_1 = c(20,22, 13,14,44),
a_2 = c(42, 13, 32, 31, 14),
b = c(1, 2, 1, 1, 2),
c = c(1, 2, 1, 3, 1)
)
dictionary <- tibble(
variable = c("a", "b", "c"),
label = c("Age", "Gender", "Education"),
type = c("mselect", "select", "select"),
values = c(NA, "1, 2", "1, 2,3" ),
valuelabel = c(NA, "Male, Female", "Primary, Secondary, Tertiary")
)
factor_vars <- dictionary %>%
filter(type == "select") %>% pull(variable)
for (var in factor_vars){
fct_levels <- dictionary %>% filter(variable == var) %>%
pull(values)
fct_levels <- str_trim(unlist(strsplit(fct_levels, split = ",")))
fct_labels <- dictionary %>% filter(variable == var) %>%
pull(valuelabel)
fct_labels <- unlist(strsplit(fct_labels, split = ","))
mydata[[var]] <- factor(mydata[[var]],
levels = fct_levels,
labels = fct_labels)
}
CodePudding user response:
You could do something like this. Better provide a clean dictionary (i.e. spaces after commas) instead of using trimws
, str_trim
, or the like.
v <- c("b", "c")
u <- lapply(v, function(x) {
d <- dictionary[dictionary$variable == x, c('values', 'valuelabel')] |>
unlist() |>
strsplit(', ') |>
as.data.frame()
factor(mydata[[x]], levels=d$values, labels=d$valuelabel)
}) |>
setNames(v) |>
as.data.frame()
Result
res <- cbind(mydata[!names(mydata) %in% v], u)
# a_1 a_2 b c
# 1 20 42 Male Primary
# 2 22 13 Female Secondary
# 3 13 32 Male Primary
# 4 14 31 Male Tertiary
# 5 44 14 Female Primary
Where:
str(res)
# 'data.frame': 5 obs. of 4 variables:
# $ a_1: num 20 22 13 14 44
# $ a_2: num 42 13 32 31 14
# $ b : Factor w/ 2 levels "Male","Female": 1 2 1 1 2
# $ c : Factor w/ 3 levels "Primary","Secondary",..: 1 2 1 3 1
Note:
> R.version.string
[1] "R version 4.1.2 (2021-11-01)"