I would use this dataset as an example
BEZ <- c("A","A","A","A","B","B","B")
var <- c("B","B","B","B","B","B","B")
bar <- c("B","B","B","B","B","B","B")
Bez1 <- c("A","A","A","A","B","B","B")
var1 <- c("B","B","B","B","B","B","B")
bar1 <- c("B","B","B","B","B","B","B")
dat <- data.frame(BEZ, var, bar, Bez1, var1, bar1)
the tricky thing that I would like to do is use a method (loops, map(), apply(), dplyr functions, and so on) to create aside the already existing new column where based on the respective row value is converted into a number.
Excepeted result
BEZ BEZ_num var var_num bar bar_num Bez1 BEZ1_num var1 var1_num bar1 bar1_num
A 0 B 1 B 1 A 0 B 1 B 1
A 0 B 1 B 1 A 0 B 1 B 1
A 0 B 1 C 2 A 0 B 1 A 0
A 0 B 1 B 1 A 0 C 2 B 1
B 1 B 1 B 1 B 1 C 2 C 2
B 1 B 1 B 1 A 0 B 1 B 1
B 1 B 1 B 1 A 0 B 1 B 1
This is more or less the idea I would like to hit. Any suggestions? Thanks
CodePudding user response:
See in the comments. The provided data frame and the expected output do not match. But I think we could use mutate(across..)
with the .names argument
combined with case_when
:
library(dplyr)
dat %>%
mutate(across(everything(), ~case_when(
. == "A" ~ "0",
. == "B" ~ "1",
. == "C" ~ "2"), .names = "{col}_num"))
BEZ var bar Bez1 var1 bar1 BEZ_num var_num bar_num Bez1_num var1_num bar1_num
1 A B B A B B 0 1 1 0 1 1
2 A B B A B B 0 1 1 0 1 1
3 A B B A B B 0 1 1 0 1 1
4 A B B A B B 0 1 1 0 1 1
5 B B B B B B 1 1 1 1 1 1
6 B B B B B B 1 1 1 1 1 1
7 B B B B B B 1 1 1 1 1 1
CodePudding user response:
Using a for
loop in base R:
dat2 <- dat[, 1, drop = FALSE]
for (col in names(dat)) {
dat2[[col]] <- dat[[col]]
dat2[[paste0(col, "_num")]] <- match(dat[[col]], LETTERS) - 1
}
dat2
# BEZ BEZ_num var var_num bar bar_num Bez1 Bez1_num var1 var1_num bar1 bar1_num
# 1 A 0 B 1 B 1 A 0 B 1 B 1
# 2 A 0 B 1 B 1 A 0 B 1 B 1
# 3 A 0 B 1 B 1 A 0 B 1 B 1
# 4 A 0 B 1 B 1 A 0 B 1 B 1
# 5 B 1 B 1 B 1 B 1 B 1 B 1
# 6 B 1 B 1 B 1 B 1 B 1 B 1
# 7 B 1 B 1 B 1 B 1 B 1 B 1
Or a (slightly convoluted) approach using dplyr::across()
:
library(dplyr)
dat %>%
mutate(
across(BEZ:bar1, list(TMP = identity, num = \(x) match(x, LETTERS) - 1)),
.keep = "unused"
) %>%
rename_with(\(x) gsub("_TMP$", "", x))
# same output as above
Or finally, if you don't care about the order of the output columns, you could also use dplyr::across()
with the .names
argument:
dat %>%
mutate(across(
BEZ:bar1,
\(x) match(x, LETTERS) - 1,
.names = "{.col}_num"
))
# BEZ var bar Bez1 var1 bar1 BEZ_num var_num bar_num Bez1_num var1_num bar1_num
# 1 A B B A B B 0 1 1 0 1 1
# 2 A B B A B B 0 1 1 0 1 1
# 3 A B B A B B 0 1 1 0 1 1
# 4 A B B A B B 0 1 1 0 1 1
# 5 B B B B B B 1 1 1 1 1 1
# 6 B B B B B B 1 1 1 1 1 1
# 7 B B B B B B 1 1 1 1 1 1
CodePudding user response:
Using factor
library(dplyr)
dat %>%
mutate(across(everything(), ~ as.integer(factor(.x))-1, .names = "{.col}_num"))
-output
BEZ var bar Bez1 var1 bar1 BEZ_num var_num bar_num Bez1_num var1_num bar1_num
1 A B B A B B 0 0 0 0 0 0
2 A B B A B B 0 0 0 0 0 0
3 A B B A B B 0 0 0 0 0 0
4 A B B A B B 0 0 0 0 0 0
5 B B B B B B 1 0 0 1 0 0
6 B B B B B B 1 0 0 1 0 0
7 B B B B B B 1 0 0 1 0 0