I have a dataset like this:
structure(list(`Frequency
Percent` = c("car", "window", "ball",
"ups"), AI = c("2\n0.00", "3\n0.00", "1\n0.00", "2\n0.00"), BLK = c("0\n0.00",
"218\n0.29", "48\n0.06", "0\n0.00"), HIANIC = c("1\n0.00", "8\n0.01",
"4\n0.01", "0\n0.00"), NATRICAN = c("9\n0.01", "7\n0.01", "8\n0.01",
"0\n0.00"), UNK = c("15\n0.02", "83\n0.11", "36\n0.05", "0\n0.00"
), yy = c("111\n0.15", "897\n1.20", "756\n1.02", "1\n0.00")), class = "data.frame", row.names = c(NA,
-4L))
How can I split each row by "" and remove n to make two new columns. For instance, car and AI cell (2\n0.00), I will have 2 and 0.00 in two different columns.
CodePudding user response:
Using tidyr::separate_rows
and tidyr::pivot_wider
you could do:
library(tidyr)
library(dplyr)
dat |>
mutate(unit = c("n\npct")) |>
separate_rows(-1, sep = "\n") |>
pivot_wider(names_from = "unit", values_from = -1)
#> # A tibble: 4 × 15
#> Frequency\n…¹ AI_n AI_pct BLK_n BLK_pct HIANI…² HIANI…³ NATRI…⁴ NATRI…⁵ UNK_n
#> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 car 2 0.00 0 0.00 1 0.00 9 0.01 15
#> 2 window 3 0.00 218 0.29 8 0.01 7 0.01 83
#> 3 ball 1 0.00 48 0.06 4 0.01 8 0.01 36
#> 4 ups 2 0.00 0 0.00 0 0.00 0 0.00 0
#> # … with 5 more variables: UNK_pct <chr>, yy_n <chr>, yy_pct <chr>,
#> # unit_n <chr>, unit_pct <chr>, and abbreviated variable names
#> # ¹`Frequency\n Percent`, ²HIANIC_n, ³HIANIC_pct, ⁴NATRICAN_n,
#> # ⁵NATRICAN_pct
CodePudding user response:
One way is to use tidyr::separate
in a for
loop:
for(i in names(df[,-1])){
df <- tidyr::separate(df, i, sep = "\n", into = c(i, paste0(i,"_val")))
}
Output:
# Frequency\n Percent AI AI_val BLK BLK_val HIANIC HIANIC_val NATRICAN NATRICAN_val UNK UNK_val yy yy_val
# 1 car 2 0.00 0 0.00 1 0.00 9 0.01 15 0.02 111 0.15
# 2 window 3 0.00 218 0.29 8 0.01 7 0.01 83 0.11 897 1.20
# 3 ball 1 0.00 48 0.06 4 0.01 8 0.01 36 0.05 756 1.02
# 4 ups 2 0.00 0 0.00 0 0.00 0 0.00 0 0.00 1 0.00
CodePudding user response:
There is also a base R solution:
dat = structure(list(`Frequency
Percent` = c("car", "window", "ball",
"ups"), AI = c("2\n0.00", "3\n0.00", "1\n0.00", "2\n0.00"), BLK = c("0\n0.00",
"218\n0.29", "48\n0.06", "0\n0.00"), HIANIC = c("1\n0.00", "8\n0.01",
"4\n0.01", "0\n0.00"), NATRICAN = c("9\n0.01", "7\n0.01", "8\n0.01",
"0\n0.00"), UNK = c("15\n0.02", "83\n0.11", "36\n0.05", "0\n0.00"
), yy = c("111\n0.15", "897\n1.20", "756\n1.02", "1\n0.00")), class = "data.frame", row.names = c(NA,
-4L))
transformed = data.frame(Freq_pc = dat[,1])
for(col in seq(2, ncol(dat))){
transformed = cbind(transformed, t(matrix(unlist(strsplit(dat[,col], "\n")), nrow=2)))
names(transformed)[c(2*(col-1), 2*(col-1) 1)] = c(paste0(names(dat)[col], "_n"), paste0(names(dat)[col], "_pc"))
}
That results in:
Freq_pc AI_n AI_pc BLK_n BLK_pc HIANIC_n HIANIC_pc NATRICAN_n NATRICAN_pc UNK_n UNK_pc yy_n yy_pc
1 car 2 0.00 0 0.00 1 0.00 9 0.01 15 0.02 111 0.15
2 window 3 0.00 218 0.29 8 0.01 7 0.01 83 0.11 897 1.20
3 ball 1 0.00 48 0.06 4 0.01 8 0.01 36 0.05 756 1.02
4 ups 2 0.00 0 0.00 0 0.00 0 0.00 0 0.00 1 0.00