here's a sample of my data :
> a
sample(dt$r, 10)
1 5
2 2,5,6
3 5
4 <NA>
5 <NA>
6 5
7 2,6
8 5
9 <NA>
10 <NA>
I want to seperate these values that convert them into columns. The dataframeshould look like this:
2 5 6
1 FALSE TRUE FALSE
2 TRUE TRUE FALSE
3 FALSE FALSE FALSE
4 FALSE FALSE FALSE
5 FALSE FALSE FALSE
6 FALSE TRUE FALSE
7 TRUE FALSE TRUE
8 FALSE TRUE FALSE
9 FALSE FALSE FALSE
10 FALSE FALSE FALSE
I'm trying to use tidyr::spread or strsplit but can't seem to find how
CodePudding user response:
If you have to generalize, it's not the best answer.
Data
data <- data.frame(aux = c("5","2,5,6","5",NA,NA,"2,6"))
Code
library(dplyr)
library(stringr)
data %>%
mutate(
`2` = if_else(str_detect(aux,"2"),TRUE,FALSE),
`5` = if_else(str_detect(aux,"5"),TRUE,FALSE),
`6` = if_else(str_detect(aux,"6"),TRUE,FALSE)
) %>%
replace_na(list(`2` = FALSE,`5` = FALSE,`6` = FALSE))
Output
aux 2 5 6
1 5 FALSE TRUE FALSE
2 2,5,6 TRUE TRUE TRUE
3 5 FALSE TRUE FALSE
4 <NA> FALSE FALSE FALSE
5 <NA> FALSE FALSE FALSE
6 2,6 TRUE FALSE TRUE
CodePudding user response:
Using strsplit
this is one approach.
First get the data frame as a strsplit
list, then get the values without <NA>
, finally look for matches with sapply
.
df_split <- strsplit(df$r, ",")
df_split_val <- sort(unique(unlist(df_split)))[2:4]
setNames(data.frame(t(sapply(df_split, function(x)
df_split_val %in% x))), df_split_val)
2 5 6
1 FALSE TRUE FALSE
2 TRUE TRUE TRUE
3 FALSE TRUE FALSE
4 FALSE FALSE FALSE
5 FALSE FALSE FALSE
6 FALSE TRUE FALSE
7 TRUE FALSE TRUE
8 FALSE TRUE FALSE
9 FALSE FALSE FALSE
10 FALSE FALSE FALSE
Data
df <- structure(list(r = c("5", "2,5,6", "5", "<NA>", "<NA>", "5",
"2,6", "5", "<NA>", "<NA>")), class = "data.frame", row.names = c(NA,
-10L))
CodePudding user response:
A more general solution:
- Identify all unique items present in your column
dt <- data.frame(r = c("5","2,5,6","5",NA,NA,"2,6"))
items <- sort(unique(unlist(strsplit(unique(dt$r), ","))))
items
[1] "2" "5" "6"
- Create columns for each unique item by finding in-string matches using
grepl
dt2 <- as.data.frame(sapply(items, function(item) grepl(item, dt$r, fixed=TRUE)))
dt2
2 5 6
1 FALSE TRUE FALSE
2 TRUE TRUE TRUE
3 FALSE TRUE FALSE
4 FALSE FALSE FALSE
5 FALSE FALSE FALSE
6 TRUE FALSE TRUE