I have a large df file from a survey platform. the columns contains one question(as a title) with all potential answer. I'm trying to separate each of this answer column into a new set of columns (with the name of answer as the the name of the new column).
after that i would like each new column to indicate if the value appears in the original column(with "1") to process the data more easily
df<-data.frame("name"= c("John","mark","bell","elsa"),"what do you like to
eat"=c("apple","fries apple","peach","bread"))
original df
name | What.do.you.like.to.eat |
---|---|
John | apple |
Mark | fries apple |
bell | peach |
elsa | bread |
i'm using this code which works, but i'm sure there must be more efficient/easier way of doing it, as i have a over 50 column like this.
df<-df %>%
separate(what.do.you.like.to.eat, c("apple","fries","peach","bread",NA ), remove = F)
df[,3:6]<-""
{
df[,3] = with(df, ifelse(grepl("apple", df$what.do.you.like.to.eat,ignore.case = T),
paste('1', df[,3]),
paste("", df[,3])))
df[,4] = with(df, ifelse(grepl("fries", df$what.do.you.like.to.eat,ignore.case = T),
paste('1', df[,4]),
paste("", df[,4])))
df[,5] = with(df, ifelse(grepl("peach", df$what.do.you.like.to.eat,ignore.case = T),
paste('1', df[,5]),
paste("", df[,5])))
df[,6] = with(df, ifelse(grepl("bread", df$what.do.you.like.to.eat,ignore.case = T),
paste('1', df[,6]),
paste("", df[,6])))
}
desire output
name | What.do.you.like.to.eat | apple | fries | peach | bread |
---|---|---|---|---|---|
John | apple | 1 | |||
Mark | fries apple | 1 | 1 | ||
bell | peach | 1 | |||
elsa | bread | 1 |
CodePudding user response:
You could use purrr::map
to apply over your vector of answers and for each, check their presence in the string.
library(tidyverse)
df <- data.frame(
name = c("John", "mark", "bell", "elsa"),
"what do you like to eat" = c("apple", "fries apple", "peach", "bread")
)
ans <- c("apple", "fries", "peach", "bread")
map_dfc(ans,~ transmute(df, !!sym(.x) := str_detect(what.do.you.like.to.eat, .x))) %>%
bind_cols(df, .)
#> name what.do.you.like.to.eat apple fries peach bread
#> 1 John apple TRUE FALSE FALSE FALSE
#> 2 mark fries apple TRUE TRUE FALSE FALSE
#> 3 bell peach FALSE FALSE TRUE FALSE
#> 4 elsa bread FALSE FALSE FALSE TRUE
CodePudding user response:
Ok i've done this tell me if this works for you :
my_df <- data.frame("name" = c("John","mark","bell","elsa"),
"what do you like to eat" = c("apple","fries apple","peach","bread"),
stringsAsFactors = FALSE)
my_var <- unique(sort(str_split(string = my_df$what.do.you.like.to.eat, pattern = " ", simplify = TRUE)))
my_pos <- which(my_var == "")
if (length(my_pos)) {
my_var <- my_var[-my_pos]
}
my_col <- c(colnames(my_df), my_var)
my_miss <- setdiff(my_col, colnames(my_df))
my_df[my_miss] <- NA
my_f <- function(x, y) {
my_var <- grep(pattern = colnames(my_df)[x], x = my_df[, y])
if (length(my_var)) {
my_df[my_var, x] <<- 1
}
}
lapply(3:ncol(my_df), function(x) my_f(x, 2))
you can change this part to this :
my_df <- data.frame("name" = c("John","mark","bell","elsa"),
"what do you like to eat" = c("i like apple","i love fries apple","i'm kind of peach","bread all the way"),
stringsAsFactors = FALSE)
my_var <- unique(sort(str_split(string =
my_df$what.do.you.like.to.eat, pattern = " ", simplify = TRUE)))
my_food <- c("apple", "fries", "bread", "peach")
my_var <- my_var[which(my_var %in% my_food)]
my_pos <- which(my_var == "")
if (length(my_pos)) {
my_var <- my_var[-my_pos]
}