I am trying to change the format of my current data set to one that has 1 user per row, and which splits all the unique values (dynamic number of values) in the Color and Food columns into their own columns with Yes and No. Each user has a unique ID.
Current format:
ID | Name | Color | Food
1 | John | Blue | Pizza
1 | John | Red | Pizza
1 | John | Yellow | Pizza
1 | John | Blue | Ice Cream
1 | John | Red | Ice Cream
1 | John | Yellow | Ice Cream
2 | Kelly | Blue | Pizza
2 | Kelly | Red | Pizza
Desired format:
ID | Name | Color_Blue | Color_Red | Color_Yellow | Food_Pizza | Food_Ice Cream |
1 | John | Yes | Yes | Yes | Yes | Yes |
2 | Kelly | Yes | Yes | No | Yes | No |
CodePudding user response:
library(dplyr); library(tidyr)
df %>%
pivot_longer(-c(ID:Name)) %>%
unite("col", c(name, value)) %>%
distinct(ID, Name, col) %>%
mutate(val = "Yes") %>%
pivot_wider(names_from = col, values_from = "val", values_fill = "No")
# A tibble: 2 x 7
ID Name Color_Blue Food_Pizza Color_Red Color_Yellow `Food_Ice Cream`
<chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 1 John Yes Yes Yes Yes Yes
2 2 Kelly Yes Yes Yes No No
sample data
df <- tribble(~ID , ~Name , ~Color , ~Food,
"1" , "John", "Blue", "Pizza",
"1" , "John" , "Red", "Pizza",
"1" , "John", "Yellow", "Pizza",
"1" , "John" , "Blue", "Ice Cream",
"1" , "John", "Red", "Ice Cream",
"1" , "John" , "Yellow", "Ice Cream",
"2" , "Kelly", "Blue", "Pizza",
"2" , "Kelly", "Red", "Pizza")
CodePudding user response:
Convoluted base R solution:
spread_vecs <- c("Color", "Food")
keep_vecs <- names(df)[!(
names(df) %in% spread_vecs)
]
aggregate(
as.formula(
paste(
".",
paste0(
keep_vecs,
collapse = " "
),
sep = "~"
)
),
cbind(
df[,keep_vecs],
data.frame(
do.call(
cbind,
lapply(
spread_vecs,
function(x){
y <- unique(
df[,x]
)
setNames(
data.frame(
outer(
df[,x],
y,
`==`
)
),
paste(
x,
y,
sep = "_"
)
)
}
)
),
row.names = NULL
)
),
FUN = function(x){
ifelse(
any(x),
"Yes",
"No"
)
}
)
Data:
df <- structure(list(ID = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L), Name = c("John",
"John", "John", "John", "John", "John", "Kelly", "Kelly"), Color = c("Blue",
"Red", "Yellow", "Blue", "Red", "Yellow", "Blue", "Red"), Food = c("Pizza",
"Pizza", "Pizza", "Ice Cream", "Ice Cream", "Ice Cream", "Pizza",
"Pizza")), class = "data.frame", row.names = c(NA, -8L))