Home > OS >  Add column for each unique value in given row
Add column for each unique value in given row

Time:10-12

I am trying to change the format of my current data set to one that has 1 user per row, and which splits all the unique values (dynamic number of values) in the Color and Food columns into their own columns with Yes and No. Each user has a unique ID.

Current format: 
ID | Name  | Color  | Food 
1  | John  | Blue   | Pizza
1  | John  | Red    | Pizza
1  | John  | Yellow | Pizza
1  | John  | Blue   | Ice Cream
1  | John  | Red    | Ice Cream
1  | John  | Yellow | Ice Cream
2  | Kelly | Blue   | Pizza
2  | Kelly | Red    | Pizza


Desired format: 
ID | Name  | Color_Blue | Color_Red | Color_Yellow | Food_Pizza | Food_Ice Cream |
1  | John  | Yes        | Yes       | Yes          | Yes        | Yes            |
2  | Kelly | Yes        | Yes       | No           | Yes        | No             |

CodePudding user response:

library(dplyr); library(tidyr)
df %>% 
  pivot_longer(-c(ID:Name)) %>%
  unite("col", c(name, value)) %>%
  distinct(ID, Name, col) %>%
  mutate(val = "Yes") %>%
  pivot_wider(names_from = col, values_from = "val", values_fill = "No")

# A tibble: 2 x 7
  ID    Name  Color_Blue Food_Pizza Color_Red Color_Yellow `Food_Ice Cream`
  <chr> <chr> <chr>      <chr>      <chr>     <chr>        <chr>           
1 1     John  Yes        Yes        Yes       Yes          Yes             
2 2     Kelly Yes        Yes        Yes       No           No   

sample data

df <- tribble(~ID , ~Name  , ~Color  , ~Food,
"1"  , "John",  "Blue",    "Pizza",
"1"  , "John" , "Red",    "Pizza",
"1"  , "John",  "Yellow",  "Pizza",
"1"  , "John" , "Blue",   "Ice Cream",
"1"  , "John",  "Red",    "Ice Cream",
"1"  , "John" , "Yellow", "Ice Cream",
"2"  , "Kelly", "Blue",    "Pizza",
"2"  , "Kelly", "Red",    "Pizza")
  

CodePudding user response:

Convoluted base R solution:

spread_vecs <- c("Color", "Food")

keep_vecs <- names(df)[!(
  names(df) %in% spread_vecs)
]

aggregate(
  as.formula(
    paste(
      ".", 
      paste0(
        keep_vecs,
        collapse = " "
      ),
      sep = "~"
    )
  ),
  cbind(
    df[,keep_vecs],
    data.frame(
      do.call(
        cbind, 
        lapply(
          spread_vecs,
          function(x){
            y <- unique(
              df[,x]
            )
            setNames(
              data.frame(
                outer(
                  df[,x], 
                  y,
                  `==`
                )
              ),
              paste(
                x,
                y,
                sep = "_"
              )
            )
          }
        )
      ),
      row.names = NULL
    )
  ),
  FUN = function(x){
    ifelse(
      any(x),
      "Yes",
      "No"
    )
  }
)

Data:

df <- structure(list(ID = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L), Name = c("John", 
"John", "John", "John", "John", "John", "Kelly", "Kelly"), Color = c("Blue", 
"Red", "Yellow", "Blue", "Red", "Yellow", "Blue", "Red"), Food = c("Pizza", 
"Pizza", "Pizza", "Ice Cream", "Ice Cream", "Ice Cream", "Pizza", 
"Pizza")), class = "data.frame", row.names = c(NA, -8L))
  • Related