Home > Software design >  Turning a long-format dataframe into a one-row wide-format dataframe
Turning a long-format dataframe into a one-row wide-format dataframe

Time:12-15

I was wondering is there is a way to wide-format my long_data to achieve my desired_output below?

I tried pivot_wider(long_data, id_cols= -c(class, sub, order),names_from= DV, values_from= score) without success.

library(tidyverse)
m="
class sub  order time DV score task_type
1     1    s-c   1    ac 1     simple
1     1    s-c   1    bc 2     simple
1     1    s-c   2    ac 3     complex
1     1    s-c   2    bc 4     complex
"

long_data = read.table(text = m, header = TRUE)
desired_output =
"
class sub  order DV_ac_T1 DV_ac_T2 DV_bc_T1 DV_bc_T2 task_type_T1  task_type_T2
1     1    s-c   1        3        2        4        simple        complex
"

CodePudding user response:

We may use pivot_wider with names_glue to adjust the column names format

library(tidyr)
pivot_wider(long_data, names_from = c(DV, time), 
     values_from = c(score, task_type), names_glue = "{.value}_{DV}_T{time}")

-output

# A tibble: 1 × 11
  class   sub order score_ac_T1 score_bc_T1 score_ac_T2 score_bc_T2 task_type_ac_T1 task_type_bc_T1 task_type_ac_T2 task_type_bc_T2
  <int> <int> <chr>       <int>       <int>       <int>       <int> <chr>           <chr>           <chr>           <chr>          
1     1     1 s-c             1           2           3           4 simple          simple          complex         complex      

If we want to remove the 'score'

library(stringr)
pivot_wider(long_data, names_from = c(DV, time), 
     values_from = c(score, task_type), 
    names_glue = "{str_remove(str_c(.value, '_', DV), 'score_')}_T{time}")

-output

# A tibble: 1 × 11
  class   sub order ac_T1 bc_T1 ac_T2 bc_T2 task_type_ac_T1 task_type_bc_T1 task_type_ac_T2 task_type_bc_T2
  <int> <int> <chr> <int> <int> <int> <int> <chr>           <chr>           <chr>           <chr>          
1     1     1 s-c       1     2     3     4 simple          simple          complex         complex        

CodePudding user response:

Here is another option that gives the desired output (not elegant as @akrun) :-)

library(dplyr)
library(tidyr)

long_data %>% 
  mutate(x = paste0("DV_", DV, "_T", time)) %>% 
  mutate(y = paste0("task_type", "_T", time)) %>% 
  select(-c(DV, time)) %>% 
  pivot_wider(names_from = x, 
              values_from  = score) %>% 
  pivot_wider(names_from = y,
              values_from = task_type) %>% 
  summarise(across(everything(), ~ max(., na.rm = T)))
  class   sub order DV_ac_T1 DV_bc_T1 DV_ac_T2 DV_bc_T2 task_type_T1 task_type_T2
  <int> <int> <chr>    <int>    <int>    <int>    <int> <chr>        <chr>       
1     1     1 s-c          1        2        3        4 simple       complex  
  • Related