I need to tidy the following data set in a compact manner using pivot_longer.
My aim is to have a trial column that shows the numbers in the column names from will_f1:will_f16, dface1:dface16, dfood1:dfood16, dout1:dout16. I would like to also retrieve multiple columns for the variables as will, face, food, out.
Currently I am using a code like the following for each variable (will, face, food, out) separately then I merge the separate data frames based on id and trial:
will_long<-data.will %>%
select(ID:will_f16) %>%
pivot_longer(starts_with("will_"),
names_to ="trial",
names_prefix = "will_f",
values_to = "will")
Data sample:
ID,will_f1,will_f2,will_f3,will_f4,will_f5,will_f6,will_f7,will_f8,will_f9,will_f10,will_f11,will_f12,will_f13,will_f14,will_f15,will_f16,will_ef1,will_ef2,will_ef3,will_ef4,will_ef5,will_ef6,will_ef7,will_ef8,dface1,dface2,dface3,dface4,dface5,dface6,dface7,dface8,dface9,dface10,dface11,dface12,dface13,dface14,dface15,dface16,dfood1,dfood2,dfood3,dfood4,dfood5,dfood6,dfood7,dfood8,dfood9,dfood10,dfood11,dfood12,dfood13,dfood14,dfood15,dfood16,dout1,dout2,dout3,dout4,dout5,dout6,dout7,dout8,dout9,dout10,dout11,dout12,dout13,dout14,dout15,dout16,dexfood1,dexfood2,dexfood3,dexfood4,dexfood5,dexfood6,dexfood7,dexfood8
1,6,1,1,5,6,2,1,1,1,1,5,3,2,5,3,1,1,3,2,1,5,1,4,1,QID117,QID118,QID123,QID114,QID113,QID111,QID110,QID124,QID122,QID119,QID109,QID112,QID115,QID121,QID120,QID108,QID70,QID56,QID61,QID49,QID80,QID3,QID53,QID57,QID29,QID69,QID73,QID81,QID63,QID76,QID47,QID85,QID92,QID103,QID95,QID97,QID106,QID90,QID93,QID105,QID100,QID107,QID102,QID99,QID101,QID94,QID91,QID104,QID45,QID35,QID67,QID51,QID86,QID58,QID74,QID82
CodePudding user response:
Perhaps this helps - rename
the columns that doesn't have the _
by inserting the _
between the letter and the digit using a regex lookaround, then use pivot_longer
to split at the _
by capturing the characters before and after the _
with names_pattern
library(dplyr)
library(stringr)
library(tidyr)
data.will %>%
rename_with(~ str_replace(., "(?<=[A-Za-z])(?=\\d)", "_"), -contains("_")) %>%
pivot_longer(col = -ID, names_to = c(".value", "trial"),
names_pattern = "^([^_] )_f?(.*)")
-output
# A tibble: 24 × 7
ID trial will dface dfood dout dexfood
<int> <chr> <int> <chr> <chr> <chr> <chr>
1 1 1 6 QID117 QID70 QID92 QID45
2 1 2 1 QID118 QID56 QID103 QID35
3 1 3 1 QID123 QID61 QID95 QID67
4 1 4 5 QID114 QID49 QID97 QID51
5 1 5 6 QID113 QID80 QID106 QID86
6 1 6 2 QID111 QID3 QID90 QID58
7 1 7 1 QID110 QID53 QID93 QID74
8 1 8 1 QID124 QID57 QID105 QID82
9 1 9 1 QID122 QID29 QID100 <NA>
10 1 10 1 QID119 QID69 QID107 <NA>
# … with 14 more rows
data
data.will <- structure(list(ID = 1L, will_f1 = 6L, will_f2 = 1L, will_f3 = 1L,
will_f4 = 5L, will_f5 = 6L, will_f6 = 2L, will_f7 = 1L, will_f8 = 1L,
will_f9 = 1L, will_f10 = 1L, will_f11 = 5L, will_f12 = 3L,
will_f13 = 2L, will_f14 = 5L, will_f15 = 3L, will_f16 = 1L,
will_ef1 = 1L, will_ef2 = 3L, will_ef3 = 2L, will_ef4 = 1L,
will_ef5 = 5L, will_ef6 = 1L, will_ef7 = 4L, will_ef8 = 1L,
dface1 = "QID117", dface2 = "QID118", dface3 = "QID123",
dface4 = "QID114", dface5 = "QID113", dface6 = "QID111",
dface7 = "QID110", dface8 = "QID124", dface9 = "QID122",
dface10 = "QID119", dface11 = "QID109", dface12 = "QID112",
dface13 = "QID115", dface14 = "QID121", dface15 = "QID120",
dface16 = "QID108", dfood1 = "QID70", dfood2 = "QID56", dfood3 = "QID61",
dfood4 = "QID49", dfood5 = "QID80", dfood6 = "QID3", dfood7 = "QID53",
dfood8 = "QID57", dfood9 = "QID29", dfood10 = "QID69", dfood11 = "QID73",
dfood12 = "QID81", dfood13 = "QID63", dfood14 = "QID76",
dfood15 = "QID47", dfood16 = "QID85", dout1 = "QID92", dout2 = "QID103",
dout3 = "QID95", dout4 = "QID97", dout5 = "QID106", dout6 = "QID90",
dout7 = "QID93", dout8 = "QID105", dout9 = "QID100", dout10 = "QID107",
dout11 = "QID102", dout12 = "QID99", dout13 = "QID101", dout14 = "QID94",
dout15 = "QID91", dout16 = "QID104", dexfood1 = "QID45",
dexfood2 = "QID35", dexfood3 = "QID67", dexfood4 = "QID51",
dexfood5 = "QID86", dexfood6 = "QID58", dexfood7 = "QID74",
dexfood8 = "QID82"), class = "data.frame", row.names = c(NA,
-1L))