Home > Back-end >  Create a long data format and a week variable in R
Create a long data format and a week variable in R

Time:10-11

I want to transform the following data frame from wide to long with the dplyr library and consequently add a new column which gives the value of 1 to 7 ( those are the week numbers).

Basically, ID1 should get the values of AI_1, AI_2, AI_3, AI_4, AI_5, AI_6, AI_7 in a new column with a new name AI. and so forth for ID2, ID3, and ID4 and activity1...., activity7. And finally a new variable WEEK that contains the values 1 to 7 per IDn

I tried and use the following code but it does not work.

df_long <-
  df4 %>%
  pivot_longer(!c(ID, x1.time), names_to = "Parameters", values_to = "AI") %>%
  mutate(Time = parse_number(Parameters),
         MeanNames = case_when(grepl("AI", Parameters) ~ "AI",
                               grepl("Activity", Parameters) ~ "Activity" %>%
  select(ID,Time,AI,activity) %>%
  pivot_wider(names_from = MeanNames,values_from =AI_mean)
    structure(list(ID = c("ID1", "ID10", "ID11", "ID12", "ID13", 
"ID14", "ID15", "ID16", "ID17", "ID18", "ID19", "ID2", "ID20", 
"ID21", "ID22", "ID23", "ID24", "ID25", "ID26", "ID27"), x1.time = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), activity1 = c(0, 
0.363888888888889, 0.286111111111111, 0.531944444444444, 0.290972222222222, 
0.382638888888889, 0.334722222222222, 0.557638888888889, 0.238888888888889, 
0.154166666666667, 0, 0, 0.448611111111111, 0.124305555555556, 
0.290972222222222, 0.131944444444444, 0.255555555555556, 0.252777777777778, 
0.215277777777778, 0.303472222222222), activity2 = c(0.303472222222222, 
0.2875, 0.294444444444444, 0.280555555555556, 0.304861111111111, 
0.359722222222222, 0.347916666666667, 0.264583333333333, 0.19375, 
0.171527777777778, 0.291666666666667, 0.297222222222222, 0.213888888888889, 
0.222222222222222, 0.304861111111111, 0.113888888888889, 0.208333333333333, 
0.324305555555556, 0.234027777777778, 0.364583333333333), activity3 = c(0.232638888888889, 
0.323611111111111, 0.2, 0.222222222222222, 0.323611111111111, 
0.390972222222222, 0.381944444444444, 0.306944444444444, 0.19375, 
0.263888888888889, 0.263194444444444, 0.228472222222222, 0.248611111111111, 
0.313194444444444, 0.323611111111111, 0.0840277777777778, 0.189583333333333, 
0.394444444444444, 0.302777777777778, 0.442361111111111), activity4 = c(0.228472222222222, 
0.359722222222222, 0.261111111111111, 0.275694444444444, 0.252777777777778, 
0.331944444444444, 0.309027777777778, 0.274305555555556, 0.232638888888889, 
0.277777777777778, 0.270138888888889, 0.207638888888889, 0.288888888888889, 
0.210416666666667, 0.252777777777778, NA, NA, NA, NA, NA), activity5 = c(0.348611111111111, 
0.33125, 0.261805555555556, 0.409722222222222, 0.342361111111111, 
0.372916666666667, 0.193055555555556, 0.208333333333333, 0.286111111111111, 
0.290972222222222, 0.691666666666667, 0.275, 0.25625, 0.294444444444444, 
0.342361111111111, NA, NA, NA, NA, NA), activity6 = c(0.215972222222222, 
0.159027777777778, 0.10625, 0.155555555555556, NA, 0.263888888888889, 
0.227777777777778, 0.125, 0.234027777777778, 0.232638888888889, 
NA, 0.198611111111111, NA, NA, NA, NA, NA, NA, NA, NA), activity7 = c(0.123611111111111, 
0.129861111111111, 0.167361111111111, 0.224305555555556, NA, 
0.347916666666667, 0.203472222222222, 0.190972222222222, NA, 
NA, NA, 0.190277777777778, NA, NA, NA, NA, NA, NA, NA, NA), AI_1 = c(0.393672183448241, 
0.438868800326202, 0.301835910942944, 0.40417558245206, 0.642022052126492, 
0.142408584001277, 0.563028495670381, 0.118116816566331, 0.436212462794191, 
0.514300047314275, 0.257247546067607, 0.335604284591974, 0.291193390934394, 
0.341934289475186, 0.642022052126492, 0.324860973193389, 0.557537537102492, 
0.247198790391852, 0.280429188867851, 0.327332083531813), AI_2 = c(0.44750235206392, 
0.46972327227022, 0.173959743093479, 0.421306848407757, 0.218793365833983, 
0.168790497373066, 0.445596632079763, 0.364687582621664, 0.336361741323463, 
0.319688614645366, 0.168382532506642, 0.254056078037093, 0.21746835769531, 
0.160789612606635, 0.218793365833983, 0.266311313005776, 0.176325908027991, 
0.312408009841627, 0.169341786618747, 0.356053484162169), AI_3 = c(0.390957508679111, 
0.307322261599723, 0.145575504943204, 0.387689075911702, 0.427994003058763, 
0.151832437379957, 0.387053429665751, 0.119132014224298, 0.34516992971253, 
0.47310811763004, 0.313667364304496, 0.295658016477924, 0.397331683205576, 
0.441376185979552, 0.427994003058763, 0.177419794042628, 0.232604123120372, 
0.35835034138553, 0.166844732038584, 0.262686661252918), AI_4 = c(0.292133451860516, 
0.627664748633739, 0.168042970470727, 0.191594272379608, 0.246615732293793, 
0.13833361951739, 0.717029959712378, 0.494249194948433, 0.410376411198318, 
0.352738104642845, 0.179616804877742, 0.15798984548373, 0.19307993691809, 
0.191789936049133, 0.246615732293793, NA, NA, NA, NA, NA), AI_5 = c(0.137422020311649, 
0.47671458715848, 0.318789374750512, 0.0991406883545277, 0.572774291410116, 
0.36272185628419, 0.163807707465756, 0.276116364601869, 0.424367165540981, 
0.0812096382549585, 0.158958073184742, 0.246078855017164, 0.134373938223757, 
0.221318159585512, 0.572774291410116, NA, NA, NA, NA, NA), AI_6 = c(0.237710368521645, 
0.249932172314976, 0.129535724382782, 0.228112502799076, NA, 
0.161907840368906, 0.267396839935385, 0.239625058217238, 0.269110377528862, 
0.0783380834367801, NA, 0.152294273122736, NA, NA, NA, NA, NA, 
NA, NA, NA), AI_7 = c(0.122793946391041, 0.340170989947673, 0.409484578569937, 
0.345311080770631, NA, 0.131960673054408, 0.427318121458564, 
0.245755226369851, NA, NA, NA, 0.140419853417226, NA, NA, NA, 
NA, NA, NA, NA, NA)), row.names = c(NA, 20L), class = "data.frame")

It should look similar to the following dataframe: enter image description here

CodePudding user response:

You should use names_to with .value and define the pattern in names_pattern (here, the first group is either "activity" or "AI", and the second group is the digit, \\d). The name of the column is given by the name of the group in the same position as .value (here, first one).

library(tidyr)
df %>% 
  pivot_longer(-c(ID, x1.time), 
               names_to = c(".value", "Time"),
               names_pattern = "(activity|AI)_*(\\d)")

output

# A tibble: 140 × 5
   ID    x1.time Time  activity    AI
   <chr>   <dbl> <chr>    <dbl> <dbl>
 1 ID1        NA 1        0     0.394
 2 ID1        NA 2        0.303 0.448
 3 ID1        NA 3        0.233 0.391
 4 ID1        NA 4        0.228 0.292
 5 ID1        NA 5        0.349 0.137
 6 ID1        NA 6        0.216 0.238
 7 ID1        NA 7        0.124 0.123
 8 ID10       NA 1        0.364 0.439
 9 ID10       NA 2        0.288 0.470
10 ID10       NA 3        0.324 0.307
# … with 130 more rows
  • Related