I'm having trouble pivoting my data. I can use a workaround to produce my desired output (shown below) but I suspect that it is possible in a single pivot_longer()
call, I just can't figure out the command. Any advice/help would be appreciated.
library(dplyr)
library(tidyr)
## Data
df <- structure(list(id_name = c("ID_108", "ID_109", "ID_112",
"ID_113", "ID_115", "ID_116", "ID_119", "ID_12", "ID_132",
"ID_134", "ID_142", "ID_145", "ID_15", "ID_151", "ID_152",
"ID_158", "ID_163", "ID_170", "ID_173", "ID_179", "ID_191",
"ID_199", "ID_202", "ID_203", "ID_204", "ID_205", "ID_208",
"ID_214", "ID_215", "ID_216", "ID_225", "ID_235", "ID_236",
"ID_241", "ID_242", "ID_26", "ID_28", "ID_37", "ID_44",
"ID_45", "ID_49", "ID_51", "ID_54", "ID_55", "ID_57",
"ID_60", "ID_64", "ID_69", "ID_71", "ID_72", "ID_75",
"ID_88", "ID_9", "ID_98"),
thing_time_1.tab = c(NA, NA, 128, 91, 81, 112, NA, 142, NA,
NA, 371, NA, 0, 127, 207, 92, NA, NA,
554, 435, 830, 89, 112, 291, 133, NA,
197, 130, 484, NA, NA, NA, 126, NA, NA,
94, 98, NA, NA, 168, NA, 48, NA, 146, 54,
NA, 173, 30, 425, NA, 58, 201, NA, NA),
thing_1.tab = c(NA, NA, "(AB)", "(AB)", "(BB)", "(BB)", NA, "(AB)", NA, NA, "(BB)",
NA, "(AB)", "(AB)", "(AB)", "(BA)", NA, NA, "(BB)", "(BB)", "(BB)",
"(AB)", "(AB)", "(BB)", "(BB)", NA, "(AB)", "(AB)", "(BB)", NA,
NA, NA, "(BA)", NA, NA, "(AB)", "(BB)", NA, NA, "(BA)", NA, "(AB)",
NA, "(BA)", "(BA)", NA, "(AB)", "(BB)", "(BB)", NA, "(AB)", "(AB)",
NA, NA),
thing_time_2.tab = c(NA, 212, NA, 183, NA, 118, NA, NA, 5519, 157, NA, NA, 437, NA,
NA, 359, NA, 342, NA, NA, NA, 179, NA, 386, 176, 47, NA, 177,
561, NA, NA, NA, NA, NA, 70, NA, NA, 68, NA, 238, NA, 194, NA,
301, NA, NA, 263, 243, 750, 904, 126, 399, NA, 124),
thing_2.tab = c(NA, "(BA)", NA, "(BB)", NA, "(BA)", NA, NA, "(BB)", "(AA)", NA, NA,
"(BA)", NA, NA, "(BB)", NA, "(BB)", NA, NA, NA, "(AB)", NA, "(BB)",
"(BB)", "(BB)", NA, "(BB)", "(BB)", NA, NA, NA, NA, NA, "(BB)", NA,
NA, "(BA)", NA, "(BB)", NA, "(AB)", NA, "(AB)", NA, NA, "(BB)", "(BA)",
"(BB)", "(BA)", "(AB)", "(BB)", NA, "(BA)"),
thing_time_3.tab = c(167, 251, NA, 259, NA, NA, NA, NA, 5580, NA, NA, NA, NA, NA, NA, NA,
NA, 370, NA, NA, NA, NA, NA, 411, NA, 174, NA, NA, 656, NA, NA,
NA, NA, NA, NA, NA, NA, 120, NA, NA, NA, NA, 334, NA, NA, NA,
NA, NA, NA, 941, NA, NA, NA, 192),
thing_3.tab = c("(BB)", "(AB)", NA, "(BB)", NA, NA, NA, NA, "(AB)", NA, NA, NA, NA, NA, NA, NA,
NA, "(BB)", NA, NA, NA, NA, NA, "(BB)", NA, "(BB)", NA, NA, "(BA)",
NA, NA, NA, NA, NA, NA, NA, NA, "(AB)", NA, NA, NA, NA, "(AB)",
NA, NA, NA, NA, NA, NA, "(BB)", NA, NA, NA, "(AB)"),
thing_time_4.tab = c(NA, 575, NA, NA, NA, NA, NA, NA, 5615, NA, NA, NA, NA, NA, NA, NA,
NA, 401, NA, NA, NA, NA, NA, NA, 232, NA, NA, NA, NA, 319, NA,
NA, NA, NA, NA, NA, NA, 215, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 223),
thing_4.tab = c(NA, "(BB)", NA, NA, NA, NA, NA, NA, "(AB)", NA, NA, NA, NA, NA, NA, NA, NA,
"(BB)", NA, NA, NA, NA, NA, NA, "(BB)", NA, NA, NA, NA, "(BB)",
NA, NA, NA, NA, NA, NA, NA, "(BB)", NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, "(BB)"),
thing_time_5.tab = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
421, NA, NA, NA, NA, NA, NA, 278, NA, NA, NA, NA, 370, NA, NA,
NA, 451, NA, NA, NA, 280, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, 507),
thing_5.tab = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "(BB)", NA,
NA, NA, NA, NA, NA, "(BA)", NA, NA, NA, NA, "(BB)", NA, NA, NA,
"(BB)", NA, NA, NA, "(BB)", NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, "(BB)"),
thing_time_6.tab = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, 445, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA),
thing_6.tab = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, "(BA)", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA),
thing_time_7.tab = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, 576, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
thing_7.tab = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "(BB)",
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA),
thing_time_8.tab = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 719, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA),
thing_8.tab = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, "(BB)", NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)),
row.names = c(NA, -54L), class = "data.frame")
Outcome I want ("df_2"):
## Outcome I want ("df_2")
part1 <- df %>%
pivot_longer(-id_name,
names_to = c("item", ".value"),
names_sep = "[_.]",
values_transform = list(.value = as.character)) %>%
select(id_name, time) %>%
na.omit()
#> Warning: Expected 2 pieces. Additional pieces discarded in 16 rows [1, 2, 3, 4,
#> 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16].
part2 <- df %>%
pivot_longer(-id_name,
names_to = c("item", ".value"),
names_sep = "[_.]",
values_transform = list(.value = as.character)) %>%
select(-c(item, time)) %>%
pivot_longer(-id_name) %>%
select(-name) %>%
na.omit()
#> Warning: Expected 2 pieces. Additional pieces discarded in 16 rows [1, 2, 3, 4,
#> 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16].
df_2 <- part2 %>%
bind_cols(part1) %>%
select(-`id_name...3`) %>%
rename("ID" = "id_name...1",
"Thing" = "value",
"Time" = "time")
#> New names:
#> • `id_name` -> `id_name...1`
#> • `id_name` -> `id_name...3`
df_2
#> # A tibble: 86 × 3
#> ID Thing Time
#> <chr> <chr> <dbl>
#> 1 ID_108 (BB) 167
#> 2 ID_109 (BA) 212
#> 3 ID_109 (AB) 251
#> 4 ID_109 (BB) 575
#> 5 ID_112 (AB) 128
#> 6 ID_113 (AB) 91
#> 7 ID_113 (BB) 183
#> 8 ID_113 (BB) 259
#> 9 ID_115 (BB) 81
#> 10 ID_116 (BB) 112
#> # … with 76 more rows
My attempts:
## My failed attempts:
df %>%
pivot_longer(-id_name,
names_to = c("item", ".value"),
names_pattern = "(. _. )\\_(\\d?)")
#> # A tibble: 486 × 10
#> id_name item `1` `2` `3` `4` `5` `6` `7` `8`
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 ID_108 thing_time NA NA 167 NA NA NA NA NA
#> 2 ID_108 <NA> NA NA NA NA NA NA NA NA
#> 3 ID_108 <NA> NA NA NA NA NA NA NA NA
#> 4 ID_108 <NA> NA NA NA NA NA NA NA NA
#> 5 ID_108 <NA> NA NA NA NA NA NA NA NA
#> 6 ID_108 <NA> NA NA NA NA NA NA NA NA
#> 7 ID_108 <NA> NA NA NA NA NA NA NA NA
#> 8 ID_108 <NA> NA NA NA NA NA NA NA NA
#> 9 ID_108 <NA> NA NA NA NA NA NA NA NA
#> 10 ID_109 thing_time NA 212 251 575 NA NA NA NA
#> # … with 476 more rows
## pivot-then-coalesce? Also no good...
df %>%
pivot_longer(-id_name,
names_to = c("item", ".value"),
names_sep = "[_.]",
values_transform = list(.value = as.character)) %>%
group_by(id_name) %>%
summarise(across(-item, ~max(., na.rm = TRUE)))
#> Warning: Expected 2 pieces. Additional pieces discarded in 16 rows [1, 2, 3, 4,
#> 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16].
#> # A tibble: 54 × 10
#> id_name time `1` `2` `3` `4` `5` `6` `7` `8`
#> <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 ID_108 167 <NA> <NA> (BB) <NA> <NA> <NA> <NA> <NA>
#> 2 ID_109 575 <NA> (BA) (AB) (BB) <NA> <NA> <NA> <NA>
#> 3 ID_112 128 (AB) <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 4 ID_113 259 (AB) (BB) (BB) <NA> <NA> <NA> <NA> <NA>
#> 5 ID_115 81 (BB) <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 6 ID_116 118 (BB) (BA) <NA> <NA> <NA> <NA> <NA> <NA>
#> 7 ID_119 -Inf <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 8 ID_12 142 (AB) <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 9 ID_132 5615 <NA> (BB) (AB) (AB) <NA> <NA> <NA> <NA>
#> 10 ID_134 157 <NA> (AA) <NA> <NA> <NA> <NA> <NA> <NA>
#> # … with 44 more rows
Created on 2022-05-24 by the reprex package (v2.0.1)
CodePudding user response:
Does this work for you?
df %>%
pivot_longer(-id_name,
names_to = c(".value", "number"),
names_pattern = "([_a-z] )_(\\d)",
values_transform = list(.value = as.character)) %>%
# select(-number) %>%
na.omit()
# A tibble: 86 × 4
id_name number thing_time thing
<chr> <chr> <dbl> <chr>
1 ID_108 3 167 (BB)
2 ID_109 2 212 (BA)
3 ID_109 3 251 (AB)
4 ID_109 4 575 (BB)
5 ID_112 1 128 (AB)
6 ID_113 1 91 (AB)
7 ID_113 2 183 (BB)
8 ID_113 3 259 (BB)
9 ID_115 1 81 (BB)
10 ID_116 1 112 (BB)
It does involve some clean-up of "thing-time" column title.