I have a dataset where I just want each ID to have one row. Some of them have 2 of a different ID, and I want to make the dataset wide like in the sample output below.
ID. other_ID. height color
44 57 56 blue
32 99 28 green
66 23 19 yellow
66 56 5 purple
80 43 7 green
ID other_ID other_ID2 height1 height2 color1 color2
44 57 NA 56 NA blue NA
32 99 NA 28 NA green NA
66 23 56 19 5 yellow purple
80 43 NA 7 NA green NA
CodePudding user response:
df <- data.frame(
"ID." = c(44, 32, 66, 66, 80),
"other_ID." = c(57, 99, 23, 56, 43),
"height" = c(56, 28, 19, 5, 7),
"color" = c("blue", "green", "yellow", "purple", "green")
)
Here’s an idea using tidyverse
. The idea is to use the non-unique ID column to create a count of the number of observations within that ID, then use that count to convert the data to wide format.
library(tidyverse)
df_wide <-
df %>%
group_by(ID.) %>%
mutate(obs = row_number()) %>%
pivot_wider(id_cols = ID., values_from = c(other_ID., height, color), names_from = obs)
df_wide
#> # A tibble: 4 x 7
#> # Groups: ID. [4]
#> ID. other_ID._1 other_ID._2 height_1 height_2 color_1 color_2
#> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
#> 1 44 57 NA 56 NA blue <NA>
#> 2 32 99 NA 28 NA green <NA>
#> 3 66 23 56 19 5 yellow purple
#> 4 80 43 NA 7 NA green <NA>
CodePudding user response:
You can use a simple pivot_wider:
df %>%
group_by(ID) %>%
mutate(count_id = 1:n()) %>%
ungroup() %>%
pivot_wider(values_from = c(other_ID, height, color), names_from = count_id)
which gives:
# A tibble: 4 x 7
ID other_ID_1 other_ID_2 height_1 height_2 color_1 color_2
<int> <int> <int> <int> <int> <chr> <chr>
1 44 57 NA 56 NA blue <NA>
2 32 99 NA 28 NA green <NA>
3 66 23 56 19 5 yellow purple
4 80 43 NA 7 NA green <NA>
CodePudding user response:
A solution based on data.table
:
library(data.table)
df <- data.frame(
stringsAsFactors = FALSE,
ID = c(44L, 32L, 66L, 66L, 80L),
other_ID = c(57L, 99L, 23L, 56L, 43L),
height = c(56L, 28L, 19L, 5L, 7L),
color = c("blue", "green", "yellow", "purple", "green")
)
dcast(setDT(df), ID ~ rowid(ID), value.var = names(df)[-1])
#> ID other_ID_1 other_ID_2 height_1 height_2 color_1 color_2
#> 1: 32 99 NA 28 NA green <NA>
#> 2: 44 57 NA 56 NA blue <NA>
#> 3: 66 23 56 19 5 yellow purple
#> 4: 80 43 NA 7 NA green <NA>
A solution based on nest
and unnest_wider
:
library(tidyverse)
df %>%
nest(data = c(other_ID, height, color)) %>%
unnest_wider(data) %>% unnest_wider(other_ID, names_sep = "") %>%
unnest_wider(height, names_sep = "") %>% unnest_wider(color, names_sep = "")
#> # A tibble: 4 × 7
#> ID other_ID1 other_ID2 height1 height2 color1 color2
#> <int> <int> <int> <int> <int> <chr> <chr>
#> 1 44 57 NA 56 NA blue <NA>
#> 2 32 99 NA 28 NA green <NA>
#> 3 66 23 56 19 5 yellow purple
#> 4 80 43 NA 7 NA green <NA>
Another possible solution:
library(tidyverse)
df %>%
group_by(ID) %>%
summarise(across(everything(), ~ str_c(., collapse = ",")), .groups ="drop") %>%
reduce(names(.)[-1], function(x,y)
separate(x,y, into=str_c(y,1:2), sep=",", fill="right", convert=T), .init=.)
#> # A tibble: 4 × 7
#> ID other_ID1 other_ID2 height1 height2 color1 color2
#> <int> <int> <int> <int> <int> <chr> <chr>
#> 1 32 99 NA 28 NA green <NA>
#> 2 44 57 NA 56 NA blue <NA>
#> 3 66 23 56 19 5 yellow purple
#> 4 80 43 NA 7 NA green <NA>
CodePudding user response:
in Base R:
reshape(transform(df, time = ave(ID, ID, FUN = seq_along)), dir = 'wide', idvar = 'ID', sep="")
ID other_ID1 height1 color1 other_ID2 height2 color2
1 44 57 56 blue NA NA <NA>
2 32 99 28 green NA NA <NA>
3 66 23 19 yellow 56 5 purple
5 80 43 7 green NA NA <NA>
in Data.table:
library(data.table)
dcast(setDT(df), ID~rowid(ID), value.var = names(df)[-1], sep = "")
ID other_ID1 other_ID2 height1 height2 color1 color2
1: 32 99 NA 28 NA green <NA>
2: 44 57 NA 56 NA blue <NA>
3: 66 23 56 19 5 yellow purple
4: 80 43 NA 7 NA green <NA>