Home > Blockchain >  Grouping and stacking data
Grouping and stacking data

Time:01-13

A sample of my data is :

dat <- read.table(text = " ID   BC1 DC1 DE1 MN2 DC2 PO2 SA3 BC3 KL3 AA4 AP4 BC4 PO4
        1   2   1   2   3   1   3   1   1   3   2   2   2   2
        2   3   1   1   2   3   1   1   2   3   1   1   3   2
        3   2   3   2   3   2   3   2   1   1   3   1   1   1
        4   3   3   1   1   1   1   1   2   2   1   2   1   2", header = TRUE)

I want to get the following table and missing data are blank

ID Group1  Group2  Group3  Group4
 1      2       1       2      
 2      3       1       1      
 3      2       3       2      
 4      3       3       1      
 1      3       1       3      
 2      2       3       1      
 3      3       2       3      
 4      1       1       1      
 1      1       1       3      
 2      1       2       3      
 3      2       1       1      
 4      1       2       2      
 1      2       2       2      2
 2      1       1       3      2
 3      3       1       1      1
 4      1       2       1      2

The number in front of each column is where the columns are separated from each other. For example BC1, DC1 and DE1. They form the first four rows with their Ids and MN2, DC2 and PO2 form the second rows with their IDs and so on.

CodePudding user response:

What about:

library(tidyr)
library(dplyr)

dat |>
    pivot_longer(-ID, names_sep = "(?=\\d)", names_to = c(NA, "id")) |>
    group_by(ID, id) |>
    mutate(name = row_number()) |> 
    pivot_wider(id, values_fn = list) |>
    unnest_longer(everything()) |>
    ungroup()

Output:

# A tibble: 16 × 5
   id      `1`   `2`   `3`   `4`
   <chr> <int> <int> <int> <int>
 1 1         2     1     2    NA
 2 1         3     1     1    NA
 3 1         2     3     2    NA
 4 1         3     3     1    NA
 5 2         3     1     3    NA
 6 2         2     3     1    NA
 7 2         3     2     3    NA
 8 2         1     1     1    NA
 9 3         1     1     3    NA
10 3         1     2     3    NA
11 3         2     1     1    NA
12 3         1     2     2    NA
13 4         2     2     2     2
14 4         1     1     3     2
15 4         3     1     1     1
16 4         1     2     1     2

CodePudding user response:

Would be interesting to see if there is an easier approach:

library(tidyverse)

dat |> 
  pivot_longer(-ID) |> 
  mutate(id = str_extract(name, "\\d$")) |> 
  group_by(ID, id) |> 
  mutate(name = paste0("Group", row_number())) |>
  ungroup() |> 
  pivot_wider(names_from = name, values_from = value) |> 
  arrange(id, ID) |> 
  select(-id)
#> # A tibble: 16 × 5
#>       ID Group1 Group2 Group3 Group4
#>    <int>  <int>  <int>  <int>  <int>
#>  1     1      2      1      2     NA
#>  2     2      3      1      1     NA
#>  3     3      2      3      2     NA
#>  4     4      3      3      1     NA
#>  5     1      3      1      3     NA
#>  6     2      2      3      1     NA
#>  7     3      3      2      3     NA
#>  8     4      1      1      1     NA
#>  9     1      1      1      3     NA
#> 10     2      1      2      3     NA
#> 11     3      2      1      1     NA
#> 12     4      1      2      2     NA
#> 13     1      2      2      2      2
#> 14     2      1      1      3      2
#> 15     3      3      1      1      1
#> 16     4      1      2      1      2

CodePudding user response:

You can rename the data with a specified pattern ("index1_index2"), i.e.

#   ID 1_1 1_2 1_3 2_1 2_2 2_3 3_1 3_2 3_3 4_1 4_2 4_3 4_4
# 1  1   2   1   2   3   1   3   1   1   3   2   2   2   2
# 2  2   3   1   1   2   3   1   1   2   3   1   1   3   2
# 3  3   2   3   2   3   2   3   2   1   1   3   1   1   1
# 4  4   3   3   1   1   1   1   1   2   2   1   2   1   2

so that you can add the special element ".value" to names_to when using pivot_longer() to stack multiple columns that are grouped by that pattern.

Code
library(dplyr)
library(tidyr)

dat %>%
  rename_with(~ sub('\\D ', '', .x) %>%
                paste(., ave(., ., FUN = seq), sep = '_'), -ID) %>%
  pivot_longer(-ID, names_to = c("set", ".value"), names_sep = '_') %>%
  arrange(set) %>%
  select(-set)
Output
# A tibble: 16 × 5
      ID   `1`   `2`   `3`   `4`
   <int> <int> <int> <int> <int>
 1     1     2     1     2    NA
 2     2     3     1     1    NA
 3     3     2     3     2    NA
 4     4     3     3     1    NA
 5     1     3     1     3    NA
 6     2     2     3     1    NA
 7     3     3     2     3    NA
 8     4     1     1     1    NA
 9     1     1     1     3    NA
10     2     1     2     3    NA
11     3     2     1     1    NA
12     4     1     2     2    NA
13     1     2     2     2     2
14     2     1     1     3     2
15     3     3     1     1     1
16     4     1     2     1     2
  • Related