Home > front end >  Count combinations grouped by another variable
Count combinations grouped by another variable

Time:12-14

I have a dataframe of the following format

col_1 col_2
X A
X B
X C
Y B
Y C
Z A
Z C
Z D

For each element of col_1 I want to construct a column of all the combinations of size 2 from at col_2. For example for combinations of size 2, my result would look something like that

col_1 col_2
X (A, B)
X (A, C)
X (B, C)
Y (B, C)
Y (B, C)
Z (A, C)
Z (A, D)
Z (C, D)

How can I do this?

Thanks in advance!

CodePudding user response:

library(tidyverse)

data <- tribble(
  ~col_1, ~col_2,
  "X", "A",
  "X", "B",
  "X", "C",
  "Y", "B",
  "Y", "C",
  "Z", "A",
  "Z", "C",
  "Z", "D"
)

data2 <-
  data %>%
  nest(-col_1) %>%
  mutate(
    data = data %>% map(~ {
      .x$col_2 %>%
        combn(2) %>%
        t() %>%
        as_tibble() %>%
        transmute(col_2 = map2(V1, V2, ~ c(.x, .y)))
    })
  ) %>%
  unnest(data)
#> Warning: All elements of `...` must be named.
#> Did you want `data = c(col_2)`?
#> Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
#> Using compatibility `.name_repair`.

# reduplicate rows
data %>%
  select(col_1) %>%
  left_join(data2) %>%
  as.data.frame()
#> Joining, by = "col_1"
#>    col_1 col_2
#> 1      X  A, B
#> 2      X  A, C
#> 3      X  B, C
#> 4      X  A, B
#> 5      X  A, C
#> 6      X  B, C
#> 7      X  A, B
#> 8      X  A, C
#> 9      X  B, C
#> 10     Y  B, C
#> 11     Y  B, C
#> 12     Z  A, C
#> 13     Z  A, D
#> 14     Z  C, D
#> 15     Z  A, C
#> 16     Z  A, D
#> 17     Z  C, D
#> 18     Z  A, C
#> 19     Z  A, D
#> 20     Z  C, D

Created on 2021-12-13 by the reprex package (v2.0.1)

CodePudding user response:

library(tidyverse)

df <- tribble(
  ~col_1, ~col_2,
  "X", "A",
  "X", "B",
  "X", "C",
  "Y", "B",
  "Y", "C",
  "Z", "A",
  "Z", "C",
  "Z", "D"
)

df %>%
  group_nest(col_1) %>%
  transmute(col_1, col_2 = map(data, ~ combn(
    x = .x$col_2, m = 2, FUN = toString
  ))) %>%
  unnest(col_2)
#> # A tibble: 7 x 2
#>   col_1 col_2
#>   <chr> <chr>
#> 1 X     A, B 
#> 2 X     A, C 
#> 3 X     B, C 
#> 4 Y     B, C 
#> 5 Z     A, C 
#> 6 Z     A, D 
#> 7 Z     C, D

Created on 2021-12-13 by the reprex package (v2.0.1)

CodePudding user response:

Maybe this data.table option could help

> setDT(df)[, combn(col_2, 2, toString), col_1]
   col_1   V1
1:     X A, B
2:     X A, C
3:     X B, C
4:     Y B, C
5:     Z A, C
6:     Z A, D
7:     Z C, D
  • Related