Home > Software engineering >  R: expand grid of all possible combinations within groups and apply functions across all the pairs
R: expand grid of all possible combinations within groups and apply functions across all the pairs

Time:10-10

data <- tibble(time = c(1,1,2,2), a = c(1,2,3,4), b =c(4,3,2,1), c = c(1,1,1,1))

The result will look like this

result <- tibble( 
             t = c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2),
             firm1 = c("a","a","a","b","b","b","c","c","c","a","a","a","b","b","b","c","c","c"),
             firm2 = c("a","b","c","a","b","c","a","b","c","a","b","c","a","b","c","a","b","c"),
             value = c(6,10,5,10,14,9,5,9,4,14,10,9,10,6,5,9,5,4))
result

The function could be

function(x, y){sum(x, y)}

Basically I am looking for a tidy solution to expand.grid data at each point of time and apply functions across columns. Can anyone help? I tried this, but I could not have time in front of the pairs.

expected_result<-expand.grid(names(data[-1]), names(data[-1])) %>%
  mutate(value = map2(Var1, Var2, ~ fun1(data[.x], data[.y])))
expected_result

CodePudding user response:

We may use

library(dplyr)
library(tidyr)
library(purrr)
 data1 <- data %>% 
    group_by(time) %>% 
    summarise(across(everything(), sum, na.rm = TRUE), .groups = 'drop') %>%
     pivot_longer(cols = -time) %>% 
     group_split(time)
  map_dfr(data1, ~ {dat <- .x
       crossing(firm1 = dat$name, firm2 = dat$name) %>% 
       mutate(value = c(outer(dat$value, dat$value, FUN = ` `))) %>% 
       mutate(time = first(dat$time), .before = 1)})

-output

# A tibble: 18 × 4
    time firm1 firm2 value
   <dbl> <chr> <chr> <dbl>
 1     1 a     a         6
 2     1 a     b        10
 3     1 a     c         5
 4     1 b     a        10
 5     1 b     b        14
 6     1 b     c         9
 7     1 c     a         5
 8     1 c     b         9
 9     1 c     c         4
10     2 a     a        14
11     2 a     b        10
12     2 a     c         9
13     2 b     a        10
14     2 b     b         6
15     2 b     c         5
16     2 c     a         9
17     2 c     b         5
18     2 c     c         4

CodePudding user response:

Use exand.grid you get all possible combination of columns, split the data by time and apply fun for each row of tmp.

library(dplyr)
library(purrr)

tmp <- expand.grid(firm1 = names(data[-1]), firm2 = names(data[-1]))

fun <- function(x, y) sum(x, y)

result <- data %>%
  group_split(time) %>%
  map_df(~cbind(time = .x$time[1], tmp, 
                value = apply(tmp, 1, function(x) fun(.x[[x[1]]], .x[[x[2]]]))))

result

#   time firm1 firm2 value
#1     1     a     a     6
#2     1     b     a    10
#3     1     c     a     5
#4     1     a     b    10
#5     1     b     b    14
#6     1     c     b     9
#7     1     a     c     5
#8     1     b     c     9
#9     1     c     c     4
#10    2     a     a    14
#11    2     b     a    10
#12    2     c     a     9
#13    2     a     b    10
#14    2     b     b     6
#15    2     c     b     5
#16    2     a     c     9
#17    2     b     c     5
#18    2     c     c     4

You may also do this in base R -

result <- do.call(rbind, by(data, data$time, function(x) {
  cbind(time = x$time[1], tmp, 
        value = apply(tmp, 1, function(y) fun(x[[y[1]]], x[[y[2]]])))
}))
  • Related