Copy all pairwise combinations of dataframe and their combined name in a new dataframe-CodePudding

I was wondering how to generate a new dataframe containing all pairwise combinations of the value and the combined row and column name of another dataframe. To explain as an example I have the following dataframe:

# dataframe with col names a1:a5
df <- data.frame(a1 = c(4, 2, 6, 9, 13),
                 a2 = c(56, 1, 47, 2, 3),
                 a3 = c(4, 6, 9, 11, 85),
                 a4 = c(6, 15, 4, 12, 3),
                 a5 = c(54, 94, 3, 2, 75))

# and with rownames a1:a5
rownames(df) <- c("a1","a2","a3","a4","a5")

df now looks like this:

	a1	a2	a3	a4	a5
a1	4	56	4	6	54
a2	2	1	6	15	94
a3	6	47	9	4	3
a4	9	2	11	12	2
a5	13	3	85	3	75

I need a new dataframe of all possible combinations (so 25x2) looking like this:

	Step	Value
1	a1a1	4
2	a1a2	56
3	a1a3	4
4	a1a4	6
...	...	...
25	a5a5	75

Thank you.

CodePudding user response：

You could convert the data to a table and back to a data.frame.

df2 <- as.data.frame(as.table(as.matrix(df)))
df2[order(df2$Var1), ]

#    Var1 Var2 Freq
# 1    a1   a1    4
# 6    a1   a2   56
# 11   a1   a3    4
# 16   a1   a4    6
# 21   a1   a5   54
# 2    a2   a1    2
# 7    a2   a2    1
# 12   a2   a3    6
# 17   a2   a4   15
# 22   a2   a5   94
# ...

CodePudding user response：

You can put it in a long format:

library(tidyr)
library(dplyr)
 
 df %>%
  # add as column row names
  mutate(col1 = rownames(.)) %>%
  # from wide to long format
  pivot_longer( -col1, values_to = "Value", names_to = "col2") %>%
  # create the combination in the format you need
  mutate(step = paste0(col1,col2)) %>%
  # select useful columns
  select(step, Value) %>%
  # sort by step
  arrange(step)

# A tibble: 25 x 2
   step  Value
   <chr> <dbl>
 1 a1a1      4
 2 a1a2     56
 3 a1a3      4
 4 a1a4      6
 5 a1a5     54
 6 a2a1      2
 7 a2a2      1
 8 a2a3      6
 9 a2a4     15
10 a2a5     94
# ... with 15 more rows

CodePudding user response：

We may use stack.

stack(dat)
#    values ind
# 1       4  a1
# 2       2  a1
# 3       6  a1
# 4       9  a1
# 5      13  a1
# 6      56  a2
# 7     ...

Or, to be precise:

cbind(stack(as.data.frame(t(dat))), r=rownames(dat)) |>
  transform(step=paste0(ind, r)) |> subset(select=c(4, 1))
#    step values
# 1  a1a1      4
# 2  a1a2     56
# 3  a1a3      4
# 4  a1a4      6
# 5  a1a5     54
# 6  a2a1      2
# 7   ...

Data:

dat <- structure(list(a1 = c(4L, 2L, 6L, 9L, 13L), a2 = c(56L, 1L, 47L, 
2L, 3L), a3 = c(4L, 6L, 9L, 11L, 85L), a4 = c(6L, 15L, 4L, 12L, 
3L), a5 = c(54L, 94L, 3L, 2L, 75L)), class = "data.frame", row.names = c("a1", 
"a2", "a3", "a4", "a5"))