I have a dataframe such as
GroupsA GroupsB
G1 G2
G1 G3
G1 G4
G1 G5
G1 G6
G2 G3
G2 G4
G2 G5
G2 G6
G3 G4
G3 G5
G3 G6
G4 G5
G4 G6
G5 G6
And I have 3 lists such as :
list1<-c("G1","G3")
list2<-c("G2")
list3<-c("G4","G5","G6")
and I would like to add a new column to the dataframe by adding the name of the list from which the GroupA and B come from such as
GroupsA GroupsB Lists
G1 G2 list1&list2
G1 G3 list1&list1
G1 G4 list1&list3
G1 G5 list1&list3
G1 G6 list1&list3
G2 G3 list2&list1
G2 G4 list2&list3
G2 G5 list2&list3
G2 G6 list2&list3
G3 G4 list1&list3
G3 G5 list1&list3
G3 G6 list1&list3
G4 G5 list3&list3
G4 G6 list3&list3
G5 G6 list3&list3
Here is the dput format of the df if it can helps :
structure(list(GroupsA = c("G1", "G1", "G1", "G1", "G1", "G2",
"G2", "G2", "G2", "G3", "G3", "G3", "G4", "G4", "G5"), GroupsB = c("G2",
"G3", "G4", "G5", "G6", "G3", "G4", "G5", "G6", "G4", "G5", "G6",
"G5", "G6", "G6")), class = "data.frame", row.names = c(NA, -15L
))
CodePudding user response:
A base R solution can be to obtain the lists from your global environment, stack them in a data frame and match with your original data frame, i.e.
d1 <- stack(mget(grep('list', ls(), value = TRUE)))
df$Lists <- do.call(paste,
c(data.frame(sapply(df, function(i)d1$ind[match(i, d1$values)])),
sep = '&'))
GroupsA GroupsB Lists
1 G1 G2 list1&list2
2 G1 G3 list1&list1
3 G1 G4 list1&list3
4 G1 G5 list1&list3
5 G1 G6 list1&list3
6 G2 G3 list2&list1
7 G2 G4 list2&list3
8 G2 G5 list2&list3
9 G2 G6 list2&list3
10 G3 G4 list1&list3
11 G3 G5 list1&list3
12 G3 G6 list1&list3
13 G4 G5 list3&list3
14 G4 G6 list3&list3
15 G5 G6 list3&list3
DATA
df <- structure(list(GroupsA = c("G1", "G1", "G1", "G1", "G1", "G2",
"G2", "G2", "G2", "G3", "G3", "G3", "G4", "G4", "G5"), GroupsB = c("G2",
"G3", "G4", "G5", "G6", "G3", "G4", "G5", "G6", "G4", "G5", "G6",
"G5", "G6", "G6")), class = "data.frame", row.names = c(NA, -15L
))
CodePudding user response:
In tidyverse
:
library(tidyverse)
lists <- stack(lst(list1, list2, list3))
bind_cols(df,
df %>%
map_dfc(~ lists$ind[match(.x, lists$values)]) %>%
unite(col = "Lists", sep = "&"))
GroupsA GroupsB Lists
1 G1 G2 list1&list2
2 G1 G3 list1&list1
3 G1 G4 list1&list3
4 G1 G5 list1&list3
5 G1 G6 list1&list3
6 G2 G3 list2&list1
7 G2 G4 list2&list3
8 G2 G5 list2&list3
9 G2 G6 list2&list3
10 G3 G4 list1&list3
11 G3 G5 list1&list3
12 G3 G6 list1&list3
13 G4 G5 list3&list3
14 G4 G6 list3&list3
15 G5 G6 list3&list3
CodePudding user response:
With data.table
sqldf
library(sqldf)
library(data.table)
X = list(list1 = list1, list2 = list2, list3 = list3)
dt = rbindlist(lapply(X, as.data.table), idcol = 'list_from')
sqldf("SELECT x.V1 AS GroupsA,
y.V1 AS GroupsB,
(x.list_from ||'&'|| y.list_from) AS Lists
FROM dt AS x CROSS JOIN dt AS y
WHERE x.V1 < y.V1")
# GroupsA GroupsB Lists
# 1 G1 G3 list1&list1
# 2 G1 G2 list1&list2
# 3 G1 G4 list1&list3
# 4 G1 G5 list1&list3
# 5 G1 G6 list1&list3
# 6 G3 G4 list1&list3
# 7 G3 G5 list1&list3
# 8 G3 G6 list1&list3
# 9 G2 G3 list2&list1
# 10 G2 G4 list2&list3
# 11 G2 G5 list2&list3
# 12 G2 G6 list2&list3
# 13 G4 G5 list3&list3
# 14 G4 G6 list3&list3
# 15 G5 G6 list3&list3
Or just data.table
(won't scale well)
out = dt[, CJ(GA = V1, GB = V1)][GA < GB]
out[, froma := dt[.SD, on = .(V1 = GA), list_from]]
out[, fromb := dt[.SD, on = .(V1 = GB), list_from]]
out[, c('froma', 'fromb', 'lists') := .(NULL, NULL, paste(froma, fromb, sep = "&"))]
# GA GB lists
# <char> <char> <char>
# 1: G1 G2 list1&list2
# 2: G1 G3 list1&list1
# 3: G1 G4 list1&list3
# 4: G1 G5 list1&list3
# 5: G1 G6 list1&list3
# 6: G2 G3 list2&list1
# 7: G2 G4 list2&list3
# 8: G2 G5 list2&list3
# 9: G2 G6 list2&list3
# 10: G3 G4 list1&list3
# 11: G3 G5 list1&list3
# 12: G3 G6 list1&list3
# 13: G4 G5 list3&list3
# 14: G4 G6 list3&list3
# 15: G5 G6 list3&list3