I want to collect members of one column in my data frame where it has all the values in another column.
The sample dataset looks like this:
df <- structure(list(ID=c('a','a','a','a','b','b','c','c','d','d','d','d','e','e','f','f','f','g','h','h','h','h','i','i','i','i'),
A1=c('g1','g1','g1','g1','g1','g1','g1','g1','g2','g2','g2','g2','g2','g2','g2', 'g2','g2','g3', 'g3','g3','g3','g3','g3','g3', 'g3','g3'),
number=c(1, 2,3,4,1,2,1,3,1,2,3,4,2,3,2,3,4,1,1,2,3,4,1,2,3,4)),
class = "data.frame", row.names = c(NA,-26L))
As a result I want to know which groups have exactly all of the given values in the column number.
df.result<- structure(list(ID=c('a','b','c','d','e','f','g','h','i'),
A1=c('g1','g1','g1','g2','g2','g2','g3','g3','g3'),
all.numbers=c('yes','no','no','yes','no','no','no','yes','yes')),
class = "data.frame", row.names = c(NA,-9L))
Thanks
CodePudding user response:
How about this? It produces your expected output, let me know if there are any more test cases.
df %>%
group_by(ID, A1) %>%
summarise(all_numbers = all(number == row_number()) & n() == 4, .groups = "drop")
CodePudding user response:
Using left_join
df %>%
left_join(., df.result, by = c("ID", "A1"))
edit
considering @dcsuka's comment
another way to get your df.result
is using a condition when n
rows are >= 4
.
df %>%
group_by(ID, A1) %>%
tally() %>%
mutate(n = case_when(n >= 4 ~ "yes",
T ~ "no")) %>%
rename(all.numbers = n)
## A tibble: 9 x 3
## Groups: ID [9]
#ID A1 all.numbers
#<chr> <chr> <chr>
# 1 a g1 yes
#2 b g1 no
#3 c g1 no
#4 d g2 yes
#5 e g2 no
#6 f g2 no
#7 g g3 no
#8 h g3 yes
#9 i g3 yes
CodePudding user response:
Here is a base-R solution using with()
:
with(df, {
numbers = unique(number)
IDs = unique(ID)
df_byID = split(df, ~ ID)
# next line only works cause there is one unique 'A1' value for each 'ID'
A1s = sapply(df_byID, function (x) unique(x[['A1']]))
all.numbers = sapply(df_byID, function (x) all(numbers %in% x[['number']]))
data.frame(ID = IDs, A1 = A1s, all.numbers = ifelse(all.numbers, 'yes', 'no'))
})
# ID A1 all.numbers
#a a g1 yes
#b b g1 no
#c c g1 no
#d d g2 yes
#e e g2 no
#f f g2 no
#g g g3 no
#h h g3 yes
#i i g3 yes