How can I find a matching value between two columns in R?-CodePudding

I have a dataframe grouped that contains rows assigned by a reference number like so:

ref origin delivery
   1   x      lux
   1   lux    y
   1   x      lux
   2   z      c
   2   q      w 
   3   p      lux
   3   lux    t
   4   k      lux
   4   k      lux

I have been trying to find a way to extract all the rows that have the same ref and have lux in both columns at least once:

  ref origin delivery
   1   x      lux
   1   lux    y
   1   x      lux
   3   p      lux
   3   lux    t

Tried doing this:

is_crossdock <- c()
 for(i in 1: nrow(grouped)){
    cmrf <- grouped$ref[i]
    for(j in 1:nrow(grouped[which(grouped$ref== cmrf),])){
      if(any(grouped$origin=='lux' && any(grouped$delivery=='lux'))){
        is_crossdock = c(is_crossdock,i)
      }
    }
  }

but result is incorrect

CodePudding user response：

With dplyr

library(dplyr)
df %>%
  group_by(ref) %>%
  filter("lux" %in% origin & "lux" %in% delivery) 
# # A tibble: 5 × 3
# # Groups:   ref [2]
#     ref origin delivery
#   <int> <chr>  <chr>   
# 1     1 x      lux     
# 2     1 lux    y       
# 3     1 x      lux     
# 4     3 p      lux     
# 5     3 lux    t

Or in base R:

lux_refs = intersect(
  df[df$origin == "lux", "ref"],
  df[df$delivery == "lux", "ref"]
)
df[df$ref %in% lux_refs, ]
#   ref origin delivery
# 1   1      x      lux
# 2   1    lux        y
# 3   1      x      lux
# 6   3      p      lux
# 7   3    lux        t

Using this sample data:

df = read.table(text = 'ref origin delivery
   1   x      lux
   1   lux    y
   1   x      lux
   2   z      c
   2   q      w 
   3   p      lux
   3   lux    t
   4   k      lux
   4   k      lux', header = T)

CodePudding user response：

In tidyverse, we can use if_all with filter

library(dplyr)
df1 %>% 
   group_by(ref) %>%
   filter(if_all(everything(), ~ "lux" %in% .x)) %>%
   ungroup

-output

# A tibble: 5 × 3
    ref origin delivery
  <int> <chr>  <chr>   
1     1 x      lux     
2     1 lux    y       
3     1 x      lux     
4     3 p      lux     
5     3 lux    t

data

df1 <- structure(list(ref = c(1L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L), 
origin = c("x", 
"lux", "x", "z", "q", "p", "lux", "k", "k"), delivery = c("lux", 
"y", "lux", "c", "w", "lux", "t", "lux", "lux")), 
class = "data.frame", row.names = c(NA, 
-9L))