Home > Software engineering >  Find if certain values have certain other parameters associated with it in a R data frame
Find if certain values have certain other parameters associated with it in a R data frame

Time:12-28

I am looking to find out if certain values are associated with each category. I have a vector x <- c("Jay Ambiga","Ameen Agent"). For each value in x I want to know if the they have Coffee Shop and Restaurant associated with it. I was using grepl on description to find out if it the word Coffee or Restaurant but I am not sure how to get a final result which looks like this,

enter image description here

df<-structure(list(Date = c("Ledger:", "44718", "44737", "44768", 
                                    "44768", "Ledger:", "44564", "44564", "44567", "44567", "44662", 
                                    "44687", "44743", "44743", "44758", "44758"), Particulars = c("Ameen Agent", 
                                                                                                  "To", "To", "To", "By", "Jay Ambiga", "To", "By", "To", "By", 
                                                                                                  "To", "To", "By", "By", "To", "By"), Description = c("1-Jan-22 to 12-Oct-22", 
                                                                                                                                                       "UOB SGD A/C - Restaurant", "UOB SGD A/C - Restaurant", 
                                                                                                                                                       "UOB SGD A/C - Restaurant", "Agent Fee", "1-Jan-22 to 12-Oct-22", 
                                                                                                                                                       "UOB SGD A/C - Coffee Shop", "Purchase of Materials - Others", 
                                                                                                                                                       "UOB SGD A/C - Coffee Shop", "Purchase of Materials - Liquor & Cigarette", 
                                                                                                                                                       "UOB SGD A/C - Restaurant", "UOB SGD A/C - Restaurant", 
                                                                                                                                                       "Purchase of Materials - Others", "Purchase of Materials - Others", 
                                                                                                                                                       "UOB SGD A/C - Restaurant", "Purchase of Materials - Groceries"
                                                                                                  ), Vch_Type = c(NA, "Payment", "Payment", "Payment", "Journal", 
                                                                                                                  NA, "Payment", "Purchase", "Payment", "Purchase", "Payment", 
                                                                                                                  "Payment", "Purchase", "Purchase", "Payment", "Purchase"), Vch_No = c(NA, 
                                                                                                                                                                                        "1150", "1255", "1415", "39", NA, "13", "4671", "62", "9", "879", 
                                                                                                                                                                                        "992", "11042022", "06052022", "1361", "16072022"), Debit = c(NA, 
                                                                                                                                                                                                                                                      "50", "300", "1000", NA, NA, "3447.5", NA, "258", NA, "293.39999999999998", 
                                                                                                                                                                                                                                                      "880", NA, NA, "20", NA), Credit = c(NA, NA, NA, NA, "1000", 
                                                                                                                                                                                                                                                                                           NA, NA, "3447.5", NA, "258", NA, NA, "293.39999999999998", "880", 
                                                                                                                                                                                                                                                                                           NA, "20"), Category = c("Ameen Agent", "Ameen Agent", "Ameen Agent", 
                                                                                                                                                                                                                                                                                                                   "Ameen Agent", "Ameen Agent", "Jay Ambiga", "Jay Ambiga", "Jay Ambiga", 
                                                                                                                                                                                                                                                                                                                   "Jay Ambiga", "Jay Ambiga", "Jay Ambiga", "Jay Ambiga", "Jay Ambiga", 
                                                                                                                                                                                                                                                                                                                   "Jay Ambiga", "Jay Ambiga", "Jay Ambiga")), row.names = c(NA, 
                                                                                                                                                                                                                                                                                                                                                                             -16L), class = c("tbl_df", "tbl", "data.frame"))

CodePudding user response:

library(data.table)
setDT(df)
ans <- df[, .(temp = paste0(Description, collapse = ";")), by = .(Category)]
ans[, has_both := as.logical(
        sapply(temp, function(x) grepl("restaurant", x, ignore.case = TRUE)) * 
        sapply(temp, function(x) grepl("coffee shop", x, ignore.case = TRUE)))][, temp := NULL][]
#       Category has_both
# 1: Ameen Agent    FALSE
# 2:  Jay Ambiga     TRUE



       
  • Related