How to map values and add in R?-CodePudding

I have a table for cost of all items.

cost_table<- data.frame(FRUIT,COST)
                                      FRUIT   COST
1                                     APPLE   15
2                                    ORANGE   14
3                                      KIWI   13
5                                    BANANA   11
6                                   AVOCADO   10

and customer table where the corresponding cost of of the items have to be added.

   customer_name     items                           
1  sam               APPLE,AVOCADO                   
2  anna              ORANGE,KIWI,BANANA                    
3  elle              ORANGE                      
5  john              AVOCADO,APPLE                 
6  greg              KIWI

The items is of type list. how do I map the items to cost table and add to get the total cost?

I want the output to be like this.

   customer_name     items                   total_cost        
1  sam               APPLE,AVOCADO           25       
2  anna              ORANGE,KIWI,BANANA      38              
3  elle              ORANGE                  14   
5  john              AVOCADO,APPLE           25     
6  greg              KIWI                    13

CodePudding user response：

Here is one potential solution:

library(tidyverse)

cost_table <- read.table(text = "                                      FRUIT   COST
1                                     APPLE   15
2                                    ORANGE   14
3                                      KIWI   13
5                                    BANANA   11
6                                   AVOCADO   10", header = TRUE)

customer_table <- read.table(text = "   customer_name     items                           
1  sam               APPLE,AVOCADO                   
2  anna              ORANGE,KIWI,BANANA                    
3  elle              ORANGE                      
5  john              AVOCADO,APPLE                 
6  greg              KIWI                  ", header = TRUE)

# Calculate the max number of items in the customer_table
nmax <- max(stringr::str_count(customer_table$items, "\\,"))   1

# Split the "items" into separate columns, reformat the data, and summarise COST
customer_table %>%
  separate(items, sep = ",", into = paste0("item_", seq_len(nmax)), fill = "right") %>%
  pivot_longer(-customer_name, values_to = "FRUIT") %>%
  left_join(cost_table, by = "FRUIT") %>%
  group_by(customer_name) %>%
  summarise(COST = sum(COST, na.rm = TRUE))
#> # A tibble: 5 × 2
#>   customer_name  COST
#>   <chr>         <int>
#> 1 anna             38
#> 2 elle             14
#> 3 greg             13
#> 4 john             25
#> 5 sam              25

^{Created on 2022-02-21 by the reprex package (v2.0.1)}

If you need the output ordered the same as the input you can convert "customer_name" to an ordered factor, e.g.

# Split the "items" into separate columns, reformat the data, and summarise COST
customer_table %>%
  mutate(customer_name = factor(customer_name,
                                levels = customer_table$customer_name,
                                ordered = TRUE)) %>%
  separate(items, sep = ",", into = paste0("item_", seq_len(nmax)), fill = "right") %>%
  pivot_longer(-customer_name, values_to = "FRUIT") %>%
  left_join(cost_table, by = "FRUIT") %>%
  group_by(customer_name) %>%
  summarise(COST = sum(COST, na.rm = TRUE))
#> # A tibble: 5 × 2
#>   customer_name  COST
#>   <ord>         <int>
#> 1 sam              25
#> 2 anna             38
#> 3 elle             14
#> 4 john             25
#> 5 greg             13

CodePudding user response：

You could strsplit the items by customer at the comma, subset the cost table and sum the values. The trimws accounts for possible whitespace before or after the comma.

transform(customers, total_cost=unclass(
  by(items, customer_name, \(x)
     sum(with(cost_table, COST[FRUIT %in% trimws(unlist(strsplit(x, ',')))])))))
#   customer_name              items total_cost
# 1           sam      APPLE,AVOCADO         38
# 2          anna ORANGE,KIWI,BANANA         14
# 3          elle             ORANGE         13
# 5          john      AVOCADO,APPLE         25
# 6          greg               KIWI         25

Data:

customers <- structure(list(customer_name = c("sam", "anna", "elle", "john", 
"greg"), items = c("APPLE,AVOCADO", "ORANGE,KIWI,BANANA", "ORANGE", 
"AVOCADO,APPLE", "KIWI")), class = "data.frame", row.names = c("1", 
"2", "3", "5", "6"))

cost_table <- structure(list(FRUIT = c("APPLE", "ORANGE", "KIWI", "BANANA", 
"AVOCADO"), COST = c(15L, 14L, 13L, 11L, 10L)), class = "data.frame", row.names = c("1", 
"2", "3", "5", "6"))