I have a table for cost of all items.
cost_table<- data.frame(FRUIT,COST)
FRUIT COST
1 APPLE 15
2 ORANGE 14
3 KIWI 13
5 BANANA 11
6 AVOCADO 10
and customer table where the corresponding cost of of the items have to be added.
customer_name items
1 sam APPLE,AVOCADO
2 anna ORANGE,KIWI,BANANA
3 elle ORANGE
5 john AVOCADO,APPLE
6 greg KIWI
The items is of type list. how do I map the items to cost table and add to get the total cost?
I want the output to be like this.
customer_name items total_cost
1 sam APPLE,AVOCADO 25
2 anna ORANGE,KIWI,BANANA 38
3 elle ORANGE 14
5 john AVOCADO,APPLE 25
6 greg KIWI 13
CodePudding user response:
Here is one potential solution:
library(tidyverse)
cost_table <- read.table(text = " FRUIT COST
1 APPLE 15
2 ORANGE 14
3 KIWI 13
5 BANANA 11
6 AVOCADO 10", header = TRUE)
customer_table <- read.table(text = " customer_name items
1 sam APPLE,AVOCADO
2 anna ORANGE,KIWI,BANANA
3 elle ORANGE
5 john AVOCADO,APPLE
6 greg KIWI ", header = TRUE)
# Calculate the max number of items in the customer_table
nmax <- max(stringr::str_count(customer_table$items, "\\,")) 1
# Split the "items" into separate columns, reformat the data, and summarise COST
customer_table %>%
separate(items, sep = ",", into = paste0("item_", seq_len(nmax)), fill = "right") %>%
pivot_longer(-customer_name, values_to = "FRUIT") %>%
left_join(cost_table, by = "FRUIT") %>%
group_by(customer_name) %>%
summarise(COST = sum(COST, na.rm = TRUE))
#> # A tibble: 5 × 2
#> customer_name COST
#> <chr> <int>
#> 1 anna 38
#> 2 elle 14
#> 3 greg 13
#> 4 john 25
#> 5 sam 25
Created on 2022-02-21 by the reprex package (v2.0.1)
If you need the output ordered the same as the input you can convert "customer_name" to an ordered factor, e.g.
# Split the "items" into separate columns, reformat the data, and summarise COST
customer_table %>%
mutate(customer_name = factor(customer_name,
levels = customer_table$customer_name,
ordered = TRUE)) %>%
separate(items, sep = ",", into = paste0("item_", seq_len(nmax)), fill = "right") %>%
pivot_longer(-customer_name, values_to = "FRUIT") %>%
left_join(cost_table, by = "FRUIT") %>%
group_by(customer_name) %>%
summarise(COST = sum(COST, na.rm = TRUE))
#> # A tibble: 5 × 2
#> customer_name COST
#> <ord> <int>
#> 1 sam 25
#> 2 anna 38
#> 3 elle 14
#> 4 john 25
#> 5 greg 13
CodePudding user response:
You could strsplit
the items by
customer at the comma, subset the cost table and sum
the values. The trimws
accounts for possible whitespace before or after the comma.
transform(customers, total_cost=unclass(
by(items, customer_name, \(x)
sum(with(cost_table, COST[FRUIT %in% trimws(unlist(strsplit(x, ',')))])))))
# customer_name items total_cost
# 1 sam APPLE,AVOCADO 38
# 2 anna ORANGE,KIWI,BANANA 14
# 3 elle ORANGE 13
# 5 john AVOCADO,APPLE 25
# 6 greg KIWI 25
Data:
customers <- structure(list(customer_name = c("sam", "anna", "elle", "john",
"greg"), items = c("APPLE,AVOCADO", "ORANGE,KIWI,BANANA", "ORANGE",
"AVOCADO,APPLE", "KIWI")), class = "data.frame", row.names = c("1",
"2", "3", "5", "6"))
cost_table <- structure(list(FRUIT = c("APPLE", "ORANGE", "KIWI", "BANANA",
"AVOCADO"), COST = c(15L, 14L, 13L, 11L, 10L)), class = "data.frame", row.names = c("1",
"2", "3", "5", "6"))