I have a dataframe and I would like find a way of creating a number of smaller dataframes based on the combination of unique values in my original data frame.
In the example data below, I would like to find all the unique combinations of "SalesType" and "LeadSale" (in this example it is "Retail and Jessica", "Retail and Mark", "Market and Jessica" and "Market and Mark") and then create new dataframes with only that subsetted information (expected output below).
Input:
structure(list(BusinessDate = structure(c(1672318800, 1672318800,
1672318800, 1672318800, 1672318800, 1672318800, 1672318800, 1672318800,
1672318800, 1672318800, 1672318800, 1672318800, 1672318800, 1672318800,
1672318800, 1672318800, 1672318800, 1672318800, 1672318800, 1672318800
), class = c("POSIXct", "POSIXt"), tzone = ""), ReportType = c("Sales",
"Sales", "Sales", "Sales", "Sales", "Sales", "Sales", "Sales",
"Sales", "Sales", "Sales", "Sales", "Sales", "Sales", "Sales",
"Sales", "Sales", "Sales", "Sales", "Sales"), SalesType = c("Retail",
"Retail", "Online", "Retail", "Online", "Online",
"Market", "Market", "Retail", "Online", "Market",
"Online", "Retail", "Retail", "Retail", "Retail",
"Market", "Market", "Online", "Online"), Currency = c("USD", "USD", "USD", "USD", "USD", "USD",
"USD", "USD", "USD", "USD", "USD", "USD", "USD", "USD", "USD",
"USD", "USD", "USD", "USD", "USD"), LeadSale = c("Mark", "Mark",
"Mark", "Mark", "Jessica", "Jessica", "Jessica", "Mark", "Jessica",
"Jessica", "Jessica", "Jessica", "Mark", "Mark", "Mark", "Mark",
"Mark", "Mark", "Mark", "Mark"), Value = c(19, 189, 0,
0, 236, 36, 81, 19,
34, 12, 12, 12,
45.5, 45.5, 45.5, 45.5,
45.5, 45.5, 45.5, 0)), row.names = c(NA,
20L), class = "data.frame")
Expected Output:
output_market_jessica <-
structure(list(BusinessDate = structure(c(1672318800, 1672318800,
1672318800, 1672318800, 1672318800, 1672318800), tzone = "", class = c("POSIXct",
"POSIXt")), ReportType = c("Sales", "Sales", "Sales", "Sales",
"Sales", "Sales"), SalesType = c("Market", "Market", "Market",
"Market", "Market", "Market"), Currency = c("USD", "USD", "USD",
"USD", "USD", "USD"), LeadSale = c("Jessica", "Jessica", "Jessica",
"Jessica", "Jessica", "Jessica"), Value = c(236, 36, 81, 12,
12, 12)), row.names = c(NA, -6L), class = "data.frame")
output_market_mark <-
structure(list(BusinessDate = structure(c(1672318800, 1672318800,
1672318800, 1672318800, 1672318800, 1672318800), tzone = "", class = c("POSIXct",
"POSIXt")), ReportType = c("Sales", "Sales", "Sales", "Sales",
"Sales", "Sales"), SalesType = c("Market", "Market", "Market",
"Market", "Market", "Market"), Currency = c("USD", "USD", "USD",
"USD", "USD", "USD"), LeadSale = c("Mark", "Mark", "Mark", "Mark",
"Mark", "Mark"), Value = c(0, 19, 45.5, 45.5, 45.5, 0)), row.names = c(NA,
-6L), class = "data.frame")
output_retail_jessica <-
structure(list(BusinessDate = structure(1672318800, tzone = "", class = c("POSIXct",
"POSIXt")), ReportType = "Sales", SalesType = "Retail", Currency = "USD",
LeadSale = "Jessica", Value = 34), row.names = c(NA, -1L), class = "data.frame")
output_retail_mark <-
structure(list(BusinessDate = structure(c(1672318800, 1672318800,
1672318800, 1672318800, 1672318800, 1672318800, 1672318800), tzone = "", class = c("POSIXct",
"POSIXt")), ReportType = c("Sales", "Sales", "Sales", "Sales",
"Sales", "Sales", "Sales"), SalesType = c("Retail", "Retail",
"Retail", "Retail", "Retail", "Retail", "Retail"), Currency = c("USD",
"USD", "USD", "USD", "USD", "USD", "USD"), LeadSale = c("Mark",
"Mark", "Mark", "Mark", "Mark", "Mark", "Mark"), Value = c(19,
189, 0, 45.5, 45.5, 45.5, 45.5)), row.names = c(NA, -7L), class = "data.frame")
CodePudding user response:
One approach. This is a generalized function (except subset_name) for any datframe and by-groups.
Note: The new dataframe are assigned to global environment.
get_subset_df <- function(df,by){
keys <- df%>%
group_by_at(by)%>%
group_keys
for(i in 1:nrow(keys)){
subset_key <- keys[i,]
subset_name <- paste(c("Output",keys[i,]),collapse="_")
assign(subset_name,df%>%right_join(subset_key),envir=.GlobalEnv)
}
}
get_subset_df(df,c("SalesType","LeadSale"))
ls()
[1] "df" "get_subset_df" "Output_Market_Jessica"
[4] "Output_Market_Mark" "Output_Online_Jessica" "Output_Online_Mark"
[7] "Output_Retail_Jessica" "Output_Retail_Mark"