Only the first ifelse statement is working as expected. Otherwise, most of the Transaction Days incorrectly fall into the '1-7 Days' category. The Status can either be "SOLD" or "AVAILABLE". Transaction_Days is a character variable which is >=0.
pl_df$Event_Time_Category <- ifelse(pl_df$Status=="SOLD",
ifelse(pl_df$Transaction_Days<1, '0-1 Day of Game',
ifelse(pl_df$Transaction_Days>0 & pl_df$Transaction_Days<8 , '1-7 Days',
ifelse(pl_df$Transaction_Days>7 & pl_df$Transaction_Days<32, '7-31 Days',
ifelse(pl_df$Transaction_Days>31 & pl_df$Transaction_Days<91, '31-90 Days', '90 Days Out')))), '')
Actual Output:
Transaction_Days Event_Time_Category
72 1-7 Days
3 1-7 Days
10 1-7 Days
37 1-7 Days
61 1-7 Days
35 1-7 Days
126 1-7 Days
92 90 Days Out
53 1-7 Days
11 1-7 Days
48 1-7 Days
19 1-7 Days
21 1-7 Days
66 1-7 Days
20 1-7 Days
49 1-7 Days
21 1-7 Days
43 1-7 Days
31 1-7 Days
0 0-1 Day of Game
Expected Output:
Transaction_Days Event_Time_Category
72 31-90 Days
3 1-7 Days
10 7-31 Days
37 31-90 Days
61 31-90 Days
35 31-90 Days
126 90 Days Out
92 90 Days Out
53 31-90 Days
11 7-31 Days
48 31-90 Days
19 7-31 Days
21 7-31 Days
66 31-90 Days
20 7-31 Days
49 31-90 Days
21 7-31 Days
43 31-90 Days
31 7-31 Days
0 0-1 Day of Game
Below are more details about the dataframe:
structure(list(Promoter = c("ABC", "ABC", "ABC", "ABC", "ABC",
"ABC"), Event.Date = c("2022-07-27 13:10:00", "2022-07-27 13:10:00",
"2022-07-27 13:10:00", "2022-07-27 13:10:00", "2022-07-27 13:10:00",
"2022-07-27 13:10:00"), Description = c("Twins @ Brewers", "Twins @ Brewers",
"Twins @ Brewers", "Twins @ Brewers", "Twins @ Brewers", "Twins @ Brewers"
), Performer = c("Milwaukee Brewers", "Milwaukee Brewers", "Milwaukee Brewers",
"Milwaukee Brewers", "Milwaukee Brewers", "Milwaukee Brewers"
), Category = c("MLB", "MLB", "MLB", "MLB", "MLB", "MLB"), Transaction.Date = c("2022-05-16 17:42:00",
"2022-07-24 21:12:00", "2022-07-17 14:16:00", "2022-06-20 13:24:00",
"2022-05-27 17:24:00", "2022-06-22 19:25:00"), Zone = c("Field Diamond Box",
"Field Diamond Box", "Field Diamond Box", "Field Infield Box",
"Field Infield Box", "Field Infield Box"), Section = c(111L,
111L, 111L, 110L, 110L, 110L), Row = c("3", "4", "4", "6", "6",
"6"), Seat = c("9, 10", "5, 6", "7, 8", "10, 11, 12", "5, 6, 7",
"8, 9"), Quantity = c(2L, 2L, 2L, 3L, 3L, 2L), Status = c("SOLD",
"SOLD", "SOLD", "SOLD", "SOLD", "SOLD"), Price = c(85.47, 72.05,
72.86, 45.36, 44.73, 43.75), Cost = c(164, 164, 164, 174, 174,
116), Revenue = c(170.94, 144.1, 145.72, 136.08, 134.19, 87.5
), Profit = c(6.94, -19.9, -18.28, -37.92, -39.81, -28.5), Event_Date = structure(c(19200, 19200, 19200, 19200,
19200, 19200), class = "Date"), Transaction_Date = structure(c(19128,
19197, 19190, 19163, 19139, 19165), class = "Date"), Alias = c(" Twins @ Brewers",
" Twins @ Brewers", " Twins @ Brewers", " Twins @ Brewers", " Twins @ Brewers",
" Twins @ Brewers"), Cost_Per_Ticket = c(82, 82, 82, 58, 58,
58), Revenue_Per_Ticket = c("85.47", "72.05", "72.86", "45.36",
"44.73", "43.75"), Pack_Size = c("2", "2", "2", "3", "3", "2"
), Transaction_Days = c("72", "3", "10", "37", "61", "35"), Event_Time_Category = c("1-7 Days",
"1-7 Days", "1-7 Days", "1-7 Days", "1-7 Days", "1-7 Days"),
Event_Time = c("01:10 PM", "01:10 PM", "01:10 PM", "01:10 PM",
"01:10 PM", "01:10 PM")), row.names = c(NA, 6L), class = "data.frame")
CodePudding user response:
make use of cut
:
lbs <- c(paste0(c(0, 1,7,31),'-', c(1, 7, 31, 90), ' Days'), '90 Days')
transform(df, Event_Time_cat = cut(Transaction_Days, c(-Inf, 1, 7, 31, 90, Inf), lbs))
Transaction_Days Event_Time_cat
1 72 31-90 Days
2 3 1-7 Days
3 10 7-31 Days
4 37 31-90 Days
5 61 31-90 Days
6 35 31-90 Days
7 126 90 Days
8 92 90 Days
9 53 31-90 Days
10 11 7-31 Days
11 48 31-90 Days
12 19 7-31 Days
13 21 7-31 Days
14 66 31-90 Days
15 20 7-31 Days
16 49 31-90 Days
17 21 7-31 Days
18 43 31-90 Days
19 31 7-31 Days
20 0 0-1 Days
CodePudding user response:
Let's change the text to numbers first, then we can do comparisons that work numerically instead of alphabetically. (8 < 10
but "8" > "10"
)
Then with your ifelse chain, you could remove redundant tests. For instance, if we know the number is not <1
, then we don't need to test if it's >0
.
pl_df$Transaction_Days = as.numeric(pl_df$Transaction_Days)
pl_df$Event_Time_Category <-
ifelse(pl_df$Status=="SOLD",
ifelse(pl_df$Transaction_Days<1, '0-1 Day of Game',
ifelse(pl_df$Transaction_Days<8 , '1-7 Days',
ifelse(pl_df$Transaction_Days<32, '7-31 Days',
ifelse(pl_df$Transaction_Days<91, '31-90 Days', '90 Days Out')))), '')
This is a pretty good use case for dplyr::case_when
, which I think is easier to read and maintain. It works where the output is the one corresponding to the first TRUE test it encounters.
library(dplyr)
pl_df %>%
mutate(Event_Time_Category = case_when(
Status != "SOLD" ~ '', # Blank if not SOLD
Transaction_Days < 1 ~ '0-1 Day of Game',
Transaction_Days < 8 ~ '1-7 Days',
Transaction_Days < 32 ~ '7-31 Days',
Transaction_Days < 91 ~ '31-90 Days',
TRUE ~ '90 Days Out'))