Home > Enterprise >  My nested ifelse statements are not functioning as expected
My nested ifelse statements are not functioning as expected

Time:07-28

Only the first ifelse statement is working as expected. Otherwise, most of the Transaction Days incorrectly fall into the '1-7 Days' category. The Status can either be "SOLD" or "AVAILABLE". Transaction_Days is a character variable which is >=0.

pl_df$Event_Time_Category <- ifelse(pl_df$Status=="SOLD",
                                 ifelse(pl_df$Transaction_Days<1, '0-1 Day of Game',
                                  ifelse(pl_df$Transaction_Days>0 & pl_df$Transaction_Days<8 , '1-7 Days',
                                    ifelse(pl_df$Transaction_Days>7 & pl_df$Transaction_Days<32, '7-31 Days',
                                       ifelse(pl_df$Transaction_Days>31 & pl_df$Transaction_Days<91, '31-90 Days', '90  Days Out')))), '')

Actual Output:

Transaction_Days Event_Time_Category 
   72            1-7 Days   
    3            1-7 Days   
   10            1-7 Days   
   37            1-7 Days   
   61            1-7 Days   
   35            1-7 Days   
  126            1-7 Days   
   92            90  Days Out   
   53            1-7 Days   
   11            1-7 Days   
   48            1-7 Days   
   19            1-7 Days   
   21            1-7 Days   
   66            1-7 Days   
   20            1-7 Days   
   49            1-7 Days   
   21            1-7 Days   
   43            1-7 Days   
   31            1-7 Days   
    0            0-1 Day of Game   

Expected Output:

Transaction_Days Event_Time_Category 
   72            31-90 Days   
    3            1-7 Days   
   10            7-31 Days   
   37            31-90 Days   
   61            31-90 Days   
   35            31-90 Days   
  126            90  Days Out   
   92            90  Days Out   
   53            31-90 Days   
   11            7-31 Days   
   48            31-90 Days   
   19            7-31 Days   
   21            7-31 Days   
   66            31-90 Days   
   20            7-31 Days   
   49            31-90 Days   
   21            7-31 Days   
   43            31-90 Days   
   31            7-31 Days   
    0            0-1 Day of Game   

Below are more details about the dataframe:

structure(list(Promoter = c("ABC", "ABC", "ABC", "ABC", "ABC", 
"ABC"), Event.Date = c("2022-07-27 13:10:00", "2022-07-27 13:10:00", 
"2022-07-27 13:10:00", "2022-07-27 13:10:00", "2022-07-27 13:10:00", 
"2022-07-27 13:10:00"), Description = c("Twins @ Brewers", "Twins @ Brewers", 
"Twins @ Brewers", "Twins @ Brewers", "Twins @ Brewers", "Twins @ Brewers"
), Performer = c("Milwaukee Brewers", "Milwaukee Brewers", "Milwaukee Brewers", 
"Milwaukee Brewers", "Milwaukee Brewers", "Milwaukee Brewers"
), Category = c("MLB", "MLB", "MLB", "MLB", "MLB", "MLB"), Transaction.Date = c("2022-05-16 17:42:00", 
"2022-07-24 21:12:00", "2022-07-17 14:16:00", "2022-06-20 13:24:00", 
"2022-05-27 17:24:00", "2022-06-22 19:25:00"), Zone = c("Field Diamond Box", 
"Field Diamond Box", "Field Diamond Box", "Field Infield Box", 
"Field Infield Box", "Field Infield Box"), Section = c(111L, 
111L, 111L, 110L, 110L, 110L), Row = c("3", "4", "4", "6", "6", 
"6"), Seat = c("9, 10", "5, 6", "7, 8", "10, 11, 12", "5, 6, 7", 
"8, 9"), Quantity = c(2L, 2L, 2L, 3L, 3L, 2L), Status = c("SOLD", 
"SOLD", "SOLD", "SOLD", "SOLD", "SOLD"), Price = c(85.47, 72.05, 
72.86, 45.36, 44.73, 43.75), Cost = c(164, 164, 164, 174, 174, 
116), Revenue = c(170.94, 144.1, 145.72, 136.08, 134.19, 87.5
), Profit = c(6.94, -19.9, -18.28, -37.92, -39.81, -28.5),  Event_Date = structure(c(19200, 19200, 19200, 19200, 
19200, 19200), class = "Date"), Transaction_Date = structure(c(19128, 
19197, 19190, 19163, 19139, 19165), class = "Date"), Alias = c(" Twins @ Brewers", 
" Twins @ Brewers", " Twins @ Brewers", " Twins @ Brewers", " Twins @ Brewers", 
" Twins @ Brewers"), Cost_Per_Ticket = c(82, 82, 82, 58, 58, 
58), Revenue_Per_Ticket = c("85.47", "72.05", "72.86", "45.36", 
"44.73", "43.75"), Pack_Size = c("2", "2", "2", "3", "3", "2"
), Transaction_Days = c("72", "3", "10", "37", "61", "35"), Event_Time_Category = c("1-7 Days", 
"1-7 Days", "1-7 Days", "1-7 Days", "1-7 Days", "1-7 Days"), 
    Event_Time = c("01:10 PM", "01:10 PM", "01:10 PM", "01:10 PM", 
    "01:10 PM", "01:10 PM")), row.names = c(NA, 6L), class = "data.frame")

CodePudding user response:

make use of cut:

lbs <- c(paste0(c(0, 1,7,31),'-', c(1, 7, 31, 90), ' Days'), '90  Days')

transform(df, Event_Time_cat = cut(Transaction_Days, c(-Inf, 1, 7, 31, 90, Inf), lbs))

   Transaction_Days Event_Time_cat
1                72     31-90 Days
2                 3       1-7 Days
3                10      7-31 Days
4                37     31-90 Days
5                61     31-90 Days
6                35     31-90 Days
7               126       90  Days
8                92       90  Days
9                53     31-90 Days
10               11      7-31 Days
11               48     31-90 Days
12               19      7-31 Days
13               21      7-31 Days
14               66     31-90 Days
15               20      7-31 Days
16               49     31-90 Days
17               21      7-31 Days
18               43     31-90 Days
19               31      7-31 Days
20                0       0-1 Days

CodePudding user response:

Let's change the text to numbers first, then we can do comparisons that work numerically instead of alphabetically. (8 < 10 but "8" > "10")

Then with your ifelse chain, you could remove redundant tests. For instance, if we know the number is not <1, then we don't need to test if it's >0.

pl_df$Transaction_Days = as.numeric(pl_df$Transaction_Days)

pl_df$Event_Time_Category <- 
  ifelse(pl_df$Status=="SOLD",
         ifelse(pl_df$Transaction_Days<1, '0-1 Day of Game',
                ifelse(pl_df$Transaction_Days<8 , '1-7 Days',
                       ifelse(pl_df$Transaction_Days<32, '7-31 Days',
                              ifelse(pl_df$Transaction_Days<91, '31-90 Days', '90  Days Out')))), '')

This is a pretty good use case for dplyr::case_when, which I think is easier to read and maintain. It works where the output is the one corresponding to the first TRUE test it encounters.

library(dplyr)
pl_df %>%
  mutate(Event_Time_Category = case_when(
    Status != "SOLD"      ~ '',  # Blank if not SOLD
    Transaction_Days < 1  ~ '0-1 Day of Game',
    Transaction_Days < 8  ~ '1-7 Days',
    Transaction_Days < 32 ~ '7-31 Days',
    Transaction_Days < 91 ~ '31-90 Days', 
    TRUE ~ '90  Days Out'))
  • Related