Home > database >  put the row that belong in r
put the row that belong in r

Time:02-05

I need to put the row that belongs to the conditional. In this example the maximum value between days_op and days_tc is 90 but the belonging row is PN. (Note: days_op and xvar_op are together the same with days_tc and xvar_tc).

test = structure(list(identificacionSujeto = c("dave", 
                                        "dave", "dave", 
                                        "dave", "dave"
), date= structure(c(18992, 18992, 18992, 18992, 18992), class = c("IDate", "Date")), 
categ = c("T", "T", "T", "T", "T"), 
days_op = c(24, 50, 3, 11, 70), 
xvar_op = c("CO", "ON", "CO", "ON", "CO"), 
categ_op = c("T", "T", "T", "T", "T"), 
days_tc = c(54, 15, 90, 10, 54), 
xvar_op = c("PN", "NM", "PN", "PN", "PN"), 
categ_tc = c("Y", "V", "Y", "Y", "Y")), 
class = c("grouped_df", "tbl_df", "tbl", "data.frame"), 
row.names = c(NA, -5L), 
groups = structure(list(identificacionSujeto = "dave", 
                        .rows = structure(list(1:5), ptype = integer(0), 
                                          class = c("vctrs_list_of", "vctrs_vctr", "list"))), 
                   class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -1L), .drop = TRUE))


  identif…¹ date       categ days_op xvar_op categ…² days_tc xvar_op expected_output
  <chr>     <IDate>    <chr>   <dbl> <chr>   <chr>     <dbl> <chr>  
1 dave      2021-12-31 T          24 CO      T            54 PN     PN
2 dave      2021-12-31 T          50 ON      T            15 NM     PN
3 dave      2021-12-31 T           3 CO      T            90 PN     PN
4 dave      2021-12-31 T          11 ON      T            10 PN     PN
5 dave      2021-12-31 T          70 CO      T            54 PN     PN

CodePudding user response:

As it is already grouped, we may get the max index of the columns as well as the max value to subset the xvar_op columns (the columns were named the same so use make.unique to modify duplicate column names before applying the function)

library(dplyr)
test %>% 
  setNames(make.unique(names(.))) %>% 
  mutate(ind = max.col(cbind(days_op, days_tc)), 
  val = pmax(days_op, days_tc),
   out = cbind(xvar_op, xvar_op.1)[which.max(val),][ind[which.max(val)]], 
   ind = NULL, val = NULL) %>%
  ungroup

-output

# A tibble: 5 × 10
  identificacionSujeto date       categ days_op xvar_op categ_op days_tc xvar_op.1 categ_tc out  
  <chr>                <IDate>    <chr>   <dbl> <chr>   <chr>      <dbl> <chr>     <chr>    <chr>
1 dave                 2021-12-31 T          24 CO      T             54 PN        Y        PN   
2 dave                 2021-12-31 T          50 ON      T             15 NM        V        PN   
3 dave                 2021-12-31 T           3 CO      T             90 PN        Y        PN   
4 dave                 2021-12-31 T          11 ON      T             10 PN        Y        PN   
5 dave                 2021-12-31 T          70 CO      T             54 PN        Y        PN   
  • Related