decide 1 or 2 or both grouped by 2 variables-CodePudding

DF<-data.frame(id=c(1,1,1,2,2,2),rank=c("1","2","3","1","2","3"),code=c("A","B","B","B","B","A"))

DF
  id rank code
1  1    1    A
2  1    2    B
3  1    3    B
4  2    1    B
5  2    2    B
6  2    3    A

Desired output:

  id rank code type1 type2 type3
1  1    1    A     A   MIX   MIX
2  1    2    B    NA   MIX   MIX
3  1    3    B    NA    NA   MIX
4  2    1    B     B     B   MIX
5  2    2    B    NA     B   MIX
6  2    3    A    NA    NA   MIX

All is grouped by id

type1 gets code where rank = 1.

type2 gets code where rank = 1-2. If code is different in rank 1 and 2, then MIX

type3 gets code where rank = 1-3. etc. etc.

Anyone? :)

CodePudding user response：

Here's a dplyr solution using ifelse and a temporary column to reduce boilerplate:

library(dplyr)

DF %>% 
  group_by(id) %>% 
  mutate(a = code[rank == 1],
         type1 = ifelse(rank > 1, NA, 
                        ifelse(all(code[!(rank > 1)] == a[1]), a[1], "MIX")),
         type2 = ifelse(rank > 2, NA, 
                        ifelse(all(code[!(rank > 2)] == a[1]), a[1], "MIX")),
         type3 = ifelse(rank > 3, NA, 
                        ifelse(all(code[!(rank > 3)] == a[1]), a[1], "MIX"))) %>%
  select(-a)

#> # A tibble: 6 x 6
#> # Groups:   id [2]
#>      id rank  code  type1 type2 type3
#>   <dbl> <chr> <chr> <chr> <chr> <chr>
#> 1     1 1     A     A     MIX   MIX  
#> 2     1 2     B     NA    MIX   MIX  
#> 3     1 3     B     NA    NA    MIX  
#> 4     2 1     B     B     B     MIX  
#> 5     2 2     B     NA    B     MIX  
#> 6     2 3     A     NA    NA    MIX

CodePudding user response：

Using dplyr with case_when statements:

DF %>%
  group_by(id) %>%
  mutate(type2_grp = if_else(rank <= 2, 1, 0),
         type3_grp = if_else(rank <= 3, 1, 0)) %>%
  mutate(type1 = case_when(rank == 1 ~ code)) %>%
  group_by(id, type2_grp) %>%
  mutate(type2 = case_when(type2_grp == 1 & length(unique(code)) > 1 ~ "MIX",
                           type2_grp == 1 & code == "A" ~ "A",
                           type2_grp == 1 & code == "B" ~ "B")) %>%
  group_by(id, type3_grp) %>%
  mutate(type3 = case_when(type3_grp == 1 & length(unique(code)) > 1 ~ "MIX",
                           type3_grp == 1 & code == "A" ~ "A",
                           type3_grp == 1 & code == "B" ~ "B")) %>%
  ungroup() %>%
  select(-type2_grp, -type3_grp)

Which creates:

# A tibble: 6 x 6
     id rank  code  type1 type2 type3
  <dbl> <chr> <chr> <chr> <chr> <chr>
1     1 1     A     A     MIX   MIX  
2     1 2     B     NA    MIX   MIX  
3     1 3     B     NA    NA    MIX  
4     2 1     B     B     B     MIX  
5     2 2     B     NA    B     MIX  
6     2 3     A     NA    NA    MIX

CodePudding user response：

A base R solution for an arbitrary number of "type" columns

maxtype=3

do.call(rbind,
  by(DF,list(DF$id),function(x){
    y=list()
    for (i in 1:maxtype) {
      tmp=rep(NA,nrow(x))
      idx=as.numeric(x$rank)<=i
      if (length(unique(x$code[idx]))==1) {
        tmp[idx]=x$code[1]
      } else {
        tmp[idx]="MIX"
      }
      y[[paste0("type",i)]]=tmp
    }
    cbind(x,y)
  })
)

    id rank code type1 type2 type3
1.1  1    1    A     A   MIX   MIX
1.2  1    2    B  <NA>   MIX   MIX
1.3  1    3    B  <NA>  <NA>   MIX
2.4  2    1    B     B     B   MIX
2.5  2    2    B  <NA>     B   MIX
2.6  2    3    A  <NA>  <NA>   MIX