Decide if variables are equal by rank and id-CodePudding

DF<-data.frame(id=c(1,1,1,2,2,2),rank=c("1","2","3","1","2","3"),code=c("A","B","B","B","B","A"))

DF
  id rank code
1  A1    1    A
2  A1    2    B
3  A1    3    B
4  B2    1    B
5  B2    2    B
6  B2    3    A

Desired output:

  id rank code type1 type2 type3
1  A1    1    A    aa   MIX   MIX
2  A1    2    B    NA   MIX   MIX
3  A1    3    B    NA    NA   MIX
4  B2    1    B    bb    bb   MIX
5  B2    2    B    NA    bb   MIX
6  B2    3    A    NA    NA   MIX

All is grouped by id

type1 gets code where rank = 1.

type2 gets code where rank = 1-2. If code is different in rank 1 and 2, then MIX

type3 gets code where rank = 1-3. etc. etc.

Anyone? :)

CodePudding user response：

If the column 'code' is factor, convert to character with as.character or use type.convert (automatically), then grouped by 'id', create the conditions with case_when to create the columns, 'type1', 'type2' and 'type3'

library(dplyr)
DF %>% 
   type.convert(as.is = TRUE) %>% 
   group_by(id) %>%
    mutate(type1 = case_when(rank == 1 
                ~ strrep(tolower(code), 2)),
          type2 = case_when(rank %in% 1:2 & all(c(1, 2) %in% rank) & 
              n_distinct(code[rank %in% 1:2]) == 1 
              ~ strrep(tolower(code), 2),
       rank %in% 1:2  &  all(c(1, 2) %in% rank)  & 
            n_distinct(code[rank %in% 1:2]) > 1 ~ 
        "MIX"), 
     type3 = case_when(rank %in% 1:3 & all(c(1, 2, 3) %in% rank) &
       n_distinct(code[rank %in% 1:3]) == 1 ~ 
                 strrep(tolower(code), 2),  rank %in% 1:3  &  
        all(c(1, 2, 3) %in% rank)  & n_distinct(code[rank %in% 1:3]) > 1 ~ 
        "MIX")) %>%
  ungroup

-output

# A tibble: 7 × 6
     id  rank code  type1 type2 type3
  <int> <int> <chr> <chr> <chr> <chr>
1     1     1 A     aa    MIX   MIX  
2     1     2 B     <NA>  MIX   MIX  
3     1     3 B     <NA>  <NA>  MIX  
4     2     1 B     bb    bb    MIX  
5     2     2 B     <NA>  bb    MIX  
6     2     3 A     <NA>  <NA>  MIX  
7     3     1 A     aa    <NA>  <NA>

data

DF <- data.frame(id=c(1,1,1,2,2,2,3),
rank=c("1","2","3","1","2","3","1"),
code=c("A","B","B","B","B","A","A"))

CodePudding user response：

With a slight modification to my answer from your previous question

maxtype=3

do.call(
  rbind,
  by(DF,list(DF$id),function(x){
    y=list()
    for (i in 1:maxtype) {
      tmp=rep(NA,nrow(x))
      idx=as.numeric(x$rank)<=i
      if (length(unique(x$code[idx]))==1) {
        tmp[idx]=paste0(rep(tolower(x$code[1]),2),collapse="")
      } else {
        tmp[idx]="MIX"
      }
      y[[paste0("type",i)]]=tmp
    }
    cbind(x,y)
  })
)

    id rank code type1 type2 type3
1.1  1    1    A    aa   MIX   MIX
1.2  1    2    B  <NA>   MIX   MIX
1.3  1    3    B  <NA>  <NA>   MIX
2.4  2    1    B    bb    bb   MIX
2.5  2    2    B  <NA>    bb   MIX
2.6  2    3    A  <NA>  <NA>   MIX

Also note that your id column is different in DF and your output.