DF<-data.frame(id=c(1,1,1,2,2,2),rank=c("1","2","3","1","2","3"),code=c("A","B","B","B","B","A"))
DF
id rank code
1 1 1 A
2 1 2 B
3 1 3 B
4 2 1 B
5 2 2 B
6 2 3 A
Desired output:
id rank code type1 type2 type3
1 1 1 A A MIX MIX
2 1 2 B NA MIX MIX
3 1 3 B NA NA MIX
4 2 1 B B B MIX
5 2 2 B NA B MIX
6 2 3 A NA NA MIX
All is grouped by id
type1
gets code
where rank
= 1.
type2
gets code
where rank
= 1-2. If code
is different in rank
1 and 2, then MIX
type3
gets code
where rank
= 1-3. etc. etc.
Anyone? :)
CodePudding user response:
Here's a dplyr
solution using ifelse
and a temporary column to reduce boilerplate:
library(dplyr)
DF %>%
group_by(id) %>%
mutate(a = code[rank == 1],
type1 = ifelse(rank > 1, NA,
ifelse(all(code[!(rank > 1)] == a[1]), a[1], "MIX")),
type2 = ifelse(rank > 2, NA,
ifelse(all(code[!(rank > 2)] == a[1]), a[1], "MIX")),
type3 = ifelse(rank > 3, NA,
ifelse(all(code[!(rank > 3)] == a[1]), a[1], "MIX"))) %>%
select(-a)
#> # A tibble: 6 x 6
#> # Groups: id [2]
#> id rank code type1 type2 type3
#> <dbl> <chr> <chr> <chr> <chr> <chr>
#> 1 1 1 A A MIX MIX
#> 2 1 2 B NA MIX MIX
#> 3 1 3 B NA NA MIX
#> 4 2 1 B B B MIX
#> 5 2 2 B NA B MIX
#> 6 2 3 A NA NA MIX
CodePudding user response:
Using dplyr
with case_when
statements:
DF %>%
group_by(id) %>%
mutate(type2_grp = if_else(rank <= 2, 1, 0),
type3_grp = if_else(rank <= 3, 1, 0)) %>%
mutate(type1 = case_when(rank == 1 ~ code)) %>%
group_by(id, type2_grp) %>%
mutate(type2 = case_when(type2_grp == 1 & length(unique(code)) > 1 ~ "MIX",
type2_grp == 1 & code == "A" ~ "A",
type2_grp == 1 & code == "B" ~ "B")) %>%
group_by(id, type3_grp) %>%
mutate(type3 = case_when(type3_grp == 1 & length(unique(code)) > 1 ~ "MIX",
type3_grp == 1 & code == "A" ~ "A",
type3_grp == 1 & code == "B" ~ "B")) %>%
ungroup() %>%
select(-type2_grp, -type3_grp)
Which creates:
# A tibble: 6 x 6
id rank code type1 type2 type3
<dbl> <chr> <chr> <chr> <chr> <chr>
1 1 1 A A MIX MIX
2 1 2 B NA MIX MIX
3 1 3 B NA NA MIX
4 2 1 B B B MIX
5 2 2 B NA B MIX
6 2 3 A NA NA MIX
CodePudding user response:
A base R solution for an arbitrary number of "type" columns
maxtype=3
do.call(rbind,
by(DF,list(DF$id),function(x){
y=list()
for (i in 1:maxtype) {
tmp=rep(NA,nrow(x))
idx=as.numeric(x$rank)<=i
if (length(unique(x$code[idx]))==1) {
tmp[idx]=x$code[1]
} else {
tmp[idx]="MIX"
}
y[[paste0("type",i)]]=tmp
}
cbind(x,y)
})
)
id rank code type1 type2 type3
1.1 1 1 A A MIX MIX
1.2 1 2 B <NA> MIX MIX
1.3 1 3 B <NA> <NA> MIX
2.4 2 1 B B B MIX
2.5 2 2 B <NA> B MIX
2.6 2 3 A <NA> <NA> MIX