DF<-data.frame(id=c(1,1,1,2,2,2),rank=c("1","2","3","1","2","3"),code=c("A","B","B","B","B","A"))
DF
id rank code
1 A1 1 A
2 A1 2 B
3 A1 3 B
4 B2 1 B
5 B2 2 B
6 B2 3 A
Desired output:
id rank code type1 type2 type3
1 A1 1 A aa MIX MIX
2 A1 2 B NA MIX MIX
3 A1 3 B NA NA MIX
4 B2 1 B bb bb MIX
5 B2 2 B NA bb MIX
6 B2 3 A NA NA MIX
All is grouped by id
type1
gets code
where rank
= 1.
type2
gets code
where rank
= 1-2. If code
is different in rank
1 and 2, then MIX
type3
gets code
where rank
= 1-3. etc. etc.
Anyone? :)
CodePudding user response:
If the column 'code' is factor
, convert to character
with as.character
or use type.convert
(automatically), then grouped by 'id', create the conditions with case_when
to create the columns, 'type1', 'type2' and 'type3'
library(dplyr)
DF %>%
type.convert(as.is = TRUE) %>%
group_by(id) %>%
mutate(type1 = case_when(rank == 1
~ strrep(tolower(code), 2)),
type2 = case_when(rank %in% 1:2 & all(c(1, 2) %in% rank) &
n_distinct(code[rank %in% 1:2]) == 1
~ strrep(tolower(code), 2),
rank %in% 1:2 & all(c(1, 2) %in% rank) &
n_distinct(code[rank %in% 1:2]) > 1 ~
"MIX"),
type3 = case_when(rank %in% 1:3 & all(c(1, 2, 3) %in% rank) &
n_distinct(code[rank %in% 1:3]) == 1 ~
strrep(tolower(code), 2), rank %in% 1:3 &
all(c(1, 2, 3) %in% rank) & n_distinct(code[rank %in% 1:3]) > 1 ~
"MIX")) %>%
ungroup
-output
# A tibble: 7 × 6
id rank code type1 type2 type3
<int> <int> <chr> <chr> <chr> <chr>
1 1 1 A aa MIX MIX
2 1 2 B <NA> MIX MIX
3 1 3 B <NA> <NA> MIX
4 2 1 B bb bb MIX
5 2 2 B <NA> bb MIX
6 2 3 A <NA> <NA> MIX
7 3 1 A aa <NA> <NA>
data
DF <- data.frame(id=c(1,1,1,2,2,2,3),
rank=c("1","2","3","1","2","3","1"),
code=c("A","B","B","B","B","A","A"))
CodePudding user response:
With a slight modification to my answer from your previous question
maxtype=3
do.call(
rbind,
by(DF,list(DF$id),function(x){
y=list()
for (i in 1:maxtype) {
tmp=rep(NA,nrow(x))
idx=as.numeric(x$rank)<=i
if (length(unique(x$code[idx]))==1) {
tmp[idx]=paste0(rep(tolower(x$code[1]),2),collapse="")
} else {
tmp[idx]="MIX"
}
y[[paste0("type",i)]]=tmp
}
cbind(x,y)
})
)
id rank code type1 type2 type3
1.1 1 1 A aa MIX MIX
1.2 1 2 B <NA> MIX MIX
1.3 1 3 B <NA> <NA> MIX
2.4 2 1 B bb bb MIX
2.5 2 2 B <NA> bb MIX
2.6 2 3 A <NA> <NA> MIX
Also note that your id column is different in DF and your output.