Home > OS >  how to pipe all these commands dplyr aggregate groupby mixedorder in R
how to pipe all these commands dplyr aggregate groupby mixedorder in R

Time:05-03

assume this is my dataset

library(gtools)
 library(dplyr)
 df <- data.frame(grp=c(0.5,0.6,1,2,2,2,4.5,10,22,"kids","Parents","Teachers"),
                  f1= c(1,0,3,2,4,0,3,0,1,6,8,4),
                  f2= c(1,0,3,1,4,0,1,0,1,5,8,4),
                  f3= c(1,0,3,2,4,6,1,2,1,6,8,4))
 df
        grp f1 f2 f3
1       0.5  1  1  1
2       0.6  0  0  0
3         1  3  3  3
4         2  2  1  2
5         2  4  4  4
6         2  0  0  6
7       4.5  3  1  1
8        10  0  0  2
9        22  1  1  1
10     kids  6  5  6
11  Parents  8  8  8
12 Teachers  4  4  4

and this is my desired output

 df_final
       grp f1 f2 f3
1      <=1  4  4  4
2      2-9  9  6 13
3    10-19  0  0  2
4      >20  1  1  1
5     kids  6  5  6
6  Parents  8  8  8
7 Teachers  4  4  4

This is what I did commenting my questions:

############ how NOT to splot set into two subsets of data
df_1 <- df %>%
   filter(grepl('kids|Parents|Teachers', grp)) 

 df_1
       grp f1 f2 f3
1     kids  6  5  6
2  Parents  8  8  8
3 Teachers  4  4  4
 
 df_2 <- df %>%
   filter(!grepl('kids|Parents|Teachers', grp)) %>%
   mutate(across(.cols = grp, .fns = as.numeric)) %>%
   mutate(grp= cut(grp, breaks=c(-999,2,10,21,999) , labels=c("<=1", "2-9","10-19",">20"), right=F)) 

 df_2
    grp f1 f2 f3
1   <=1  1  1  1
2   <=1  0  0  0
3   <=1  3  3  3
4   2-9  2  1  2
5   2-9  4  4  4
6   2-9  0  0  6
7   2-9  3  1  1
8 10-19  0  0  2
9   >20  1  1  1
 
 ### how to pipe both aggregate and mixedorder/sort instead of separate lined of codes
 df_2 <- aggregate(.~grp, data = df_2, FUN=sum)
 df2[mixedorder(df2$grp, decreasing = T),]

 df_2
    grp f1 f2 f3
1   <=1  4  4  4
2   2-9  9  6 13
3 10-19  0  0  2
4   >20  1  1  1

### how to make sure 10-19 does not come before 2-9 in case of actual dataset
    grp  a  b  d
1   <=1 53 48 53
2 10-15 65 63 65
3   2-9 30 40 30
 
df_final <- rbind(df_2, df_1)
df_final
       grp f1 f2 f3
1      <=1  4  4  4
2      2-9  9  6 13
3    10-19  0  0  2
4      >20  1  1  1
5     kids  6  5  6
6  Parents  8  8  8
7 Teachers  4  4  4

Is there any neat way to get from original df to df_final all in dplyr by just piping commands?
how NOT to splot set into two subsets of data?
how to pipe both aggregate and mixedorder/sort instead of separate lined of codes?
how to make sure 10-19 does not come before 2-9 in case of actual dataset?

CodePudding user response:

Here is one option - create a second column ('grp2') with the cut values on the numeric elements only, then coalesce with the original column, while appending the levels, and then do a group_by summarise with across. In this way, we don't have to use mixedsort, as the cut already had the grouping sorted

library(dplyr)
library(stringr)
df %>% 
  mutate(grp2 = case_when(str_detect(grp, '^[0-9.] $') 
   ~    cut(as.numeric(grp), breaks=c(-999,2,10,21,999) , 
    labels=c("<=1", "2-9","10-19",">20"), right=FALSE))) %>% 
   mutate(grp =factor(coalesce(grp2, grp),
    levels = c(levels(grp2), unique(grp[is.na(grp2)]))), .keep = "unused") %>% 
   group_by(grp) %>% 
   summarise(across(everything(), sum, na.rm = TRUE), .groups = 'drop')

-output

# A tibble: 7 × 4
  grp         f1    f2    f3
  <fct>    <dbl> <dbl> <dbl>
1 <=1          4     4     4
2 2-9          9     6    13
3 10-19        0     0     2
4 >20          1     1     1
5 kids         6     5     6
6 Parents      8     8     8
7 Teachers     4     4     4
  • Related