What is the other way to qount tertiles using tidyverse (or any other packages) in R?-CodePudding

I have WVS 6th wave dataframe. Computed the outgroup trust index (outgroup_index) and I want to divide this vector into 3 groups according to tertiles.

I use base R functions to do that:

# Recoding will be based on tertiles
# Find the tretiles of the index 

tertiles <- quantile(filtered_df$outgroup_index, c(0:3) / 3)

# cut the target variable into tertiles

filtered_df$index_recoded <- with(
  filtered_df,
  cut(outgroup_index,
      tertiles,
      include.lowest = T)
)

But I am wondering about other possible and more neat ways to do it (preferably using dplyr/tidyverse or any other packages)?

Data:

structure(list(V2 = structure(c(643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643), label = "Country/region", format.spss = "F4.0", labels = c(`Not asked in survey` = -4, 
Algeria = 12, Azerbaijan = 31, Argentina = 32, Australia = 36, 
Armenia = 51, Brazil = 76, Belarus = 112, Chile = 152, China = 156, 
`Taiwan ROC` = 158, Colombia = 170, Cyprus = 196, Ecuador = 218, 
Estonia = 233, Georgia = 268, Palestine = 275, Germany = 276, 
Ghana = 288, Haiti = 332, `Hong Kong SAR` = 344, India = 356, 
Iraq = 368, Japan = 392, Kazakhstan = 398, Jordan = 400, `South Korea` = 410, 
Kuwait = 414, Kyrgyzstan = 417, Lebanon = 422, Libya = 434, Malaysia = 458, 
Mexico = 484, Morocco = 504, Netherlands = 528, `New Zealand` = 554, 
Nigeria = 566, Pakistan = 586, Peru = 604, Philippines = 608, 
Poland = 616, Qatar = 634, Romania = 642, Russia = 643, Rwanda = 646, 
Singapore = 702, Slovenia = 705, `South Africa` = 710, Zimbabwe = 716, 
Spain = 724, Sweden = 752, Thailand = 764, `Trinidad and Tobago` = 780, 
Tunisia = 788, Turkey = 792, Ukraine = 804, Egypt = 818, `United States` = 840, 
Uruguay = 858, Uzbekistan = 860, Yemen = 887), class = c("haven_labelled", 
"vctrs_vctr", "double")), V105 = structure(c(4, 3, 3, 4, 3, 4, 
4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 3, 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 2, 2, 2, 1, 1, 
2, 4, 2, 2, 2, 1, 2, 1, 4, 2, 1, 4, 2, 3, 3, 2, 3, 2, 3, 2, 3, 
2, 2, 3, 3, 3, 3, 3, 3, NA, 3, 3, 4, 2, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 2, 2, 3, 3, 3, 2, 3, NA), label = "Trust: People you meet for the first time (B)", format.spss = "F3.0", labels = c(`SE:Inapplicable ; RU:Inappropriate response; HT: Dropped out` = -5, 
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U 00B4>t know` = -1, 
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3, 
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr", 
"double")), V106 = structure(c(3, 2, NA, 4, 2, 4, 4, 3, 3, 4, 
3, 3, 4, 4, 4, 4, NA, NA, NA, NA, 3, 2, 2, 2, 2, 2, 2, 3, 3, 
3, 3, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 
2, 2, 1, 1, 2, 1, 4, 2, 1, 4, 2, 3, 3, 2, 2, 2, 3, 2, 3, 2, 2, 
NA, 3, NA, 3, 3, 3, 2, 3, 3, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 2, 2, 2, 3, 2, 2, 2, 3), label = "Trust: People of another religion (B)", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; HT: Dropped` = -5, 
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U 00B4>t know` = -1, 
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3, 
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr", 
"double")), V107 = structure(c(3, 4, NA, 4, 2, 4, 4, 3, 3, 4, 
3, 3, 4, 4, 4, 4, 3, 2, NA, NA, 3, 2, 2, 2, 2, 2, 2, 3, 3, 3, 
3, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2, 
2, 1, 1, 2, 1, 4, 2, 1, 3, 2, 3, 2, 2, 2, 2, 3, 2, 3, 2, 2, NA, 
3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
2, 3, 2, 3, 2, 2, 2, 3), label = "Trust: People of another nationality (B)", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; HT: Dropped` = -5, 
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U 00B4>t know` = -1, 
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3, 
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr", 
"double")), V248 = structure(c(9, 8, 5, 8, 8, 8, 8, 9, 7, 9, 
9, 5, 5, 6, 5, 5, 5, 5, 5, 4, 9, 9, 4, 9, 9, 3, 6, 9, 8, 9, 9, 
9, NA, 9, 5, 9, 5, 7, 9, 5, 5, 9, 9, 8, 9, 9, 5, 5, 5, 9, 9, 
8, 5, 8, 9, 9, 5, 8, 9, 9, 9, 7, 7, 5, 4, 6, 9, 6, 6, 9, 9, 5, 
6, 7, 5, 4, 7, 7, 5, 5, 5, 5, 8, 9, 8, 9, 9, 9, 9, 9, 9, 9, 5, 
9, 9, 5, 9, 8, 9, 5, 5), label = "Highest educational level attained", format.spss = "F3.0", labels = c(`AU: Inapplicable (No-school education) DE,SE:Inapplicable ;` = -5, 
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U 00B4>t know` = -1, 
`No formal education` = 1, `Incomplete primary school` = 2, `Complete primary school` = 3, 
`Incomplete secondary school: technical/ vocational type` = 4, 
`Complete secondary school: technical/ vocational type` = 5, 
`Incomplete secondary school: university-preparatory type` = 6, 
`Complete secondary school: university-preparatory type` = 7, 
`Some university-level education, without degree` = 8, `University - level education, with degree` = 9
), class = c("haven_labelled", "vctrs_vctr", "double")), V59 = structure(c(9, 
5, 6, 8, 6, 7, NA, 8, 5, 3, 4, 7, 2, 1, 1, 6, 8, 6, NA, NA, 1, 
5, NA, 6, 1, 2, 9, 5, 6, NA, NA, 3, 6, 6, 4, NA, 6, 6, NA, NA, 
3, 9, 8, 10, 9, 6, 10, 9, 8, 9, 9, 10, 6, 4, 4, 6, 4, 10, 3, 
3, 4, 3, 5, 4, 7, 3, 3, 4, 3, 7, 4, 6, 4, 1, 1, 6, 1, 1, 6, 1, 
1, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 7, 3, 1, 5, 6, 7, 2, 4, 5
), label = "Satisfaction with financial situation of household", format.spss = "F3.0", labels = c(`HT: Dropped out survey;DE,SE:Inapplicable ; RU:Inappropriate` = -5, 
`Not asked` = -4, `No answer` = -2, `Don<U 00B4>t know` = -1, 
Dissatisfied = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, `6` = 6, 
`7` = 7, `8` = 8, `9` = 9, Satisfied = 10), class = c("haven_labelled", 
"vctrs_vctr", "double")), V237 = structure(c(3, 2, 2, 2, NA, 
1, 2, 2, 1, 2, 2, 2, 2, 3, 2, 1, 1, 3, 2, 2, NA, 2, 2, 3, 4, 
2, 2, 1, NA, 1, 1, 1, NA, NA, NA, 1, NA, 1, 1, NA, 2, 1, 2, 1, 
1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 
2, 3, 2, 1, 2, 3, 2, 2, 2, NA, 2, 2, 4, 2, 2, 2, 1, 1, 2, 1, 
2, 3, 2, 2, 1, 2, 2, 2, 3, 3, 2, 3, 2, 2, NA, 3), label = "Family savings during past year", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; BH: Missing;` = -5, 
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U 00B4>t know` = -1, 
`Save money` = 1, `Just get by` = 2, `Spent some savings and borrowed money` = 3, 
`Spent savings and borrowed money` = 4), class = c("haven_labelled", 
"vctrs_vctr", "double")), V105_rec = c(1, 2, 2, 1, 2, 1, 1, 1, 
1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 3, 3, 3, 4, 4, 3, 1, 
3, 3, 3, 4, 3, 4, 1, 3, 4, 1, 3, 2, 2, 3, 2, 3, 2, 3, 2, 3, 3, 
2, 2, 2, 2, 2, 2, NA, 2, 2, 1, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
4, 4, 3, 3, 2, 2, 2, 3, 2, NA), V106_rec = c(2, 3, NA, 1, 3, 
1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, NA, NA, NA, NA, 2, 3, 3, 3, 
3, 3, 3, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 3, 
3, 4, 4, 3, 3, 3, 3, 4, 4, 3, 4, 1, 3, 4, 1, 3, 2, 2, 3, 3, 3, 
2, 3, 2, 3, 3, NA, 2, NA, 2, 2, 2, 3, 2, 2, 1, 3, 4, 4, 4, 4, 
4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 3, 3, 3, 2), V107_rec = c(2, 
1, NA, 1, 3, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 3, NA, NA, 2, 
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 4, 
3, 3, 3, 3, 4, 4, 3, 4, 3, 3, 4, 4, 3, 4, 1, 3, 4, 2, 3, 2, 3, 
3, 3, 3, 2, 3, 2, 3, 3, NA, 2, 3, 2, 2, 2, 3, 2, 2, 2, 3, 4, 
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 3, 2, 3, 3, 3, 2), outgroup_index = c(1.66666666666667, 
2, 2, 1, 2.66666666666667, 1, 1, 1.66666666666667, 1.66666666666667, 
1, 1.66666666666667, 2, 1, 1, 1, 1, 1.5, 2.5, 2, 2, 2, 3, 3, 
3, 3, 3, 2.66666666666667, 2, 2, 2, 2, 1.33333333333333, 1.33333333333333, 
2, 2, 2, 2, 2, 2, 2, 2, 2.66666666666667, 2, 3, 3, 3, 4, 4, 3, 
2.66666666666667, 3, 3, 3.66666666666667, 4, 3, 4, 1, 3, 4, 1.33333333333333, 
3, 2, 2.33333333333333, 3, 2.66666666666667, 3, 2, 3, 2, 3, 3, 
2, 2, 2.5, 2, 2, 2, 3, 2, 2, 1.33333333333333, 3, 4, 4, 4, 4, 
4, 4, 4, 4, 4, 4, 4, 3, 2.66666666666667, 2.66666666666667, 2, 
2.66666666666667, 3, 2.66666666666667, 2), V59_rec = structure(c(5, 
3, 3, 4, 3, 4, NA, 4, 3, 2, 2, 4, 1, 1, 1, 3, 4, 3, NA, NA, 1, 
3, NA, 3, 1, 1, 5, 3, 3, NA, NA, 2, 3, 3, 2, NA, 3, 3, NA, NA, 
2, 5, 4, 5, 5, 3, 5, 5, 4, 5, 5, 5, 3, 2, 2, 3, 2, 5, 2, 2, 2, 
2, 3, 2, 4, 2, 2, 2, 2, 4, 2, 3, 2, 1, 1, 3, 1, 1, 3, 1, 1, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 4, 2, 1, 3, 3, 4, 1, 2, 3), labels = c(`Not satisfied at all` = 1, 
`Rather not satisfied` = 2, `Neither satisfied, nor not satisfied` = 3, 
`Rather satisfied` = 4, Satisfied = 5), class = c("haven_labelled", 
"vctrs_vctr", "double")), V248_dummy = structure(c(1, 1, 0, 1, 
1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 
0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 
0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0), labels = c(`A university education and higher` = 1, 
`No university education` = 0), class = c("haven_labelled", "vctrs_vctr", 
"double")), V237_rec = structure(c(3, 2, 2, 2, NA, 1, 2, 2, 1, 
2, 2, 2, 2, 3, 2, 1, 1, 3, 2, 2, NA, 2, 2, 3, 3, 2, 2, 1, NA, 
1, 1, 1, NA, NA, NA, 1, NA, 1, 1, NA, 2, 1, 2, 1, 1, 1, 1, 1, 
1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 2, 3, 2, 1, 
2, 3, 2, 2, 2, NA, 2, 2, 3, 2, 2, 2, 1, 1, 2, 1, 2, 3, 2, 2, 
1, 2, 2, 2, 3, 3, 2, 3, 2, 2, NA, 3), labels = c(`Save money` = 1, 
`Just get by` = 2, `Spent savings and borrowed money` = 3), class = c("haven_labelled", 
"vctrs_vctr", "double"))), row.names = c(NA, -101L), class = c("tbl_df", 
"tbl", "data.frame"), label = "filelabel")

CodePudding user response：

A bit unintuitive, but ggplot2 has the functionality you are looking for.

filtered_df %>%
    mutate(index_recoded = ggplot2::cut_interval(outgroup_index, 3))

And to verify the levels are the same:

# smaller dput would be nice
start <- Data

all(
  {
    filtered_df  <- start
    tertiles <- quantile(filtered_df$outgroup_index, c(0:3) / 3)
    filtered_df$index_recoded <- with(
      filtered_df,
      cut(outgroup_index,
          tertiles,
          include.lowest = T)
    )
    filtered_df$index_recoded
  } == {
    tv_df <- start
    tv_df %>%
      mutate(index_recoded = ggplot2::cut_interval(outgroup_index, 3)) %>%
      pull(index_recoded)
  }
)

[1] TRUE

CodePudding user response：

cut has a simpler syntax if you want to divide the data into fixed intervals.

filtered_df$index_recoded <- cut(filtered_df$outgroup_index, 3)

You can also use it with labels = FALSE to get 1, 2 and 3 as output.

filtered_df$index_recoded <- cut(filtered_df$outgroup_index, 3, labels = FALSE)