Home > Enterprise >  What is the other way to qount tertiles using tidyverse (or any other packages) in R?
What is the other way to qount tertiles using tidyverse (or any other packages) in R?

Time:10-17

I have WVS 6th wave dataframe. Computed the outgroup trust index (outgroup_index) and I want to divide this vector into 3 groups according to tertiles.

I use base R functions to do that:

# Recoding will be based on tertiles
# Find the tretiles of the index 

tertiles <- quantile(filtered_df$outgroup_index, c(0:3) / 3)

# cut the target variable into tertiles

filtered_df$index_recoded <- with(
  filtered_df,
  cut(outgroup_index,
      tertiles,
      include.lowest = T)
)

But I am wondering about other possible and more neat ways to do it (preferably using dplyr/tidyverse or any other packages)?

Data:

structure(list(V2 = structure(c(643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 
643, 643, 643, 643), label = "Country/region", format.spss = "F4.0", labels = c(`Not asked in survey` = -4, 
Algeria = 12, Azerbaijan = 31, Argentina = 32, Australia = 36, 
Armenia = 51, Brazil = 76, Belarus = 112, Chile = 152, China = 156, 
`Taiwan ROC` = 158, Colombia = 170, Cyprus = 196, Ecuador = 218, 
Estonia = 233, Georgia = 268, Palestine = 275, Germany = 276, 
Ghana = 288, Haiti = 332, `Hong Kong SAR` = 344, India = 356, 
Iraq = 368, Japan = 392, Kazakhstan = 398, Jordan = 400, `South Korea` = 410, 
Kuwait = 414, Kyrgyzstan = 417, Lebanon = 422, Libya = 434, Malaysia = 458, 
Mexico = 484, Morocco = 504, Netherlands = 528, `New Zealand` = 554, 
Nigeria = 566, Pakistan = 586, Peru = 604, Philippines = 608, 
Poland = 616, Qatar = 634, Romania = 642, Russia = 643, Rwanda = 646, 
Singapore = 702, Slovenia = 705, `South Africa` = 710, Zimbabwe = 716, 
Spain = 724, Sweden = 752, Thailand = 764, `Trinidad and Tobago` = 780, 
Tunisia = 788, Turkey = 792, Ukraine = 804, Egypt = 818, `United States` = 840, 
Uruguay = 858, Uzbekistan = 860, Yemen = 887), class = c("haven_labelled", 
"vctrs_vctr", "double")), V105 = structure(c(4, 3, 3, 4, 3, 4, 
4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 3, 
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 2, 2, 2, 1, 1, 
2, 4, 2, 2, 2, 1, 2, 1, 4, 2, 1, 4, 2, 3, 3, 2, 3, 2, 3, 2, 3, 
2, 2, 3, 3, 3, 3, 3, 3, NA, 3, 3, 4, 2, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 2, 2, 3, 3, 3, 2, 3, NA), label = "Trust: People you meet for the first time (B)", format.spss = "F3.0", labels = c(`SE:Inapplicable ; RU:Inappropriate response; HT: Dropped out` = -5, 
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U 00B4>t know` = -1, 
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3, 
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr", 
"double")), V106 = structure(c(3, 2, NA, 4, 2, 4, 4, 3, 3, 4, 
3, 3, 4, 4, 4, 4, NA, NA, NA, NA, 3, 2, 2, 2, 2, 2, 2, 3, 3, 
3, 3, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2, 
2, 2, 1, 1, 2, 1, 4, 2, 1, 4, 2, 3, 3, 2, 2, 2, 3, 2, 3, 2, 2, 
NA, 3, NA, 3, 3, 3, 2, 3, 3, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 2, 2, 2, 3, 2, 2, 2, 3), label = "Trust: People of another religion (B)", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; HT: Dropped` = -5, 
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U 00B4>t know` = -1, 
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3, 
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr", 
"double")), V107 = structure(c(3, 4, NA, 4, 2, 4, 4, 3, 3, 4, 
3, 3, 4, 4, 4, 4, 3, 2, NA, NA, 3, 2, 2, 2, 2, 2, 2, 3, 3, 3, 
3, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2, 
2, 1, 1, 2, 1, 4, 2, 1, 3, 2, 3, 2, 2, 2, 2, 3, 2, 3, 2, 2, NA, 
3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
2, 3, 2, 3, 2, 2, 2, 3), label = "Trust: People of another nationality (B)", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; HT: Dropped` = -5, 
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U 00B4>t know` = -1, 
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3, 
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr", 
"double")), V248 = structure(c(9, 8, 5, 8, 8, 8, 8, 9, 7, 9, 
9, 5, 5, 6, 5, 5, 5, 5, 5, 4, 9, 9, 4, 9, 9, 3, 6, 9, 8, 9, 9, 
9, NA, 9, 5, 9, 5, 7, 9, 5, 5, 9, 9, 8, 9, 9, 5, 5, 5, 9, 9, 
8, 5, 8, 9, 9, 5, 8, 9, 9, 9, 7, 7, 5, 4, 6, 9, 6, 6, 9, 9, 5, 
6, 7, 5, 4, 7, 7, 5, 5, 5, 5, 8, 9, 8, 9, 9, 9, 9, 9, 9, 9, 5, 
9, 9, 5, 9, 8, 9, 5, 5), label = "Highest educational level attained", format.spss = "F3.0", labels = c(`AU: Inapplicable (No-school education) DE,SE:Inapplicable ;` = -5, 
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U 00B4>t know` = -1, 
`No formal education` = 1, `Incomplete primary school` = 2, `Complete primary school` = 3, 
`Incomplete secondary school: technical/ vocational type` = 4, 
`Complete secondary school: technical/ vocational type` = 5, 
`Incomplete secondary school: university-preparatory type` = 6, 
`Complete secondary school: university-preparatory type` = 7, 
`Some university-level education, without degree` = 8, `University - level education, with degree` = 9
), class = c("haven_labelled", "vctrs_vctr", "double")), V59 = structure(c(9, 
5, 6, 8, 6, 7, NA, 8, 5, 3, 4, 7, 2, 1, 1, 6, 8, 6, NA, NA, 1, 
5, NA, 6, 1, 2, 9, 5, 6, NA, NA, 3, 6, 6, 4, NA, 6, 6, NA, NA, 
3, 9, 8, 10, 9, 6, 10, 9, 8, 9, 9, 10, 6, 4, 4, 6, 4, 10, 3, 
3, 4, 3, 5, 4, 7, 3, 3, 4, 3, 7, 4, 6, 4, 1, 1, 6, 1, 1, 6, 1, 
1, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 7, 3, 1, 5, 6, 7, 2, 4, 5
), label = "Satisfaction with financial situation of household", format.spss = "F3.0", labels = c(`HT: Dropped out survey;DE,SE:Inapplicable ; RU:Inappropriate` = -5, 
`Not asked` = -4, `No answer` = -2, `Don<U 00B4>t know` = -1, 
Dissatisfied = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, `6` = 6, 
`7` = 7, `8` = 8, `9` = 9, Satisfied = 10), class = c("haven_labelled", 
"vctrs_vctr", "double")), V237 = structure(c(3, 2, 2, 2, NA, 
1, 2, 2, 1, 2, 2, 2, 2, 3, 2, 1, 1, 3, 2, 2, NA, 2, 2, 3, 4, 
2, 2, 1, NA, 1, 1, 1, NA, NA, NA, 1, NA, 1, 1, NA, 2, 1, 2, 1, 
1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 
2, 3, 2, 1, 2, 3, 2, 2, 2, NA, 2, 2, 4, 2, 2, 2, 1, 1, 2, 1, 
2, 3, 2, 2, 1, 2, 2, 2, 3, 3, 2, 3, 2, 2, NA, 3), label = "Family savings during past year", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; BH: Missing;` = -5, 
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U 00B4>t know` = -1, 
`Save money` = 1, `Just get by` = 2, `Spent some savings and borrowed money` = 3, 
`Spent savings and borrowed money` = 4), class = c("haven_labelled", 
"vctrs_vctr", "double")), V105_rec = c(1, 2, 2, 1, 2, 1, 1, 1, 
1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 2, 2, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 3, 3, 3, 4, 4, 3, 1, 
3, 3, 3, 4, 3, 4, 1, 3, 4, 1, 3, 2, 2, 3, 2, 3, 2, 3, 2, 3, 3, 
2, 2, 2, 2, 2, 2, NA, 2, 2, 1, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 
4, 4, 3, 3, 2, 2, 2, 3, 2, NA), V106_rec = c(2, 3, NA, 1, 3, 
1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, NA, NA, NA, NA, 2, 3, 3, 3, 
3, 3, 3, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 3, 
3, 4, 4, 3, 3, 3, 3, 4, 4, 3, 4, 1, 3, 4, 1, 3, 2, 2, 3, 3, 3, 
2, 3, 2, 3, 3, NA, 2, NA, 2, 2, 2, 3, 2, 2, 1, 3, 4, 4, 4, 4, 
4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 3, 3, 3, 2), V107_rec = c(2, 
1, NA, 1, 3, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 3, NA, NA, 2, 
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 4, 
3, 3, 3, 3, 4, 4, 3, 4, 3, 3, 4, 4, 3, 4, 1, 3, 4, 2, 3, 2, 3, 
3, 3, 3, 2, 3, 2, 3, 3, NA, 2, 3, 2, 2, 2, 3, 2, 2, 2, 3, 4, 
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 3, 2, 3, 3, 3, 2), outgroup_index = c(1.66666666666667, 
2, 2, 1, 2.66666666666667, 1, 1, 1.66666666666667, 1.66666666666667, 
1, 1.66666666666667, 2, 1, 1, 1, 1, 1.5, 2.5, 2, 2, 2, 3, 3, 
3, 3, 3, 2.66666666666667, 2, 2, 2, 2, 1.33333333333333, 1.33333333333333, 
2, 2, 2, 2, 2, 2, 2, 2, 2.66666666666667, 2, 3, 3, 3, 4, 4, 3, 
2.66666666666667, 3, 3, 3.66666666666667, 4, 3, 4, 1, 3, 4, 1.33333333333333, 
3, 2, 2.33333333333333, 3, 2.66666666666667, 3, 2, 3, 2, 3, 3, 
2, 2, 2.5, 2, 2, 2, 3, 2, 2, 1.33333333333333, 3, 4, 4, 4, 4, 
4, 4, 4, 4, 4, 4, 4, 3, 2.66666666666667, 2.66666666666667, 2, 
2.66666666666667, 3, 2.66666666666667, 2), V59_rec = structure(c(5, 
3, 3, 4, 3, 4, NA, 4, 3, 2, 2, 4, 1, 1, 1, 3, 4, 3, NA, NA, 1, 
3, NA, 3, 1, 1, 5, 3, 3, NA, NA, 2, 3, 3, 2, NA, 3, 3, NA, NA, 
2, 5, 4, 5, 5, 3, 5, 5, 4, 5, 5, 5, 3, 2, 2, 3, 2, 5, 2, 2, 2, 
2, 3, 2, 4, 2, 2, 2, 2, 4, 2, 3, 2, 1, 1, 3, 1, 1, 3, 1, 1, 2, 
2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 4, 2, 1, 3, 3, 4, 1, 2, 3), labels = c(`Not satisfied at all` = 1, 
`Rather not satisfied` = 2, `Neither satisfied, nor not satisfied` = 3, 
`Rather satisfied` = 4, Satisfied = 5), class = c("haven_labelled", 
"vctrs_vctr", "double")), V248_dummy = structure(c(1, 1, 0, 1, 
1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 
0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 
0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0), labels = c(`A university education and higher` = 1, 
`No university education` = 0), class = c("haven_labelled", "vctrs_vctr", 
"double")), V237_rec = structure(c(3, 2, 2, 2, NA, 1, 2, 2, 1, 
2, 2, 2, 2, 3, 2, 1, 1, 3, 2, 2, NA, 2, 2, 3, 3, 2, 2, 1, NA, 
1, 1, 1, NA, NA, NA, 1, NA, 1, 1, NA, 2, 1, 2, 1, 1, 1, 1, 1, 
1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 2, 3, 2, 1, 
2, 3, 2, 2, 2, NA, 2, 2, 3, 2, 2, 2, 1, 1, 2, 1, 2, 3, 2, 2, 
1, 2, 2, 2, 3, 3, 2, 3, 2, 2, NA, 3), labels = c(`Save money` = 1, 
`Just get by` = 2, `Spent savings and borrowed money` = 3), class = c("haven_labelled", 
"vctrs_vctr", "double"))), row.names = c(NA, -101L), class = c("tbl_df", 
"tbl", "data.frame"), label = "filelabel")

CodePudding user response:

A bit unintuitive, but ggplot2 has the functionality you are looking for.

filtered_df %>%
    mutate(index_recoded = ggplot2::cut_interval(outgroup_index, 3))

And to verify the levels are the same:

# smaller dput would be nice
start <- Data

all(
  {
    filtered_df  <- start
    tertiles <- quantile(filtered_df$outgroup_index, c(0:3) / 3)
    filtered_df$index_recoded <- with(
      filtered_df,
      cut(outgroup_index,
          tertiles,
          include.lowest = T)
    )
    filtered_df$index_recoded
  } == {
    tv_df <- start
    tv_df %>%
      mutate(index_recoded = ggplot2::cut_interval(outgroup_index, 3)) %>%
      pull(index_recoded)
  }
)

[1] TRUE

CodePudding user response:

cut has a simpler syntax if you want to divide the data into fixed intervals.

filtered_df$index_recoded <- cut(filtered_df$outgroup_index, 3)

You can also use it with labels = FALSE to get 1, 2 and 3 as output.

filtered_df$index_recoded <- cut(filtered_df$outgroup_index, 3, labels = FALSE)
  • Related