I have WVS 6th wave dataframe. Computed the outgroup trust index (outgroup_index
) and I want to divide this vector into 3 groups according to tertiles.
I use base R functions to do that:
# Recoding will be based on tertiles
# Find the tretiles of the index
tertiles <- quantile(filtered_df$outgroup_index, c(0:3) / 3)
# cut the target variable into tertiles
filtered_df$index_recoded <- with(
filtered_df,
cut(outgroup_index,
tertiles,
include.lowest = T)
)
But I am wondering about other possible and more neat ways to do it (preferably using dplyr
/tidyverse
or any other packages)?
Data:
structure(list(V2 = structure(c(643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643, 643,
643, 643, 643, 643), label = "Country/region", format.spss = "F4.0", labels = c(`Not asked in survey` = -4,
Algeria = 12, Azerbaijan = 31, Argentina = 32, Australia = 36,
Armenia = 51, Brazil = 76, Belarus = 112, Chile = 152, China = 156,
`Taiwan ROC` = 158, Colombia = 170, Cyprus = 196, Ecuador = 218,
Estonia = 233, Georgia = 268, Palestine = 275, Germany = 276,
Ghana = 288, Haiti = 332, `Hong Kong SAR` = 344, India = 356,
Iraq = 368, Japan = 392, Kazakhstan = 398, Jordan = 400, `South Korea` = 410,
Kuwait = 414, Kyrgyzstan = 417, Lebanon = 422, Libya = 434, Malaysia = 458,
Mexico = 484, Morocco = 504, Netherlands = 528, `New Zealand` = 554,
Nigeria = 566, Pakistan = 586, Peru = 604, Philippines = 608,
Poland = 616, Qatar = 634, Romania = 642, Russia = 643, Rwanda = 646,
Singapore = 702, Slovenia = 705, `South Africa` = 710, Zimbabwe = 716,
Spain = 724, Sweden = 752, Thailand = 764, `Trinidad and Tobago` = 780,
Tunisia = 788, Turkey = 792, Ukraine = 804, Egypt = 818, `United States` = 840,
Uruguay = 858, Uzbekistan = 860, Yemen = 887), class = c("haven_labelled",
"vctrs_vctr", "double")), V105 = structure(c(4, 3, 3, 4, 3, 4,
4, 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 2, 2, 2, 1, 1,
2, 4, 2, 2, 2, 1, 2, 1, 4, 2, 1, 4, 2, 3, 3, 2, 3, 2, 3, 2, 3,
2, 2, 3, 3, 3, 3, 3, 3, NA, 3, 3, 4, 2, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 2, 2, 3, 3, 3, 2, 3, NA), label = "Trust: People you meet for the first time (B)", format.spss = "F3.0", labels = c(`SE:Inapplicable ; RU:Inappropriate response; HT: Dropped out` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U 00B4>t know` = -1,
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3,
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr",
"double")), V106 = structure(c(3, 2, NA, 4, 2, 4, 4, 3, 3, 4,
3, 3, 4, 4, 4, 4, NA, NA, NA, NA, 3, 2, 2, 2, 2, 2, 2, 3, 3,
3, 3, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 2, 3, 2, 2, 2, 1, 1, 2, 2,
2, 2, 1, 1, 2, 1, 4, 2, 1, 4, 2, 3, 3, 2, 2, 2, 3, 2, 3, 2, 2,
NA, 3, NA, 3, 3, 3, 2, 3, 3, 4, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 2, 2, 2, 3, 2, 2, 2, 3), label = "Trust: People of another religion (B)", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; HT: Dropped` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U 00B4>t know` = -1,
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3,
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr",
"double")), V107 = structure(c(3, 4, NA, 4, 2, 4, 4, 3, 3, 4,
3, 3, 4, 4, 4, 4, 3, 2, NA, NA, 3, 2, 2, 2, 2, 2, 2, 3, 3, 3,
3, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2,
2, 1, 1, 2, 1, 4, 2, 1, 3, 2, 3, 2, 2, 2, 2, 3, 2, 3, 2, 2, NA,
3, 2, 3, 3, 3, 2, 3, 3, 3, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 3, 2, 3, 2, 2, 2, 3), label = "Trust: People of another nationality (B)", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; HT: Dropped` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U 00B4>t know` = -1,
`Trust completely` = 1, `Trust somewhat` = 2, `Do not trust very much` = 3,
`Do not trust at all` = 4), class = c("haven_labelled", "vctrs_vctr",
"double")), V248 = structure(c(9, 8, 5, 8, 8, 8, 8, 9, 7, 9,
9, 5, 5, 6, 5, 5, 5, 5, 5, 4, 9, 9, 4, 9, 9, 3, 6, 9, 8, 9, 9,
9, NA, 9, 5, 9, 5, 7, 9, 5, 5, 9, 9, 8, 9, 9, 5, 5, 5, 9, 9,
8, 5, 8, 9, 9, 5, 8, 9, 9, 9, 7, 7, 5, 4, 6, 9, 6, 6, 9, 9, 5,
6, 7, 5, 4, 7, 7, 5, 5, 5, 5, 8, 9, 8, 9, 9, 9, 9, 9, 9, 9, 5,
9, 9, 5, 9, 8, 9, 5, 5), label = "Highest educational level attained", format.spss = "F3.0", labels = c(`AU: Inapplicable (No-school education) DE,SE:Inapplicable ;` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U 00B4>t know` = -1,
`No formal education` = 1, `Incomplete primary school` = 2, `Complete primary school` = 3,
`Incomplete secondary school: technical/ vocational type` = 4,
`Complete secondary school: technical/ vocational type` = 5,
`Incomplete secondary school: university-preparatory type` = 6,
`Complete secondary school: university-preparatory type` = 7,
`Some university-level education, without degree` = 8, `University - level education, with degree` = 9
), class = c("haven_labelled", "vctrs_vctr", "double")), V59 = structure(c(9,
5, 6, 8, 6, 7, NA, 8, 5, 3, 4, 7, 2, 1, 1, 6, 8, 6, NA, NA, 1,
5, NA, 6, 1, 2, 9, 5, 6, NA, NA, 3, 6, 6, 4, NA, 6, 6, NA, NA,
3, 9, 8, 10, 9, 6, 10, 9, 8, 9, 9, 10, 6, 4, 4, 6, 4, 10, 3,
3, 4, 3, 5, 4, 7, 3, 3, 4, 3, 7, 4, 6, 4, 1, 1, 6, 1, 1, 6, 1,
1, 4, 4, 3, 4, 4, 4, 3, 4, 4, 4, 2, 7, 3, 1, 5, 6, 7, 2, 4, 5
), label = "Satisfaction with financial situation of household", format.spss = "F3.0", labels = c(`HT: Dropped out survey;DE,SE:Inapplicable ; RU:Inappropriate` = -5,
`Not asked` = -4, `No answer` = -2, `Don<U 00B4>t know` = -1,
Dissatisfied = 1, `2` = 2, `3` = 3, `4` = 4, `5` = 5, `6` = 6,
`7` = 7, `8` = 8, `9` = 9, Satisfied = 10), class = c("haven_labelled",
"vctrs_vctr", "double")), V237 = structure(c(3, 2, 2, 2, NA,
1, 2, 2, 1, 2, 2, 2, 2, 3, 2, 1, 1, 3, 2, 2, NA, 2, 2, 3, 4,
2, 2, 1, NA, 1, 1, 1, NA, NA, NA, 1, NA, 1, 1, NA, 2, 1, 2, 1,
1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3,
2, 3, 2, 1, 2, 3, 2, 2, 2, NA, 2, 2, 4, 2, 2, 2, 1, 1, 2, 1,
2, 3, 2, 2, 1, 2, 2, 2, 3, 3, 2, 3, 2, 2, NA, 3), label = "Family savings during past year", format.spss = "F3.0", labels = c(`DE,SE:Inapplicable ; RU:Inappropriate response; BH: Missing;` = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don<U 00B4>t know` = -1,
`Save money` = 1, `Just get by` = 2, `Spent some savings and borrowed money` = 3,
`Spent savings and borrowed money` = 4), class = c("haven_labelled",
"vctrs_vctr", "double")), V105_rec = c(1, 2, 2, 1, 2, 1, 1, 1,
1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 3, 3, 3, 4, 4, 3, 1,
3, 3, 3, 4, 3, 4, 1, 3, 4, 1, 3, 2, 2, 3, 2, 3, 2, 3, 2, 3, 3,
2, 2, 2, 2, 2, 2, NA, 2, 2, 1, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 3, 3, 2, 2, 2, 3, 2, NA), V106_rec = c(2, 3, NA, 1, 3,
1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, NA, NA, NA, NA, 2, 3, 3, 3,
3, 3, 3, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, 3,
3, 4, 4, 3, 3, 3, 3, 4, 4, 3, 4, 1, 3, 4, 1, 3, 2, 2, 3, 3, 3,
2, 3, 2, 3, 3, NA, 2, NA, 2, 2, 2, 3, 2, 2, 1, 3, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 3, 3, 3, 2), V107_rec = c(2,
1, NA, 1, 3, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 2, 3, NA, NA, 2,
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 4,
3, 3, 3, 3, 4, 4, 3, 4, 3, 3, 4, 4, 3, 4, 1, 3, 4, 2, 3, 2, 3,
3, 3, 3, 2, 3, 2, 3, 3, NA, 2, 3, 2, 2, 2, 3, 2, 2, 2, 3, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 2, 3, 2, 3, 3, 3, 2), outgroup_index = c(1.66666666666667,
2, 2, 1, 2.66666666666667, 1, 1, 1.66666666666667, 1.66666666666667,
1, 1.66666666666667, 2, 1, 1, 1, 1, 1.5, 2.5, 2, 2, 2, 3, 3,
3, 3, 3, 2.66666666666667, 2, 2, 2, 2, 1.33333333333333, 1.33333333333333,
2, 2, 2, 2, 2, 2, 2, 2, 2.66666666666667, 2, 3, 3, 3, 4, 4, 3,
2.66666666666667, 3, 3, 3.66666666666667, 4, 3, 4, 1, 3, 4, 1.33333333333333,
3, 2, 2.33333333333333, 3, 2.66666666666667, 3, 2, 3, 2, 3, 3,
2, 2, 2.5, 2, 2, 2, 3, 2, 2, 1.33333333333333, 3, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 3, 2.66666666666667, 2.66666666666667, 2,
2.66666666666667, 3, 2.66666666666667, 2), V59_rec = structure(c(5,
3, 3, 4, 3, 4, NA, 4, 3, 2, 2, 4, 1, 1, 1, 3, 4, 3, NA, NA, 1,
3, NA, 3, 1, 1, 5, 3, 3, NA, NA, 2, 3, 3, 2, NA, 3, 3, NA, NA,
2, 5, 4, 5, 5, 3, 5, 5, 4, 5, 5, 5, 3, 2, 2, 3, 2, 5, 2, 2, 2,
2, 3, 2, 4, 2, 2, 2, 2, 4, 2, 3, 2, 1, 1, 3, 1, 1, 3, 1, 1, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 4, 2, 1, 3, 3, 4, 1, 2, 3), labels = c(`Not satisfied at all` = 1,
`Rather not satisfied` = 2, `Neither satisfied, nor not satisfied` = 3,
`Rather satisfied` = 4, Satisfied = 5), class = c("haven_labelled",
"vctrs_vctr", "double")), V248_dummy = structure(c(1, 1, 0, 1,
1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1,
0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1,
0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1,
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0), labels = c(`A university education and higher` = 1,
`No university education` = 0), class = c("haven_labelled", "vctrs_vctr",
"double")), V237_rec = structure(c(3, 2, 2, 2, NA, 1, 2, 2, 1,
2, 2, 2, 2, 3, 2, 1, 1, 3, 2, 2, NA, 2, 2, 3, 3, 2, 2, 1, NA,
1, 1, 1, NA, NA, NA, 1, NA, 1, 1, NA, 2, 1, 2, 1, 1, 1, 1, 1,
1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 2, 3, 2, 1,
2, 3, 2, 2, 2, NA, 2, 2, 3, 2, 2, 2, 1, 1, 2, 1, 2, 3, 2, 2,
1, 2, 2, 2, 3, 3, 2, 3, 2, 2, NA, 3), labels = c(`Save money` = 1,
`Just get by` = 2, `Spent savings and borrowed money` = 3), class = c("haven_labelled",
"vctrs_vctr", "double"))), row.names = c(NA, -101L), class = c("tbl_df",
"tbl", "data.frame"), label = "filelabel")
CodePudding user response:
A bit unintuitive, but ggplot2
has the functionality you are looking for.
filtered_df %>%
mutate(index_recoded = ggplot2::cut_interval(outgroup_index, 3))
And to verify the levels are the same:
# smaller dput would be nice
start <- Data
all(
{
filtered_df <- start
tertiles <- quantile(filtered_df$outgroup_index, c(0:3) / 3)
filtered_df$index_recoded <- with(
filtered_df,
cut(outgroup_index,
tertiles,
include.lowest = T)
)
filtered_df$index_recoded
} == {
tv_df <- start
tv_df %>%
mutate(index_recoded = ggplot2::cut_interval(outgroup_index, 3)) %>%
pull(index_recoded)
}
)
[1] TRUE
CodePudding user response:
cut
has a simpler syntax if you want to divide the data into fixed intervals.
filtered_df$index_recoded <- cut(filtered_df$outgroup_index, 3)
You can also use it with labels = FALSE
to get 1, 2 and 3 as output.
filtered_df$index_recoded <- cut(filtered_df$outgroup_index, 3, labels = FALSE)