I am working with survey data where observations have been collected across 7 different countries. Since I am not interested in studying country-specific differences, I'd like to reduce the levels of the factor for my variable "income" by using a single currency. This is an example of the levels I have.
800 Pounds or less
800 – 1,600 Pounds
1,600 – 2,400 Pounds
2,400 – 3,200 Pounds
3,200 – 4,000 Pounds
4,000 – 4,800 Pounds
4,800 – 5,600 Pounds
5,600 Pounds or more
And the same for Euro and CHF. What I am trying to do is to change everything in Euro. I've tried with:
data$demo_income <- as.character(data$demo_income)
data$demo_income[data$demo_income == "800 Pounds or less" | data$demo_income == "1.000 CHF or less" |data$demo_income == "1,000 Euro or less"] <- "1,000 Euro or less"
data$demo_income[data$demo_income == "800 - 1,600 Pounds" | data$demo_income == "1.000 - 2.000 CHF" | data$demo_income == "1,000 - 2,000 Euro"] <- "1,000 - 2,000 Euro"
data$demo_income[data$demo_income == "1,600 - 2,400 Pounds" | data$demo_income == "2.000 - 3.000 CHF" | data$demo_income == "2,000 - 3,000 Euro"] <- "2,000 - 3,000 Euro"
data$demo_income[data$demo_income == "2,400 - 3,200 Pounds" | data$demo_income == "3.000 - 4.000 CHF"|data$demo_income == "3,000 - 4,000 Euro"] <- "3,000 - 4,000 Euro"
data$demo_income[data$demo_income == "3,200 - 4,000 Pounds" | data$demo_income == "4.000 - 5.000 CHF"|data$demo_income == "4,000 - 5,000 Euro"] <- "4,000 - 5,000 Euro"
data$demo_income[data$demo_income == "4,000 - 4,800 Pounds" | data$demo_income == "5.000 - 6.000 CHF"|data$demo_income == "5,000 - 6,000 Euro"] <- "5,000 - 6,000 Euro"
data$demo_income[data$demo_income == "4,800 - 5,600 Pounds" | data$demo_income == "6.000 - 7.000 CHF"|data$demo_income == "6,000 - 7,000 Euro"] <- "6,000 - 7,000 Euro"
data$demo_income[data$demo_income =="5,600 Pounds or more" | data$demo_income =="7.000 CHF or more"|data$demo_income == "7,000 Euro or more"] <- "7,000 Euro or more"
table(data$demo_income)
data$demo_income <- as.factor(data$demo_income)
But it does not work. I also tried recode_factor or
levels(WB.Data$demo_income) <- list("1,000 Euro or less" = "800 Pounds or less",
"1,000 - 2,000 Euro" = "800 - 1,600 Pounds",
"2,000 - 3,000 Euro" ="1,600 - 2,400 Pounds",
"3,000 - 4,000 Euro" = "2,400 - 3,200 Pounds",
"4,000 - 5,000 Euro" = "3,200 - 4,000 Pounds",
"5,000 - 6,000 Euro" = "4,000 - 4,800 Pounds",
"6,000 - 7,000 Euro" = "4,800 - 5,600 Pounds",
"7,000 Euro or more" = "5,600 Pounds or more",
"1,000 Euro or less" = "1.000 CHF or less",
"1,000 - 2,000 Euro" = "1.000 - 2.000 CHF",
"2,000 - 3,000 Euro" = "2.000 - 3.000 CHF",
"3,000 - 4,000 Euro" = "3.000 - 4.000 CHF",
"4,000 - 5,000 Euro" = "4.000 - 5.000 CHF",
"5,000 - 6,000 Euro" = "5.000 - 6.000 CHF",
"6,000 - 7,000 Euro" = "6.000 - 7.000 CHF",
"7,000 Euro or more" = "6.000 - 7.000 CHF"
)
But nothing.
CodePudding user response:
forcats::fct_collapse()
is well-suited to this:
library(dplyr)
library(forcats)
data <- data %>% fct_collapse(
demo_income,
"1,000 Euro or less" = c("800 Pounds or less", "1.000 CHF or less", "1,000 Euro or less"),
"1,000 - 2,000 Euro" = c("800 - 1,600 Pounds", "1.000 - 2.000 CHF", "1,000 - 2,000 Euro"),
"2,000 - 3,000 Euro" = c("1,600 - 2,400 Pounds", "2.000 - 3.000 CHF", "2,000 - 3,000 Euro"),
# and so on for remaining levels
)
CodePudding user response:
It seems that your data and your code use a different character for "-", so the corresponding entries are not matched in R. E.g. Comparing the 800 – 1,600 Pounds
from your example levels against the 800 - 1,600 Pounds
from your code in R gives False
, because the left side uses the hyphen character, whereas the right side uses the minus character (which is slightly shorter):
"800 – 1,600 Pounds" == "800 - 1,600 Pounds"
False
If the same character is used, your code should work. E.g.:
Example data:
data = data.frame(demo_income = c(
"800 Pounds or less",
"800 - 1,600 Pounds",
"1,600 - 2,400 Pounds",
"2,400 - 3,200 Pounds",
"3,200 - 4,000 Pounds",
"4,000 - 4,800 Pounds",
"4,800 - 5,600 Pounds",
"5,600 Pounds or more",
"1.000 CHF or less",
"1.000 - 2.000 CHF",
"2.000 - 3.000 CHF",
"3.000 - 4.000 CHF",
"4.000 - 5.000 CHF",
"5.000 - 6.000 CHF",
"6.000 - 7.000 CHF",
"7.000 CHF or more",
"1,000 Euro or less",
"1,000 - 2,000 Euro",
"2,000 - 3,000 Euro",
"3,000 - 4,000 Euro",
"4,000 - 5,000 Euro",
"5,000 - 6,000 Euro",
"6,000 - 7,000 Euro",
"7,000 Euro or more"
))
length(levels(data$demo_income))
24
After applying your code (but using the same "-" character as the data):
data$demo_income <- as.character(data$demo_income)
data$demo_income[data$demo_income == "800 Pounds or less" | data$demo_income == "1.000 CHF or less" |data$demo_income == "1,000 Euro or less"] <- "1,000 Euro or less"
data$demo_income[data$demo_income == "800 - 1,600 Pounds" | data$demo_income == "1.000 - 2.000 CHF" | data$demo_income == "1,000 - 2,000 Euro"] <- "1,000 - 2,000 Euro"
data$demo_income[data$demo_income == "1,600 - 2,400 Pounds" | data$demo_income == "2.000 - 3.000 CHF" | data$demo_income == "2,000 - 3,000 Euro"] <- "2,000 - 3,000 Euro"
data$demo_income[data$demo_income == "2,400 - 3,200 Pounds" | data$demo_income == "3.000 - 4.000 CHF"|data$demo_income == "3,000 - 4,000 Euro"] <- "3,000 - 4,000 Euro"
data$demo_income[data$demo_income == "3,200 - 4,000 Pounds" | data$demo_income == "4.000 - 5.000 CHF"|data$demo_income == "4,000 - 5,000 Euro"] <- "4,000 - 5,000 Euro"
data$demo_income[data$demo_income == "4,000 - 4,800 Pounds" | data$demo_income == "5.000 - 6.000 CHF"|data$demo_income == "5,000 - 6,000 Euro"] <- "5,000 - 6,000 Euro"
data$demo_income[data$demo_income == "4,800 - 5,600 Pounds" | data$demo_income == "6.000 - 7.000 CHF"|data$demo_income == "6,000 - 7,000 Euro"] <- "6,000 - 7,000 Euro"
data$demo_income[data$demo_income =="5,600 Pounds or more" | data$demo_income =="7.000 CHF or more"|data$demo_income == "7,000 Euro or more"] <- "7,000 Euro or more"
table(data$demo_income)
data$demo_income <- as.factor(data$demo_income)
length(levels(data$demo_income))
1,000 - 2,000 Euro 1,000 Euro or less 2,000 - 3,000 Euro 3,000 - 4,000 Euro
3 3 3 3
4,000 - 5,000 Euro 5,000 - 6,000 Euro 6,000 - 7,000 Euro 7,000 Euro or more
3 3 3 3
8