1 1 2 3 740 780 780 780
2 1 4 0 890 890 890 890
3 2 3 5 550 550 550 550
4 2 5 10 890 250 250 400
wage_may wage_jun wage_jul wage_aug wage_sep wage_oct wage_nov
1 780 780 780 780 780 780 780
2 890 890 890 890 890 790 250
3 550 550 550 550 550 550 550
4 500 890 600 750 890 300 300
wage_dec
1 780
2 300
3 550
4 300
structure(list(id = c(1L, 1L, 2L, 2L), hire_month = c(2L, 4L,
3L, 5L), sep_month = c(3L, 0L, 5L, 10L), wage_jan = c(740L, 890L,
550L, 890L), wage_feb = c(780L, 890L, 550L, 250L), wage_mar = c(780L,
890L, 550L, 250L), wage_apr = c(780L, 890L, 550L, 400L), wage_may = c(780L,
890L, 550L, 500L), wage_jun = c(780L, 890L, 550L, 890L), wage_jul = c(780L,
890L, 550L, 600L), wage_aug = c(780L, 890L, 550L, 750L), wage_sep = c(780L,
890L, 550L, 890L), wage_oct = c(780L, 790L, 550L, 300L), wage_nov = c(780L,
250L, 550L, 300L), wage_dec = c(780L, 300L, 550L, 300L)), class = "data.frame", row.names = c(NA,
-4L))
I would like to create a column for last wage in the following way: If sep_month is 3, I would like this new column to give last_wage as the one in wage_mar; if sep_month is 4, I would like this column to give last_wage as the one in wage_apr and so forth.
I basically want to associate the sep_month to its respective wage depending on the month.
CodePudding user response:
You could do this with mutate()
and case_when()
from the dplyr
library:
df %>%
mutate(last_wage =
case_when(
sep_month == 0 ~ wage_dec,
sep_month == 1 ~ wage_jan,
sep_month == 2 ~ wage_feb,
sep_month == 3 ~ wage_mar,
sep_month == 4 ~ wage_apr,
sep_month == 5 ~ wage_may,
sep_month == 6 ~ wage_jun,
sep_month == 7 ~ wage_jul,
sep_month == 8 ~ wage_aug,
sep_month == 9 ~ wage_sep,
sep_month == 10 ~ wage_oct,
sep_month == 11 ~ wage_nov,
sep_month == 12 ~ wage_dec,
TRUE ~ NA_integer_
)
)
CodePudding user response:
Another option would be to pivot to long form and make the calculation then pivot back to wide form with tidyverse
. I also added in converting 0
to 12
, but then converted it back at the end.
library(tidyverse)
df %>%
mutate(rowid = row_number()) %>%
pivot_longer(-c(id, hire_month, sep_month, rowid), names_to = c(".value", "month"), names_sep = "_") %>%
mutate(sep_month = ifelse(sep_month == 0, 12, sep_month),
month_num = match(str_to_title(month),month.abb),
last_wage = ifelse(sep_month == month_num, wage, NA) ) %>%
select(-month_num) %>%
pivot_wider(id_cols = c(id, hire_month, sep_month, last_wage), names_from = "month", names_glue = "{.value}_{month}", values_from = wage) %>%
group_by(id, hire_month, sep_month) %>%
fill(everything(), .direction = "downup") %>%
slice(1) %>%
mutate(sep_month = ifelse(sep_month == 12, 0, sep_month))
Output
id hire_month sep_month last_wage wage_jan wage_feb wage_mar wage_apr wage_may wage_jun wage_jul wage_aug wage_sep wage_oct wage_nov wage_dec
<int> <int> <dbl> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 1 2 3 780 740 780 780 780 780 780 780 780 780 780 780 780
2 1 4 12 300 890 890 890 890 890 890 890 890 890 790 250 300
3 2 3 5 550 550 550 550 550 550 550 550 550 550 550 550 550
4 2 5 10 300 890 250 250 400 500 890 600 750 890 300 300 300