I have a small dataset weekly_data
of projects were working on, and anticipated time to be spent and duration in weeks for each of the two milestones, labeled CD and CA
# A tibble: 17 x 5
dsk_proj_number hrs_per_week_cd cd_dur_weeks hrs_per_week_ca ca_dur_weeks
<fct> <dbl> <dbl> <dbl> <dbl>
1 17061 0 0 2.43 28
2 18009 0 0 1.83 12
3 18029 0 0 2.83 24
4 19029 1.5 16 2.43 28
5 19050 0 0 2.8 20
6 20012 0 0 3.4 20
7 21016 3 8 2.43 28
8 21022 0 0 4.25 16
9 21050 0 0 3.4 20
10 21061a 17.5 24 15.8 52
11 21061b 1.5 4 7.5 8
12 21061c 7.67 12 5 12
13 21061d 0 0 0 0
14 21061e 8 1 3 1
15 21094 0 0 3 8
16 22027 0 0 0.75 8
17 22068 2.92 12 2.38 8
I want to get this into a format wheree, based on the cd_dur_weeks
and ca_dur_weeks
durations indicated, I have the estiamted number of hours by weeks, for all the weeks, like this:
> sched %>% head(15)
# A tibble: 15 x 17
`18009` `22068` `17061` `21050` `19029` `21016` `21022` `19050` `18029` `22027` `20012` `21094` `21061a` `21061b` `21061c` `21061d` `21061e`
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1.83 2.92 2.43 3.4 1.5 3 4.25 2.8 2.83 0.75 3.4 3 17.5 1.5 7.67 0 8
2 1.83 2.92 2.43 3.4 1.5 3 4.25 2.8 2.83 0.75 3.4 3 17.5 1.5 7.67 0 3
3 1.83 2.92 2.43 3.4 1.5 3 4.25 2.8 2.83 0.75 3.4 3 17.5 1.5 7.67 0 0
4 1.83 2.92 2.43 3.4 1.5 3 4.25 2.8 2.83 0.75 3.4 3 17.5 1.5 7.67 0 0
5 1.83 2.92 2.43 3.4 1.5 3 4.25 2.8 2.83 0.75 3.4 3 17.5 7.5 7.67 0 0
6 1.83 2.92 2.43 3.4 1.5 3 4.25 2.8 2.83 0.75 3.4 3 17.5 7.5 7.67 0 0
7 1.83 2.92 2.43 3.4 1.5 3 4.25 2.8 2.83 0.75 3.4 3 17.5 7.5 7.67 0 0
8 1.83 2.92 2.43 3.4 1.5 3 4.25 2.8 2.83 0.75 3.4 3 17.5 7.5 7.67 0 0
9 1.83 2.92 2.43 3.4 1.5 2.43 4.25 2.8 2.83 0 3.4 0 17.5 7.5 7.67 0 0
10 1.83 2.92 2.43 3.4 1.5 2.43 4.25 2.8 2.83 0 3.4 0 17.5 7.5 7.67 0 0
11 1.83 2.92 2.43 3.4 1.5 2.43 4.25 2.8 2.83 0 3.4 0 17.5 7.5 7.67 0 0
12 1.83 2.92 2.43 3.4 1.5 2.43 4.25 2.8 2.83 0 3.4 0 17.5 7.5 7.67 0 0
13 0 2.38 2.43 3.4 1.5 2.43 4.25 2.8 2.83 0 3.4 0 17.5 0 5 0 0
14 0 2.38 2.43 3.4 1.5 2.43 4.25 2.8 2.83 0 3.4 0 17.5 0 5 0 0
15 0 2.38 2.43 3.4 1.5 2.43 4.25 2.8 2.83 0 3.4 0 17.5 0 5 0 0
I was able to use pivot_wider()
to make the project numbers the variable names, and each row an individual week, but was forced to use for()
's and if()
's. Seems like there should be an easier way to get this done.
Here's the code I used:
sched <- data.frame(dsk_proj_number = rezvan$dsk_proj_number)
sched$weeks <- NA
sched <- sched %>% pivot_wider(names_from = dsk_proj_number, values_from = weeks)
for(proj_num in weekly_data$dsk_proj_number){
duration_cd = weekly_data[which(weekly_data$dsk_proj_number == proj_num), "cd_dur_weeks"] %>% as.numeric
duration_ca = weekly_data[which(weekly_data$dsk_proj_number == proj_num), "ca_dur_weeks"] %>% as.numeric
if(duration_cd > 0) {
sched[1:duration_cd, proj_num] = weekly_data[which(weekly_data$dsk_proj_number == proj_num), "hrs_per_week_cd"]
}
if(duration_ca > 0) {
sched[duration_cd 1:duration_ca, proj_num] = weekly_data[which(weekly_data$dsk_proj_number == proj_num), "hrs_per_week_ca"]
}
}
sched <- sched %>% mutate_all(coalesce, 0)
CodePudding user response:
You can use rep()
to repeat elements a certain number of times, and then use c()
to concatenate them into a long sequence. I use rowwise
from dplyr
to conveniently do this row-by-row.
Then you can unnest
the lists of vectors.
library(tidyverse)
sched <- weekly_data %>%
mutate(max_weeks = max(cd_dur_weeks ca_dur_weeks)) %>%
rowwise() %>%
mutate(week = list(c(rep(hrs_per_week_cd, cd_dur_weeks), rep(hrs_per_week_ca, ca_dur_weeks), rep(0, max_weeks-cd_dur_weeks-ca_dur_weeks)))) %>%
ungroup() %>%
select(dsk_proj_number, week) %>%
pivot_wider(names_from = "dsk_proj_number", values_from = week) %>%
unnest(everything())
CodePudding user response:
df %>%
select(1:3) %>%
slice(rep(1:nrow(.), cd_dur_weeks)) %>%
select(-3) %>%
mutate(milestone = 1) %>%
rename(hrs_per_week = hrs_per_week_cd) -> df1
df %>%
select(c(1,4,5)) %>%
slice(rep(1:nrow(.), ca_dur_weeks)) %>%
select(-3) %>%
mutate(milestone = 2) %>%
rename(hrs_per_week = hrs_per_week_ca) -> df2
rbind(df1, df2) %>%
arrange(dsk_proj_number, milestone) %>%
group_by(dsk_proj_number) %>%
mutate(week = seq_along(dsk_proj_number)) %>%
pivot_wider(id_cols=week, names_from=dsk_proj_number, values_from=hrs_per_week) %>%
replace(is.na(.), 0)