Pivoting and Distributing values based on Duration-CodePudding

I have a small dataset weekly_data of projects were working on, and anticipated time to be spent and duration in weeks for each of the two milestones, labeled CD and CA

# A tibble: 17 x 5
   dsk_proj_number hrs_per_week_cd cd_dur_weeks hrs_per_week_ca ca_dur_weeks
   <fct>                     <dbl>        <dbl>           <dbl>        <dbl>
 1 17061                      0               0            2.43           28
 2 18009                      0               0            1.83           12
 3 18029                      0               0            2.83           24
 4 19029                      1.5            16            2.43           28
 5 19050                      0               0            2.8            20
 6 20012                      0               0            3.4            20
 7 21016                      3               8            2.43           28
 8 21022                      0               0            4.25           16
 9 21050                      0               0            3.4            20
10 21061a                    17.5            24           15.8            52
11 21061b                     1.5             4            7.5             8
12 21061c                     7.67           12            5              12
13 21061d                     0               0            0               0
14 21061e                     8               1            3               1
15 21094                      0               0            3               8
16 22027                      0               0            0.75            8
17 22068                      2.92           12            2.38            8

I want to get this into a format wheree, based on the cd_dur_weeks and ca_dur_weeks durations indicated, I have the estiamted number of hours by weeks, for all the weeks, like this:

> sched %>% head(15)
# A tibble: 15 x 17
   `18009` `22068` `17061` `21050` `19029` `21016` `21022` `19050` `18029` `22027` `20012` `21094` `21061a` `21061b` `21061c` `21061d` `21061e`
     <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
 1    1.83    2.92    2.43     3.4     1.5    3       4.25     2.8    2.83    0.75     3.4       3     17.5      1.5     7.67        0        8
 2    1.83    2.92    2.43     3.4     1.5    3       4.25     2.8    2.83    0.75     3.4       3     17.5      1.5     7.67        0        3
 3    1.83    2.92    2.43     3.4     1.5    3       4.25     2.8    2.83    0.75     3.4       3     17.5      1.5     7.67        0        0
 4    1.83    2.92    2.43     3.4     1.5    3       4.25     2.8    2.83    0.75     3.4       3     17.5      1.5     7.67        0        0
 5    1.83    2.92    2.43     3.4     1.5    3       4.25     2.8    2.83    0.75     3.4       3     17.5      7.5     7.67        0        0
 6    1.83    2.92    2.43     3.4     1.5    3       4.25     2.8    2.83    0.75     3.4       3     17.5      7.5     7.67        0        0
 7    1.83    2.92    2.43     3.4     1.5    3       4.25     2.8    2.83    0.75     3.4       3     17.5      7.5     7.67        0        0
 8    1.83    2.92    2.43     3.4     1.5    3       4.25     2.8    2.83    0.75     3.4       3     17.5      7.5     7.67        0        0
 9    1.83    2.92    2.43     3.4     1.5    2.43    4.25     2.8    2.83    0        3.4       0     17.5      7.5     7.67        0        0
10    1.83    2.92    2.43     3.4     1.5    2.43    4.25     2.8    2.83    0        3.4       0     17.5      7.5     7.67        0        0
11    1.83    2.92    2.43     3.4     1.5    2.43    4.25     2.8    2.83    0        3.4       0     17.5      7.5     7.67        0        0
12    1.83    2.92    2.43     3.4     1.5    2.43    4.25     2.8    2.83    0        3.4       0     17.5      7.5     7.67        0        0
13    0       2.38    2.43     3.4     1.5    2.43    4.25     2.8    2.83    0        3.4       0     17.5      0       5           0        0
14    0       2.38    2.43     3.4     1.5    2.43    4.25     2.8    2.83    0        3.4       0     17.5      0       5           0        0
15    0       2.38    2.43     3.4     1.5    2.43    4.25     2.8    2.83    0        3.4       0     17.5      0       5           0        0

I was able to use pivot_wider() to make the project numbers the variable names, and each row an individual week, but was forced to use for()'s and if()'s. Seems like there should be an easier way to get this done.

Here's the code I used:

sched <- data.frame(dsk_proj_number = rezvan$dsk_proj_number)
sched$weeks <- NA
sched <- sched %>% pivot_wider(names_from = dsk_proj_number, values_from = weeks)


for(proj_num in weekly_data$dsk_proj_number){
  duration_cd = weekly_data[which(weekly_data$dsk_proj_number == proj_num), "cd_dur_weeks"] %>% as.numeric
  duration_ca = weekly_data[which(weekly_data$dsk_proj_number == proj_num), "ca_dur_weeks"] %>% as.numeric
  
  if(duration_cd > 0) {
    sched[1:duration_cd, proj_num] = weekly_data[which(weekly_data$dsk_proj_number == proj_num), "hrs_per_week_cd"]
  }
  
  if(duration_ca > 0) {
    sched[duration_cd   1:duration_ca, proj_num] = weekly_data[which(weekly_data$dsk_proj_number == proj_num), "hrs_per_week_ca"]
  }
  
  
}



sched <- sched %>% mutate_all(coalesce, 0)

CodePudding user response：

You can use rep() to repeat elements a certain number of times, and then use c() to concatenate them into a long sequence. I use rowwise from dplyr to conveniently do this row-by-row.

Then you can unnest the lists of vectors.

library(tidyverse)
sched <- weekly_data %>% 
    mutate(max_weeks = max(cd_dur_weeks   ca_dur_weeks)) %>% 
    rowwise() %>% 
    mutate(week = list(c(rep(hrs_per_week_cd, cd_dur_weeks), rep(hrs_per_week_ca, ca_dur_weeks), rep(0, max_weeks-cd_dur_weeks-ca_dur_weeks)))) %>% 
    ungroup() %>% 
    select(dsk_proj_number, week) %>% 
    pivot_wider(names_from = "dsk_proj_number", values_from = week) %>% 
    unnest(everything())

CodePudding user response：

df %>% 
    select(1:3) %>% 
    slice(rep(1:nrow(.), cd_dur_weeks)) %>% 
    select(-3) %>%
    mutate(milestone = 1) %>%
    rename(hrs_per_week = hrs_per_week_cd) -> df1

df %>% 
    select(c(1,4,5)) %>% 
    slice(rep(1:nrow(.), ca_dur_weeks)) %>% 
    select(-3) %>%
    mutate(milestone = 2) %>%
    rename(hrs_per_week = hrs_per_week_ca) -> df2

rbind(df1, df2) %>%
    arrange(dsk_proj_number, milestone) %>%
    group_by(dsk_proj_number) %>%
    mutate(week = seq_along(dsk_proj_number)) %>%
    pivot_wider(id_cols=week, names_from=dsk_proj_number, values_from=hrs_per_week) %>%
    replace(is.na(.), 0)