Home > other >  pivot_wider does not keep all the variables
pivot_wider does not keep all the variables

Time:05-07

I would like to keep the variable cat (category) in the output of my function. However, I am not able to keep it. The idea is to apply a similar function to m <- 1 - (1 - se * p2)^df$n based on the category. But in order to perform that step, I need to keep the variable category.

Here's the code:

#script3

suppressPackageStartupMessages({
  library(mc2d)
  library(tidyverse)
})

sim_one <- function() {
df<-data.frame(id=c(1:30),cat=c(rep("a",12),rep("b",18)),month=c(1:6,1,6,4,1,5,2,3,2,5,4,6,3:6,4:6,1:5,5),n=rpois(30,5))
  nr <- nrow(df)
  df$n[df$n == "0"] <- 3
  se <- rbeta(nr, 96, 6)
  epi.a <- rpert(nr, min = 1.5, mode = 2, max = 3)
  p <- 0.2
  p2 <- epi.a*p
  m <- 1 - (1 - se * p2)^df$n
  results <- data.frame(month = df$month, m, df$cat)
  results %>%
    arrange(month) %>%
    group_by(month) %>%
    mutate(n = row_number(), .groups = "drop") %>%
    pivot_wider(
    id_cols = n,
    names_from = month,
    names_glue = "m_{.name}",
    values_from =m
    ) 
}

set.seed(99)

iters <- 1000
sim_list <- replicate(iters, sim_one(), simplify = FALSE)
sim_list[[1]]
#> # A tibble: 7 x 7
#>       n    m_1    m_2    m_3    m_4   m_5    m_6
#>   <int>  <dbl>  <dbl>  <dbl>  <dbl> <dbl>  <dbl>
#> 1     1  0.970  0.623  0.905  0.998 0.929  0.980
#> 2     2  0.912  0.892  0.736  0.830 0.890  0.862
#> 3     3  0.795  0.932  0.553  0.958 0.931  0.798
#> 4     4  0.950  0.892  0.732  0.649 0.777  0.743
#> 5     5 NA     NA     NA      0.657 0.980  0.945
#> 6     6 NA     NA     NA      0.976 0.836 NA    
#> 7     7 NA     NA     NA     NA     0.740 NA

Created on 2022-05-07 by the reprex package (v2.0.1)

CodePudding user response:

If you also want to keep df.cat as a row, then you can add it to the id_cols when you pivot_wider, so that it is not dropped.

suppressPackageStartupMessages({
  library(mc2d)
  library(tidyverse)
})

sim_one <- function() {
  df <-
    data.frame(
      id = c(1:30),
      cat = c(rep("a", 12), rep("b", 18)),
      month = c(1:6, 1, 6, 4, 1, 5, 2, 3, 2, 5, 4, 6, 3:6, 4:6, 1:5, 5),
      n = rpois(30, 5)
    )
  nr <- nrow(df)
  df$n[df$n == "0"] <- 3
  se <- rbeta(nr, 96, 6)
  epi.a <- rpert(nr,
                 min = 1.5,
                 mode = 2,
                 max = 3)
  p <- 0.2
  p2 <- epi.a * p
  m <- 1 - (1 - se * p2) ^ df$n
  results <- data.frame(month = df$month, m, df$cat)
  results %>%
    arrange(month) %>%
    group_by(month) %>%
    mutate(n = row_number(), .groups = "drop") %>%
    pivot_wider(
      id_cols = c(n, df.cat),
      names_from = month,
      names_glue = "m_{.name}",
      values_from = m
    )
}


set.seed(99)

iters <- 100
sim_list <- replicate(iters, sim_one(), simplify = FALSE)
sim_list[[1]]

Output

      n df.cat    m_1    m_2    m_3    m_4    m_5    m_6
  <int> <chr>   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
1     1 a       0.970  0.623  0.905  0.998  0.929  0.980
2     2 a       0.912  0.892 NA      0.830  0.890  0.862
3     3 a       0.795 NA     NA     NA     NA     NA    
4     4 b       0.950  0.892  0.732  0.649  0.777  0.743
5     3 b      NA      0.932  0.553  0.958  0.931  0.798
6     2 b      NA     NA      0.736 NA     NA     NA    
7     5 b      NA     NA     NA      0.657  0.980  0.945
8     6 b      NA     NA     NA      0.976  0.836 NA    
9     7 b      NA     NA     NA     NA      0.740 NA    

CodePudding user response:

If you drop idcols argument from pivot_wider it should keep the df.cat column.

Also mutate does not have .groups argument, so here it actually creates a new column with the name .groups. .groups argument is present in summarise.

library(mc2d)
library(tidyverse)

sim_one <- function() {
  df<-data.frame(id=c(1:30),cat=c(rep("a",12),rep("b",18)),month=c(1:6,1,6,4,1,5,2,3,2,5,4,6,3:6,4:6,1:5,5),n=rpois(30,5))
  nr <- nrow(df)
  df$n[df$n == "0"] <- 3
  se <- rbeta(nr, 96, 6)
  epi.a <- rpert(nr, min = 1.5, mode = 2, max = 3)
  p <- 0.2
  p2 <- epi.a*p
  m <- 1 - (1 - se * p2)^df$n
  results <- data.frame(month = df$month, m, df$cat)
  results %>%
    arrange(month) %>%
    group_by(month) %>%
    mutate(n = row_number()) %>%
    pivot_wider(
      names_from = month,
      names_glue = "m_{.name}",
      values_from =m
    ) 
}

set.seed(99)

iters <- 1000
sim_list <- replicate(iters, sim_one(), simplify = FALSE)
sim_list[[1]]

#  df.cat     n    m_1    m_2    m_3    m_4    m_5    m_6
#  <chr>  <int>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
#1 a          1  0.970  0.623  0.905  0.998  0.929  0.980
#2 a          2  0.912  0.892 NA      0.830  0.890  0.862
#3 a          3  0.795 NA     NA     NA     NA     NA    
#4 b          4  0.950  0.892  0.732  0.649  0.777  0.743
#5 b          3 NA      0.932  0.553  0.958  0.931  0.798
#6 b          2 NA     NA      0.736 NA     NA     NA    
#7 b          5 NA     NA     NA      0.657  0.980  0.945
#8 b          6 NA     NA     NA      0.976  0.836 NA    
#9 b          7 NA     NA     NA     NA      0.740 NA    
  • Related