Home > Software design >  R format the wide table to long table
R format the wide table to long table

Time:09-29

cutoff        KM KM_lo KM_hi  rstm rstm_lo rstm_hi           
   <chr>      <dbl> <dbl> <dbl> <dbl>   <dbl>   <dbl>           
 1 2017-01-01   2.1   1.4   4.9   7.2     3.9    10.2 
 2 2017-04-01   3.5   2.1   4.7   8.9     6.6    10.8 
 3 2017-07-01   3.7   2.8   4.2   7.2     6.2     8.4 

How do I convert this to a long table? I am struggling to create it into the format I want. I tried the gather and melt functions. The output table would look something like this

      cutoff        VAR    Val   Val-hi Val-lo
       <chr>        <chr>  <dbl> <dbl> <dbl>       
     1 2017-01-01   KM     2.1   4.9   1.4     
     2 2017-01-01   rstm   7.2   4.7   3.9     
     3 2017-07-01   KM     3.7   4.2   2.8   

Sample date

structure(list(cutoff = c("2017-01-01", "2017-04-01", "2017-07-01"
), KM = c(2.1, 3.5, 3.7), KM_lo = c(1.4, 2.1, 2.8), KM_hi = c(4.9, 
4.7, 4.2), rstm = c(7.2, 8.9, 7.2), rstm_lo = c(3.9, 6.6, 6.2
), rstm_hi = c(10.2, 10.8, 8.4)), row.names = c(NA, -3L), class = c("tbl_df", 
"tbl", "data.frame"))

CodePudding user response:

We may do

library(dplyr)
library(tidyr)
library(stringr)
df1 %>% 
   rename_with(~ str_c(., "_none"), c("KM", "rstm")) %>%
   pivot_longer(cols = -cutoff, names_to = c("VAR", ".value"), 
       names_sep = "_") %>% 
  rename_with(~ c("Val", "Val-lo", "Val-hi"), 3:5)

-output

# A tibble: 6 × 5
  cutoff     VAR     Val `Val-lo` `Val-hi`
  <chr>      <chr> <dbl>    <dbl>    <dbl>
1 2017-01-01 KM      2.1      1.4      4.9
2 2017-01-01 rstm    7.2      3.9     10.2
3 2017-04-01 KM      3.5      2.1      4.7
4 2017-04-01 rstm    8.9      6.6     10.8
5 2017-07-01 KM      3.7      2.8      4.2
6 2017-07-01 rstm    7.2      6.2      8.4

CodePudding user response:

Here is another pivot_longer approach:

library(dplyr)
library(tidyr)

df %>% 
  pivot_longer(
    -cutoff,
    names_to = c("VAR", ".value"),
    names_pattern = "(. )_(. )"
  ) %>% 
  na.omit()
  cutoff     VAR      lo    hi
  <chr>      <chr> <dbl> <dbl>
1 2017-01-01 KM      1.4   4.9
2 2017-01-01 rstm    3.9  10.2
3 2017-04-01 KM      2.1   4.7
4 2017-04-01 rstm    6.6  10.8
5 2017-07-01 KM      2.8   4.2
6 2017-07-01 rstm    6.2   8.4

CodePudding user response:

library(tidyverse)
df <-
  structure(
    list(
      cutoff = c("2017-01-01", "2017-04-01", "2017-07-01"),
      KM = c(2.1, 3.5, 3.7),
      KM_lo = c(1.4, 2.1, 2.8),
      KM_hi = c(4.9, 4.7, 4.2),
      rstm = c(7.2, 8.9, 7.2),
      rstm_lo = c(3.9, 6.6, 6.2),
      rstm_hi = c(10.2, 10.8, 8.4)
    ),
    row.names = c(NA,-3L),
    class = c("tbl_df",
              "tbl", "data.frame")
  )

df %>% 
  pivot_longer(cols = -cutoff) %>% 
  separate(col = name, into = c("name", "suffix"), sep = "_", remove = TRUE) %>% 
  mutate(id = data.table::rleid(name)) %>% 
  pivot_wider(id_cols = c(id, cutoff, name), names_from = suffix, names_prefix = "VAL_", values_from = value) %>% 
  select(-id) %>% 
  rename(VAL = VAL_NA)
#> Warning: Expected 2 pieces. Missing pieces filled with `NA` in 6 rows [1, 4, 7,
#> 10, 13, 16].
#> # A tibble: 6 x 5
#>   cutoff     name    VAL VAL_lo VAL_hi
#>   <chr>      <chr> <dbl>  <dbl>  <dbl>
#> 1 2017-01-01 KM      2.1    1.4    4.9
#> 2 2017-01-01 rstm    7.2    3.9   10.2
#> 3 2017-04-01 KM      3.5    2.1    4.7
#> 4 2017-04-01 rstm    8.9    6.6   10.8
#> 5 2017-07-01 KM      3.7    2.8    4.2
#> 6 2017-07-01 rstm    7.2    6.2    8.4

Created on 2021-09-28 by the reprex package (v2.0.1)

  • Related