Home > Net >  dcast/spread character-only dataframe with two columns in RHS
dcast/spread character-only dataframe with two columns in RHS

Time:07-25

I am trying to spread df on two columns in RHS long to wide. Dataframe I have:

dat <- data.frame(
  bm = LETTERS[seq(1,10)], 
  at4 = c("a1","a2","a3","a4","a5","a6","a7","a8","a9", "a10"),
  n1 = c("n1","n2","n3","n4","n5","n6","n7","n8","n9", "n10"),
  cat = c("x","y","z","x","y","z","x","y","z","z"),
  clas = c("k","l","k","l","k","l","k","l","k","l")) 

What gives:

bm  at4 n1  cat clas
A   a1  n1  x   k
B   a2  n2  y   l
C   a3  n3  z   k
D   a4  n4  x   l
E   a5  n5  y   k
F   a6  n6  z   l
G   a7  n7  x   k
H   a8  n8  y   l
I   a9  n9  z   k
J   a10 n10 z   l

Desired dataframe:

#The approach that works with adding dummy columns:

dat$temp1 <- 1
dat <- dcast(dat, ... ~ cat, value.var = "temp1", fun.aggregate = sum)
dat$temp1 <- 1
dat <- dcast(dat, ... ~ clas, value.var = "temp1", fun.aggregate = sum)

bm  at4 n1  x   y   z   k   l
A   a1  n1  1   0   0   1   0
B   a2  n2  0   1   0   0   1
C   a3  n3  0   0   1   1   0
D   a4  n4  1   0   0   0   1
E   a5  n5  0   1   0   1   0
F   a6  n6  0   0   1   0   1
G   a7  n7  1   0   0   1   0
H   a8  n8  0   1   0   0   1
I   a9  n9  0   0   1   1   0
J   a10 n10 0   0   1   0   1

I am wondering if it can be done in one go in dcast or spread. I tried:

dat$temp1 <- 1
dcast(dat, ... ~ cat   clas, value.var = "temp1", fun.aggregate = sum)

but it gives:

bm  at4 n1  x_k x_l y_k y_l z_k z_l
A   a1  n1  1   0   0   0   0   0
B   a2  n2  0   0   0   1   0   0
C   a3  n3  0   0   0   0   1   0
D   a4  n4  0   1   0   0   0   0
E   a5  n5  0   0   1   0   0   0
F   a6  n6  0   0   0   0   0   1
G   a7  n7  1   0   0   0   0   0
H   a8  n8  0   0   0   1   0   0
I   a9  n9  0   0   0   0   1   0
J   a10 n10 0   0   0   0   0   1

bm column should be unique for each cell after casting so it can show instead of 1. tried similar approach:

dcast(dat, ... ~ cat   clas, value.var = "bm", fun.aggregate = toString)

But dcast mix RHS columns together:

at4 n1  x_k x_l y_k y_l z_k z_l
a1  n1  A                   
a10 n10                     J
a2  n2              B       
a3  n3                  C   
a4  n4      D               
a5  n5          E           
a6  n6                      F
a7  n7  G                   
a8  n8              H       
a9  n9                  I   

CodePudding user response:

We can melt & dcast in one go:

library(data.table)
setDT(dat)

dcast(melt(dat, id = c("bm", "at4", "n1")), bm   at4   n1 ~ value, fun = length)
#    bm at4  n1 k l x y z
# 1:  A  a1  n1 1 0 1 0 0
# 2:  B  a2  n2 0 1 0 1 0
# 3:  C  a3  n3 1 0 0 0 1
# 4:  D  a4  n4 0 1 1 0 0
# 5:  E  a5  n5 1 0 0 1 0
# 6:  F  a6  n6 0 1 0 0 1
# 7:  G  a7  n7 1 0 1 0 0
# 8:  H  a8  n8 0 1 0 1 0
# 9:  I  a9  n9 1 0 0 0 1
#10:  J a10 n10 0 1 0 0 1

In the reshape2 package there is a function called recast which "wraps melting and (d)casting a data frame into a single step.", see https://github.com/hadley/reshape/blob/master/R/recast.r

CodePudding user response:

Here is a solution but pivot_wider, not with spread/dcast.
Reshape both cat and clas to wide format, then join the two results.

suppressPackageStartupMessages({
  library(dplyr)
  library(tidyr)
})

dat <- data.frame(
  bm = LETTERS[seq(1,10)], 
  at4 = c("a1","a2","a3","a4","a5","a6","a7","a8","a9", "a10"),
  n1 = c("n1","n2","n3","n4","n5","n6","n7","n8","n9", "n10"),
  cat = c("x","y","z","x","y","z","x","y","z","z"),
  clas = c("k","l","k","l","k","l","k","l","k","l")) 

inner_join(
  dat %>%
    mutate(new = 1) %>% 
    pivot_wider(
      id_cols = 1:3,
      names_from = cat,
      values_from = new,
      values_fill = 0L
    ),
  dat %>%
    mutate(new = 1L) %>%
    pivot_wider(
      id_cols = 1:3,
      names_from = clas,
      values_from = new,
      values_fill = 0L
    ), 
  by = c("bm", "at4", "n1")
)
#> # A tibble: 10 × 8
#>    bm    at4   n1        x     y     z     k     l
#>    <chr> <chr> <chr> <dbl> <dbl> <dbl> <int> <int>
#>  1 A     a1    n1        1     0     0     1     0
#>  2 B     a2    n2        0     1     0     0     1
#>  3 C     a3    n3        0     0     1     1     0
#>  4 D     a4    n4        1     0     0     0     1
#>  5 E     a5    n5        0     1     0     1     0
#>  6 F     a6    n6        0     0     1     0     1
#>  7 G     a7    n7        1     0     0     1     0
#>  8 H     a8    n8        0     1     0     0     1
#>  9 I     a9    n9        0     0     1     1     0
#> 10 J     a10   n10       0     0     1     0     1

Created on 2022-07-24 by the reprex package (v2.0.1)

  • Related