Home > OS >  Automatically order a wide data.table: dcast columns in a specific order/setcolorder based on a patt
Automatically order a wide data.table: dcast columns in a specific order/setcolorder based on a patt

Time:10-09

I have a DT like this:

id = c(rep(1,10), rep(2, 5), rep(3,12))
th = c(rep(c(0,1),5), c(0, 1, 0, 1, 0), rep(c(1,0,1),4 ))
drugs = c(rep(c("A","B","C","D","E"),2), c("A", "B", "B", "B", "A"), rep(c("C","D","c"),4 ))

DT = data.table(id, th, drugs)

DT
    id th drugs seq
 1:  1  0     A   1
 2:  1  1     B   2
 3:  1  0     C   3
 4:  1  1     D   4
 5:  1  0     E   5
 6:  1  1     A   6
 7:  1  0     B   7
 8:  1  1     C   8
 9:  1  0     D   9
10:  1  1     E  10
11:  2  0     A   1
12:  2  1     B   2
13:  2  0     B   3
14:  2  1     B   4
15:  2  0     A   5
16:  3  1     C   1
17:  3  0     D   2
18:  3  1     c   3
19:  3  1     C   4
20:  3  0     D   5
21:  3  1     c   6
22:  3  1     C   7
23:  3  0     D   8
24:  3  1     c   9
25:  3  1     C  10
26:  3  0     D  11
27:  3  1     c  12

I reshape to wide after making a counter ("seq") by id to have all drugs in one observation/id

DT_wide =  DT[, seq := seq(.N), by = .(id)][, dcast.data.table(.SD, id ~ paste0("rx", seq), value.var = c("th", "drugs"))]

Obtaining:

    DT_wide
   id th_rx1 th_rx10 th_rx11 th_rx12 th_rx2 th_rx3 th_rx4 th_rx5 th_rx6 th_rx7 th_rx8 th_rx9 drugs_rx1 drugs_rx10 drugs_rx11 drugs_rx12 drugs_rx2 drugs_rx3 drugs_rx4 drugs_rx5 drugs_rx6 drugs_rx7 drugs_rx8 drugs_rx9
1:  1      0       1      NA      NA      1      0      1      0      1      0      1      0         A          E       <NA>       <NA>         B         C         D         E         A         B         C         D
2:  2      0      NA      NA      NA      1      0      1      0     NA     NA     NA     NA         A       <NA>       <NA>       <NA>         B         B         B         A      <NA>      <NA>      <NA>      <NA>
3:  3      1       1       0       1      0      1      1      0      1      1      0      1         C          C          D          c         D         c         C         D         c         C         D         c

The Desired Output is to have the DT_wide columns ordered this way:

"id", "th_rx1","drugs_rx1", "th_rx2", "drugs_rx2",...,"th_rx12", "drugs_rx12"

Is there a better way to do the dcast or it's needed a post dcast setcolorder() with specific regex?

I tried a setcolorder with no results because of the _rx1 and _rx10 problem I find:

setcolorder(DT_wide, c("id", grep("_rx1", colnames(DT_wide), value = TRUE)))

Thank you in advance for any help! :D

CodePudding user response:

Try the following workaround

cols <- c("th", "drugs")
# a shorter way of achieving your dcast
#  numbering comes from rowid()
DT.wide <- dcast(DT, id ~ paste0("rx", rowid(id)), value.var = cols)
# new order of colnames
new_colorder <- CJ(unique(rowid(DT$id)), cols, sorted = FALSE)[, paste(cols, V1, sep = "_rx")]
# reorder the relevant columns
setcolorder(DT.wide, c(setdiff(names(DT.wide), new_colorder), new_colorder))


#    id th_rx1 drugs_rx1 th_rx2 drugs_rx2 th_rx3 drugs_rx3 th_rx4 drugs_rx4 th_rx5 drugs_rx5 th_rx6
# 1:  1      0         A      1         B      0         C      1         D      0         E      1
# 2:  2      0         A      1         B      0         B      1         B      0         A     NA
# 3:  3      1         C      0         D      1         c      1         C      0         D      1
#    drugs_rx6 th_rx7 drugs_rx7 th_rx8 drugs_rx8 th_rx9 drugs_rx9 th_rx10 drugs_rx10 th_rx11 drugs_rx11
# 1:         A      0         B      1         C      0         D       1          E      NA       <NA>
# 2:      <NA>     NA      <NA>     NA      <NA>     NA      <NA>      NA       <NA>      NA       <NA>
# 3:         c      1         C      0         D      1         c       1          C       0          D
#    th_rx12 drugs_rx12
# 1:      NA       <NA>
# 2:      NA       <NA>
# 3:       1          c

CodePudding user response:

Nearly the same approach as Wimpel's answers but different in details, e.g., the use of sprintf() and rowid(id) in dcast():

library(data.table)
library(magrittr)
DTw <- dcast(DT, id ~ sprintf("rxi", rowid(id)), value.var = c("th", "drugs"))
newcols <- DT[, CJ(max(rowid(id)) %>% seq() %>% sprintf("rxi", .), 
                   setdiff(names(.SD), "id"))][
  , c("id",paste(V2, V1, sep = "_"))]
setcolorder(DTw, newcols)
DTw
   id drugs_rx01 th_rx01 drugs_rx02 th_rx02 drugs_rx03 th_rx03 drugs_rx04 th_rx04 drugs_rx05 th_rx05 drugs_rx06 th_rx06
1:  1          A       0          B       1          C       0          D       1          E       0          A       1
2:  2          A       0          B       1          B       0          B       1          A       0       <NA>      NA
3:  3          C       1          D       0          c       1          C       1          D       0          c       1
   drugs_rx07 th_rx07 drugs_rx08 th_rx08 drugs_rx09 th_rx09 drugs_rx10 th_rx10 drugs_rx11 th_rx11 drugs_rx12 th_rx12
1:          B       0          C       1          D       0          E       1       <NA>      NA       <NA>      NA
2:       <NA>      NA       <NA>      NA       <NA>      NA       <NA>      NA       <NA>      NA       <NA>      NA
3:          C       1          D       0          c       1          C       1          D       0          c       1
  • Related