Put the values in a row which are larger than zero in a list of lists-CodePudding

I have data as follows:

dat_in <- structure(list(rn = c("W", "M"), `      0` = c(0L, 0L), `[      0,     25)` = c(5L, 
0L), `[     25,     50)` = c(0L, 0L), `[     25,    100)` = c(38L, 
3L), `[     50,    100)` = c(0L, 0L), `[    100,    250)` = c(43L, 
5L), `[    100,    500)` = c(0L, 0L), `[    250,    500)` = c(27L, 
12L), `[    500,   1000)` = c(44L, 0L), `[    500,1000000]` = c(0L, 
53L), `[   1000,   1500)` = c(0L, 0L), `[   1000,1000000]` = c(20L, 
0L), `[   1500,   3000)` = c(0L, 0L), `[   3000,1000000]` = c(0L, 
0L), Sum_col = c(177, 73)), row.names = 1:2, class = c("data.table", 
"data.frame"))

  rn       0 [      0,     25) [     25,     50) [     25,    100) [     50,    100) [    100,    250) [    100,    500) [    250,    500) [    500,   1000)
1  W       0                 5                 0                38                 0                43                 0                27                44
2  M       0                 0                 0                 3                 0                 5                 0                12                 0
  [    500,1000000] [   1000,   1500) [   1000,1000000] [   1500,   3000) [   3000,1000000] Sum_col
1                 0                 0                20                 0                 0     177
2                53                 0                 0                 0                 0      73

I would like to create a list of list, with all non zero values per row. So for row one and two this would be:

dat_out <- structure(
  list( 
  freq = list(a= c(5, 38, 43, 27, 44, 20, 177), b=c(3, 5, 12, 53, 73))), 
  row.names = c(NA, -2L), class = "data.frame")

                        freq
1 5, 38, 43, 27, 44, 20, 177
2           3, 5, 12, 53, 73

What would be the best way to do this?

CodePudding user response：

You can try something like:

library(data.table)
library(magrittr)

lapply(dat_in$rn,function(x){
  dat_in[rn == x]  %>% 
    transpose() %>% 
    .[2:.N,V1] %>% 
    as.numeric() %>% 
    .[which(.>0)]
})

[[1]]
[1]   5  38  43  27  44  20 177

[[2]]
[1]  3  5 12 53 73

CodePudding user response：

Here's a tidy solution:

dat_in <- structure(list(rn = c("W", "M"), `      0` = c(0L, 0L), `[      0,     25)` = c(5L, 
                                                                                          0L), `[     25,     50)` = c(0L, 0L), `[     25,    100)` = c(38L, 
                                                                                                                                                        3L), `[     50,    100)` = c(0L, 0L), `[    100,    250)` = c(43L, 
                                                                                                                                                                                                                      5L), `[    100,    500)` = c(0L, 0L), `[    250,    500)` = c(27L, 
                                                                                                                                                                                                                                                                                    12L), `[    500,   1000)` = c(44L, 0L), `[    500,1000000]` = c(0L, 
                                                                                                                                                                                                                                                                                                                                                    53L), `[   1000,   1500)` = c(0L, 0L), `[   1000,1000000]` = c(20L, 
                                                                                                                                                                                                                                                                                                                                                                                                                   0L), `[   1500,   3000)` = c(0L, 0L), `[   3000,1000000]` = c(0L, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 0L), Sum_col = c(177, 73)), row.names = 1:2, class = c("data.table", 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        "data.frame"))
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
out <- dat_in %>% 
  rowwise() %>% 
  summarise(freq =list(c_across(-rn))) %>% 
  rowwise() %>% 
  mutate(freq = list(freq[which(freq > 0)]))
out$freq
#> [[1]]
#> [1]   5  38  43  27  44  20 177
#> 
#> [[2]]
#> [1]  3  5 12 53 73

^{Created on 2022-04-22 by the reprex package (v2.0.1)}

CodePudding user response：

Using toString.

apply(dat_in[-c(1, length(dat_in))], 1, \(x) toString(x[x != 0])) |>
  as.data.frame() |> setNames('freq')
#                    freq
# 1 5, 38, 43, 27, 44, 20
# 2          3, 5, 12, 53

CodePudding user response：

Using base R, you can use a loop, but this might be slow depending on the size of your data set:

out <- list() # for storage
for (i in 1:nrow(dat_in)) { # loop through rows
  vec <- as.numeric(dat_in[i,-1]) # get numbers from the row
  vec <- vec[vec != 0] # get non-zero numbers
  out[[i]] <- vec # store in a list
}

You can do the same using apply, this might be faster:

out <- apply(dat_in, 1, function (x) {
  vec <- as.numeric(x[-1])
  vec <- vec[vec != 0]
  return(vec)
})