Home > Software engineering >  List of differing length, named vectors into data frame - R
List of differing length, named vectors into data frame - R

Time:10-13

I have a list of named vectors containing dates in R and I'd like to convert it into a data frame where columns will be named after vector names. Please note that due to differing vector length there will be NAs in the final fata frame.

d5 <- list(structure(c(event13 = 1797, event17 = 7006, event3 = -6796
), class = "Date"), structure(c(event3 = 5984, event14 = 7175, 
event11 = -4031, event20 = 8612, event2 = 5158, event5 = 1002, 
event8 = -382, event15 = 4367, event11 = 2960), class = "Date"), 
    structure(c(event7 = 4394, event13 = -1389, event9 = -4407
    ), class = "Date"), structure(c(event5 = 7729), class = "Date"), 
    structure(c(event4 = -3384, event10 = 1288, event10 = 7502, 
    event5 = -5100, event9 = -3177, event8 = -4027, event11 = -3554, 
    event16 = 9484, event3 = 1386), class = "Date"), structure(c(event10 = -6906, 
    event3 = 6966, event8 = -975, event14 = -3286, event12 = 744, 
    event11 = 7111, event15 = 9576, event12 = 2223, event9 = 10771
    ), class = "Date"), structure(c(event16 = 8764), class = "Date"), 
    structure(c(event15 = 9795, event14 = 7681, event2 = -1728, 
    event14 = 10876), class = "Date"), structure(c(event13 = -1341, 
    event11 = 1202), class = "Date"), structure(c(event3 = -666, 
    event1 = -4192, event9 = 8808, event4 = -1765), class = "Date"))
> d5
[[1]]
     event13      event17       event3 
"1974-12-03" "1989-03-08" "1951-05-25" 

[[2]]
      event3      event14      event11      event20       event2       event5       event8      event15 
"1986-05-21" "1989-08-24" "1958-12-19" "1993-07-31" "1984-02-15" "1972-09-29" "1968-12-15" "1981-12-16" 
     event11 
"1978-02-08" 

[[3]]
      event7      event13       event9 
"1982-01-12" "1966-03-14" "1957-12-08" 

[[4]]
      event5 
"1991-03-01" 
...

The output would look like this:

ID event1 event2 event3 ...
1 date1 NA date3 ...

Using do.call with rbind returns a following error"

as.data.frame(do.call(rbind, d5))
   event3 event14 event11 event20 event2 event5 event8 event15 event11
1    1797    7006   -6796    1797   7006  -6796   1797    7006   -6796
2    5984    7175   -4031    8612   5158   1002   -382    4367    2960
3    4394   -1389   -4407    4394  -1389  -4407   4394   -1389   -4407
4    7729    7729    7729    7729   7729   7729   7729    7729    7729
5   -3384    1288    7502   -5100  -3177  -4027  -3554    9484    1386
6   -6906    6966    -975   -3286    744   7111   9576    2223   10771
7    8764    8764    8764    8764   8764   8764   8764    8764    8764
8    9795    7681   -1728   10876   9795   7681  -1728   10876    9795
9   -1341    1202   -1341    1202  -1341   1202  -1341    1202   -1341
10   -666   -4192    8808   -1765   -666  -4192   8808   -1765    -666
Warning message:
In (function (..., deparse.level = 1)  :
  number of columns of result is not a multiple of vector length (arg 8)

How to convert it efficiently (without equalising length of each vector by adding NAs and resorting them to match the columns)?

CodePudding user response:

library(tidyr)
library(dplyr)

# Make each element of the list a dataframe with two columns: x and event_name
dataframed <- lapply(d5, function(x) {
  out <- as.data.frame(x)
  out$event_name <- names(x)
  out
})

# Make a unique dataframe from this list of dataframes
unlisted <- do.call(rbind, dataframed)
row.names(unlisted) <- NULL

# pivot this dataframe to have event_name as column names
unlisted |> 
  group_by(event_name) |> 
  mutate(id = seq_len(n())) |> 
  ungroup() |> 
  pivot_wider(names_from = event_name, values_from = x) |> 
  select(-id)

#> # A tibble: 5 × 17
#>   event13    event17    event3     event14    event11    event20    event2    
#>   <date>     <date>     <date>     <date>     <date>     <date>     <date>    
#> 1 1974-12-03 1989-03-08 1951-05-25 1989-08-24 1958-12-19 1993-07-31 1984-02-15
#> 2 1966-03-14 NA         1986-05-21 1961-01-02 1960-04-09 NA         1965-04-09
#> 3 1966-05-01 NA         1973-10-18 1991-01-12 1989-06-21 NA         NA        
#> 4 NA         NA         1989-01-27 1999-10-12 1973-04-17 NA         NA        
#> 5 NA         NA         1968-03-06 NA         NA         NA         NA        
#> # … with 10 more variables: event5 <date>, event8 <date>, event15 <date>,
#> #   event12 <date>, event7 <date>, event9 <date>, event4 <date>,
#> #   event10 <date>, event16 <date>, event1 <date>

Created on 2022-10-12 with reprex v2.0.2

CodePudding user response:

Does this satisfy your needs? Note that due to your second element, there are two columns event11.

res <- data.table::rbindlist(lapply(d5, function(x) data.table::as.data.table(as.list(x))),
                             fill = TRUE,
                             use.names = TRUE)
res[1:5]
#>       event13    event17     event3    event14    event11    event20     event2
#> 1: 1974-12-03 1989-03-08 1951-05-25       <NA>       <NA>       <NA>       <NA>
#> 2:       <NA>       <NA> 1986-05-21 1989-08-24 1958-12-19 1993-07-31 1984-02-15
#> 3: 1966-03-14       <NA>       <NA>       <NA>       <NA>       <NA>       <NA>
#> 4:       <NA>       <NA>       <NA>       <NA>       <NA>       <NA>       <NA>
#> 5:       <NA>       <NA> 1973-10-18       <NA> 1960-04-09       <NA>       <NA>
#>        event5     event8    event15    event11     event7     event9     event4
#> 1:       <NA>       <NA>       <NA>       <NA>       <NA>       <NA>       <NA>
#> 2: 1972-09-29 1968-12-15 1981-12-16 1978-02-08       <NA>       <NA>       <NA>
#> 3:       <NA>       <NA>       <NA>       <NA> 1982-01-12 1957-12-08       <NA>
#> 4: 1991-03-01       <NA>       <NA>       <NA>       <NA>       <NA>       <NA>
#> 5: 1956-01-15 1958-12-23       <NA>       <NA>       <NA> 1961-04-21 1960-09-26
#>       event10    event10    event16 event12 event12 event14 event1
#> 1:       <NA>       <NA>       <NA>    <NA>    <NA>    <NA>   <NA>
#> 2:       <NA>       <NA>       <NA>    <NA>    <NA>    <NA>   <NA>
#> 3:       <NA>       <NA>       <NA>    <NA>    <NA>    <NA>   <NA>
#> 4:       <NA>       <NA>       <NA>    <NA>    <NA>    <NA>   <NA>
#> 5: 1973-07-12 1990-07-17 1995-12-20    <NA>    <NA>    <NA>   <NA>

CodePudding user response:

Assuming no duplicated events in each element of the list:

>dplyr::bind_rows(d5)
# A tibble: 10 × 17
   event13    event17    event3     event14    event11    event20    event2     event5     event8    
   <date>     <date>     <date>     <date>     <date>     <date>     <date>     <date>     <date>    
 1 1974-12-03 1989-03-08 1951-05-25 NA         NA         NA         NA         NA         NA        
 2 NA         NA         1986-05-21 1989-08-24 1958-12-19 1993-07-31 1984-02-15 1972-09-29 1968-12-15
 3 1966-03-14 NA         NA         NA         NA         NA         NA         NA         NA        
 4 NA         NA         NA         NA         NA         NA         NA         1991-03-01 NA        
 5 NA         NA         1973-10-18 NA         1960-04-09 NA         NA         1956-01-15 1958-12-23
 6 NA         NA         1989-01-27 1961-01-02 1989-06-21 NA         NA         NA         1967-05-02
 7 NA         NA         NA         NA         NA         NA         NA         NA         NA        
 8 NA         NA         NA         1991-01-12 NA         NA         1965-04-09 NA         NA        
 9 1966-05-01 NA         NA         NA         1973-04-17 NA         NA         NA         NA        
10 NA         NA         1968-03-06 NA         NA         NA         NA         NA         NA        
# … with 8 more variables: event15 <date>, event7 <date>, event9 <date>, event4 <date>, event10 <date>,
#   event16 <date>, event12 <date>, event1 <date>

Here, d5 is manually edited to remove duplicated entries:

d5 <- list(structure(c(event13 = 1797, event17 = 7006, event3 = -6796
), class = "Date"), structure(c(event3 = 5984, event14 = 7175, 
event11 = -4031, event20 = 8612, event2 = 5158, event5 = 1002, 
event8 = -382, event15 = 4367), class = "Date"), structure(c(event7 = 4394, 
event13 = -1389, event9 = -4407), class = "Date"), structure(c(event5 = 7729), class = "Date"), 
    structure(c(event4 = -3384, event10 = 1288, event5 = -5100, 
    event9 = -3177, event8 = -4027, event11 = -3554, event16 = 9484, 
    event3 = 1386), class = "Date"), structure(c(event10 = -6906, 
    event3 = 6966, event8 = -975, event14 = -3286, event11 = 7111, 
    event15 = 9576, event12 = 2223, event9 = 10771), class = "Date"), 
    structure(c(event16 = 8764), class = "Date"), structure(c(event15 = 9795, 
    event14 = 7681, event2 = -1728), class = "Date"), structure(c(event13 = -1341, 
    event11 = 1202), class = "Date"), structure(c(event3 = -666, 
    event1 = -4192, event9 = 8808, event4 = -1765), class = "Date"))

CodePudding user response:

With base R this is an option

Get all event names

nn <- unique(unlist(sapply(d5, function(x) names(x))))

Iterate through all event names and name the columns

res <- data.frame(t(sapply(d5, function(x) strftime(x[nn]))))
colnames(res) <- nn

res
      event13    event17     event3    event14    event11    event20     event2
1  1974-12-03 1989-03-08 1951-05-25       <NA>       <NA>       <NA>       <NA>
2        <NA>       <NA> 1986-05-21 1989-08-24 1958-12-19 1993-07-31 1984-02-15
3  1966-03-14       <NA>       <NA>       <NA>       <NA>       <NA>       <NA>
4        <NA>       <NA>       <NA>       <NA>       <NA>       <NA>       <NA>
5        <NA>       <NA> 1973-10-18       <NA> 1960-04-09       <NA>       <NA>
6        <NA>       <NA> 1989-01-27 1961-01-02 1989-06-21       <NA>       <NA>
7        <NA>       <NA>       <NA>       <NA>       <NA>       <NA>       <NA>
8        <NA>       <NA>       <NA> 1991-01-12       <NA>       <NA> 1965-04-09
9  1966-05-01       <NA>       <NA>       <NA> 1973-04-17       <NA>       <NA>
10       <NA>       <NA> 1968-03-06       <NA>       <NA>       <NA>       <NA>
       event5     event8    event15     event7     event9     event4    event10
1        <NA>       <NA>       <NA>       <NA>       <NA>       <NA>       <NA>
2  1972-09-29 1968-12-15 1981-12-16       <NA>       <NA>       <NA>       <NA>
3        <NA>       <NA>       <NA> 1982-01-12 1957-12-08       <NA>       <NA>
4  1991-03-01       <NA>       <NA>       <NA>       <NA>       <NA>       <NA>
5  1956-01-15 1958-12-23       <NA>       <NA> 1961-04-21 1960-09-26 1973-07-12
6        <NA> 1967-05-02 1996-03-21       <NA> 1999-06-29       <NA> 1951-02-04
7        <NA>       <NA>       <NA>       <NA>       <NA>       <NA>       <NA>
8        <NA>       <NA> 1996-10-26       <NA>       <NA>       <NA>       <NA>
9        <NA>       <NA>       <NA>       <NA>       <NA>       <NA>       <NA>
10       <NA>       <NA>       <NA>       <NA> 1994-02-12 1965-03-03       <NA>
      event16    event12     event1
1        <NA>       <NA>       <NA>
2        <NA>       <NA>       <NA>
3        <NA>       <NA>       <NA>
4        <NA>       <NA>       <NA>
5  1995-12-20       <NA>       <NA>
6        <NA> 1972-01-15       <NA>
7  1993-12-30       <NA>       <NA>
8        <NA>       <NA>       <NA>
9        <NA>       <NA>       <NA>
10       <NA>       <NA> 1958-07-11
  • Related