I have a list of named vectors containing dates in R and I'd like to convert it into a data frame where columns will be named after vector names. Please note that due to differing vector length there will be NAs in the final fata frame.
d5 <- list(structure(c(event13 = 1797, event17 = 7006, event3 = -6796
), class = "Date"), structure(c(event3 = 5984, event14 = 7175,
event11 = -4031, event20 = 8612, event2 = 5158, event5 = 1002,
event8 = -382, event15 = 4367, event11 = 2960), class = "Date"),
structure(c(event7 = 4394, event13 = -1389, event9 = -4407
), class = "Date"), structure(c(event5 = 7729), class = "Date"),
structure(c(event4 = -3384, event10 = 1288, event10 = 7502,
event5 = -5100, event9 = -3177, event8 = -4027, event11 = -3554,
event16 = 9484, event3 = 1386), class = "Date"), structure(c(event10 = -6906,
event3 = 6966, event8 = -975, event14 = -3286, event12 = 744,
event11 = 7111, event15 = 9576, event12 = 2223, event9 = 10771
), class = "Date"), structure(c(event16 = 8764), class = "Date"),
structure(c(event15 = 9795, event14 = 7681, event2 = -1728,
event14 = 10876), class = "Date"), structure(c(event13 = -1341,
event11 = 1202), class = "Date"), structure(c(event3 = -666,
event1 = -4192, event9 = 8808, event4 = -1765), class = "Date"))
> d5
[[1]]
event13 event17 event3
"1974-12-03" "1989-03-08" "1951-05-25"
[[2]]
event3 event14 event11 event20 event2 event5 event8 event15
"1986-05-21" "1989-08-24" "1958-12-19" "1993-07-31" "1984-02-15" "1972-09-29" "1968-12-15" "1981-12-16"
event11
"1978-02-08"
[[3]]
event7 event13 event9
"1982-01-12" "1966-03-14" "1957-12-08"
[[4]]
event5
"1991-03-01"
...
The output would look like this:
ID | event1 | event2 | event3 | ... |
---|---|---|---|---|
1 | date1 | NA | date3 | ... |
Using do.call
with rbind
returns a following error"
as.data.frame(do.call(rbind, d5))
event3 event14 event11 event20 event2 event5 event8 event15 event11
1 1797 7006 -6796 1797 7006 -6796 1797 7006 -6796
2 5984 7175 -4031 8612 5158 1002 -382 4367 2960
3 4394 -1389 -4407 4394 -1389 -4407 4394 -1389 -4407
4 7729 7729 7729 7729 7729 7729 7729 7729 7729
5 -3384 1288 7502 -5100 -3177 -4027 -3554 9484 1386
6 -6906 6966 -975 -3286 744 7111 9576 2223 10771
7 8764 8764 8764 8764 8764 8764 8764 8764 8764
8 9795 7681 -1728 10876 9795 7681 -1728 10876 9795
9 -1341 1202 -1341 1202 -1341 1202 -1341 1202 -1341
10 -666 -4192 8808 -1765 -666 -4192 8808 -1765 -666
Warning message:
In (function (..., deparse.level = 1) :
number of columns of result is not a multiple of vector length (arg 8)
How to convert it efficiently (without equalising length of each vector by adding NAs and resorting them to match the columns)?
CodePudding user response:
library(tidyr)
library(dplyr)
# Make each element of the list a dataframe with two columns: x and event_name
dataframed <- lapply(d5, function(x) {
out <- as.data.frame(x)
out$event_name <- names(x)
out
})
# Make a unique dataframe from this list of dataframes
unlisted <- do.call(rbind, dataframed)
row.names(unlisted) <- NULL
# pivot this dataframe to have event_name as column names
unlisted |>
group_by(event_name) |>
mutate(id = seq_len(n())) |>
ungroup() |>
pivot_wider(names_from = event_name, values_from = x) |>
select(-id)
#> # A tibble: 5 × 17
#> event13 event17 event3 event14 event11 event20 event2
#> <date> <date> <date> <date> <date> <date> <date>
#> 1 1974-12-03 1989-03-08 1951-05-25 1989-08-24 1958-12-19 1993-07-31 1984-02-15
#> 2 1966-03-14 NA 1986-05-21 1961-01-02 1960-04-09 NA 1965-04-09
#> 3 1966-05-01 NA 1973-10-18 1991-01-12 1989-06-21 NA NA
#> 4 NA NA 1989-01-27 1999-10-12 1973-04-17 NA NA
#> 5 NA NA 1968-03-06 NA NA NA NA
#> # … with 10 more variables: event5 <date>, event8 <date>, event15 <date>,
#> # event12 <date>, event7 <date>, event9 <date>, event4 <date>,
#> # event10 <date>, event16 <date>, event1 <date>
Created on 2022-10-12 with reprex v2.0.2
CodePudding user response:
Does this satisfy your needs? Note that due to your second element, there are two columns event11.
res <- data.table::rbindlist(lapply(d5, function(x) data.table::as.data.table(as.list(x))),
fill = TRUE,
use.names = TRUE)
res[1:5]
#> event13 event17 event3 event14 event11 event20 event2
#> 1: 1974-12-03 1989-03-08 1951-05-25 <NA> <NA> <NA> <NA>
#> 2: <NA> <NA> 1986-05-21 1989-08-24 1958-12-19 1993-07-31 1984-02-15
#> 3: 1966-03-14 <NA> <NA> <NA> <NA> <NA> <NA>
#> 4: <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 5: <NA> <NA> 1973-10-18 <NA> 1960-04-09 <NA> <NA>
#> event5 event8 event15 event11 event7 event9 event4
#> 1: <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 2: 1972-09-29 1968-12-15 1981-12-16 1978-02-08 <NA> <NA> <NA>
#> 3: <NA> <NA> <NA> <NA> 1982-01-12 1957-12-08 <NA>
#> 4: 1991-03-01 <NA> <NA> <NA> <NA> <NA> <NA>
#> 5: 1956-01-15 1958-12-23 <NA> <NA> <NA> 1961-04-21 1960-09-26
#> event10 event10 event16 event12 event12 event14 event1
#> 1: <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 2: <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 3: <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 4: <NA> <NA> <NA> <NA> <NA> <NA> <NA>
#> 5: 1973-07-12 1990-07-17 1995-12-20 <NA> <NA> <NA> <NA>
CodePudding user response:
Assuming no duplicated events in each element of the list:
>dplyr::bind_rows(d5)
# A tibble: 10 × 17
event13 event17 event3 event14 event11 event20 event2 event5 event8
<date> <date> <date> <date> <date> <date> <date> <date> <date>
1 1974-12-03 1989-03-08 1951-05-25 NA NA NA NA NA NA
2 NA NA 1986-05-21 1989-08-24 1958-12-19 1993-07-31 1984-02-15 1972-09-29 1968-12-15
3 1966-03-14 NA NA NA NA NA NA NA NA
4 NA NA NA NA NA NA NA 1991-03-01 NA
5 NA NA 1973-10-18 NA 1960-04-09 NA NA 1956-01-15 1958-12-23
6 NA NA 1989-01-27 1961-01-02 1989-06-21 NA NA NA 1967-05-02
7 NA NA NA NA NA NA NA NA NA
8 NA NA NA 1991-01-12 NA NA 1965-04-09 NA NA
9 1966-05-01 NA NA NA 1973-04-17 NA NA NA NA
10 NA NA 1968-03-06 NA NA NA NA NA NA
# … with 8 more variables: event15 <date>, event7 <date>, event9 <date>, event4 <date>, event10 <date>,
# event16 <date>, event12 <date>, event1 <date>
Here, d5
is manually edited to remove duplicated entries:
d5 <- list(structure(c(event13 = 1797, event17 = 7006, event3 = -6796
), class = "Date"), structure(c(event3 = 5984, event14 = 7175,
event11 = -4031, event20 = 8612, event2 = 5158, event5 = 1002,
event8 = -382, event15 = 4367), class = "Date"), structure(c(event7 = 4394,
event13 = -1389, event9 = -4407), class = "Date"), structure(c(event5 = 7729), class = "Date"),
structure(c(event4 = -3384, event10 = 1288, event5 = -5100,
event9 = -3177, event8 = -4027, event11 = -3554, event16 = 9484,
event3 = 1386), class = "Date"), structure(c(event10 = -6906,
event3 = 6966, event8 = -975, event14 = -3286, event11 = 7111,
event15 = 9576, event12 = 2223, event9 = 10771), class = "Date"),
structure(c(event16 = 8764), class = "Date"), structure(c(event15 = 9795,
event14 = 7681, event2 = -1728), class = "Date"), structure(c(event13 = -1341,
event11 = 1202), class = "Date"), structure(c(event3 = -666,
event1 = -4192, event9 = 8808, event4 = -1765), class = "Date"))
CodePudding user response:
With base R
this is an option
Get all event names
nn <- unique(unlist(sapply(d5, function(x) names(x))))
Iterate through all event names and name the columns
res <- data.frame(t(sapply(d5, function(x) strftime(x[nn]))))
colnames(res) <- nn
res
event13 event17 event3 event14 event11 event20 event2
1 1974-12-03 1989-03-08 1951-05-25 <NA> <NA> <NA> <NA>
2 <NA> <NA> 1986-05-21 1989-08-24 1958-12-19 1993-07-31 1984-02-15
3 1966-03-14 <NA> <NA> <NA> <NA> <NA> <NA>
4 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
5 <NA> <NA> 1973-10-18 <NA> 1960-04-09 <NA> <NA>
6 <NA> <NA> 1989-01-27 1961-01-02 1989-06-21 <NA> <NA>
7 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
8 <NA> <NA> <NA> 1991-01-12 <NA> <NA> 1965-04-09
9 1966-05-01 <NA> <NA> <NA> 1973-04-17 <NA> <NA>
10 <NA> <NA> 1968-03-06 <NA> <NA> <NA> <NA>
event5 event8 event15 event7 event9 event4 event10
1 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
2 1972-09-29 1968-12-15 1981-12-16 <NA> <NA> <NA> <NA>
3 <NA> <NA> <NA> 1982-01-12 1957-12-08 <NA> <NA>
4 1991-03-01 <NA> <NA> <NA> <NA> <NA> <NA>
5 1956-01-15 1958-12-23 <NA> <NA> 1961-04-21 1960-09-26 1973-07-12
6 <NA> 1967-05-02 1996-03-21 <NA> 1999-06-29 <NA> 1951-02-04
7 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
8 <NA> <NA> 1996-10-26 <NA> <NA> <NA> <NA>
9 <NA> <NA> <NA> <NA> <NA> <NA> <NA>
10 <NA> <NA> <NA> <NA> 1994-02-12 1965-03-03 <NA>
event16 event12 event1
1 <NA> <NA> <NA>
2 <NA> <NA> <NA>
3 <NA> <NA> <NA>
4 <NA> <NA> <NA>
5 1995-12-20 <NA> <NA>
6 <NA> 1972-01-15 <NA>
7 1993-12-30 <NA> <NA>
8 <NA> <NA> <NA>
9 <NA> <NA> <NA>
10 <NA> <NA> 1958-07-11