R List with 3 levels and unique keys as names to data.frame, fast way-CodePudding

there! I`ve got nested list like this:

list <- list(
  y = list("2020-05-13" = list(url1 = c(0,0,0), url2 = c(0,0,0)))
  , g = list("2020-05-13" = list(url1 = c(0,0,0), url2 = c(0,0,0)))
  , a = list("2020-05-13" = list(url1 = c(0,0,0), url2 = c(0,0,0)))
)

with many dates, and many unique URL

I wish to get:

# A tibble: 3 x 6
  d          et         e     v     f source
  <chr>      <chr>  <dbl> <dbl> <dbl> <chr> 
1 2020-05-13 url1     0     0     0   y
2 2020-05-13 url1     0     0     0   g
3 2020-05-13 url1     0     0     0   a
4 2020-05-13 url2     0     0     0   y
5 2020-05-13 url2     0     0     0   g
6 2020-05-13 url2     0     0     0   a

I`ve seen this topic, but I have no params names, only unique values as keys. I know how to get it with cycles, but want to know fast way. Thank you!

CodePudding user response：

library(dplyr)
library(tidyr)
library(lubridate)

# little helper function to clean up the nested part
make_df = function(x) {
  x %>%
  as.data.frame %>%
  t %>%
  as.data.frame %>%
  setNames(c("e", "v", "f"))
}

# make things into a messy data frame
lapply(input, make_df) %>%
  bind_rows(.id = "source") %>%
  rownames_to_column() %>%
  # clean up the columns
  mutate(
    d = ymd(substr(rowname, 2, 12)),
    et = substr(rowname, 13, 1e5),
    et = sub("\\..*", "", et)
  ) %>%
  select(d, et, e, v, f, source)
#            d   et e v f source
# 1 2020-05-13 url1 0 0 0      y
# 2 2020-05-13 url2 0 0 0      y
# 3 2020-05-13 url1 0 0 0      g
# 4 2020-05-13 url2 0 0 0      g
# 5 2020-05-13 url1 0 0 0      a
# 6 2020-05-13 url2 0 0 0      a

CodePudding user response：

We may need to convert the inner nested vector to a named vector or list

library(rrapply)
library(purrr)
library(dplyr)
rrapply(list, f = function(x) setNames(as.list(x),
      c("e", "v", "f"))) %>% 
    modify_depth(2, ~ bind_rows(.x, .id = 'et')) %>% 
    map_dfr(~ bind_rows(.x, .id = 'd'), .id = 'source')

-output

# A tibble: 6 x 6
  source d          et        e     v     f
  <chr>  <chr>      <chr> <dbl> <dbl> <dbl>
1 y      2020-05-13 url1      0     0     0
2 y      2020-05-13 url2      0     0     0
3 g      2020-05-13 url1      0     0     0
4 g      2020-05-13 url2      0     0     0
5 a      2020-05-13 url1      0     0     0
6 a      2020-05-13 url2      0     0     0

or slightly more compact way

library(tidyr)
rrapply(list, f = function(x) as_tibble(setNames(as.list(x),
    c("e", "v", "f"))), how = "melt") %>% 
    unnest_wider(value) %>%
    rename_with(~ c("source", "d", "et"), 1:3)

-output

# A tibble: 6 x 6
  source d          et        e     v     f
  <chr>  <chr>      <chr> <dbl> <dbl> <dbl>
1 y      2020-05-13 url1      0     0     0
2 y      2020-05-13 url2      0     0     0
3 g      2020-05-13 url1      0     0     0
4 g      2020-05-13 url2      0     0     0
5 a      2020-05-13 url1      0     0     0
6 a      2020-05-13 url2      0     0     0

CodePudding user response：

library(tidyverse)

    list %>%
      map_df(
        .x = .,
        .f = function(x) bind_rows(x) %>% mutate(d = names(x)),
        .id = "source"
        ) %>% 
      group_by(source) %>% 
      mutate(aux = c("e","v","f")) %>% 
      pivot_longer(cols = starts_with("url"),names_to = "et") %>% 
      pivot_wider(names_from = aux,values_from = value) %>% 
      select(d,et,e,v,f,source)

# A tibble: 6 x 6
# Groups:   source [3]
  d          et        e     v     f source
  <chr>      <chr> <dbl> <dbl> <dbl> <chr> 
1 2020-05-13 url1      0     0     0 y     
2 2020-05-13 url2      0     0     0 y     
3 2020-05-13 url1      0     0     0 g     
4 2020-05-13 url2      0     0     0 g     
5 2020-05-13 url1      0     0     0 a     
6 2020-05-13 url2      0     0     0 a