Home > Net >  Create list of dataframes based on columns of dataframes in another list R
Create list of dataframes based on columns of dataframes in another list R

Time:06-14

I have a list of dataframes that look like this:

df_type1 <- data.frame(unit_name=c("A", "B", "C", "D"),
                   year_2014=c(653, 49, 563, 239),
                   year_2015=c(29, 63, 4879, 34),
                   year_2016=c(349, 340, 1, 47))

df_type2 <- data.frame(unit_name=c("A", "B", "C", "D"),
                   year_2014=c(573, 59, 563, 2999),
                   year_2015=c(39, 6390, 479, 98),
                   year_2016=c(5649, 640, 890, 12))

list_types <- list(df_type1, df_type2)

Here, values of units A to D correspond to a year.

From this list, I want to obtain a new list where each dataframe corresponds to the values of each separate year for the same units A to D. These new dataframes should look like this:

df_2014 <- data.frame(unit_name=c("A", "B", "C", "D"),
                      type1=c(653, 49, 563, 239),  # these are the values of the column year_2014 in df_type1
                      type2=c(573, 59, 563, 2999))

df_2015 <- data.frame(unit_name=c("A", "B", "C", "D"),
                      type1=c(29, 63, 4879, 34),
                      type2=c(39, 6390, 479, 98))

df_2016 <- data.frame(unit_name=c("A", "B", "C", "D"),
                      type1=c(349, 340, 1, 47),
                      type2=c(5649, 640, 890, 12))

Thanks!

CodePudding user response:

This should give:

list_types |> 
      purrr::map_dfr(~.x, .id = "TYPE") |> 
      tidyr::pivot_longer(!c(TYPE, unit_name )) |> 
      tidyr::pivot_wider(names_from  = TYPE, names_prefix = "type") |>  
      split(~name)

$year_2014
# A tibble: 4 × 4
  unit_name name      type1 type2
  <chr>     <chr>     <dbl> <dbl>
1 A         year_2014   653   573
2 B         year_2014    49    59
3 C         year_2014   563   563
4 D         year_2014   239  2999

$year_2015
# A tibble: 4 × 4
  unit_name name      type1 type2
  <chr>     <chr>     <dbl> <dbl>
1 A         year_2015    29    39
2 B         year_2015    63  6390
3 C         year_2015  4879   479
4 D         year_2015    34    98

$year_2016
# A tibble: 4 × 4
  unit_name name      type1 type2
  <chr>     <chr>     <dbl> <dbl>
1 A         year_2016   349  5649
2 B         year_2016   340   640
3 C         year_2016     1   890
4 D         year_2016    47    12

CodePudding user response:

Something like?:

lapply(2:4, function(i) cbind(list_types[[1]][1], sapply(list_types, `[`, i)))
#[[1]]
#  unit_name year_2014 year_2014
#1         A       653       573
#2         B        49        59
#3         C       563       563
#4         D       239      2999
#
#[[2]]
#  unit_name year_2015 year_2015
#1         A        29        39
#2         B        63      6390
#3         C      4879       479
#4         D        34        98
#
#[[3]]
#  unit_name year_2016 year_2016
#1         A       349      5649
#2         B       340       640
#3         C         1       890
#4         D        47        12

Or a little bit nicer and creating the data.frames.

. <- do.call(cbind, list_types)
i <- unique(grep("year", names(.), value = TRUE))
i <- setNames(i, sub("year", "df", i))
s <- paste0("type", seq_len(length(list_types)))
list2env(lapply(i, function(i) {             #Or without list2env to get a list
  cbind(.[1], setNames(.[i == names(.)], s))
}), globalenv())

df_2014
#  unit_name type1 type2
#1         A   653   573
#2         B    49    59
#3         C   563   563
#4         D   239  2999

df_2015
#  unit_name type1 type2
#1         A    29    39
#2         B    63  6390
#3         C  4879   479
#4         D    34    98

df_2016
#  unit_name type1 type2
#1         A   349  5649
#2         B   340   640
#3         C     1   890
#4         D    47    12

CodePudding user response:

Here is a data.table option using rbindlist melt dcast split

library(data.table)

split(
    dcast(
        melt(
            setDT(
                rbindlist(
                    Map(cbind,
                        type = paste0("type", seq_along(list_types)),
                        list_types
                    )
                )
            ),
            c("type", "unit_name"),
            variable.name = "year"
        ), unit_name   year ~ type
    ),
    by = "year"
)

which gives

$year_2014
   unit_name      year type1 type2
1:         A year_2014   653   573
2:         B year_2014    49    59
3:         C year_2014   563   563
4:         D year_2014   239  2999

$year_2015
   unit_name      year type1 type2
1:         A year_2015    29    39
2:         B year_2015    63  6390
3:         C year_2015  4879   479
4:         D year_2015    34    98

$year_2016
   unit_name      year type1 type2
1:         A year_2016   349  5649
2:         B year_2016   340   640
3:         C year_2016     1   890
4:         D year_2016    47    12

CodePudding user response:

   > # here we create a list with unique name of each df to use later with bind_rows
> 
> list_types <- list(df_type1 = df_type1, df_type2 = df_type2)
> 
> # Then we bind the rows for the dataframes since they have the same columns
> # Also using .id = "type" to give a variable name to each df
> 
> master_df <-  bind_rows(list_types, .id = "type")
> 
> head(master_df)
      type unit_name year_2014 year_2015 year_2016
1 df_type1         A       653        29       349
2 df_type1         B        49        63       340
3 df_type1         C       563      4879         1
4 df_type1         D       239        34        47
5 df_type2         A       573        39      5649
6 df_type2         B        59      6390       640
> 
> # we expand the df to make columns again for each df mapped to each year
> 
> expanded_df <- master_df %>%
    pivot_wider(names_from = type, values_from = starts_with("year"))
> 
> head(expanded_df)
# A tibble: 4 x 7
  unit_name year_2014_df_type1 year_2014_df_type2 year_2015_df_type1 year_2015_df_type2
  <chr>                  <dbl>              <dbl>              <dbl>              <dbl>
1 A                        653                573                 29                 39
2 B                         49                 59                 63               6390
3 C                        563                563               4879                479
4 D                        239               2999                 34                 98
# ... with 2 more variables: year_2016_df_type1 <dbl>, year_2016_df_type2 <dbl>
> 
> # Then we get separate df for each year !
> 
> df_2014 <- expanded_df %>% select(unit_name, starts_with("year_2014"))
> df_2015 <- expanded_df %>% select(unit_name, starts_with("year_2015"))
> df_2016 <- expanded_df %>% select(unit_name, starts_with("year_2016"))
> 
> # we can easily clean their names:
>   
> names(df_2014) <- str_remove (names(df_2014), "year_2014")
> names(df_2015) <- str_remove (names(df_2015), "year_2015")
> names(df_2016) <- str_remove (names(df_2016), "year_2016")
> 
> df_2014
# A tibble: 4 x 3
  unit_name `_df_type1` `_df_type2`
  <chr>           <dbl>       <dbl>
1 A                 653         573
2 B                  49          59
3 C                 563         563
4 D                 239        2999
> 
> df_2015
# A tibble: 4 x 3
  unit_name `_df_type1` `_df_type2`
  <chr>           <dbl>       <dbl>
1 A                  29          39
2 B                  63        6390
3 C                4879         479
4 D                  34          98
> 
> df_2016
# A tibble: 4 x 3
  unit_name `_df_type1` `_df_type2`
  <chr>           <dbl>       <dbl>
1 A                 349        5649
2 B                 340         640
3 C                   1         890
4 D                  47          12
  • Related