Use list names inside purrr:::map_dfr function-CodePudding

I was trying something relatively simple, but having some struggles. Let's say I have two dataframes df1 and df2:

df1:

id  expenditure
1    10
2    20
1    30
2    50

df2:

id  expenditure
1    30
2    50
1    60
2    10

I also added them to a named list:

table_list = list()
table_list[['a']] = df1
table_list[['b']] = df2

And now I want to perform some summary operation through a function and then bind those rows:

get_summary = function(table){
   final_table = table %>% group_by(id) %>% summarise(total_expenditure= sum(expenditure))

}

And then apply this through map_dfr:

summary = table_list %>% map_dfr(get_summary, id='origin_table')

So, this will create a almost what I'm looking for:

 origin_table   id   total_expenditure
      a          1       40
      a          2       70
      b          1       90
      b          2       60

But, what if I would like to do something specific depending on the element of the list that is being passed, something like this:

get_summary = function(table, name){
   dummy_list = c(TRUE, FALSE)
   names(dummy_list) = c('a', 'b')

   final_table = table %>% group_by(id) %>% summarise(total_expenditure= sum(expenditure))

   is_true = dummy_list[[name]] # Want to use the original name to call another list

   if(is_true) final_table = final_table %>% mutate(total_expenditure = total_expenditure   1) 

   return(final_table)

}

This would bring something like this:

 origin_table   id   total_expenditure
      a          1       41
      a          2       71
      b          1       90
      b          2       60

So is there any way to use list names inside my function? Or any way to identify which element of my list I'm working with? Maybe map_dfr is too restricted and I have to use something else?

Edit: changed example so it is more grounded in reality

CodePudding user response：

Instead of using map, use imap, which can return the names of the list in .y

library(purrr)
library(dplyr)
get_summary = function(dat, name){
   dat %>%
       group_by(id) %>%
        summarise(total_expenditure= sum(expenditure, na.rm = TRUE), 
              .groups = "drop") %>%
        mutate(total_expenditure = if(name=='a')
                total_expenditure   1 else total_expenditure)

}

-testing

> table_list %>% 
    imap_dfr(~ get_summary(.x, name = .y), .id = 'origin_table')
# A tibble: 4 × 3
  origin_table    id total_expenditure
  <chr>        <int>             <dbl>
1 a                1                41
2 a                2                71
3 b                1                90
4 b                2                60

data

table_list <- list(a = structure(list(id = c(1L, 2L, 1L, 2L), 
expenditure = c(10L, 
20L, 30L, 50L)), class = "data.frame", row.names = c(NA, -4L)), 
    b = structure(list(id = c(1L, 2L, 1L, 2L), expenditure = c(30L, 
    50L, 60L, 10L)), class = "data.frame", row.names = c(NA, 
    -4L)))

CodePudding user response：

Managed to do it, by adding origin_table as a pre-existing column on the dataframes:

df1 = df1 %>% mutate(origin_table = 'a')
df2 = df2 %>% mutate(origin_table = 'b')

Then I can extract the origin by doing the following:

get_summary = function(table){
   dummy_list = c(TRUE, FALSE)
   names(dummy_list) = c('a', 'b')

   origin = table %>% distinct(origin_table) %>% pull

   final_table = table %>% group_by(id) %>% summarise(total_expenditure= sum(expenditure))

   is_true = dummy_list[[origin ]] # Want to use the original name to call another list

   if(is_true) final_table = final_table %>% mutate(total_expenditure = total_expenditure   1) 

   return(final_table)

}