I'm having trouble using the apply function and repeatedly get an error about different sources.
I believe that both sources are data.frames, so I can't work out why it doesn't like the apply inputs.
I would like to get to the output_desired without the need to run them individually. I appreciate I could likely do this with a loop, but am trying to learn some need tricks.
TLDR; I am trying to group the input_s into an object, and then run that dataframe through the function(result) and rbind the outputs.
library(dplyr)
library(tidyr)
## Inputs ##
input_1 = structure(list(V1 = c("Team_2022", "Team_2022", "Team_2022"), V2 = c("Frank", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(55, 76, 14)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
input_2 = structure(list(V1 = c("Team_2023", "Team_2023", "Team_2023"), V2 = c("Bill", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(113, 23, 10)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
input_3 = structure(list(V1 = c("Team_2024", "Team_2024", "Team_2024"), V2 = c("Frank", "Mary", "Bill"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(7, 19, 52)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
input_4 = structure(list(V1 = c("Team_2025", "Team_2025", "Team_2025"), V2 = c("Frank", "Mary", "John"), V3 = c("Sydney", "Sydney", "Sydney"), V4 = c(46, 44, 88)), row.names = c(NA, -3L), class = c("data.table", "data.frame"))
## Teams ##
teams = structure(list(V1 = c("team1", "team2", "team3"), V2 = c("Mary Frank","Mary John", "Mary Bill")), class = "data.frame", row.names = c(NA, -3L))
## Group the inputs into one ##
all_objects = ls()
input_objects = grep("^input", all_objects, value = T)
input_test = as.data.frame(input_obj)
## Function ##
result = function(input, teams) {
data = teams %>%
separate_rows(V2) %>%
left_join(input, by = c("V2" = "V2")) %>%
replace_na(list(V4 = 0)) %>%
group_by(V1.x) %>% fill(V1.y, V3) %>%
summarize(V1.y = first(V1.y),
V2 = paste(V2, collapse = " "),
V3 = first(V3),
V4 = sum(V4))
return(data)
}
## Outputs individually ##
output_1 = result(input_1, teams)
output_2 = result(input_2, teams)
output_3 = result(input_3, teams)
output_4 = result(input_4, teams)
## Join outputs ##
output_desired = rbind(output_1, output_2, output_3, output_4)
## Failed apply ##
output_apply = apply(input_test, 1, function(x) {
result(x, teams)
}) %>% do.call("rbind", .)
CodePudding user response:
Using lapply
and dplyr::bind_rows
you could do:
Note: I also fixed the creation of your input_test
list for which I use lapply
and get
.
library(dplyr)
all_objects <- ls()
input_objects <- grep("^input_\\d", all_objects, value = T)
input_test <- lapply(input_objects, get)
input_test %>%
lapply(result, teams) %>%
bind_rows()
#> # A tibble: 12 × 5
#> V1.x V1.y V2 V3 V4
#> <chr> <chr> <chr> <chr> <dbl>
#> 1 team1 Team_2022 Mary Frank Sydney 131
#> 2 team2 Team_2022 Mary John Sydney 90
#> 3 team3 Team_2022 Mary Bill Sydney 76
#> 4 team1 Team_2023 Mary Frank Sydney 23
#> 5 team2 Team_2023 Mary John Sydney 33
#> 6 team3 Team_2023 Mary Bill Sydney 136
#> 7 team1 Team_2024 Mary Frank Sydney 26
#> 8 team2 Team_2024 Mary John Sydney 19
#> 9 team3 Team_2024 Mary Bill Sydney 71
#> 10 team1 Team_2025 Mary Frank Sydney 90
#> 11 team2 Team_2025 Mary John Sydney 132
#> 12 team3 Team_2025 Mary Bill Sydney 44
Or using purrr::map_df
:
purrr::map_df(input_test, result, teams)