I have a function called tidy_normal()
from TidyDensity package. This takes in exactly one parameter per arguement, example: .mean = 1
What I want to do is build a function that will take in a quoted string from a user and the parameter list like so:
tidy_multi_dist(
.tidy_dist = "tidy_normal",
.param_list = list(
.n = 50,
.mean = c(-1, 0, 1),
.sd = 1,
.num_sims = 1)
)
What I have so far is:
tidy_multi_dist <- function(
.tidy_dist = NULL,
.param_list = list()
) {
# Check param ----
if (is.null(.tidy_dist)) {
rlang::abort(
"Please enter a 'tidy_' distribution function like 'tidy_normal'
in quotes."
)
}
if (length(.param_list) == 0) {
rlang::abort(
"Please enter some parameters for your chosen 'tidy_' distribution."
)
}
# Call used ---
td <- as.character(.tidy_dist)
# Params ----
params <- .param_list
# Params for the call ----
n <- as.integer(params$.n)
num_sims <- as.integer(params$.num_sims)
x <- seq(1, num_sims, 1)
# Final parameter list
final_params_list <- params[which(!names(params) %in% c(".n", ".num_sims"))]
# Set the grid to make the calls ----
param_grid <- expand.grid(final_params_list)
df <- tidyr::expand_grid(
n = n,
param_grid,
sim = as.integer(x)
) #%>%
#group_by_all()
func_parm_list <- as.list(df)
# Run call on the grouped df ----
dff <- df %>%
dplyr::rowwise() %>%
dplyr::mutate(results = list(do.call(td, func_parm_list))) # fails here
#df %>% rowwise() %>% mutate(results = list(do.call(td, list(.n = n, .num_sims = num_sims,.mean = .mean, .sd = .sd)))) %>% unnest(results)
# Get the attributes to be used later on ----
atb <- dff$results[[1]] %>% attributes()
# Make Dist Type for column ----
dist_type <- stringr::str_remove(atb$tibble_type, "tidy_") %>%
stringr::str_replace_all(pattern = "_", " ") %>%
stringr::str_to_title()
# Get column names from the param_grid in order to make teh dist_type column ----
cols <- names(param_grid)
dff$dist_name <- paste0(
paste0(dist_type, " c("),
apply(dff[, cols], 1, paste0, collapse = ", "),
")"
)
df_unnested_tbl <- dff %>%
tidyr::unnest(results) %>%
dplyr::ungroup() %>%
dplyr::select(sim_number, dist_name, x:q) %>%
dplyr::mutate(dist_name = as.factor(dist_name)) %>%
dplyr::arrange(sim_number, dist_name)
# Attach attributes ----
attr(df_unnested_tbl, "all") <- atb
attr(df_unnested_tbl, "tbl") <- "tidy_multi_tibble"
# Return ----
return(df_unnested_tbl)
}
The error message I am getting is:
> df %>%
#dplyr::rowwise() %>%
dplyr::mutate(results = list(do.call(td, func_parm_list)))
Error in `dplyr::mutate()`:
! Problem while computing `results = list(do.call(td, func_parm_list))`.
i The error occurred in group 1: n = 500, .mean = -1, .sd = 1, sim = 1.
Caused by error in `tidy_normal()`:
! unused arguments (n = c(500, 500, 500), sim = c(1, 1, 1))
Run `rlang::last_error()` to see where the error occurred.
Since I do not know what distribution the user is going to enter I wanted this function to be dynamic and use do.call
instead of making an explicit rlang::call2
for each possibility.
I am not sure how to proceed from here as everything is failing, I assume because my do.call is wrong.
CodePudding user response:
The function parameter names should match with the column names of 'df' i.e. if we look at the output of 'df' inside the function
> df
# A tibble: 3 × 4
n .mean .sd sim
<int> <dbl> <dbl> <int>
1 50 -1 1 1
2 50 0 1 1
3 50 1 1 1
and the arguments of the tidynormal
are
> formalArgs(tidy_normal)
[1] ".n" ".mean" ".sd" ".num_sims"
In the below code, the column names is changed to match the formalArgs
as well as make use pmap
to apply the function rowwise
(which would be faster than rowwise
)
...
names(df) <- formalArgs(td)
...
dff <- df %>% mutate(result = purrr::pmap(cur_data(), match.fun(td)))
...
We may have to change the function to
tidy_multi_dist <- function(
.tidy_dist = NULL,
.param_list = list()
) {
# Check param ----
if (is.null(.tidy_dist)) {
rlang::abort(
"Please enter a 'tidy_' distribution function like 'tidy_normal'
in quotes."
)
}
if (length(.param_list) == 0) {
rlang::abort(
"Please enter some parameters for your chosen 'tidy_' distribution."
)
}
# Call used ---
td <- as.character(.tidy_dist)
# Params ----
params <- .param_list
# Params for the call ----
n <- as.integer(params$.n)
num_sims <- as.integer(params$.num_sims)
x <- seq(1, num_sims, 1)
# Final parameter list
final_params_list <- params[which(!names(params) %in% c(".n", ".num_sims"))]
# Set the grid to make the calls ----
param_grid <- expand.grid(final_params_list)
df <- tidyr::expand_grid(
n = n,
param_grid,
sim = as.integer(x)
) #%>%
#group_by_all()
#func_parm_list <- as.list(df)
names(df) <- formalArgs(td)
# Run call on the grouped df ----
#dff <- df %>%
# dplyr::rowwise() %>%
# dplyr::mutate(results = list(do.call(td, func_parm_list))) # fails here
dff <- df %>% mutate(results = purrr::pmap(cur_data(), match.fun(td)))
#df %>% rowwise() %>% mutate(results = list(do.call(td, list(.n = n, .num_sims = num_sims,.mean = .mean, .sd = .sd)))) %>% unnest(results)
# Get the attributes to be used later on ----
atb <- dff$results[[1]] %>% attributes()
# Make Dist Type for column ----
dist_type <- stringr::str_remove(atb$tibble_type, "tidy_") %>%
stringr::str_replace_all(pattern = "_", " ") %>%
stringr::str_to_title()
# Get column names from the param_grid in order to make teh dist_type column ----
cols <- names(param_grid)
dff$dist_name <- paste0(
paste0(dist_type, " c("),
apply(dff[, cols], 1, paste0, collapse = ", "),
")"
)
df_unnested_tbl <- dff %>%
tidyr::unnest(results) %>%
dplyr::ungroup() %>%
dplyr::select(sim_number, dist_name, x:q) %>%
dplyr::mutate(dist_name = as.factor(dist_name)) %>%
dplyr::arrange(sim_number, dist_name)
# Attach attributes ----
attr(df_unnested_tbl, "all") <- atb
attr(df_unnested_tbl, "tbl") <- "tidy_multi_tibble"
# Return ----
return(df_unnested_tbl)
}
-testing
> out <- tidy_multi_dist(
.tidy_dist = "tidy_normal",
.param_list = list(
.n = 50,
.mean = c(-1, 0, 1),
.sd = 1,
.num_sims = 1)
)
> out
# A tibble: 150 × 8
sim_number dist_name x y dx dy p q
<fct> <fct> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 Gaussian c(-1, 1) 1 -0.879 -4.90 0.000211 0 -Inf
2 1 Gaussian c(-1, 1) 2 -1.70 -4.74 0.000585 0 -3.05
3 1 Gaussian c(-1, 1) 3 -1.72 -4.59 0.00142 0 -2.74
4 1 Gaussian c(-1, 1) 4 -0.577 -4.43 0.00306 0 -2.54
5 1 Gaussian c(-1, 1) 5 -1.87 -4.28 0.00583 0 -2.39
6 1 Gaussian c(-1, 1) 6 -0.779 -4.13 0.00990 0 -2.27
7 1 Gaussian c(-1, 1) 7 0.342 -3.97 0.0151 5.73e-300 -2.16
8 1 Gaussian c(-1, 1) 8 -2.28 -3.82 0.0212 1.12e-268 -2.07
9 1 Gaussian c(-1, 1) 9 -0.875 -3.66 0.0278 4.06e-239 -1.98
10 1 Gaussian c(-1, 1) 10 -1.77 -3.51 0.0350 2.70e-211 -1.90
# … with 140 more rows
> str(out)
tibble [150 × 8] (S3: tbl_df/tbl/data.frame)
$ sim_number: Factor w/ 1 level "1": 1 1 1 1 1 1 1 1 1 1 ...
$ dist_name : Factor w/ 3 levels "Gaussian c(-1, 1)",..: 1 1 1 1 1 1 1 1 1 1 ...
$ x : int [1:150] 1 2 3 4 5 6 7 8 9 10 ...
$ y : num [1:150] -1.901 -3.809 -1.186 -2.821 -0.666 ...
$ dx : num [1:150] -5.08 -4.93 -4.78 -4.63 -4.48 ...
$ dy : num [1:150] 0.000212 0.000573 0.001367 0.002882 0.005368 ...
$ p : num [1:150] 0 0 0 0 0 ...
$ q : num [1:150] -Inf -3.05 -2.74 -2.54 -2.39 ...
- attr(*, "all")=List of 10
..$ class : chr [1:3] "tbl_df" "tbl" "data.frame"
..$ row.names : int [1:50] 1 2 3 4 5 6 7 8 9 10 ...
..$ names : chr [1:7] "sim_number" "x" "y" "dx" ...
..$ .mean : num -1
..$ .sd : num 1
..$ .n : int 50
..$ .num_sims : int 1
..$ tibble_type: chr "tidy_gaussian"
..$ ps : num [1:50] -50 -48 -46 -44 -42 -40 -38 -36 -34 -32 ...
..$ qs : num [1:50] 0 0.0204 0.0408 0.0612 0.0816 ...
- attr(*, "tbl")= chr "tidy_multi_tibble"