Home > Blockchain >  Use do.call to run a function with a given list of parameters by the end user
Use do.call to run a function with a given list of parameters by the end user

Time:02-16

I have a function called tidy_normal() from TidyDensity package. This takes in exactly one parameter per arguement, example: .mean = 1

What I want to do is build a function that will take in a quoted string from a user and the parameter list like so:

tidy_multi_dist(
  .tidy_dist = "tidy_normal", 
  .param_list = list(
    .n = 50, 
    .mean = c(-1, 0, 1), 
    .sd = 1, 
    .num_sims = 1)
  )

What I have so far is:

tidy_multi_dist <- function(
  .tidy_dist = NULL,
  .param_list = list()
) {

  # Check param ----
  if (is.null(.tidy_dist)) {
    rlang::abort(
      "Please enter a 'tidy_' distribution function like 'tidy_normal' 
      in quotes."
    )
  }

  if (length(.param_list) == 0) {
    rlang::abort(
      "Please enter some parameters for your chosen 'tidy_' distribution."
    )
  }

  # Call used ---
  td <- as.character(.tidy_dist)

  # Params ----
  params <- .param_list

  # Params for the call ----
  n <- as.integer(params$.n)
  num_sims <- as.integer(params$.num_sims)
  x <- seq(1, num_sims, 1)

  # Final parameter list
  final_params_list <- params[which(!names(params) %in% c(".n", ".num_sims"))]

  # Set the grid to make the calls ----
  param_grid <- expand.grid(final_params_list)

  df <- tidyr::expand_grid(
    n = n,
    param_grid,
    sim = as.integer(x)
  ) #%>%
    #group_by_all()
  
  func_parm_list <- as.list(df)

  # Run call on the grouped df ----
  dff <- df %>%
    dplyr::rowwise() %>%
    dplyr::mutate(results = list(do.call(td, func_parm_list))) # fails here

  #df %>% rowwise() %>% mutate(results = list(do.call(td, list(.n = n, .num_sims = num_sims,.mean = .mean, .sd = .sd)))) %>% unnest(results)

  # Get the attributes to be used later on ----
  atb <- dff$results[[1]] %>% attributes()

  # Make Dist Type for column ----
  dist_type <- stringr::str_remove(atb$tibble_type, "tidy_") %>%
    stringr::str_replace_all(pattern = "_", " ") %>%
    stringr::str_to_title()

  # Get column names from the param_grid in order to make teh dist_type column ----
  cols <- names(param_grid)

  dff$dist_name <- paste0(
    paste0(dist_type, " c("),
    apply(dff[, cols], 1, paste0, collapse = ", "),
    ")"
  )

  df_unnested_tbl <- dff %>%
    tidyr::unnest(results) %>%
    dplyr::ungroup() %>%
    dplyr::select(sim_number, dist_name, x:q) %>%
    dplyr::mutate(dist_name = as.factor(dist_name)) %>%
    dplyr::arrange(sim_number, dist_name)

  # Attach attributes ----
  attr(df_unnested_tbl, "all") <- atb
  attr(df_unnested_tbl, "tbl") <- "tidy_multi_tibble"

  # Return ----
  return(df_unnested_tbl)

}

The error message I am getting is:

> df %>%
      #dplyr::rowwise() %>%
      dplyr::mutate(results = list(do.call(td, func_parm_list)))
Error in `dplyr::mutate()`:
! Problem while computing `results = list(do.call(td, func_parm_list))`.
i The error occurred in group 1: n = 500, .mean = -1, .sd = 1, sim = 1.
Caused by error in `tidy_normal()`:
! unused arguments (n = c(500, 500, 500), sim = c(1, 1, 1))
Run `rlang::last_error()` to see where the error occurred.

Since I do not know what distribution the user is going to enter I wanted this function to be dynamic and use do.call instead of making an explicit rlang::call2 for each possibility.

I am not sure how to proceed from here as everything is failing, I assume because my do.call is wrong.

CodePudding user response:

The function parameter names should match with the column names of 'df' i.e. if we look at the output of 'df' inside the function

> df
# A tibble: 3 × 4
      n .mean   .sd   sim
  <int> <dbl> <dbl> <int>
1    50    -1     1     1
2    50     0     1     1
3    50     1     1     1

and the arguments of the tidynormal are

> formalArgs(tidy_normal)
[1] ".n"        ".mean"     ".sd"       ".num_sims"

In the below code, the column names is changed to match the formalArgs as well as make use pmap to apply the function rowwise (which would be faster than rowwise)

...
names(df) <- formalArgs(td)
...
dff <- df %>% mutate(result = purrr::pmap(cur_data(), match.fun(td)))
...

We may have to change the function to

tidy_multi_dist <- function(
  .tidy_dist = NULL,
  .param_list = list()
) {

  # Check param ----
  if (is.null(.tidy_dist)) {
    rlang::abort(
      "Please enter a 'tidy_' distribution function like 'tidy_normal' 
      in quotes."
    )
  }

  if (length(.param_list) == 0) {
    rlang::abort(
      "Please enter some parameters for your chosen 'tidy_' distribution."
    )
  }

  # Call used ---
  td <- as.character(.tidy_dist)

  # Params ----
  params <- .param_list

  # Params for the call ----
  n <- as.integer(params$.n)
  num_sims <- as.integer(params$.num_sims)
  x <- seq(1, num_sims, 1)

  # Final parameter list
  final_params_list <- params[which(!names(params) %in% c(".n", ".num_sims"))]

  # Set the grid to make the calls ----
  param_grid <- expand.grid(final_params_list)

  df <- tidyr::expand_grid(
    n = n,
    param_grid,
    sim = as.integer(x)
  ) #%>%
    #group_by_all()
  
  #func_parm_list <- as.list(df)

 names(df) <- formalArgs(td)
  # Run call on the grouped df ----
  #dff <- df %>%
  #  dplyr::rowwise() %>%
  #  dplyr::mutate(results = list(do.call(td, func_parm_list))) # fails here

 dff <- df %>% mutate(results = purrr::pmap(cur_data(), match.fun(td)))
 
  #df %>% rowwise() %>% mutate(results = list(do.call(td, list(.n = n, .num_sims = num_sims,.mean = .mean, .sd = .sd)))) %>% unnest(results)

  # Get the attributes to be used later on ----
  atb <- dff$results[[1]] %>% attributes()

  # Make Dist Type for column ----
  dist_type <- stringr::str_remove(atb$tibble_type, "tidy_") %>%
    stringr::str_replace_all(pattern = "_", " ") %>%
    stringr::str_to_title()

  # Get column names from the param_grid in order to make teh dist_type column ----
  cols <- names(param_grid)

  dff$dist_name <- paste0(
    paste0(dist_type, " c("),
    apply(dff[, cols], 1, paste0, collapse = ", "),
    ")"
  )

  df_unnested_tbl <- dff %>%
    tidyr::unnest(results) %>%
    dplyr::ungroup() %>%
    dplyr::select(sim_number, dist_name, x:q) %>%
    dplyr::mutate(dist_name = as.factor(dist_name)) %>%
    dplyr::arrange(sim_number, dist_name)

  # Attach attributes ----
  attr(df_unnested_tbl, "all") <- atb
  attr(df_unnested_tbl, "tbl") <- "tidy_multi_tibble"

  # Return ----
  return(df_unnested_tbl)

}

-testing

> out <- tidy_multi_dist(
    .tidy_dist = "tidy_normal", 
    .param_list = list(
      .n = 50, 
      .mean = c(-1, 0, 1), 
      .sd = 1, 
      .num_sims = 1)
    )
> out
# A tibble: 150 × 8
   sim_number dist_name             x      y    dx       dy         p       q
   <fct>      <fct>             <int>  <dbl> <dbl>    <dbl>     <dbl>   <dbl>
 1 1          Gaussian c(-1, 1)     1 -0.879 -4.90 0.000211 0         -Inf   
 2 1          Gaussian c(-1, 1)     2 -1.70  -4.74 0.000585 0           -3.05
 3 1          Gaussian c(-1, 1)     3 -1.72  -4.59 0.00142  0           -2.74
 4 1          Gaussian c(-1, 1)     4 -0.577 -4.43 0.00306  0           -2.54
 5 1          Gaussian c(-1, 1)     5 -1.87  -4.28 0.00583  0           -2.39
 6 1          Gaussian c(-1, 1)     6 -0.779 -4.13 0.00990  0           -2.27
 7 1          Gaussian c(-1, 1)     7  0.342 -3.97 0.0151   5.73e-300   -2.16
 8 1          Gaussian c(-1, 1)     8 -2.28  -3.82 0.0212   1.12e-268   -2.07
 9 1          Gaussian c(-1, 1)     9 -0.875 -3.66 0.0278   4.06e-239   -1.98
10 1          Gaussian c(-1, 1)    10 -1.77  -3.51 0.0350   2.70e-211   -1.90
# … with 140 more rows

> str(out)
tibble [150 × 8] (S3: tbl_df/tbl/data.frame)
 $ sim_number: Factor w/ 1 level "1": 1 1 1 1 1 1 1 1 1 1 ...
 $ dist_name : Factor w/ 3 levels "Gaussian c(-1, 1)",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ x         : int [1:150] 1 2 3 4 5 6 7 8 9 10 ...
 $ y         : num [1:150] -1.901 -3.809 -1.186 -2.821 -0.666 ...
 $ dx        : num [1:150] -5.08 -4.93 -4.78 -4.63 -4.48 ...
 $ dy        : num [1:150] 0.000212 0.000573 0.001367 0.002882 0.005368 ...
 $ p         : num [1:150] 0 0 0 0 0 ...
 $ q         : num [1:150] -Inf -3.05 -2.74 -2.54 -2.39 ...
 - attr(*, "all")=List of 10
  ..$ class      : chr [1:3] "tbl_df" "tbl" "data.frame"
  ..$ row.names  : int [1:50] 1 2 3 4 5 6 7 8 9 10 ...
  ..$ names      : chr [1:7] "sim_number" "x" "y" "dx" ...
  ..$ .mean      : num -1
  ..$ .sd        : num 1
  ..$ .n         : int 50
  ..$ .num_sims  : int 1
  ..$ tibble_type: chr "tidy_gaussian"
  ..$ ps         : num [1:50] -50 -48 -46 -44 -42 -40 -38 -36 -34 -32 ...
  ..$ qs         : num [1:50] 0 0.0204 0.0408 0.0612 0.0816 ...
 - attr(*, "tbl")= chr "tidy_multi_tibble"
  • Related