Two inputs to map() but only iterate over one of them-CodePudding

I am trying to write a function that iterates over a vector using map2(), but which also takes another input which is fixed on each call.

For example, this code takes just the one input, vars:

library(carData)
library(purrr)
library(tidyverse)
library(Matching)

vars <- c("lfp", "lwg", "inc")
names(vars) <- vars

matching_fcn <- function(.x){
  
  matching_df <- Mroz %>% 
  mutate(wc = case_when(wc == "yes" ~ "TRUE", 
                        wc == "no" ~ "FALSE")) %>% 
  drop_na(k5, k618, age, wc, hc, .x)

matching_df$wc <- as.logical(matching_df$wc)
  ps1 <- glm(wc ~ k5   k618   age   hc, 
             family = binomial, data = matching_df)
  
  pscore <- ps1$fitted.values
  matching_df <- cbind(matching_df, pscore)
  
  Y <- matching_df[[.x]]
  Tr <- as.logical(matching_df$wc)
  
  psm1 <- Matching::Match(
    Y = Y, 
    Tr = Tr, 
    X = pscore, 
    estimand = "ATT", 
    M = 1, 
    replace = TRUE, 
    caliper = 0.05, 
    version = "fast")
  p <- 1 - pnorm(abs(psm1$est.noadj/psm1$se.standard))
  with(psm1, tibble(dv=.x, est=est.noadj, se=se.standard, p=p, ndrops=ndrops))
}

purrr::map_df(
  .x =  tidyselect::all_of(vars),
  .f = matching_fcn)

But if I want to run the same models on a different df which has the same variable names, i will need to copy the whole function again and change the line matching_df <- .... which is sub-optimal.

I tried to solve for this using map2() but it's returning an error that the dimensions of .x and .y are not the same (naturally) because it's attempting to iterate over .y as well as .x.

What I want is to be able to set up the function like this:

matching_fcn <- function(.x, .y){
  
  matching_df <- .y %>% ...

And call it like this:

purrr::map2_df(
  .x = tidyselect::all_of(vars),
  .y = df1,
  .f = matching_fcn)

purrr::map2_df(
  .x = tidyselect::all_of(vars),
  .y = df2,
  .f = matching_fcn)

Etc. Is this possible?

CodePudding user response：

If I have understood correctly I don't think you should use map2 here. As @Limey mentioned you can include data as argument in the function.

library(carData)
library(purrr)
library(tidyverse)
library(Matching)

vars <- c("lfp", "lwg", "inc")
names(vars) <- vars

matching_fcn <- function(data, .x){
  
  matching_df <- data %>% 
    mutate(wc = case_when(wc == "yes" ~ "TRUE", 
                          wc == "no" ~ "FALSE")) %>% 
    drop_na(k5, k618, age, wc, hc, .x)
  
  matching_df$wc <- as.logical(matching_df$wc)
  ps1 <- glm(wc ~ k5   k618   age   hc, 
             family = binomial, data = matching_df)
  
  pscore <- ps1$fitted.values
  matching_df <- cbind(matching_df, pscore)
  
  Y <- matching_df[[.x]]
  Tr <- as.logical(matching_df$wc)
  
  psm1 <- Matching::Match(
    Y = Y, 
    Tr = Tr, 
    X = pscore, 
    estimand = "ATT", 
    M = 1, 
    replace = TRUE, 
    caliper = 0.05, 
    version = "fast")
  p <- 1 - pnorm(abs(psm1$est.noadj/psm1$se.standard))
  with(psm1, tibble(dv=.x, est=est.noadj, se=se.standard, p=p, ndrops=ndrops))
}

Call this as -

purrr::map_df(
  .x =  tidyselect::all_of(vars),
  .f = ~matching_fcn(Mroz, .x))

#  dv      est     se        p ndrops
#  <chr> <dbl>  <dbl>    <dbl>  <dbl>
#1 lfp   0.175 0.0450 5.07e- 5      5
#2 lwg   0.375 0.0571 2.49e-11      5
#3 inc   4.84  1.09   4.82e- 6      5