I wrote a r function to compute the median by group:
varA<-rep(c(1:2),times=30)
df1<-data.frame(varA)
df1$var1 <- sample(500:1000, length(df1$varA))
df1 <- df1 %>% mutate(outcome=ifelse(varA==1, "Yes", "No"))
ctn_me<- function(df, var, group_var) {
df[[group_var]]<-as.character(df[[group_var]])
# df[[var]]<-as.numeric(df[[var]])
tbl1<-df %>%
bind_rows(mutate(., !!group_var := 'Total')) %>%
dplyr::group_by(gpvar=.[[group_var]])%>%
dplyr::summarise(
median=median(.[[var]], na.rm = TRUE),
N = n())
print(tbl1)
}
ctn_me(df1, "var1", "outcome")
It gave me results like this:
#### gpvar median N
#### <chr> <dbl> <int>
#### 1 No 734 30
#### 2 Total 734 60
#### 3 Yes 734 30
So it can count the number of rows within each group, but for the median, it returned the overall median instead by the group.
This gave me the results I wanted:
df1 %>% bind_rows(mutate(., outcome := 'Total')) %>%
dplyr::group_by(outcome)%>%
dplyr::summarise(
median=median(var1, na.rm = TRUE),
N = n())
# A tibble: 3 x 3
# outcome median N
# <chr> <dbl> <int>
# 1 No 713 30
# 2 Total 734 60
# 3 Yes 788. 30
I was trying to figure out what was wrong with my r function. Can anyone let me know? Thanks!
CodePudding user response:
Try this for non-standard evaluation.
ctn_me<- function(df, var, group_var) {
df[[group_var]]<-as.character(df[[group_var]])
# df[[var]]<-as.numeric(df[[var]])
tbl1<-df %>%
bind_rows(mutate(., !!group_var := 'Total')) %>%
dplyr::group_by(.data[[group_var]])%>%
dplyr::summarise(
median=median(.data[[var]], na.rm = TRUE),
N = n())
print(tbl1)
}```
CodePudding user response:
The docs state that you need to specifically reference ".data" within the summarise()
function:
"When you have an env-variable that is a character vector, you need to index into the .data pronoun with [[, like summarise(df, mean = mean(.data[[var]]))."
In this case, you need to change .[[variable]] to .data[[variable]], i.e.
library(tidyverse)
set.seed(123)
varA<-rep(c(1:2),times=30)
df1<-data.frame(varA)
df1$var1 <- sample(500:1000, length(df1$varA))
df1 <- df1 %>% mutate(outcome=ifelse(varA==1, "Yes", "No"))
ctn_me <- function(df, var, group_var) {
df %>%
bind_rows(mutate(., !!group_var := "Total")) %>%
group_by(gpvar = .[[group_var]]) %>%
summarise(
median_group = median(.data[[var]], na.rm = TRUE),
N = n()
)
}
ctn_me(df1, "var1", "outcome")
#> # A tibble: 3 × 3
#> gpvar median_group N
#> <chr> <dbl> <int>
#> 1 No 740. 30
#> 2 Total 754 60
#> 3 Yes 776. 30
Created on 2022-07-19 by the reprex package (v2.0.1)
Original answer:
If you use a different syntax inside the summarise()
function it works as expected, so I think it's something to do with the summarise()
function:
library(tidyverse)
set.seed(123)
varA<-rep(c(1:2),times=30)
df1<-data.frame(varA)
df1$var1 <- sample(500:1000, length(df1$varA))
df1 <- df1 %>% mutate(outcome=ifelse(varA==1, "Yes", "No"))
ctn_me <- function(df, var, group_var) {
df %>%
bind_rows(mutate(., !!group_var := "Total")) %>%
group_by(gpvar = .[[group_var]]) %>%
summarise(
median_group = median(!!sym(var), na.rm = TRUE),
N = n()
)
}
ctn_me(df1, "var1", "outcome")
#> # A tibble: 3 × 3
#> gpvar median_group N
#> <chr> <dbl> <int>
#> 1 No 740. 30
#> 2 Total 754 60
#> 3 Yes 776. 30
Created on 2022-07-19 by the reprex package (v2.0.1)