I have some data in this format:
#> # A tibble: 3 × 5
#> item cost blue pink black
#> <int> <int> <int> <int> <int>
#> 1 1 4 1 0 1
#> 2 2 10 1 0 1
#> 3 3 3 0 1 1
The output I would like is the relative frequency of the color columns. Items can have more than one color, so the relative frequencies don't have to sum to 1.
#> color rel_freq
#> <int> <int>
#> 1 blue 0.66
#> 2 pink 0.33
#> 2 black 1.00
I can do it manually for one color, say, blue:
library(tidyverse)
df <- tibble::tribble(
~item, ~cost, ~blue, ~pink, ~black,
1L, 4L, 1L, 0L, 1L,
2L, 10L, 1L, 0L, 1L,
3L, 3L, 0L, 1L, 1L
)
df %>%
group_by(blue) %>%
summarise(count = n()) %>%
mutate(rel_freq = (count/sum(count)*100) ) %>%
filter(blue==1) %>%
mutate(color = deparse(substitute(blue))) %>%
select(-blue, -count) %>%
select(color, everything())
which gives
#> color rel_freq
#> <chr> <dbl>
#> 1 blue 66.7
but when I put it into a function, I can't figure out how to pass in an argument so that it can be treated as a column (using "curly-curly" notation) and as a character variable (that's what I'm stuck on).
calc_rel_freq <- function(input_color){
df %>%
group_by({{input_color}}) %>%
summarise(count = n()) %>%
mutate(rel_freq = (count/sum(count)*100) ) %>%
filter({{input_color}}==1) %>%
mutate(color = deparse(substitute({{input_color}}))) %>% # This is where I'm stuck.
select(-{{input_color}}, -count) %>%
select(color, everything())
}
calc_rel_freq(blue)
My final goal is to be able to use a function like this:
input_colors <- c("blue", "pink", "black")
map(input_colors, calc_relative_freq)
Code for data input below:
library(tidyverse)
df <- tibble::tribble(
~item, ~cost, ~blue, ~pink, ~black,
1L, 4L, 1L, 0L, 1L,
2L, 10L, 1L, 0L, 1L,
3L, 3L, 0L, 1L, 1L
)
df
CodePudding user response:
Since your desired output is the relative frequencies, you can do that more directly with
df %>%
select(-cost) %>%
pivot_longer(blue:black) %>%
group_by(name) %>%
summarize(rel_freq=mean(value))
# name rel_freq
# <chr> <dbl>
# 1 black 1
# 2 blue 0.667
# 3 pink 0.333
If you really wanted just one, you can just filter()
at the end.
CodePudding user response:
We can use dplyr with across
.
library(dplyr)
df %>% summarise(across(blue:black, mean))
# A tibble: 1 × 3
blue pink black
<dbl> <dbl> <dbl>
1 0.667 0.333 1
across
can also work with a vector of names of selected columns with the all_of
selection helper:
library(dplyr)
input_colors <- c("blue", "pink", "black")
df %>% summarise(across(all_of(input_colors), mean))
If we really want to use non-standard evaluation to use character elements as column selections inside a dplyr function, we can convert to symbol (sym
) and evaluate (!!
), with the rlang
package:
library(dplyr)
library(purrr)
library(rlang)
map_dfc(input_colors, ~df %>% summarise(across(!!(sym(.x)), mean)))
# A tibble: 1 × 3
blue pink black
<dbl> <dbl> <dbl>
1 0.667 0.333 1