I have the following data structure:
country age sex
x 10 m
y 20 f
x 12 m
y 40 m
I want to group my data according to the country and get a calculation of the percentage of my sex variable according to it, resulting in some table like this:
country mean_age percent_m percent_f
x 11 100% 0%
y 20 50% 50%
Thank you in advance!
CodePudding user response:
A possible solution:
library(tidyverse)
df %>%
mutate(age = as.character(age)) %>%
pivot_longer(-country) %>%
pivot_wider(country, values_fn = ~ {if (any(str_detect(.x, "\\d")))
mean(as.numeric(.x)) else proportions(table(.x))["m"]}) %>%
mutate(mean_age = age, percent_m = sex, percent_f = 1- percent_m, age = NULL,
sex = NULL)
#> # A tibble: 2 × 4
#> country mean_age percent_m percent_f
#> <chr> <dbl> <dbl> <dbl>
#> 1 x 11 1 0
#> 2 y 30 0.5 0.5
Another possible solution:
library(tidyverse)
inner_join(
df %>%
mutate(name = "mean_age") %>%
pivot_wider(country, values_from = age, values_fn = mean),
df %>%
mutate(name = "percent_m") %>%
pivot_wider(country, values_from = sex,
values_fn = ~ proportions(table(.x))["m"])) %>%
mutate(percent_f = 1 - percent_m)
#> Joining, by = "country"
#> # A tibble: 2 × 4
#> country mean_age percent_m percent_f
#> <chr> <dbl> <dbl> <dbl>
#> 1 x 11 1 0
#> 2 y 30 0.5 0.5
CodePudding user response:
library(tidyverse)
df <- data.frame(country = c("x", "x", "x", "x", "x", "y", "y", "y"),
age = c(10, 20, 12, 40, 23, 17, 21, 19),
sex = c("m", "f", "f", "m", "f", "m", "f", "f"))
df1 <- df %>%
pivot_wider(names_from = sex, values_from = sex) %>%
group_by(country) %>%
summarise(age_mean = mean(age),
m = length(na.omit(m)),
f = length(na.omit(f))) %>%
mutate(m_perc = (m / (m f)) * 100,
f_perc = (f / (m f)) * 100)
> df1
# A tibble: 2 x 6
country age_mean m f m_perc f_perc
<chr> <dbl> <int> <int> <dbl> <dbl>
1 x 21 2 3 40 60
2 y 19 1 2 33.3 66.7