I have a df that looks like this:
head(df)
site year common_name
1 2019 Silverside
1 2019 Silverside
1 2019 Sailfin molly
2 2019 Silverside
1 2020 Silverside
I like the function 'aggregate', but not sure how to summarize the frequency of occurrence and proportion of the total for each individual level of a group (for each species). My ultimate goal looks like this:
site year total_fish total_Silverside %_Silverside ...etc. (all other 'common_name's)
1 2019 3 2 67%
2 2019 1 1 100%
1 2020 1 1 100%
CodePudding user response:
{dplyr} can do it.
library(tidyverse)
text <- "site, year, common_name
1, 2019, Silverside
1, 2019, Silverside
1, 2019, Sailfin molly
2, 2019, Silverside
1, 2020, Silverside"
data <- read.table(textConnection(text), sep = ",", header = TRUE) %>%
as_tibble() %>%
mutate_all(trimws)
absolutes <- data %>%
group_by(site, year) %>%
summarize(total = n(), as_tibble(table(common_name))) %>%
spread(common_name, n, fill = 0) %>%
ungroup()
#> `summarise()` has grouped output by 'site', 'year'. You can override using the `.groups` argument.
shares <- absolutes %>%
mutate(., across(data$common_name, ~ .x / !!.$total)) %>%
rename_with(~ paste0("share ", .x), data$common_name)
inner_join(absolutes, shares) %>%
rename_with(~ paste0("absolute ", .x), data$common_name)
#> Joining, by = c("site", "year", "total")
#> # A tibble: 3 × 7
#> site year total `absolute Sailfin molly` `absolute Silver… `share Sailfin m…
#> <chr> <chr> <int> <dbl> <dbl> <dbl>
#> 1 1 2019 3 1 2 0.333
#> 2 1 2020 1 0 1 0
#> 3 2 2019 1 0 1 0
#> # … with 1 more variable: share Silverside <dbl>
Created on 2021-12-09 by the reprex package (v2.0.1)
CodePudding user response:
Another possible solution:
library(tidyverse)
df %>%
pivot_wider(id_cols = c(site, year), names_from = common_name,
values_from = common_name, values_fn = length, names_prefix = "total_",
values_fill = 0) %>%
mutate(tot = rowSums(select(.,starts_with("total_")))) %>%
mutate(across(
starts_with("total_"), ~100* .x /tot, .names="perc_{str_remove(col,'total_')}")) %>% as.data.frame()
#> site year total_Silverside total_Sailfin molly tot perc_Silverside
#> 1 1 2019 2 1 3 66.66667
#> 2 2 2019 1 0 1 100.00000
#> 3 1 2020 1 0 1 100.00000
#> perc_Sailfin molly
#> 1 33.33333
#> 2 0.00000
#> 3 0.00000