I have this data frame (comma-separated format here)...
state, sex, numerator, denominator, num_divide_denom
AL, male, 10, 20, 0.5
AL, female, 20, 30, 0.66
AL, unknown, 40, 50, 0.8
FL, male, 10, 20, 0.5
FL, female, 20, 30, 0.66
FL, unknown, 40, 50, 0.8
structure(list(state = c("AL", "AL", "AL", "FL", "FL", "FL"),
sex = c(" male", " female", " unknown", " male", " female",
" unknown"), numerator = c(10L, 20L, 40L, 10L, 20L, 40L),
denominator = c(20L, 30L, 50L, 20L, 30L, 50L), num_divide_denom = c(0.5,
0.66, 0.8, 0.5, 0.66, 0.8)), class = "data.frame", row.names = c(NA,
-6L))
How do I add rows that show, for each state, the total "numerator" and "denominator" values where "sex" is not "unknown," along with the value of (numerator / denominator) for "num_divide_denom?"
CodePudding user response:
We can group by 'state', and create the column by dividing the sum
of 'numerator' where the 'sex' is not 'unknown' with the corresponding 'denominator'
library(dplyr)
df1 <- df1 %>%
group_by(state) %>%
mutate(num_divide_denom =sum(numerator[sex != 'unknown'],
na.rm = TRUE)/sum(denominator[sex != 'unknown'], na.rm = TRUE)) %>%
ungroup
-output
df1
# A tibble: 6 × 5
state sex numerator denominator num_divide_denom
<chr> <chr> <int> <int> <dbl>
1 AL male 10 20 0.6
2 AL female 20 30 0.6
3 AL unknown 40 50 0.6
4 FL male 10 20 0.6
5 FL female 20 30 0.6
6 FL unknown 40 50 0.6
If we need to create new rows
library(janitor)
df1 %>%
filter(sex != 'unknown') %>%
group_by(state) %>%
group_modify(~ adorn_totals(.x, name = 'known')) %>%
ungroup %>%
mutate(num_divide_denom = numerator/denominator) %>%
bind_rows(df1 %>%
filter(sex == 'unknown')) %>%
arrange(state)
-output
# A tibble: 8 × 5
state sex numerator denominator num_divide_denom
<chr> <chr> <int> <int> <dbl>
1 AL male 10 20 0.5
2 AL female 20 30 0.667
3 AL known 30 50 0.6
4 AL unknown 40 50 0.8
5 FL male 10 20 0.5
6 FL female 20 30 0.667
7 FL known 30 50 0.6
8 FL unknown 40 50 0.8
data
df1 <- structure(list(state = c("AL", "AL", "AL", "FL", "FL", "FL"),
sex = c("male", "female", "unknown", "male", "female", "unknown"
), numerator = c(10L, 20L, 40L, 10L, 20L, 40L), denominator = c(20L,
30L, 50L, 20L, 30L, 50L), num_divide_denom = c(0.5, 0.66,
0.8, 0.5, 0.66, 0.8)), row.names = c(NA, -6L), class = "data.frame")
CodePudding user response:
library(tidyverse)
df1 %>%
group_by(state) %>%
group_modify(
~bind_rows(
.x,
tibble(
sex = 'known',
numerator = sum(.x$numerator[.x$sex != 'unknown']),
denominator = sum(.x$denominator[.x$sex != 'unknown']),
num_divide_denom = numerator / denominator
)
)
)
# A tibble: 8 × 5 # Groups: state [2] state sex numerator denominator num_divide_denom <chr> <chr> <int> <int> <dbl> 1 AL male 10 20 0.5 2 AL female 20 30 0.66 3 AL unknown 40 50 0.8 4 AL known 30 50 0.6 5 FL male 10 20 0.5 6 FL female 20 30 0.66 7 FL unknown 40 50 0.8 8 FL known 30 50 0.6
Or
df1 %>%
group_by(state) %>%
summarise(
numerator = sum(numerator[sex != 'unknown']),
denominator = sum(denominator[sex != 'unknown']),
sex = 'known',
num_divide_denom = numerator / denominator
) %>% bind_rows(df1)