This is my sample data
grid_id_2 building_area_in area new_bu_class
1 50871 352 10001 Public
2 51448 54 10001 Others
3 51448 0 10001 Others
4 51451 555 10001 Others
5 51450 1610 10001 Others
6 51451 555 10001 Others
> dput(data_subset)
structure(list(grid_id_2 = c(50871L, 51448L, 51448L, 51451L,
51450L, 51451L, 51450L, 50682L, 50681L, 50682L), building_area_in = c(352L,
54L, 0L, 555L, 1610L, 555L, 1610L, 12L, 219L, 818L), area = c(10001L,
10001L, 10001L, 10001L, 10001L, 10001L, 10001L, 10001L, 10001L,
10001L), new_bu_class = c("Public", "Others", "Others", "Others",
"Others", "Others", "Others", "Public", "Public", "Public")), row.names = c(NA,
10L), class = "data.frame")
I am trying to compute the building class area per grid id. Therefore, I want to calculate "building_area_in" proportion by each "new_bu_class" per "grid_id_2"
My expected output will look like this:
grid_id_2 building_area_in area new_bu_class class_propotion_Others class_propotion_Public total_area
1 50871 352 10001 Public 0 0.35 352
2 51448 54 10001 Others 0.005 0 54
3 51451 555 10001 Others 0.11 0 1110
4 51450 1610 10001 Others 0.16 0 1610
Thank you in advance!
CodePudding user response:
library(dplyr)
df %>%
group_by(grid_id_2) %>%
mutate(
class_prop_others = if_else(new_bu_class == "Others",building_area_in/area,0),
class_prop_public = if_else(new_bu_class == "Public",building_area_in/area,0),
total_area = sum(building_area_in,na.rm = TRUE)
)