id=1:10
age=c(100,7,23,66,34,67,45,50,99,7)
data=data.frame(id,age)
data$age_stage=cut(data$age,breaks=c(0,50,100))
id age age_stage
1 1 100 (50,100]
2 2 7 (0,50]
3 3 23 (0,50]
4 4 66 (50,100]
5 5 34 (0,50]
6 6 67 (50,100]
7 7 45 (0,50]
8 8 50 (0,50]
9 9 99 (50,100]
10 10 7 (0,50]
I want to calculate the proportion of people who are older than 50. How should I do this?
CodePudding user response:
You can check proportion using prop.table
.
prop.table(table(data$age_stage))
(0,50] (50,100]
0.6 0.4
CodePudding user response:
Dplyr solution:
library(tidyverse)
id=1:10
age=c(100,7,23,66,34,67,45,50,99,7)
data=data.frame(id,age)
data$age_stage=cut(data$age,breaks=c(0,50,100))
data %>% # getting data
group_by(age_stage) %>% # grouping by age_stage
summarise(cnt = n()) %>% # counting how many by group of age_stage
mutate(props = round(cnt / sum(cnt), 2)) # getting frequency
#> # A tibble: 2 x 3
#> age_stage cnt props
#> <fct> <int> <dbl>
#> 1 (0,50] 6 0.6
#> 2 (50,100] 4 0.4
CodePudding user response:
#I will prefer to use a function that I can reuse or just use lapply if #I change my data.
older_than_50 <- function(data){
proportion <- mean(age > 50)
return (proportion)
}
older_than_50(data)