I have a csv file like these: this csv filled is called df_plane in R
Situation | flight_uses | People-ID |
---|---|---|
1 | 1 | 1 |
2 | 1 | 1 |
3 | 0 | 1 |
1 | 1 | 2 |
2 | 1 | 2 |
3 | 1 | 2 |
1 | 1 | 3 |
2 | 0 | 3 |
3 | 1 | 3 |
1 | 1 | 4 |
2 | 1 | 4 |
3 | 0 | 4 |
1 | 1 | 5 |
2 | 0 | 5 |
3 | 0 | 5 |
1 | 1 | 6 |
2 | 1 | 6 |
3 | NA | 6 |
1 | NA | 7 |
2 | 1 | 7 |
3 | 1 | 7 |
1 | 1 | 8 |
2 | 0 | 8 |
3 | 0 | 8 |
1 | NA | 9 |
2 | NA | 9 |
3 | 1 | 9 |
1 | 1 | 10 |
2 | 1 | 10 |
3 | 0 | 10 |
1 | 0 | 11 |
2 | 0 | 11 |
3 | 0 | 11 |
I would like to find out what percentage of people uses airplane in situation 2. I would like to know if there is a more efficient way than use the code below. Because with the below code I have to calculate it manually.
table(select(df_plane,situation,flight_uses))
CodePudding user response:
You can use mean
to calculate the proportion
> with(df_plane,mean(replace(flight_uses, is.na(flight_uses), 0)[Situation==2]))
[1] 0.5454545
CodePudding user response:
Are you asking, of those rows where Situation==2
, what is the percent where flight_uses==1
?
dplyr approach
dplyr
is useful for these types of manipulations:
library(dplyr)
df_plane |>
filter(Situation == 2) |>
summarise(
percent_using_plane = sum(flight_uses==1, na.rm=T) / n() * 100
)
# percent_using_plane
# 1 54.54545
base R
If you want to stick with the base R table syntax (which seems fine in this case but can become unwieldy once calculations get more complicated), you were nearly there:
table(df_plane[df_plane$Situation==2,]$flight_uses) / nrow(df_plane[df_plane$Situation==2,])*100
# 0 1
# 36.36364 54.54545
CodePudding user response:
You can use functions from the janitor
package.
library(tidyverse)
library(janitor)
#>
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#>
#> chisq.test, fisher.test
df_plane <- tibble::tribble(
~Situation, ~flight_uses, ~`People-ID`,
1L, 1L, 1L,
2L, 1L, 1L,
3L, 0L, 1L,
1L, 1L, 2L,
2L, 1L, 2L,
3L, 1L, 2L,
1L, 1L, 3L,
2L, 0L, 3L,
3L, 1L, 3L,
1L, 1L, 4L,
2L, 1L, 4L,
3L, 0L, 4L,
1L, 1L, 5L,
2L, 0L, 5L,
3L, 0L, 5L,
1L, 1L, 6L,
2L, 1L, 6L,
3L, NA, 6L,
1L, NA, 7L,
2L, 1L, 7L,
3L, 1L, 7L,
1L, 1L, 8L,
2L, 0L, 8L,
3L, 0L, 8L,
1L, NA, 9L,
2L, NA, 9L,
3L, 1L, 9L,
1L, 1L, 10L,
2L, 1L, 10L,
3L, 0L, 10L,
1L, 0L, 11L,
2L, 0L, 11L,
3L, 0L, 11L
) |>
clean_names()
df_plane |>
tabyl(situation, flight_uses) |>
adorn_percentages() |>
adorn_pct_formatting()
#> situation 0 1 NA_
#> 1 9.1% 72.7% 18.2%
#> 2 36.4% 54.5% 9.1%
#> 3 54.5% 36.4% 9.1%
Created on 2022-10-26 with reprex v2.0.2
In Situation 2, 54.5% of passengers uses airplane.
CodePudding user response:
Use with
instead of dplyr::select
and wrap it in proportions
.
proportions(with(df_plane, table(flight_uses, Situation, useNA='ifany')), 2)
# Situation
# flight_uses 1 2 3
# 0 0.09090909 0.36363636 0.54545455
# 1 0.72727273 0.54545455 0.36363636
# <NA> 0.18181818 0.09090909 0.09090909