Efficient way to calculate percentage of a specific value in a specific time-CodePudding

I have a csv file like these: this csv filled is called df_plane in R

Situation	flight_uses	People-ID
1	1	1
2	1	1
3	0	1
1	1	2
2	1	2
3	1	2
1	1	3
2	0	3
3	1	3
1	1	4
2	1	4
3	0	4
1	1	5
2	0	5
3	0	5
1	1	6
2	1	6
3	NA	6
1	NA	7
2	1	7
3	1	7
1	1	8
2	0	8
3	0	8
1	NA	9
2	NA	9
3	1	9
1	1	10
2	1	10
3	0	10
1	0	11
2	0	11
3	0	11

I would like to find out what percentage of people uses airplane in situation 2. I would like to know if there is a more efficient way than use the code below. Because with the below code I have to calculate it manually.

table(select(df_plane,situation,flight_uses))

CodePudding user response：

You can use mean to calculate the proportion

> with(df_plane,mean(replace(flight_uses, is.na(flight_uses), 0)[Situation==2]))
[1] 0.5454545

CodePudding user response：

Are you asking, of those rows where Situation==2, what is the percent where flight_uses==1?

dplyr approach

dplyr is useful for these types of manipulations:

library(dplyr)
df_plane |>
    filter(Situation == 2) |>
    summarise(
        percent_using_plane = sum(flight_uses==1, na.rm=T) / n() * 100
    )
#   percent_using_plane
# 1            54.54545

base R

If you want to stick with the base R table syntax (which seems fine in this case but can become unwieldy once calculations get more complicated), you were nearly there:

table(df_plane[df_plane$Situation==2,]$flight_uses) / nrow(df_plane[df_plane$Situation==2,])*100
#        0        1
# 36.36364 54.54545

CodePudding user response：

You can use functions from the janitor package.

library(tidyverse)
library(janitor)
#> 
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#> 
#>     chisq.test, fisher.test

df_plane <- tibble::tribble(
  ~Situation, ~flight_uses, ~`People-ID`,
          1L,           1L,           1L,
          2L,           1L,           1L,
          3L,           0L,           1L,
          1L,           1L,           2L,
          2L,           1L,           2L,
          3L,           1L,           2L,
          1L,           1L,           3L,
          2L,           0L,           3L,
          3L,           1L,           3L,
          1L,           1L,           4L,
          2L,           1L,           4L,
          3L,           0L,           4L,
          1L,           1L,           5L,
          2L,           0L,           5L,
          3L,           0L,           5L,
          1L,           1L,           6L,
          2L,           1L,           6L,
          3L,           NA,           6L,
          1L,           NA,           7L,
          2L,           1L,           7L,
          3L,           1L,           7L,
          1L,           1L,           8L,
          2L,           0L,           8L,
          3L,           0L,           8L,
          1L,           NA,           9L,
          2L,           NA,           9L,
          3L,           1L,           9L,
          1L,           1L,          10L,
          2L,           1L,          10L,
          3L,           0L,          10L,
          1L,           0L,          11L,
          2L,           0L,          11L,
          3L,           0L,          11L
  ) |> 
  clean_names()


df_plane |> 
  tabyl(situation, flight_uses) |> 
  adorn_percentages() |> 
  adorn_pct_formatting()
#>  situation     0     1   NA_
#>          1  9.1% 72.7% 18.2%
#>          2 36.4% 54.5%  9.1%
#>          3 54.5% 36.4%  9.1%

^{Created on 2022-10-26 with reprex v2.0.2}

In Situation 2, 54.5% of passengers uses airplane.

CodePudding user response：

Use with instead of dplyr::select and wrap it in proportions.

proportions(with(df_plane, table(flight_uses, Situation, useNA='ifany')), 2)
#            Situation
# flight_uses          1          2          3
#        0    0.09090909 0.36363636 0.54545455
#        1    0.72727273 0.54545455 0.36363636
#        <NA> 0.18181818 0.09090909 0.09090909

Situation	flight_uses	People-ID
1	1	1
2	1	1
3	0	1
1	1	2
2	1	2
3	1	2
1	1	3
2	0	3
3	1	3
1	1	4
2	1	4
3	0	4
1	1	5
2	0	5
3	0	5
1	1	6
2	1	6
3	NA	6
1	NA	7
2	1	7
3	1	7
1	1	8
2	0	8
3	0	8
1	NA	9
2	NA	9
3	1	9
1	1	10
2	1	10
3	0	10
1	0	11
2	0	11
3	0	11

Situation	flight_uses	People-ID
1	1	1
2	1	1
3	0	1
1	1	2
2	1	2
3	1	2
1	1	3
2	0	3
3	1	3
1	1	4
2	1	4
3	0	4
1	1	5
2	0	5
3	0	5
1	1	6
2	1	6
3	NA	6
1	NA	7
2	1	7
3	1	7
1	1	8
2	0	8
3	0	8
1	NA	9
2	NA	9
3	1	9
1	1	10
2	1	10
3	0	10
1	0	11
2	0	11
3	0	11

Situation	flight_uses	People-ID
1	1	1
2	1	1
3	0	1
1	1	2
2	1	2
3	1	2
1	1	3
2	0	3
3	1	3
1	1	4
2	1	4
3	0	4
1	1	5
2	0	5
3	0	5
1	1	6
2	1	6
3	NA	6
1	NA	7
2	1	7
3	1	7
1	1	8
2	0	8
3	0	8
1	NA	9
2	NA	9
3	1	9
1	1	10
2	1	10
3	0	10
1	0	11
2	0	11
3	0	11