Home > Software engineering >  Efficient way to calculate percentage of a specific value in a specific time
Efficient way to calculate percentage of a specific value in a specific time

Time:10-26

I have a csv file like these: this csv filled is called df_plane in R

Situation flight_uses People-ID
1 1 1
2 1 1
3 0 1
1 1 2
2 1 2
3 1 2
1 1 3
2 0 3
3 1 3
1 1 4
2 1 4
3 0 4
1 1 5
2 0 5
3 0 5
1 1 6
2 1 6
3 NA 6
1 NA 7
2 1 7
3 1 7
1 1 8
2 0 8
3 0 8
1 NA 9
2 NA 9
3 1 9
1 1 10
2 1 10
3 0 10
1 0 11
2 0 11
3 0 11

I would like to find out what percentage of people uses airplane in situation 2. I would like to know if there is a more efficient way than use the code below. Because with the below code I have to calculate it manually.

table(select(df_plane,situation,flight_uses))

CodePudding user response:

You can use mean to calculate the proportion

> with(df_plane,mean(replace(flight_uses, is.na(flight_uses), 0)[Situation==2]))
[1] 0.5454545

CodePudding user response:

Are you asking, of those rows where Situation==2, what is the percent where flight_uses==1?

dplyr approach

dplyr is useful for these types of manipulations:

library(dplyr)
df_plane |>
    filter(Situation == 2) |>
    summarise(
        percent_using_plane = sum(flight_uses==1, na.rm=T) / n() * 100
    )
#   percent_using_plane
# 1            54.54545

base R

If you want to stick with the base R table syntax (which seems fine in this case but can become unwieldy once calculations get more complicated), you were nearly there:

table(df_plane[df_plane$Situation==2,]$flight_uses) / nrow(df_plane[df_plane$Situation==2,])*100
#        0        1
# 36.36364 54.54545

CodePudding user response:

You can use functions from the janitor package.

library(tidyverse)
library(janitor)
#> 
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#> 
#>     chisq.test, fisher.test

df_plane <- tibble::tribble(
  ~Situation, ~flight_uses, ~`People-ID`,
          1L,           1L,           1L,
          2L,           1L,           1L,
          3L,           0L,           1L,
          1L,           1L,           2L,
          2L,           1L,           2L,
          3L,           1L,           2L,
          1L,           1L,           3L,
          2L,           0L,           3L,
          3L,           1L,           3L,
          1L,           1L,           4L,
          2L,           1L,           4L,
          3L,           0L,           4L,
          1L,           1L,           5L,
          2L,           0L,           5L,
          3L,           0L,           5L,
          1L,           1L,           6L,
          2L,           1L,           6L,
          3L,           NA,           6L,
          1L,           NA,           7L,
          2L,           1L,           7L,
          3L,           1L,           7L,
          1L,           1L,           8L,
          2L,           0L,           8L,
          3L,           0L,           8L,
          1L,           NA,           9L,
          2L,           NA,           9L,
          3L,           1L,           9L,
          1L,           1L,          10L,
          2L,           1L,          10L,
          3L,           0L,          10L,
          1L,           0L,          11L,
          2L,           0L,          11L,
          3L,           0L,          11L
  ) |> 
  clean_names()


df_plane |> 
  tabyl(situation, flight_uses) |> 
  adorn_percentages() |> 
  adorn_pct_formatting()
#>  situation     0     1   NA_
#>          1  9.1% 72.7% 18.2%
#>          2 36.4% 54.5%  9.1%
#>          3 54.5% 36.4%  9.1%

Created on 2022-10-26 with reprex v2.0.2

In Situation 2, 54.5% of passengers uses airplane.

CodePudding user response:

Use with instead of dplyr::select and wrap it in proportions.

proportions(with(df_plane, table(flight_uses, Situation, useNA='ifany')), 2)
#            Situation
# flight_uses          1          2          3
#        0    0.09090909 0.36363636 0.54545455
#        1    0.72727273 0.54545455 0.36363636
#        <NA> 0.18181818 0.09090909 0.09090909
  •  Tags:  
  • r
  • Related