Home > Software design >  generate confidence interval for binned data
generate confidence interval for binned data

Time:06-21

I am trying to calculate binomial proportion confidence interval for my data. Here I just use the iris dataset.

# break the length into different size

iris$Slen <- cut(iris$Sepal.Length, 
                breaks = c(4,5,6,8))

#need to compute confidence interval for binomial proportion within each group and size bin

alpha <- as.numeric(0.05) 

#function to calculate conf.int.

CI <-function(n,r) {
  f1=qf(1-alpha/2, 2*r, 2*(n-r 1), lower.tail=FALSE)
  f2=qf(alpha/2, 2*(r 1), 2*(n-r), lower.tail=FALSE)
  pl=(1 (n-r 1)/(r*f1))^(-1)
  pu=(1 (n-r)/((r 1)*f2))^(-1)
  }

First I calculate the number of species in each of the size bin and then calculate the relative proportion using the code below:

library(dplyr)

iris_count <- iris %>% 
  group_by(Slen, Species) %>% 
  summarise(TotalParticle = n())%>%
  mutate(RelAbund = TotalParticle/sum(TotalParticle))

Now I want to calculate the CI for the iris_count data. n is the total number in each bin and r is the RelAbund*n in the CI function. Ex. for Slen (4,5] setosa n is 28 3 1 and r is 0.875*32.

How can I compute this directly and not manually for each case?

CodePudding user response:

iris$Slen <- cut(iris$Sepal.Length, 
                 breaks = c(4,5,6,8))
alpha <- as.numeric(0.05) 
CI <-function(n,r) {
  f1=qf(1-alpha/2, 2*r, 2*(n-r 1), lower.tail=FALSE)
  f2=qf(alpha/2, 2*(r 1), 2*(n-r), lower.tail=FALSE)
  pl=(1 (n-r 1)/(r*f1))^(-1)
  pu=(1 (n-r)/((r 1)*f2))^(-1)
  c(pl, pu) # note how I changed your function here so it returns the output you're looking for
}
library(dplyr)
iris_count <- iris %>% 
  group_by(Slen, Species) %>% 
  summarise(TotalParticle = n())%>%
  mutate(RelAbund = TotalParticle/sum(TotalParticle))

totals <- aggregate(iris_count$TotalParticle, list(iris_count$Slen), sum)
colnames(totals)[which(colnames(totals) == 'Group.1')] <- 'Slen'
new_df <- as.data.frame(merge(iris_count, totals, 'Slen'))
new_df$x <- as.numeric(new_df$x)
colnames(new_df)
confint <- list(NULL)
for (i in seq_len(nrow(new_df))) {
  confint[[i]] <- CI(new_df[i, which(colnames(new_df) == "x")], new_df[i, which(colnames(new_df) == "RelAbund")] * new_df[i, which(colnames(new_df) == "x")])
}
new_df$lower_CI <- sapply(confint, function (x) {
  x[1]
})
new_df$upper_CI <- sapply(confint, function (x) {
  x[2]
})
new_df
#    Slen    Species TotalParticle  RelAbund  x     lower_CI  upper_CI
# 1 (4,5]     setosa            28 0.8750000 32 0.7100515798 0.9648693
# 2 (4,5] versicolor             3 0.0937500 32 0.0197671802 0.2502270
# 3 (4,5]  virginica             1 0.0312500 32 0.0007908686 0.1621710
# 4 (5,6]     setosa            22 0.3859649 57 0.2599546810 0.5242516
# 5 (5,6] versicolor            27 0.4736842 57 0.3398483226 0.6103478
# 6 (5,6]  virginica             8 0.1403509 57 0.0625948901 0.2579454
# 7 (6,8] versicolor            20 0.3278689 61 0.2130663358 0.4600191
# 8 (6,8]  virginica            41 0.6721311 61 0.5399808516 0.7869337

CodePudding user response:

You may be reinventing the wheel here. The built in function prop.test will give you binomial confidence intervals. Here's a full reprex:

library(tidyverse)

iris$Slen <- cut(iris$Sepal.Length, 
                breaks = c(4,5,6,8))

iris %>% 
  group_by(Slen, Species) %>% 
  summarise(TotalParticle = n()) %>%
  mutate(RelAbund = TotalParticle/sum(TotalParticle),
         lower = sapply(TotalParticle, 
                  function(x) prop.test(x, sum(TotalParticle))$conf.int[1]),
         upper = sapply(TotalParticle, 
                  function(x) prop.test(x, sum(TotalParticle))$conf.int[2]))
#> `summarise()` has grouped output by 'Slen'. You can override using the
#> `.groups` argument.
#> # A tibble: 8 x 6
#> # Groups:   Slen [3]
#>   Slen  Species    TotalParticle RelAbund   lower upper
#>   <fct> <fct>              <int>    <dbl>   <dbl> <dbl>
#> 1 (4,5] setosa                28   0.875  0.701   0.959
#> 2 (4,5] versicolor             3   0.0938 0.0245  0.262
#> 3 (4,5] virginica              1   0.0312 0.00163 0.180
#> 4 (5,6] setosa                22   0.386  0.263   0.524
#> 5 (5,6] versicolor            27   0.474  0.342   0.609
#> 6 (5,6] virginica              8   0.140  0.0668  0.263
#> 7 (6,8] versicolor            20   0.328  0.216   0.461
#> 8 (6,8] virginica             41   0.672  0.539   0.784

Created on 2022-06-20 by the reprex package (v2.0.1)

  •  Tags:  
  • r
  • Related