I am trying to calculate binomial proportion confidence interval for my data. Here I just use the iris dataset.
# break the length into different size
iris$Slen <- cut(iris$Sepal.Length,
breaks = c(4,5,6,8))
#need to compute confidence interval for binomial proportion within each group and size bin
alpha <- as.numeric(0.05)
#function to calculate conf.int.
CI <-function(n,r) {
f1=qf(1-alpha/2, 2*r, 2*(n-r 1), lower.tail=FALSE)
f2=qf(alpha/2, 2*(r 1), 2*(n-r), lower.tail=FALSE)
pl=(1 (n-r 1)/(r*f1))^(-1)
pu=(1 (n-r)/((r 1)*f2))^(-1)
}
First I calculate the number of species in each of the size bin and then calculate the relative proportion using the code below:
library(dplyr)
iris_count <- iris %>%
group_by(Slen, Species) %>%
summarise(TotalParticle = n())%>%
mutate(RelAbund = TotalParticle/sum(TotalParticle))
Now I want to calculate the CI for the iris_count
data. n
is the total number in each bin and r
is the RelAbund*n
in the CI function. Ex. for Slen (4,5] setosa n
is 28 3 1 and r
is 0.875*32.
How can I compute this directly and not manually for each case?
CodePudding user response:
iris$Slen <- cut(iris$Sepal.Length,
breaks = c(4,5,6,8))
alpha <- as.numeric(0.05)
CI <-function(n,r) {
f1=qf(1-alpha/2, 2*r, 2*(n-r 1), lower.tail=FALSE)
f2=qf(alpha/2, 2*(r 1), 2*(n-r), lower.tail=FALSE)
pl=(1 (n-r 1)/(r*f1))^(-1)
pu=(1 (n-r)/((r 1)*f2))^(-1)
c(pl, pu) # note how I changed your function here so it returns the output you're looking for
}
library(dplyr)
iris_count <- iris %>%
group_by(Slen, Species) %>%
summarise(TotalParticle = n())%>%
mutate(RelAbund = TotalParticle/sum(TotalParticle))
totals <- aggregate(iris_count$TotalParticle, list(iris_count$Slen), sum)
colnames(totals)[which(colnames(totals) == 'Group.1')] <- 'Slen'
new_df <- as.data.frame(merge(iris_count, totals, 'Slen'))
new_df$x <- as.numeric(new_df$x)
colnames(new_df)
confint <- list(NULL)
for (i in seq_len(nrow(new_df))) {
confint[[i]] <- CI(new_df[i, which(colnames(new_df) == "x")], new_df[i, which(colnames(new_df) == "RelAbund")] * new_df[i, which(colnames(new_df) == "x")])
}
new_df$lower_CI <- sapply(confint, function (x) {
x[1]
})
new_df$upper_CI <- sapply(confint, function (x) {
x[2]
})
new_df
# Slen Species TotalParticle RelAbund x lower_CI upper_CI
# 1 (4,5] setosa 28 0.8750000 32 0.7100515798 0.9648693
# 2 (4,5] versicolor 3 0.0937500 32 0.0197671802 0.2502270
# 3 (4,5] virginica 1 0.0312500 32 0.0007908686 0.1621710
# 4 (5,6] setosa 22 0.3859649 57 0.2599546810 0.5242516
# 5 (5,6] versicolor 27 0.4736842 57 0.3398483226 0.6103478
# 6 (5,6] virginica 8 0.1403509 57 0.0625948901 0.2579454
# 7 (6,8] versicolor 20 0.3278689 61 0.2130663358 0.4600191
# 8 (6,8] virginica 41 0.6721311 61 0.5399808516 0.7869337
CodePudding user response:
You may be reinventing the wheel here. The built in function prop.test
will give you binomial confidence intervals. Here's a full reprex:
library(tidyverse)
iris$Slen <- cut(iris$Sepal.Length,
breaks = c(4,5,6,8))
iris %>%
group_by(Slen, Species) %>%
summarise(TotalParticle = n()) %>%
mutate(RelAbund = TotalParticle/sum(TotalParticle),
lower = sapply(TotalParticle,
function(x) prop.test(x, sum(TotalParticle))$conf.int[1]),
upper = sapply(TotalParticle,
function(x) prop.test(x, sum(TotalParticle))$conf.int[2]))
#> `summarise()` has grouped output by 'Slen'. You can override using the
#> `.groups` argument.
#> # A tibble: 8 x 6
#> # Groups: Slen [3]
#> Slen Species TotalParticle RelAbund lower upper
#> <fct> <fct> <int> <dbl> <dbl> <dbl>
#> 1 (4,5] setosa 28 0.875 0.701 0.959
#> 2 (4,5] versicolor 3 0.0938 0.0245 0.262
#> 3 (4,5] virginica 1 0.0312 0.00163 0.180
#> 4 (5,6] setosa 22 0.386 0.263 0.524
#> 5 (5,6] versicolor 27 0.474 0.342 0.609
#> 6 (5,6] virginica 8 0.140 0.0668 0.263
#> 7 (6,8] versicolor 20 0.328 0.216 0.461
#> 8 (6,8] virginica 41 0.672 0.539 0.784
Created on 2022-06-20 by the reprex package (v2.0.1)