I would like to automate the collection of summary statistics that arise from t-tests. In the example below I have nested variables Age
, Location
, and Treatment
. For each Age
& Location
I would like to run a t-test based on Treatment
which has the two categorical names Control
& Treatment
. Put another way, I would like to know about the difference between the Control and Treatment means at each Location
for each Age
.
I would like to run the t-tests using the col_t_welch
function in matrixTests
because the output already has several of the summary statistics I'm looking for (i.e., mean.diff
, stderr
, and pvalue
). How could I set up my dataframe (df1
) to be able to fun a for-loop for a nested t-test?
Reproducible Example:
library(matrixTests)
library(ggplot2)
set.seed(123)
df1 <- data.frame(matrix(ncol = 4, nrow = 36))
x <- c("Age","Location","Treatment","Value")
colnames(df1) <- x
df1$Age <- as.factor(rep(c(1,2,3), each = 12))
df1$Location <- as.factor(rep(c("Central","North"), each = 6))
df1$Treatment <- as.factor(rep(c("Control","Treatment"), each = 3))
df1$Value <- round(rnorm(36,200,25),0)
# I can't get the for-loop below to work because I'm not sure how to set up the data frame, but I was thinking something along these lines.
i <- 1
p <- numeric(length = 3*2)
mean_diff <- numeric(length = 3*2)
SE_diff <- numeric(length = 3*2)
for(j in c("1", "2", "3")){
for(k in c("Control", "Treatment")){
ttest <- col_t_welch(Value, data = df1, subset = Age == j & Treatment == k))
p[i] <- a$pvalue
mean_diff[i] <- ttest$mean.diff
SE_diff[i] <- ttest$stderr
i <- i 1
}
}
The ideal final data frame would look like d2
below.
d2 <- expand.grid(Age = rep(c(1,2,3), 1),
Location = rep(c("Central","North"), 1),
mean_diff = NA,
SE_diff = NA,
pvalue = NA)
C1 <- df1[c(1:6),3:4]
N1 <- df1[c(7:12),3:4]
C2 <- df1[c(13:18),3:4]
N2 <- df1[c(19:24),3:4]
C3 <- df1[c(25:30),3:4]
N3 <- df1[c(31:36),3:4]
c1_mod <- col_t_welch(x=C1[1:3,2], y=C1[4:6,2])
n1_mod <- col_t_welch(x=N1[1:3,2], y=N1[4:6,2])
c2_mod <- col_t_welch(x=C2[1:3,2], y=C2[4:6,2])
n2_mod <- col_t_welch(x=N2[1:3,2], y=N2[4:6,2])
c3_mod <- col_t_welch(x=C3[1:3,2], y=C3[4:6,2])
n3_mod <- col_t_welch(x=N3[1:3,2], y=N3[4:6,2])
d2[1,3] <- c1_mod$mean.diff
d2[1,4] <- c1_mod$stderr
d2[1,5] <- c1_mod$pvalue
d2[2,3] <- c2_mod$mean.diff
d2[2,4] <- c2_mod$stderr
d2[2,5] <- c2_mod$pvalue
d2[3,3] <- c3_mod$mean.diff
d2[3,4] <- c3_mod$stderr
d2[3,5] <- c3_mod$pvalue
d2[4,3] <- n1_mod$mean.diff
d2[4,4] <- n1_mod$stderr
d2[4,5] <- n1_mod$pvalue
d2[5,3] <- n2_mod$mean.diff
d2[5,4] <- n2_mod$stderr
d2[5,5] <- n2_mod$pvalue
d2[6,3] <- n3_mod$mean.diff
d2[6,4] <- n3_mod$stderr
d2[6,5] <- n3_mod$pvalue
d2
CodePudding user response:
I think this might help you
Libraries
library(matrixTests)
library(tidyverse)
Data
set.seed(123)
df1 <- data.frame(matrix(ncol = 4, nrow = 36))
x <- c("Age","Location","Treatment","Value")
colnames(df1) <- x
df1$Age <- as.factor(rep(c(1,2,3), each = 12))
df1$Location <- as.factor(rep(c("Central","North"), each = 6))
df1$Treatment <- as.factor(rep(c("Control","Treatment"), each = 3))
df1$Value <- round(rnorm(36,200,25),0)
How to
df1 %>%
group_nest(Age,Location,Treatment) %>%
pivot_wider(names_from = Treatment,values_from = data) %>%
mutate(
test = map2(
.x = Control,
.y = Treatment,
.f = ~col_t_welch(.x,.y)
)
) %>%
unnest(test) %>%
select(Age,Location,pvalue,mean.diff,stderr)
Result
# A tibble: 6 x 5
Age Location pvalue mean.diff stderr
<fct> <fct> <dbl> <dbl> <dbl>
1 1 Central 0.675 -9.67 21.3
2 1 North 0.282 -22 17.7
3 2 Central 0.925 -3 28.4
4 2 North 0.570 9.33 14.6
5 3 Central 0.589 -14.7 25.0
6 3 North 0.311 -11.3 8.59