I am trying to simplify my code by using custom/user-defined functions in r.
I want to turn the following into functions:
shapiro <- shapiro.test(data$count)
if(shapiro$p.value > 0.05) {
require(tidyverse)
bart <- bartlett.test(count ~ species, data=data)
}
if ((shapiro$p.value > 0.05) && exists("bart")){
if(bart$p.value > 0.05) {
OneWay <- aov(count ~ species, data = data)
oneway <- summary(OneWay) #normal distibution and equal varaince
} else {
welch <- oneway.test(count ~ species, data = data) #Welch ANOVA, normal distribution, unequal variance
}
} else {
fligner <- fligner.test(count ~ species, data = data)
#if not normal distribution, perform fligner-killen homogeneity of variance test
}
The first set I have tried:
perform_shapiro <- function(df, contvar) {
shapiro.test(df$contvar)
}
perform_shapiro(df = data, contvar = count)
Which returns the error: Error in shapiro.test(df$contvar) : is.numeric(x) is not TRUE.
The second set is successful with:
perform_barlett <- function(df, contvar, catvar) {
if(shapiro$p.value > 0.05) {
require(tidyverse)
bart <- bartlett.test(contvar ~ catvar, data=df)
return(bart)
}
}
perform_bartlett(data, count, species)
But the third also has a new error:
perform_oneway_welch_fligner <- function(df, contvar, catvar){
if ((shapiro$p.value > 0.05) && exists("bart")){
if(bart$p.value > 0.05) {
OneWay <- aov(contvar ~ catvar, data = df)
oneway <- summary(OneWay) #normal distibution and equal varaince
print(oneway)
} else {
welch <- oneway.test(contvar ~ catvar, data = df) #Welch ANOVA, normal distribution, unequal variance
print(welch)
}
} else {
fligner <- fligner.test(contvar ~ catvar, data = df)
#if not normal distribution, perform fligner-killen homogeneity of variance test
print(fligner)
}
}
perform_oneway_welch_fligner(data, count, species)
Error in eval(predvars, data, env) : object 'count' not found.
Data
data <- iris
# rename columns
names(data)[names(data) == "Sepal.Length"] <- "count"
FYI shapiro is < 0.05, so bart should not exist, hence fligner should be returned. Contvar = continuous variable, catvar = categorical variable.
I thought the errors at first were due to $ and ~, but the second function works well and it has a tilde formula inside, so now I am unsure. Thanks!
CodePudding user response:
Here is a full solution, answering to the OP's comments.
perform_shapiro <- function(df, contvar) {
contvar <- as.character(substitute(contvar))
shapiro.test(df[[contvar]])
}
perform_bartlett <- function(df, contvar, catvar) {
contvar <- as.character(substitute(contvar))
catvar <- as.character(substitute(catvar))
fmla <- reformulate(catvar, contvar)
shapiro <- shapiro.test(df[[contvar]])
if(shapiro$p.value > 0.05) {
bartlett.test(fmla, data = df)
}
}
perform_oneway_welch_fligner <- function(df, contvar, catvar){
contvar <- as.character(substitute(contvar))
catvar <- as.character(substitute(catvar))
fmla <- reformulate(catvar, contvar)
shapiro <- shapiro.test(df[[contvar]])
bart <- bartlett.test(fmla, data = df)
if ((shapiro$p.value > 0.05) && !is.null(bart)){
if(bart$p.value > 0.05) {
OneWay <- aov(fmla, data = df)
summary(OneWay) #normal distibution and equal varaince
} else {
# Welch ANOVA, normal distribution, unequal variance
oneway.test(fmla, data = df)
}
} else {
#if not normal distribution, perform fligner-killen homogeneity of variance test
fligner.test(fmla, data = df)
}
}
data <- iris
names(data)[names(data) == "Sepal.Length"] <- "count"
perform_shapiro(df = data, contvar = count)
#>
#> Shapiro-Wilk normality test
#>
#> data: df[[contvar]]
#> W = 0.97609, p-value = 0.01018
# returns NULL invisiby
perform_bartlett(data, count, Species)
perform_oneway_welch_fligner(data, count, Species)
#>
#> Fligner-Killeen test of homogeneity of variances
#>
#> data: count by Species
#> Fligner-Killeen:med chi-squared = 11.618, df = 2, p-value = 0.003
Created on 2023-01-06 with reprex v2.0.2
CodePudding user response:
The first function could be solved using an anonymous function
perform_shapiro <- function(x) {
shapiro.test(x)
}
perform_shapiro(data$count)
(sorry after playing around a bit I figure out that this works! It did not work in the third function (i.e. replacing the formulas with x))