Calculate correlation between two variables on two different scales by class-CodePudding

I combined two data set with three variables: class, total1, total2. total1 is the individual average score for each student, total2 is the average of the teacher's assessment for each class. The average score on two different scales.

df
    class   total1   total2
    A       4.9      6.7
    A       3.8      6.7
    A       4.2      6.7
    B       4.5      7.2
    B       3.9      7.2 
    B       4.1      7.2
    C       3.5      6.5
    C       4.4      6.5
    C       3.6      6.5

I want to calculate the correlation between total1 and total2 with r and p-value. I used this code but I was not able to get the average score of total1 by class:

library("ggpubr")
ggscatter(df, x = "total2", y = "total1", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "spearman",
          xlab = "Teacher score", ylab = "Student score")

CodePudding user response：

s <- "class   total1   total2
    A       4.9      6.7
    A       3.8      6.7
    A       4.2      6.7
    B       4.5      7.2
    B       3.9      7.2 
    B       4.1      7.2
    C       3.5      6.5
    C       4.4      6.5
    C       3.6      6.5
"

df <- read.table(text=s, header=TRUE)
dfs <- split(df, df$class)

avg_m <- t(sapply(dfs, function(df) colMeans(df[, -1])))
res <- cor.test(x=avg_m[,"total1"], y=avg_m[, "total2"])
res$estimate ## 0.5
res$p.value  ## [1] 0.6666667

CodePudding user response：

You can use dplyr library.

library(dplyr)

class <- c("A", "A", "A", "B", "B", "B", "C", "C", "C")
total1 <- c(4.9, 3.8, 4.2, 4.5, 3.9, 4.1, 3.5, 4.4, 3.6)
total2 <- c(6.7, 6.7, 6.7, 7.2, 7.2, 7.2, 6.5, 6.5, 6.5)

df <- data.frame(class,total1,total2)
sum_data <- df %>% 
  group_by(class) %>% 
  summarise(total1_mean=mean(total1),total2_mean=mean(total2))

sum_data

# A tibble: 3 x 3
#  class total1_mean total2_mean
#  <chr>       <dbl>       <dbl>
#1 A            4.3          6.7
#2 B            4.17         7.2
#3 C            3.83         6.5



res <- cor.test(sum_data$total1_mean,sum_data$total2_mean) 
res

#Pearson's product-moment correlation

#data:  sum_data$total1_mean and sum_data$total2_mean
#t = 0.57735, df = 1, p-value = 0.6667
#alternative hypothesis: true correlation is not equal to 0
#sample estimates:
#cor 
#0.5

res$p.value #[1] 0.6666667 
res$estimate #cor 0.5