Home > Software engineering >  R: Compute multiple correlations by group (and save output to csv file)
R: Compute multiple correlations by group (and save output to csv file)

Time:09-17

Is there a way to make a file with the correlation statistic between the raw number of fish observed ("num") and each environmental data column ("temp", "do", etc.) by species ("group")?

*As well as correlations between the means and medians of num vs. env. factors?

I'd also like to be able to choose which correlation method to use (Pearson correlation, Kendall rank correlation, Spearman correlation, etc.)

My data:

zeros <- structure(list(year = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L), .Label = c("2019", "2020"), class = "factor"), season = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("dry", "wet"), class = "factor"), 
    site = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 
    1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 1L, 1L, 2L, 2L, 3L, 
    3L, 4L, 4L, 5L, 5L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L
    ), .Label = c("1", "2", "3", "4", "5"), class = "factor"), 
    group = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 
    1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 
    2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L
    ), .Label = c("Hardhead silverside", "Sailfin molly"), class = "factor"), 
    num = c(0, 8, 0, 9, 0, 13, 0, 9, 0, 10, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 7, 0, 2, 
    0, 3, 0, 13, 0), temp = c(23L, 36L, 35L, 34L, 30L, 28L, 18L, 
    19L, 33L, 33L, 25L, 20L, 33L, 23L, 36L, 32L, 28L, 17L, 34L, 
    31L, 26L, 34L, 26L, 35L, 15L, 25L, 26L, 20L, 18L, 14L, 23L, 
    17L, 26L, 17L, 17L, 19L, 29L, 31L, 18L, 15L), sal = c(12.5, 
    25.5, 8.5, 15.5, 17.5, 27.5, 9.5, 31.5, 1.5, 34.5, 25.5, 
    21.5, 10.5, 8.5, 32.5, 19.5, 6.5, 5.5, 15.5, 28.5, 6.5, 3.5, 
    29.5, 13.5, 7.5, 16.5, 3.5, 28.5, 22.5, 5.5, 9.5, 12.5, 29.5, 
    24.5, 8.5, 32.5, 37.5, 3.5, 12.5, 19.5), do = c(9.66, 7.66, 
    1.66, 14.66, 15.66, 1.66, 14.66, 15.66, 0.66, 5.66, 10.66, 
    11.66, 4.66, 0.66, 13.66, 1.66, 13.66, 6.66, 6.66, 10.66, 
    9.66, 15.66, 9.66, 15.66, 4.66, 13.66, 1.66, 11.66, 6.66, 
    8.66, 12.66, 0.66, 6.66, 0.66, 9.66, 16.66, 1.66, 10.66, 
    15.66, 10.66), depth = c(120L, 161L, 52L, 52L, 43L, 105L, 
    165L, 23L, 79L, 136L, 41L, 59L, 65L, 118L, 122L, 69L, 137L, 
    88L, 152L, 105L, 108L, 79L, 96L, 80L, 22L, 110L, 157L, 118L, 
    126L, 93L, 156L, 64L, 74L, 24L, 111L, 113L, 157L, 78L, 121L, 
    130L)), class = "data.frame", row.names = c(NA, -40L))

CodePudding user response:

The first part of your question is straightforward:

zeros.spl <- split(zeros, zeros$group)
zeros.cors <- sapply(zeros.spl, function(x) cor(x[, "num"], x[, 6:9]))
dimnames(zeros.cors)[[1]] <- colnames(zeros)[6:9]
zeros.cors
#       Hardhead silverside Sailfin molly
# temp           -0.3080334    0.36174046
# sal             0.1393580    0.47095129
# do              0.2544695   -0.06646818
# depth           0.1296208    0.08777425
t(zeros.cors)
#                           temp       sal          do      depth
# Hardhead silverside -0.3080334 0.1393580  0.25446948 0.12962078
# Sailfin molly        0.3617405 0.4709513 -0.06646818 0.08777425

Use write.csv(zeros.cors, file="results.csv") or write.csv(t(zeros.cors), file="results.csv") depending on what you want the rows/cols to be.

The second question is not clear. The means/medians of a group will be a single value so you cannot correlate it with the environmental variables. You could compute the means by group with aggregate:

aggregate(zeros[, 5:9], by=list(zeros$group), "mean")
#               Group.1  num  temp   sal   do  depth
# 1 Hardhead silverside 1.45 25.95 15.35 8.51 105.20
# 2       Sailfin molly 2.45 25.00 18.90 9.06  90.25
aggregate(zeros[, 5:9], by=list(zeros$group), "median")
#               Group.1 num temp  sal    do depth
# 1 Hardhead silverside   0   26 11.5  9.66 115.5
# 2       Sailfin molly   0   24 19.5 10.66  90.5
  • Related