I have a table of water quality data. Currently it looks like this, except the last three columns are populated with data:
But I'd like to change it into this format:
Example Dataframe
structure(list(season = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L), levels = c("Winter",
"Spring", "Summer", "Autumn"), class = "factor"), site = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L,
6L), levels = c("1", "2", "3", "4", "5", "6"), class = "factor"),
Temp = c("7.2(1.56)", "7.05(1.91)", "6.3(1.7)", "6.25(2.33)",
"6.2(2.4)", "5.4(2.4)", "11.77(2.75)", "12.5(4.62)", "11.6(3.68)",
"11.13(3.81)", "11(3.67)", "13.57(4.15)", "13(1.51)", "15.13(1.65)",
"14.4(0.75)", "14.93(1.19)", "14.97(1.29)", "21(3.24)"),
pH = c("7.44(0.29)", "7.38(0.28)", "7.52(0.1)", "7.53(0.12)",
"7.38(0.06)", "7.56(0.26)", "7.21(0.1)", "7.2(0.13)", "7.35(0.08)",
"7.44(0.06)", "7.46(0.02)", "7.72(0.11)", "7.35(0.1)", "7.48(0.12)",
"7.44(0.05)", "7.12(0.14)", "7.15(0.03)", "7.86(0.38)"),
`DO` = c("9(0)", "9.1(0.42)", "8.25(0.07)",
"8.85(0.49)", "9.25(0.64)", "9(0.42)", "8.73(1.32)", "8.13(2.85)",
"7.37(1.16)", "8.3(1.5)", "8.47(1.21)", "9.2(0.79)", "7.43(1.21)",
"5.63(3.33)", "7.07(1.12)", "4.77(2.5)", "5(1.1)", "7.87(1.07)"
), `EC` = c("337.5(55.86)", "333(41.01)",
"321.5(51.62)", "322(32.53)", "309(25.46)", "300.5(30.41)",
"407.67(13.58)", "404(12.29)", "376.33(8.08)", "337.33(8.5)",
"333.67(13.5)", "290.67(9.24)", "474(7.21)", "464.33(8.33)",
"409(4.36)", "389.33(30.27)", "368.67(19.6)", "327.67(18.58)"
)), row.names = c(NA, 18L), class = "data.frame")
CodePudding user response:
Update on OP request: version1:
df %>%
pivot_longer(-c(season,site)) %>%
pivot_wider(names_from = season, values_from = value) %>%
arrange(name, site) %>%
relocate(name, site) %>%
mutate(name = factor(name, levels = colnames(df[-c(1:2)]))) %>%
arrange(name) %>%
group_by(name) %>%
mutate(name = ifelse(row_number()==1, as.character(name), "")) %>%
print(n=30)
name site Winter Spring Summer
<chr> <fct> <chr> <chr> <chr>
1 "Temp" 1 7.2(1.56) 11.77(2.75) 13(1.51)
2 "" 2 7.05(1.91) 12.5(4.62) 15.13(1.65)
3 "" 3 6.3(1.7) 11.6(3.68) 14.4(0.75)
4 "" 4 6.25(2.33) 11.13(3.81) 14.93(1.19)
5 "" 5 6.2(2.4) 11(3.67) 14.97(1.29)
6 "" 6 5.4(2.4) 13.57(4.15) 21(3.24)
7 "pH" 1 7.44(0.29) 7.21(0.1) 7.35(0.1)
8 "" 2 7.38(0.28) 7.2(0.13) 7.48(0.12)
9 "" 3 7.52(0.1) 7.35(0.08) 7.44(0.05)
10 "" 4 7.53(0.12) 7.44(0.06) 7.12(0.14)
11 "" 5 7.38(0.06) 7.46(0.02) 7.15(0.03)
12 "" 6 7.56(0.26) 7.72(0.11) 7.86(0.38)
13 "DO" 1 9(0) 8.73(1.32) 7.43(1.21)
14 "" 2 9.1(0.42) 8.13(2.85) 5.63(3.33)
15 "" 3 8.25(0.07) 7.37(1.16) 7.07(1.12)
16 "" 4 8.85(0.49) 8.3(1.5) 4.77(2.5)
17 "" 5 9.25(0.64) 8.47(1.21) 5(1.1)
18 "" 6 9(0.42) 9.2(0.79) 7.87(1.07)
19 "EC" 1 337.5(55.86) 407.67(13.58) 474(7.21)
20 "" 2 333(41.01) 404(12.29) 464.33(8.33)
21 "" 3 321.5(51.62) 376.33(8.08) 409(4.36)
22 "" 4 322(32.53) 337.33(8.5) 389.33(30.27)
23 "" 5 309(25.46) 333.67(13.5) 368.67(19.6)
24 "" 6 300.5(30.41) 290.67(9.24) 327.67(18.58)
version2:
library(dplyr)
library(tidyr)
df %>%
pivot_longer(-c(season,site)) %>%
pivot_wider(names_from = season, values_from = value) %>%
arrange(name, site) %>%
relocate(name, site) %>%
mutate(name = factor(name, levels = colnames(df[-c(1:2)]))) %>%
arrange(name) %>%
print(n=30)
name site Winter Spring Summer
<fct> <fct> <chr> <chr> <chr>
1 Temp 1 7.2(1.56) 11.77(2.75) 13(1.51)
2 Temp 2 7.05(1.91) 12.5(4.62) 15.13(1.65)
3 Temp 3 6.3(1.7) 11.6(3.68) 14.4(0.75)
4 Temp 4 6.25(2.33) 11.13(3.81) 14.93(1.19)
5 Temp 5 6.2(2.4) 11(3.67) 14.97(1.29)
6 Temp 6 5.4(2.4) 13.57(4.15) 21(3.24)
7 pH 1 7.44(0.29) 7.21(0.1) 7.35(0.1)
8 pH 2 7.38(0.28) 7.2(0.13) 7.48(0.12)
9 pH 3 7.52(0.1) 7.35(0.08) 7.44(0.05)
10 pH 4 7.53(0.12) 7.44(0.06) 7.12(0.14)
11 pH 5 7.38(0.06) 7.46(0.02) 7.15(0.03)
12 pH 6 7.56(0.26) 7.72(0.11) 7.86(0.38)
13 DO 1 9(0) 8.73(1.32) 7.43(1.21)
14 DO 2 9.1(0.42) 8.13(2.85) 5.63(3.33)
15 DO 3 8.25(0.07) 7.37(1.16) 7.07(1.12)
16 DO 4 8.85(0.49) 8.3(1.5) 4.77(2.5)
17 DO 5 9.25(0.64) 8.47(1.21) 5(1.1)
18 DO 6 9(0.42) 9.2(0.79) 7.87(1.07)
19 EC 1 337.5(55.86) 407.67(13.58) 474(7.21)
20 EC 2 333(41.01) 404(12.29) 464.33(8.33)
21 EC 3 321.5(51.62) 376.33(8.08) 409(4.36)
22 EC 4 322(32.53) 337.33(8.5) 389.33(30.27)
23 EC 5 309(25.46) 333.67(13.5) 368.67(19.6)
24 EC 6 300.5(30.41) 290.67(9.24) 327.67(18.58)
First answer: We could do it this way:
library(dplyr)
library(tidyr)
df %>%
pivot_longer(-c(season,site)) %>%
pivot_wider(names_from = season, values_from = value) %>%
arrange(name, site) %>%
relocate(name, site)
# A tibble: 24 x 5
name site Winter Spring Summer
<chr> <fct> <chr> <chr> <chr>
1 DO 1 9(0) 8.73(1.32) 7.43(1.21)
2 DO 2 9.1(0.42) 8.13(2.85) 5.63(3.33)
3 DO 3 8.25(0.07) 7.37(1.16) 7.07(1.12)
4 DO 4 8.85(0.49) 8.3(1.5) 4.77(2.5)
5 DO 5 9.25(0.64) 8.47(1.21) 5(1.1)
6 DO 6 9(0.42) 9.2(0.79) 7.87(1.07)
7 EC 1 337.5(55.86) 407.67(13.58) 474(7.21)
8 EC 2 333(41.01) 404(12.29) 464.33(8.33)
9 EC 3 321.5(51.62) 376.33(8.08) 409(4.36)
10 EC 4 322(32.53) 337.33(8.5) 389.33(30.27)
# ... with 14 more rows
# i Use `print(n = ...)` to see more rows
CodePudding user response:
Is this what you're looking for? Using tidyr
df %>% pivot_longer(cols = c(Temperature:`Dissolved Organic Carbon`)) %>%
pivot_wider(names_from = season, values_from = value)
Output
site name Winter
<fct> <chr> <chr>
1 1 Temperature 7.2(1.56)
2 1 pH 7.44(0.29)
3 1 Dissolved Oxygen 9(0)
4 1 Electrical Conductivity 337.5(55.86)
5 1 Suspended Sediment 5.94(0.73)
6 1 Total Phosphorus 73.5(3.54)
7 1 Soluble Reactive Phosphorus 19(2.83)
8 1 Particulate Phosphorus 54.5(6.36)
9 1 Dissolved Ammonia 0.08(0)
10 1 Dissolved Nitrite 0.05(0.03)