The below df shows individual observations of fish along transects. Fish family was recorded along with its length and its biomass. I would like to first create a new column which shows the number of observations over biomass 2.5 grams for the given family in each particular row at the given transect (I'm aware there are mulptiple observations for families at each transect in some cases which should give repeated values in many rows).
This is what the first 8 rows looks like:
ID Family Length.mm. Biomass.g.
1 2019_Tela_AD_1_10 Pomacentridae 27.29 0.438
2 2019_Tela_AD_1_10 Haemulidae 51.84 2.713
3 2019_Tela_AD_1_10 Pomacentridae 54.38 3.127
4 2019_Tela_AD_1_10 Pomacentridae 54.65 2.417
5 2019_Tela_AD_1_10 Pomacentridae 55.46 2.532
6 2019_Tela_AD_1_10 Pomacentridae 55.46 2.532
7 2019_Tela_AD_1_10 Pomacentridae 55.46 2.532
8 2019_Tela_AD_1_10 Pomacentridae 55.46 2.532
I've previously used
`ave(Fish$ID, Fish[,c("ID","Family")], FUN=length)`
to obtain counts by Family but am not sure how to incorporate the biomass condition.
'ID' is an indicator of transect (only the first transect is shown above - there are lots more!).
I'll then turn that into a wider table (without biomass or length columns) with each column being a family and the table just containing ID and counts for each family column.
If interested, here is the dput of the first 100 rows:
structure(list(ID = c("2019_Tela_AD_1_10", "2019_Tela_AD_1_10",
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10",
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10",
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10",
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10",
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10",
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10",
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10",
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10",
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10",
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10",
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10",
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10",
"2019_Tela_AD_1_10", "2019_Tela_AD_1_10", "2019_Tela_AD_1_10",
"2019_Tela_AD_2_10", "2019_Tela_AD_2_10", "2019_Tela_AD_2_10",
"2019_Tela_AD_2_10", "2019_Tela_AD_2_10", "2019_Tela_AD_2_10",
"2019_Tela_AD_2_10", "2019_Tela_AD_2_10", "2019_Tela_AD_2_10",
"2019_Tela_AD_2_10", "2019_Tela_AD_2_10", "2019_Tela_AD_2_10",
"2019_Tela_AD_2_10", "2019_Tela_AD_2_10", "2019_Tela_AD_2_10",
"2019_Tela_AD_2_10", "2019_Tela_AD_2_10", "2019_Tela_AD_2_10",
"2019_Tela_AD_2_10", "2019_Tela_AD_2_10", "2019_Tela_AD_2_10",
"2019_Tela_AD_2_10", "2019_Tela_AD_2_10", "2019_Tela_AD_2_10",
"2019_Tela_AD_2_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10",
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10",
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10",
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10",
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10",
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10",
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10",
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10",
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10",
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10",
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10", "2019_Tela_AD_3_10",
"2019_Tela_AD_3_10", "2019_Tela_AD_3_10"), Family = c("Pomacentridae",
"Haemulidae", "Pomacentridae", "Pomacentridae", "Pomacentridae",
"Pomacentridae", "Pomacentridae", "Pomacentridae", "Pomacentridae",
"Pomacentridae", "Pomacentridae", "Pomacentridae", "Pomacentridae",
"Pomacentridae", "Pomacentridae", "Pomacentridae", "Pomacentridae",
"Serranidae", "Serranidae", "Pomacentridae", "Pomacentridae",
"Pomacentridae", "Pomacentridae", "Pomacentridae", "Pomacentridae",
"Pomacentridae", "Haemulidae", "Pomacentridae", "Pomacentridae",
"Acanthuridae", "Haemulidae", "Haemulidae", "Haemulidae", "Scaridae",
"Carangidae", "Scaridae", "Labridae", "Scaridae", "Scaridae",
"Haemulidae", "Scaridae", "Scaridae", "Scaridae", "Scaridae",
"Scaridae", "Pomacentridae", "Pomacentridae", "Pomacentridae",
"Pomacentridae", "Scaridae", "Pomacentridae", "Pomacentridae",
"Pomacentridae", "Pomacentridae", "Pomacentridae", "Pomacentridae",
"Pomacentridae", "Scaridae", "Pomacentridae", "Pomacentridae",
"Haemulidae", "Pomacentridae", "Pomacentridae", "Haemulidae",
"Lutjanidae", "Scaridae", "Pomacentridae", "Pomacentridae", "Pomacanthidae",
"Pomacentridae", "Pomacentridae", "Pomacentridae", "Pomacentridae",
"Pomacentridae", "Labridae", "Pomacentridae", "Pomacentridae",
"Pomacentridae", "Pomacentridae", "Pomacentridae", "Pomacentridae",
"Pomacentridae", "Pomacentridae", "Pomacentridae", "Pomacentridae",
"Pomacentridae", "Pomacentridae", "Pomacentridae", "Pomacentridae",
"Pomacentridae", "Labridae", "Chaetodontidae", "Haemulidae",
"Haemulidae", "Pomacentridae", "Labridae", "Pomacentridae", "Pomacentridae",
"Pomacentridae", "Pomacentridae"), Length.mm. = c(27.29, 51.84,
54.38, 54.65, 55.46, 55.46, 55.46, 55.46, 59.6, 72.06, 74.9,
74.9, 74.9, 75.2, 75.51, 76.51, 79.31, 80, 80, 84.66, 86.02,
86.4, 100, 108.62, 110.77, 127.25, 127.61, 134.12, 145.45, 147.45,
166.56, 191.05, 197.06, 211.48, 220.37, 266.71, 350, 350, 364.09,
406, 520, 22.06, 37.22, 37.75, 46.52, 47.58, 52.24, 52.32, 53.86,
57.5, 62.28, 63.14, 65.06, 70.2, 70.55, 73.86, 75.35, 79.82,
81.63, 109.64, 120.57, 124, 144.68, 165.7, 223.12, 290.51, 16.51,
30.75, 32.32, 37.66, 40.63, 43.08, 44.37, 45.34, 45.71, 47.42,
48.77, 49.97, 50.45, 52.23, 54.05, 55.46, 57.74, 61.47, 69.08,
74.49, 74.9, 74.9, 77.17, 77.56, 79.34, 80.62, 86.08, 92.24,
94.43, 97.6, 98.17, 100, 100, 100), Biomass.g. = c(0.438, 2.713,
3.127, 2.417, 2.532, 2.532, 2.532, 2.532, 4.164, 5.778, 8.517,
8.517, 8.517, 8.625, 6.696, 9.103, 10.185, 8.481, 8.481, 12.496,
10.099, 13.319, 16.232, 23.651, 39.36, 38.525, 35.431, 45.301,
92.096, 64.373, 55.903, 168.912, 186.322, 141.371, 99.718, 338.696,
542.347, 814.991, 694.545, 1838.5, 1973.521, 0.177, 0.872, 0.911,
1.481, 1.562, 2.757, 2.77, 3.033, 3.29, 4.259, 4.99, 5.48, 6.159,
6.256, 8.152, 8.676, 8.95, 11.146, 28.07, 29.292, 41.257, 66.861,
107.605, 173.794, 441.892, 0.056, 0.525, 1.223, 0.747, 1.255,
1.508, 1.253, 1.342, 0.755, 1.546, 2.223, 2.161, 2.472, 2.755,
3.068, 2.532, 3.771, 4.588, 5.058, 8.372, 8.517, 8.517, 9.349,
7.285, 4.491, 18.362, 12.047, 14.559, 17.589, 8.216, 19.859,
16.232, 16.232, 16.232)), row.names = c(NA, 100L), class = "data.frame")
Thanks in advance!
CodePudding user response:
You can do this with dplyr
:
library(dplyr)
Fish %>%
group_by(Family) %>%
mutate(
heavy = ifelse(`Biomass.g.` > 2.5, TRUE, FALSE),
n_heavy = sum(heavy)
)
# A tibble: 100 x 6
# # Groups: Family [10]
# ID Family Length.mm. Biomass.g. heavy n_heavy
# <chr> <chr> <dbl> <dbl> <lgl> <int>
# 1 2019_Tela_AD_1_10 Pomacentridae 27.3 0.438 FALSE 53
# 2 2019_Tela_AD_1_10 Haemulidae 51.8 2.71 TRUE 10
# 3 2019_Tela_AD_1_10 Pomacentridae 54.4 3.13 TRUE 53
# 4 2019_Tela_AD_1_10 Pomacentridae 54.6 2.42 FALSE 53
# 5 2019_Tela_AD_1_10 Pomacentridae 55.5 2.53 TRUE 53
# 6 2019_Tela_AD_1_10 Pomacentridae 55.5 2.53 TRUE 53
# 7 2019_Tela_AD_1_10 Pomacentridae 55.5 2.53 TRUE 53
# 8 2019_Tela_AD_1_10 Pomacentridae 55.5 2.53 TRUE 53
# 9 2019_Tela_AD_1_10 Pomacentridae 59.6 4.16 TRUE 53
# 10 2019_Tela_AD_1_10 Pomacentridae 72.1 5.78 TRUE 53
CodePudding user response:
You can try this, using dplyr
and tidyr
df %>%
group_by(ID, Family) %>%
summarize(ct = sum(Biomass.g.>2.5)) %>%
pivot_wider(id_cols = ID, names_from = Family, values_from = ct)
Output:
ID Acanthuridae Carangidae Haemulidae Labridae Pomacentridae Scaridae Serranidae Lutjanidae Chaetodontidae Pomacanthidae
<chr> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 2019_Tela_AD_1_10 1 1 6 1 23 5 2 NA NA NA
2 2019_Tela_AD_2_10 NA NA 2 NA 14 3 NA 1 NA NA
3 2019_Tela_AD_3_10 NA NA 2 2 16 NA NA NA 1 0
If you wanted to collapse over transect ID (I wasn't sure from your post, above), you could do this:
df %>%
group_by(Family) %>%
summarize(ct = sum(Biomass.g.>2.5)) %>%
pivot_wider(names_from = Family, values_from = ct)
Output:
Acanthuridae Carangidae Chaetodontidae Haemulidae Labridae Lutjanidae Pomacanthidae Pomacentridae Scaridae Serranidae
<int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1 1 1 1 10 3 1 0 53 8 2