A snippet of my data:
library(tidyverse)
dat <- data.frame(
stringsAsFactors = FALSE,
row.names = c("1", "2", "3", "670", "59370", "59375"),
ID = c(1L, 2L, 3L, 268L, 3L, 3L),
length_mm = c(14.601, 11.574, 7.001, 7.305, 24.506, 7.219),
site = c("D-01-03M-WA","D-01-03M-WA",
"D-01-03M-WA","D-01-05M-WA","G-X7-75M-WA","G-X7-75M-WA"),
log.afdw1 = c(6.4396563567129,
3.23142511160909,0.726792175730107,0.824517630590715,
29.9457850283007,0.796040186720814),
log.afdw2 = c(6.83116004521472,
3.16164401319114,0.596970091233782,0.687331750382688,
38.039003453732,0.660863287198884),
log.afdw3 = c(5.40642096578834,
2.81176957087929,0.683309845006421,0.77013084746403,
23.2139922541224,0.744889092946985),
log.afdw4 = c(4.43005757719623,
2.41188371975938,0.647148616791328,0.723292930805774,
17.1769499887209,0.701220368198762),
log.afdw5 = c(4.91535767152539,
2.63537716416517,0.684039365068915,0.766672097761578,
19.7212543501157,0.742695016919477),
log.afdw6 = c(5.25316585945686,
2.7067928227097,0.644703505395234,0.7278556852439,23.0280244990698,
0.703666129879028),
log.afdw7 = c(5.34976189684225,
2.79136680132969,0.683142630889635,0.769484342581568,
22.8047844780934,0.744387184411018),
log.afdw8 = c(9.59959041371451,
4.5717429498668,0.918280128730922,1.05176429111161,50.1563500630291,
1.01273601111021),
log.afdw9 = c(5.25940741628465,
2.83560704415382,0.744944854871346,0.834083719301313,
20.8410150709269,0.80822807578474),
log.afdw10 = c(8.25532268455156,
4.22907589170667,0.994700666741216,1.12418855519932,
36.6599500662328,1.08650554944192),
log.afdw11 = c(4.80315596034959,
2.53780177614021,0.63817772219286,0.71718829207643,19.9101328037613,
0.694240624022961),
log.afdw12 = c(5.3838113973112,
2.91619829757294,0.773858573573689,0.865720925131079,
21.1141739006647,0.839083279455717),
log.afdw13 = c(6.43643677924419,
3.47262140045958,0.913672357649406,1.02287043204772,
25.4655400986121,0.991197830333234),
log.sodw1 = c(13.6907966326722,
7.20015333540567,1.79250022924751,2.0161366960534,57.3421515504154,
1.95116475628179),
log.sodw2 = c(12.4813126053132,
6.28383005741153,1.42344526220503,1.61386922664491,57.615566337857,
1.55839095902752),
nonlin.sodw1 = c(21.3518748610558,
11.6166341853875,3.11223945699587,3.47887302732492,82.9177226084439,
3.37258924318284)
)
dat
A quick data overview; each row represents an individual organism with a length given by length_mm
which was collected at a sample site given by site
which provides info about the type of sample (starts with "D" for SCUBA collected and "G" for a grab type sample). Columns named log.afdw1-non.lin.sodw1
are all estimates of biomass for each organism by different methods. I need to compile (sum) the biomass for each site per square meter for each method (sum biomass in a sample / area of sampler). The area for the two samplers are given by:
ek_area <- 15.24*15.24/10000
frame_area <- 22.1*26/10000
Based on this SO question:
I thought I could do the following:
ek_area <- 15.24*15.24/10000
frame_area <- 22.1*26/10000
dat2 <- dat %>%
mutate(depth_m=as.numeric(substr(site,6,7))) %>%
group_by(site,depth_m) %>%
summarise(across(log.afdw1:nonlin.sodw1,sum)) %>%
ungroup() %>%
mutate(samp_type=substr(site,1,1)) %>%
group_by(samp_type,site,depth_m) %>%
mutate(across(log.afdw1:nonlin.sodw1,
.fns = list(~.[which(samp_type=='D')]/frame_area,
~.[which(samp_type=='G')]/ek_area)))
But keep getting an error. I could just do this by base R and bracket notation, but this is something I come across relatively often so I'm hoping someone can help with this dplyr version.
UPDATE: Expected output:
dat3 <- dat %>%
mutate(depth_m=as.numeric(substr(site,6,7))) %>%
group_by(site,depth_m) %>%
summarise(across(log.afdw1:nonlin.sodw1,sum)) %>%
ungroup() %>%
mutate(samp_type=substr(site,1,1))
divedat <- dat3[dat3$samp_type=='D',]
grabdat <- dat3[dat3$samp_type=='G',]
divedat <- divedat %>%
mutate(across(log.afdw1:nonlin.sodw1,
.fns = ~./frame_area))
grabdat <- grabdat %>%
mutate(across(log.afdw1:nonlin.sodw1,
.fns = ~./ek_area))
dat_out <- rbind(divedat,grabdat)
dat_out
CodePudding user response:
We could remove the 'samp_type' from the grouping and either extract the values in columns for each samp_type
and concatenate (c
)
library(dplyr)
dat %>%
mutate(depth_m=as.numeric(substr(site,6,7))) %>%
group_by(site,depth_m) %>%
summarise(across(log.afdw1:nonlin.sodw1,sum), .groups = 'drop')%>%
mutate(samp_type=substr(site,1,1)) %>%
group_by(site,depth_m) %>%
summarise(across(log.afdw1:nonlin.sodw1, ~
c(.x[samp_type == 'D']/frame_area, .x[samp_type == 'G']/ek_area)),
.groups = 'drop')
Or use a condition with ifelse/case_when
and do the division (or even create a column for area values based on the samp_type and then do a division
dat %>%
mutate(depth_m=as.numeric(substr(site,6,7))) %>%
group_by(site,depth_m) %>%
summarise(across(log.afdw1:nonlin.sodw1,sum), .groups = 'drop')%>%
mutate(samp_type=substr(site,1,1)) %>%
group_by(site,depth_m) %>%
summarise(across(log.afdw1:nonlin.sodw1,
~ case_when(samp_type == 'D' ~ .x/frame_area, samp_type == 'G' ~ .x/ek_area)), .groups = 'drop')
-output
# A tibble: 3 × 18
site depth_m log.afdw1 log.afdw2 log.afdw3 log.afdw4 log.afdw5 log.afdw6 log.afdw7 log.afdw8 log.afdw9 log.afdw10 log.afdw11 log.afdw12
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 D-01-03… 3 112. 119. 94.1 77.1 85.5 91.4 93.1 167. 91.5 144. 83.6 93.7
2 D-01-05… 5 14.3 12.0 13.4 12.6 13.3 12.7 13.4 18.3 14.5 19.6 12.5 15.1
3 G-X7-75… 75 1324. 1666. 1032. 770. 881. 1022. 1014. 2203. 932. 1625. 887. 945.
# … with 4 more variables: log.afdw13 <dbl>, log.sodw1 <dbl>, log.sodw2 <dbl>, nonlin.sodw1 <dbl>