Home > Software engineering >  How do you use dplyr::across to mutate by group?
How do you use dplyr::across to mutate by group?

Time:10-11

A snippet of my data:

library(tidyverse)
dat <- data.frame(
  stringsAsFactors = FALSE,
  row.names = c("1", "2", "3", "670", "59370", "59375"),
  ID = c(1L, 2L, 3L, 268L, 3L, 3L),
  length_mm = c(14.601, 11.574, 7.001, 7.305, 24.506, 7.219),
  site = c("D-01-03M-WA","D-01-03M-WA",
           "D-01-03M-WA","D-01-05M-WA","G-X7-75M-WA","G-X7-75M-WA"),
  log.afdw1 = c(6.4396563567129,
                3.23142511160909,0.726792175730107,0.824517630590715,
                29.9457850283007,0.796040186720814),
  log.afdw2 = c(6.83116004521472,
                3.16164401319114,0.596970091233782,0.687331750382688,
                38.039003453732,0.660863287198884),
  log.afdw3 = c(5.40642096578834,
                2.81176957087929,0.683309845006421,0.77013084746403,
                23.2139922541224,0.744889092946985),
  log.afdw4 = c(4.43005757719623,
                2.41188371975938,0.647148616791328,0.723292930805774,
                17.1769499887209,0.701220368198762),
  log.afdw5 = c(4.91535767152539,
                2.63537716416517,0.684039365068915,0.766672097761578,
                19.7212543501157,0.742695016919477),
  log.afdw6 = c(5.25316585945686,
                2.7067928227097,0.644703505395234,0.7278556852439,23.0280244990698,
                0.703666129879028),
  log.afdw7 = c(5.34976189684225,
                2.79136680132969,0.683142630889635,0.769484342581568,
                22.8047844780934,0.744387184411018),
  log.afdw8 = c(9.59959041371451,
                4.5717429498668,0.918280128730922,1.05176429111161,50.1563500630291,
                1.01273601111021),
  log.afdw9 = c(5.25940741628465,
                2.83560704415382,0.744944854871346,0.834083719301313,
                20.8410150709269,0.80822807578474),
  log.afdw10 = c(8.25532268455156,
                 4.22907589170667,0.994700666741216,1.12418855519932,
                 36.6599500662328,1.08650554944192),
  log.afdw11 = c(4.80315596034959,
                 2.53780177614021,0.63817772219286,0.71718829207643,19.9101328037613,
                 0.694240624022961),
  log.afdw12 = c(5.3838113973112,
                 2.91619829757294,0.773858573573689,0.865720925131079,
                 21.1141739006647,0.839083279455717),
  log.afdw13 = c(6.43643677924419,
                 3.47262140045958,0.913672357649406,1.02287043204772,
                 25.4655400986121,0.991197830333234),
  log.sodw1 = c(13.6907966326722,
                7.20015333540567,1.79250022924751,2.0161366960534,57.3421515504154,
                1.95116475628179),
  log.sodw2 = c(12.4813126053132,
                6.28383005741153,1.42344526220503,1.61386922664491,57.615566337857,
                1.55839095902752),
  nonlin.sodw1 = c(21.3518748610558,
                   11.6166341853875,3.11223945699587,3.47887302732492,82.9177226084439,
                   3.37258924318284)
)
dat

A quick data overview; each row represents an individual organism with a length given by length_mm which was collected at a sample site given by site which provides info about the type of sample (starts with "D" for SCUBA collected and "G" for a grab type sample). Columns named log.afdw1-non.lin.sodw1 are all estimates of biomass for each organism by different methods. I need to compile (sum) the biomass for each site per square meter for each method (sum biomass in a sample / area of sampler). The area for the two samplers are given by:

ek_area <- 15.24*15.24/10000
frame_area <- 22.1*26/10000

Based on this SO question:

mutate by group in R

I thought I could do the following:

ek_area <- 15.24*15.24/10000
frame_area <- 22.1*26/10000

dat2 <- dat %>% 
  mutate(depth_m=as.numeric(substr(site,6,7))) %>% 
  group_by(site,depth_m) %>% 
  summarise(across(log.afdw1:nonlin.sodw1,sum)) %>% 
  ungroup() %>% 
  mutate(samp_type=substr(site,1,1)) %>% 
  group_by(samp_type,site,depth_m) %>% 
  mutate(across(log.afdw1:nonlin.sodw1,
                .fns = list(~.[which(samp_type=='D')]/frame_area,
                            ~.[which(samp_type=='G')]/ek_area)))

But keep getting an error. I could just do this by base R and bracket notation, but this is something I come across relatively often so I'm hoping someone can help with this dplyr version.

UPDATE: Expected output:

 dat3 <- dat %>% 
   mutate(depth_m=as.numeric(substr(site,6,7))) %>% 
   group_by(site,depth_m) %>% 
   summarise(across(log.afdw1:nonlin.sodw1,sum)) %>% 
   ungroup() %>% 
   mutate(samp_type=substr(site,1,1))
 
 divedat <- dat3[dat3$samp_type=='D',] 
 grabdat <- dat3[dat3$samp_type=='G',] 

 divedat <- divedat %>% 
   mutate(across(log.afdw1:nonlin.sodw1,
                 .fns = ~./frame_area))
 grabdat <- grabdat %>% 
   mutate(across(log.afdw1:nonlin.sodw1,
                 .fns = ~./ek_area))
 dat_out <- rbind(divedat,grabdat)
dat_out 

CodePudding user response:

We could remove the 'samp_type' from the grouping and either extract the values in columns for each samp_type and concatenate (c)

library(dplyr)
dat %>% 
  mutate(depth_m=as.numeric(substr(site,6,7))) %>% 
  group_by(site,depth_m) %>% 
  summarise(across(log.afdw1:nonlin.sodw1,sum), .groups = 'drop')%>% 
  mutate(samp_type=substr(site,1,1)) %>% 
  group_by(site,depth_m) %>%  
  summarise(across(log.afdw1:nonlin.sodw1, ~ 
    c(.x[samp_type == 'D']/frame_area, .x[samp_type == 'G']/ek_area)), 
     .groups = 'drop')

Or use a condition with ifelse/case_when and do the division (or even create a column for area values based on the samp_type and then do a division

dat %>% 
  mutate(depth_m=as.numeric(substr(site,6,7))) %>% 
  group_by(site,depth_m) %>% 
  summarise(across(log.afdw1:nonlin.sodw1,sum), .groups = 'drop')%>% 
  mutate(samp_type=substr(site,1,1)) %>% 
  group_by(site,depth_m) %>%  
  summarise(across(log.afdw1:nonlin.sodw1,
    ~  case_when(samp_type == 'D' ~ .x/frame_area, samp_type == 'G' ~ .x/ek_area)), .groups = 'drop')

-output

# A tibble: 3 × 18
  site     depth_m log.afdw1 log.afdw2 log.afdw3 log.afdw4 log.afdw5 log.afdw6 log.afdw7 log.afdw8 log.afdw9 log.afdw10 log.afdw11 log.afdw12
  <chr>      <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>      <dbl>      <dbl>      <dbl>
1 D-01-03…       3     112.      119.       94.1      77.1      85.5      91.4      93.1     167.       91.5      144.        83.6       93.7
2 D-01-05…       5      14.3      12.0      13.4      12.6      13.3      12.7      13.4      18.3      14.5       19.6       12.5       15.1
3 G-X7-75…      75    1324.     1666.     1032.      770.      881.     1022.     1014.     2203.      932.      1625.       887.       945. 
# … with 4 more variables: log.afdw13 <dbl>, log.sodw1 <dbl>, log.sodw2 <dbl>, nonlin.sodw1 <dbl>
  • Related