Home > front end >  Combine rows into one row and merge information
Combine rows into one row and merge information

Time:08-20

I have these data

df <- structure(list(Site = c("2B", "2B", "2B", "2B", "2B", "2C", "2C", 
"2C", "2C", "2C", "FS", "FS", "FS", "FS", "HE", "HE", "HE"), 
    Year = c(2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 
    2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014, 2014), Maxcount_site = c(46L, 
    46L, 46L, 46L, 46L, 25L, 25L, 25L, 25L, 25L, 19L, 19L, 19L, 
    19L, 10L, 10L, 10L), Status = c("New Capture", "New Capture", 
    "Retrap", "Retrap", "Retrap", "New Capture", "New Capture", 
    "Retrap", "Retrap", "Retrap", "New Capture", "New Capture", 
    "Retrap", "Retrap", "New Capture", "New Capture", "Retrap"
    ), Name = c("bluti", "greti", "bluti", "greti", "marti", 
    "bluti", "greti", "bluti", "greti", "marti", "bluti", "greti", 
    "bluti", "greti", "bluti", "greti", "bluti"), maxcount = c(17L, 
    3L, 14L, 11L, 1L, 2L, 2L, 13L, 5L, 3L, 7L, 1L, 9L, 2L, 5L, 
    1L, 4L), blutinew = c(17L, NA, NA, NA, NA, 2L, NA, NA, NA, 
    NA, 7L, NA, NA, NA, 5L, NA, NA), blutiretrap = c(NA, NA, 
    14L, NA, NA, NA, NA, 13L, NA, NA, NA, NA, 9L, NA, NA, NA, 
    4L), gretinew = c(NA, 3L, NA, NA, NA, NA, 2L, NA, NA, NA, 
    NA, 1L, NA, NA, NA, 1L, NA), gretiretrap = c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_), martinew = c(NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_), martiretrap = c(NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_, 
    NA_integer_)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -17L), groups = structure(list(Site = c("2B", 
"2C", "FS", "HE"), .rows = structure(list(1:5, 6:10, 11:14, 15:17), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -4L), .drop = TRUE))

head(df)
Site   Year Maxcount_site Status      Name  maxcount blutinew blutiretrap gretinew gretiretrap martinew martiretrap
  <chr> <dbl>         <int> <chr>       <chr>    <int>    <int>       <int>    <int>       <int>    <int>       <int>
1 2B     2014            46 New Capture bluti       17       17          NA       NA          NA       NA          NA
2 2B     2014            46 New Capture greti        3       NA          NA        3          NA       NA          NA
3 2B     2014            46 Retrap      bluti       14       NA          14       NA          NA       NA          NA
4 2B     2014            46 Retrap      greti       11       NA          NA       NA          NA       NA          NA
5 2B     2014            46 Retrap      marti        1       NA          NA       NA          NA       NA          NA
6 2C     2014            25 New Capture bluti        2        2          NA       NA          NA       NA          NA
> 

Let's say they describe the observation of individuals at a Site in a Year, whether they are new observations or re-observed Status, the max number of all individuals Maxcount_site, then those observations broken down into categories e.g blutinew is all the observations for bluti that are New Capture and so on.

I would like to combine all rows with the same Site, Year into one row so that it looks like this, while removing the cols Name and status and maxcount

Site Year Maxcount_site blutinew blutiretrap gretinew gretiretrap martinew martiretrap
1   2B 2014            46       17          14        3          11        0           1
2   2C 2014            25        2          13        2           5        0           3
3   FS 2014            19        7           9        1           2        0           0
4   HE 2014            10        5           4        1           0        0           0

CodePudding user response:

This is the job for a simple group_by and summarize

library(tidyverse)
df |> 
  group_by(Site, Year) |> 
  summarize(across(-c(Maxcount_site, Status, Name), sum, na.rm = TRUE)) |> 
  ungroup()

# A tibble: 4 × 9
  Site   Year maxcount blutinew blutiretrap gretinew gretiretrap martinew martiretrap
  <chr> <dbl>    <int>    <int>       <int>    <int>       <int>    <int>       <int>
1 2B     2014       46       17          14        3           0        0           0
2 2C     2014       25        2          13        2           0        0           0
3 FS     2014       19        7           9        1           0        0           0
4 HE     2014       10        5           4        1           0        0           0

CodePudding user response:

Using aggregate from base R

aggregate(.~ Site   Year, subset(df, 
   select= -c(Maxcount_site, Status, Name)), sum, na.rm = TRUE, na.action = NULL)
  Site Year maxcount blutinew blutiretrap gretinew gretiretrap martinew martiretrap
1   2B 2014       46       17          14        3           0        0           0
2   2C 2014       25        2          13        2           0        0           0
3   FS 2014       19        7           9        1           0        0           0
4   HE 2014       10        5           4        1           0        0           0
  • Related