Here are my data.
> df
VAR1 VAR2 VAR3 VAR4 VAR5 VAR6 VAR7 VAR8 Group
ACSK 16 4 0 25 0 0 7 16 Group3
CKPD 9 19 0 17 0 11 6 13 Group3
AEXF 10 0 0 36 0 0 0 0 Group1
AGNS 34 0 77 30 0 0 0 0 Group3
AGRL 19 0 0 52 0 0 0 0 Group2
AJEH 5 0 40 8 0 0 0 13 Group3
AOKS 34 0 47 43 0 0 0 0 Group2
AYKR 14 0 186 18 0 0 0 0 Group2
MIEY 20 226 0 102 0 0 201 0 Group1
BLHR 13 0 0 50 0 0 0 0 Group1
I want to sum the number by row grouped by the Group
variable.
VAR1 VAR2 VAR3 VAR4 VAR5 VAR6 VAR7 VAR8 Group
1 43 226 0 ... 201 0 Group1
2 67 0 117 ... 0 0 Group2
3 64 23 233 ... 13 42 Group3
I tried the code below but it did not work.
df %>%
group_by(Group) %>%
apply(., 1, sum) %>%
rbind(.)
data
structure(list(VAR1 = c(16L, 9L, 10L, 34L, 19L, 5L, 34L, 14L,
20L, 13L), VAR2 = c(4L, 19L, 0L, 0L, 0L, 0L, 0L, 0L, 226L, 0L
), VAR3 = c(0, 0, 0, 77, 0, 40, 47, 186, 0, 0), VAR4 = c(25L,
17L, 36L, 30L, 52L, 8L, 43L, 18L, 102L, 50L), VAR5 = c(0, 0,
0, 0, 0, 0, 0, 0, 0, 0), VAR6 = c(0, 11, 0, 0, 0, 0, 0, 0, 0,
0), VAR7 = c(7L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 201L, 0L), VAR8 = c(16L,
13L, 0L, 0L, 0L, 13L, 0L, 0L, 0L, 0L), Group = c("Group3", "Group3",
"Group1", "Group3", "Group2", "Group3", "Group2", "Group2", "Group1",
"Group1")), row.names = c("ACSK", "CKPD", "AEXF", "AGNS", "AGRL",
"AJEH", "AOKS", "AYKR", "MIEY", "BLHR"), class = "data.frame")
CodePudding user response:
I am not sure what is your expected output but as a general advice I would suggest not to mix base R and tidyverse
approaches.
If you want to calculate rowwise sum then you don't need to group_by
at all.
library(dplyr)
df %>%
mutate(result = rowSums(select(., starts_with('VAR'))))
# VAR1 VAR2 VAR3 VAR4 VAR5 VAR6 VAR7 VAR8 Group result
#ACSK 16 4 0 25 0 0 7 16 Group3 68
#CKPD 9 19 0 17 0 11 6 13 Group3 75
#AEXF 10 0 0 36 0 0 0 0 Group1 46
#AGNS 34 0 77 30 0 0 0 0 Group3 141
#AGRL 19 0 0 52 0 0 0 0 Group2 71
#AJEH 5 0 40 8 0 0 0 13 Group3 66
#AOKS 34 0 47 43 0 0 0 0 Group2 124
#AYKR 14 0 186 18 0 0 0 0 Group2 218
#MIEY 20 226 0 102 0 0 201 0 Group1 549
#BLHR 13 0 0 50 0 0 0 0 Group1 63
Further, if you want to sum
the result
by Group
you may further do -
df %>%
mutate(result = rowSums(select(., starts_with('VAR')))) %>%
group_by(Group) %>%
summarise(result = sum(result))
# Group result
# <chr> <dbl>
#1 Group1 658
#2 Group2 413
#3 Group3 350
For the updated question you may do -
df %>%
group_by(Group) %>%
summarise(across(starts_with('VAR'), sum))
# Group VAR1 VAR2 VAR3 VAR4 VAR5 VAR6 VAR7 VAR8
#1 Group1 43 226 0 188 0 0 201 0
#2 Group2 67 0 233 113 0 0 0 0
#3 Group3 64 23 117 80 0 11 13 42
Or in base R -
aggregate(.~Group, df, sum)
CodePudding user response:
To sum within groups and across rows:
# Load packages
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
# Generate data
df <- data.frame(VAR1 = sample(x = 0:20, size = 12),
VAR2 = sample(x = 0:20, size = 12),
VAR3 = sample(x = 0:20, size = 12),
Group = rep(c('Group 1', 'Group 2', 'Group 3'), 4))
# Sum
df %>%
# Sum each column by group
group_by(Group) %>%
summarise(across(.cols = where(is.numeric), ~sum(.x))) %>%
# Sum each row
rowwise() %>%
mutate(Total = sum(c_across(cols = where(is.numeric)))) %>%
ungroup()
#> # A tibble: 3 × 5
#> Group VAR1 VAR2 VAR3 Total
#> <chr> <int> <int> <int> <int>
#> 1 Group 1 26 39 40 105
#> 2 Group 2 51 41 43 135
#> 3 Group 3 39 30 42 111
Created on 2021-12-02 by the reprex package (v1.0.0)