Home > Mobile >  summing up values in R dataframe without aggregation
summing up values in R dataframe without aggregation

Time:02-09

How can I add up row values in a dataframe based on conditions without having to aggregate the whole table?

I have this df:

  town party votes
1    a     A     1
2    a     B     2
3    a     C     3
4    b     A     4
5    b     B     5
6    b     C     6
7    c     A     7
8    c     B     8
9    c     C     9

I would like to add the votes of one party to those of another by town, without touching the values of the third one.

Basically to run df$votes[df$party == A] = df$votes[df$party == A] df$votes[df$party == B] for each category of df$town

CodePudding user response:

I'm interpreting your pseudo-code as wanting to only update party "A" to the sum of both "A" and "B"'s votes.

base R

do.call(rbind, by(df, df$town,
  function(Z) {
    ind <- Z$party %in% c("A", "B")
    Z$votes[Z$party == "A"] <- sum(Z$votes[ind])
    Z
  }
))
#     town party votes
# a.1    a     A     3
# a.2    a     B     2
# a.3    a     C     3
# b.4    b     A     9
# b.5    b     B     5
# b.6    b     C     6
# c.7    c     A    15
# c.8    c     B     8
# c.9    c     C     9

dplyr

library(dplyr)
df %>%
  group_by(town) %>%
  mutate(
    votes = if_else(party == "A", sum(votes[party %in% c("A", "B")]), votes)
  ) %>%
  ungroup()
# # A tibble: 9 x 3
#   town  party votes
#   <chr> <chr> <int>
# 1 a     A         3
# 2 a     B         2
# 3 a     C         3
# 4 b     A         9
# 5 b     B         5
# 6 b     C         6
# 7 c     A        15
# 8 c     B         8
# 9 c     C         9

data.table

library(data.table)
DT <- as.data.table(df) # normally setDT(df) is canonical
DT[, votes := fifelse(party == "A", sum(votes[party %in% c("A", "B")]), votes),
    by = .(town)]
#      town  party votes
#    <char> <char> <int>
# 1:      a      A     3
# 2:      a      B     2
# 3:      a      C     3
# 4:      b      A     9
# 5:      b      B     5
# 6:      b      C     6
# 7:      c      A    15
# 8:      c      B     8
# 9:      c      C     9

CodePudding user response:

You can try mutate with dplyr if you want to keep the structure of the dataframe

library(dplyr)

df %>% 
  group_by(town) %>% 
  mutate(sum=ifelse(party!="C", sum(votes[party!="C"]), votes)) %>% 
  ungroup()
# A tibble: 9 × 4
  town  party votes   sum
  <chr> <chr> <int> <int>
1 a     A         1     3
2 a     B         2     3
3 a     C         3     3
4 b     A         4     9
5 b     B         5     9
6 b     C         6     6
7 c     A         7    15
8 c     B         8    15
9 c     C         9     9

Another way using summarise

df %>% 
  filter(party!="C") %>% 
  group_by(town) %>% 
  summarise(sum=sum(votes))
# A tibble: 3 × 2
  town    sum
  <chr> <int>
1 a         3
2 b         9
3 c        15

CodePudding user response:

tidyverse

df <- data.frame(
  stringsAsFactors = FALSE,
              town = c("a", "a", "a", "b", "b", "b", "c", "c", "c"),
             party = c("A", "B", "C", "A", "B", "C", "A", "B", "C"),
             votes = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L)
)

library(tidyverse)
df %>% 
  group_by(town, grp_party = party %in% c("A", "B")) %>% 
  mutate(new_party = paste0(party, collapse = ""), new_votes = sum(votes)) %>% 
  ungroup() %>% 
  select(-grp_party)
#> # A tibble: 9 x 5
#>   town  party votes new_party new_votes
#>   <chr> <chr> <int> <chr>         <int>
#> 1 a     A         1 AB                3
#> 2 a     B         2 AB                3
#> 3 a     C         3 C                 3
#> 4 b     A         4 AB                9
#> 5 b     B         5 AB                9
#> 6 b     C         6 C                 6
#> 7 c     A         7 AB               15
#> 8 c     B         8 AB               15
#> 9 c     C         9 C                 9

Created on 2022-02-08 by the reprex package (v2.0.1)

data.table

library(data.table)
setDT(df)[, votes:= lapply(.SD, sum), by = list(town, party %in% c("A", "B"))][]
#>    town party votes
#> 1:    a     A     3
#> 2:    a     B     3
#> 3:    a     C     3
#> 4:    b     A     9
#> 5:    b     B     9
#> 6:    b     C     6
#> 7:    c     A    15
#> 8:    c     B    15
#> 9:    c     C     9

Created on 2022-02-08 by the reprex package (v2.0.1)

  •  Tags:  
  • Related