the data example is like this below:
A B C
0 1 2
0 1 2
1 10 15
1 5 15
0 2 5
0 3 5
1 20 50
1 30 50
Above A and B is the original data, and C is the column that I want to create. C is the group sum of B based on the same and adjacent A value.Even though there are some A=0, if they are not adjacent, then it should be not sum together.
CodePudding user response:
tidyverse
library(tidyverse)
df <- data.frame(
A = c(0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L),
B = c(1L, 1L, 10L, 5L, 2L, 3L, 20L, 30L)
)
df %>%
group_by(grp = data.table::rleid(A)) %>%
mutate(res = sum(B)) %>%
ungroup() %>%
select(-grp)
#> # A tibble: 8 × 3
#> A B res
#> <int> <int> <int>
#> 1 0 1 2
#> 2 0 1 2
#> 3 1 10 15
#> 4 1 5 15
#> 5 0 2 5
#> 6 0 3 5
#> 7 1 20 50
#> 8 1 30 50
Created on 2022-04-27 by the reprex package (v2.0.1)
data.table
library(data.table)
setDT(df)[, res := sum(B), by = rleid(A)][]
#> A B res
#> 1: 0 1 2
#> 2: 0 1 2
#> 3: 1 10 15
#> 4: 1 5 15
#> 5: 0 2 5
#> 6: 0 3 5
#> 7: 1 20 50
#> 8: 1 30 50
Created on 2022-04-27 by the reprex package (v2.0.1)
base
df$res <- with(df, ave(B, data.table::rleid(A), FUN = sum))
df
#> A B res
#> 1 0 1 2
#> 2 0 1 2
#> 3 1 10 15
#> 4 1 5 15
#> 5 0 2 5
#> 6 0 3 5
#> 7 1 20 50
#> 8 1 30 50
Created on 2022-04-27 by the reprex package (v2.0.1)
CodePudding user response:
library(tidyverse)
data <- tribble(
~A, ~B, ~C,
0, 1, 2,
0, 1, 2,
1, 10, 15,
1, 5, 15,
0, 2, 5,
0, 3, 5,
1, 20, 50,
1, 30, 50
)
data <- data %>% select(-C)
rle_group <- function(x) {
rle <- rle(x)
rle$values <- rle$values %>%
length() %>%
seq()
inverse.rle(rle)
}
data %>%
mutate(
group = rle_group(A)
) %>%
group_by(group) %>%
mutate(
C = sum(B)
)
#> # A tibble: 8 × 4
#> # Groups: group [4]
#> A B group C
#> <dbl> <dbl> <int> <dbl>
#> 1 0 1 1 2
#> 2 0 1 1 2
#> 3 1 10 2 15
#> 4 1 5 2 15
#> 5 0 2 3 5
#> 6 0 3 3 5
#> 7 1 20 4 50
#> 8 1 30 4 50
Created on 2022-04-27 by the reprex package (v2.0.0)
CodePudding user response:
A dplyr
solution without any rle
functions:
library(dplyr)
df %>%
group_by(grp = cumsum(c(TRUE, diff(A) != 0))) %>%
mutate(C = sum(B)) %>%
ungroup() %>%
select(-grp)
# A tibble: 8 x 3
A B C
<int> <int> <int>
1 0 1 2
2 0 1 2
3 1 10 15
4 1 5 15
5 0 2 5
6 0 3 5
7 1 20 50
8 1 30 50
Its base
equivalent:
within(df, C <- ave(B, cumsum(c(TRUE, diff(A) != 0)), FUN = sum))
A B C
1 0 1 2
2 0 1 2
3 1 10 15
4 1 5 15
5 0 2 5
6 0 3 5
7 1 20 50
8 1 30 50