Home > other >  Cumulative sum of occurrences by date
Cumulative sum of occurrences by date

Time:08-12

Let's say I have to following dataframe

 dput(df)
structure(list(Date_time = structure(c(1641025800, 1641025800, 
1641025800, 1641025800, 1641025800, 1641025800, 1641025800, 1641025800, 
1641027600, 1641027600, 1641027600, 1641027600, 1641027600, 1641027600, 
1641027600, 1641027600, 1641027600, 1641027600, 1641027600, 1641027600, 
1641027600, 1651396800, 1651396800, 1651396800, 1651396800, 1651396800, 
1651396800, 1651396800, 1651396800, 1651396800, 1651401000, 1651401000, 
1651401000, 1651401000, 1651401000, 1669966200, 1669966200, 1669966200, 
1669966200, 1669966200, 1669966200, 1669966200, 1669966200, 1669969800, 
1669969800, 1669969800, 1669969800, 1669969800, 1669969800, 1669969800, 
1669969800, 1669969800, 1669969800, 1669969800, 1669969800), class = c("POSIXct", 
"POSIXt"), tzone = "Europe/London"), Category = c("heat", "heat", 
"heat", "heat", "heat", "heat", "heat", "heat", "cold", "cold", 
"cold", "cold", "cold", "cold", "cold", "medium", "medium", "medium", 
"medium", "medium", "medium", "heat", "heat", "heat", "heat", 
"cold", "cold", "cold", "cold", "cold", "cold", "cold", "medium", 
"medium", "medium", "heat", "heat", "heat", "heat", "heat", "cold", 
"cold", "cold", "cold", "cold", "cold", "cold", "medium", "medium", 
"medium", "medium", "medium", "medium", "heat", "heat"), SubCat = c("r", 
"r", "r", "r", "n", "n", "n", "r", "r", "r", "r", "n", "n", "n", 
"n", "r", "r", "r", "n", "n", "n", "n", "n", "n", "r", "r", "r", 
"r", "n", "n", "n", "n", "r", "r", "r", "r", "r", "r", "n", "n", 
"n", "n", "n", "n", "r", "r", "r", "r", "n", "n", "r", "r", "r", 
"n", "n"), Site = c("1a", "1a", "1a", "1a", "1a", "1a", "1a", 
"1a", "1a", "1a", "1b", "1b", "1b", "1b", "1b", "1b", "1b", "1b", 
"1b", "1b", "1b", "2c", "2c", "2c", "2c", "2c", "2c", "2c", "2c", 
"2c", "2c", "2c", "2c", "2c", "2c", "7c", "7c", "7c", "7c", "7c", 
"7c", "7c", "7c", "7c", "7c", "7c", "7c", "7c", "7c", "7c", "7c", 
"7c", "7c", "7c", "7c")), row.names = c(NA, -55L), class = "data.frame")
> 

I would to produce some plots of the cumalative sums of each Category, by date, subcategory( and maybe Site too, but not sure if that will make sense). Essentially sum the total of each category per day, then add that to the next day in the dataframes total and so on. So that I end with something like below that I can plot

         Date Category Subcategory Count
1  01/01/2022     Heat           r     5
2  01/01/2022     Cold           r     6
3  01/01/2022   Medium           r     9
4  01/01/2022     Heat           n     3
5  01/01/2022     Cold           n     6
6  01/01/2022   Medium           n    10
7  05/01/2022     Heat           r     3
8  05/01/2022     Cold           r     6
9  05/01/2022   Medium           r     9
10 05/01/2022     Heat           n     4
11 05/01/2022     Cold           n     8
12 05/01/2022   Medium           n    12
13 12/01/2022     Heat           r     3
14 12/01/2022     Cold           r     6
15 12/01/2022   Medium           r    10
16 12/01/2022     Heat           n     3
17 12/01/2022     Cold           n     3
18 12/01/2022   Medium           n     5

CodePudding user response:

dplyr solution:

library(dplyr)
structure(list(Date_time = structure(c(1641025800, 1641025800, 
                                       1641025800, 1641025800, 1641025800, 1641025800, 1641025800, 1641025800, 
                                       1641027600, 1641027600, 1641027600, 1641027600, 1641027600, 1641027600, 
                                       1641027600, 1641027600, 1641027600, 1641027600, 1641027600, 1641027600, 
                                       1641027600, 1651396800, 1651396800, 1651396800, 1651396800, 1651396800, 
                                       1651396800, 1651396800, 1651396800, 1651396800, 1651401000, 1651401000, 
                                       1651401000, 1651401000, 1651401000, 1669966200, 1669966200, 1669966200, 
                                       1669966200, 1669966200, 1669966200, 1669966200, 1669966200, 1669969800, 
                                       1669969800, 1669969800, 1669969800, 1669969800, 1669969800, 1669969800, 
                                       1669969800, 1669969800, 1669969800, 1669969800, 1669969800), class = c("POSIXct", 
                                                                                                              "POSIXt"), tzone = "Europe/London"), Category = c("heat", "heat", 
                                                                                                                                                                "heat", "heat", "heat", "heat", "heat", "heat", "cold", "cold", 
                                                                                                                                                                "cold", "cold", "cold", "cold", "cold", "medium", "medium", "medium", 
                                                                                                                                                                "medium", "medium", "medium", "heat", "heat", "heat", "heat", 
                                                                                                                                                                "cold", "cold", "cold", "cold", "cold", "cold", "cold", "medium", 
                                                                                                                                                                "medium", "medium", "heat", "heat", "heat", "heat", "heat", "cold", 
                                                                                                                                                                "cold", "cold", "cold", "cold", "cold", "cold", "medium", "medium", 
                                                                                                                                                                "medium", "medium", "medium", "medium", "heat", "heat"), SubCat = c("r", 
                                                                                                                                                                                                                                    "r", "r", "r", "n", "n", "n", "r", "r", "r", "r", "n", "n", "n", 
                                                                                                                                                                                                                                    "n", "r", "r", "r", "n", "n", "n", "n", "n", "n", "r", "r", "r", 
                                                                                                                                                                                                                                    "r", "n", "n", "n", "n", "r", "r", "r", "r", "r", "r", "n", "n", 
                                                                                                                                                                                                                                    "n", "n", "n", "n", "r", "r", "r", "r", "n", "n", "r", "r", "r", 
                                                                                                                                                                                                                                    "n", "n"), Site = c("1a", "1a", "1a", "1a", "1a", "1a", "1a", 
                                                                                                                                                                                                                                                        "1a", "1a", "1a", "1b", "1b", "1b", "1b", "1b", "1b", "1b", "1b", 
                                                                                                                                                                                                                                                        "1b", "1b", "1b", "2c", "2c", "2c", "2c", "2c", "2c", "2c", "2c", 
                                                                                                                                                                                                                                                        "2c", "2c", "2c", "2c", "2c", "2c", "7c", "7c", "7c", "7c", "7c", 
                                                                                                                                                                                                                                                        "7c", "7c", "7c", "7c", "7c", "7c", "7c", "7c", "7c", "7c", "7c", 
                                                                                                                                                                                                                                                        "7c", "7c", "7c", "7c")), row.names = c(NA, -55L), class = "data.frame") %>% 
    select(-Site, Subcategory = SubCat) %>% 
    group_by_all() %>% 
    count(name = 'Count') %>% 
    arrange(Date_time) %>% 
    group_by(Category, Subcategory) %>% 
    mutate(
        Count = cumsum(Count)
    )
# A tibble: 20 x 4
# Groups:   Category, Subcategory [6]
   Date_time           Category Subcategory Count
   <dttm>              <chr>    <chr>       <int>
 1 2022-01-01 08:30:00 heat     n               3
 2 2022-01-01 08:30:00 heat     r               5
 3 2022-01-01 09:00:00 cold     n               4
 4 2022-01-01 09:00:00 cold     r               3
 5 2022-01-01 09:00:00 medium   n               3
 6 2022-01-01 09:00:00 medium   r               3
 7 2022-05-01 10:20:00 cold     n               6
 8 2022-05-01 10:20:00 cold     r               6
 9 2022-05-01 10:20:00 heat     n               6
10 2022-05-01 10:20:00 heat     r               6
11 2022-05-01 11:30:00 cold     n               8
12 2022-05-01 11:30:00 medium   r               6
13 2022-12-02 07:30:00 cold     n              11
14 2022-12-02 07:30:00 heat     n               8
15 2022-12-02 07:30:00 heat     r               9
16 2022-12-02 08:30:00 cold     n              12
17 2022-12-02 08:30:00 cold     r               9
18 2022-12-02 08:30:00 heat     n              10
19 2022-12-02 08:30:00 medium   n               5
20 2022-12-02 08:30:00 medium   r              10

CodePudding user response:

Something like this?

library(tidyverse)

df %>% 
  count(Date_time, Category, SubCat) %>% 
  mutate(x = cumsum(n)) %>% 
  ggplot(aes(x = Date_time, y=x, color=SubCat, group=SubCat)) 
  geom_line() 
  geom_point() 
  facet_wrap(.~Category)

enter image description here

  • Related