I have a dataset, where I import data from SQL, that looks like this:
ID Name Date
1 John 2021-09-05
1 John 2021-08-14
1 John 2021-08-21
2 Kate 2021-03-04
3 Mary 2021-07-10
3 Mary 2021-06-13
4 Joel 2021-05-10
4 Joel 2021-05-31
4 Joel 2021-04-13
4 Joel 2021-07-11
4 Joel 2021-03-31
5 Mike 2021-04-02
5 Mike 2021-02-07
I want to group this by floor_date(df, ‘month’) And to then summarize by how many occurrences per month to give an output in a horizontal table looking like following:
Name | Feb 21 | Mar 21 | Apr 21 | May 21 | Jun 21 | Jul 21 | ....
John 0 0 0 0 0 0 ....
Joel 0 1 1 2 0 1 ....
Kate 0 1 0 0 0 0 ....
Mary 0 0 0 0 1 1 ....
Mike 1 0 1 0 0 0 ....
CodePudding user response:
I think you're just looking for table
library(tidyverse)
library(lubridate)
df %>%
group_by(Date = floor_date(Date, "month")) %>%
select(-ID) %>%
table()
#> Date
#> Name 2021-02-01 2021-03-01 2021-04-01 2021-05-01 2021-06-01 2021-07-01
#> Joel 0 1 1 2 0 1
#> John 0 0 0 0 0 0
#> Kate 0 1 0 0 0 0
#> Mary 0 0 0 0 1 1
#> Mike 1 0 1 0 0 0
#> Date
#> Name 2021-08-01 2021-09-01
#> Joel 0 0
#> John 2 1
#> Kate 0 0
#> Mary 0 0
#> Mike 0 0
If you prefer the output to be a tibble / data frame, you can do:
df %>%
group_by(Date = floor_date(Date, "month")) %>%
select(-ID) %>%
table() %>%
as_tibble() %>%
pivot_wider(names_from = Date, values_from = n)
#> # A tibble: 5 x 9
#> Name `2021-02-01` `2021-03-01` `2021-04-01` `2021-05-01` `2021-06-01` `2021-07-01` `2021-08-01` `2021-09-01`
#> <chr> <int> <int> <int> <int> <int> <int> <int> <int>
#> 1 Joel 0 1 1 2 0 1 0 0
#> 2 John 0 0 0 0 0 0 2 1
#> 3 Kate 0 1 0 0 0 0 0 0
#> 4 Mary 0 0 0 0 1 1 0 0
#> 5 Mike 1 0 1 0 0 0 0 0
Created on 2022-10-04 with reprex v2.0.2
Data from question in reproducible format
df <- structure(list(ID = c(1L, 1L, 1L, 2L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 5L, 5L), Name = c("John", "John", "John", "Kate", "Mary",
"Mary", "Joel", "Joel", "Joel", "Joel", "Joel", "Mike", "Mike"
), Date = structure(c(18875, 18853, 18860, 18690, 18818, 18791,
18757, 18778, 18730, 18819, 18717, 18719, 18665), class = "Date")),
row.names = c(NA, -13L), class = "data.frame")
CodePudding user response:
One possible solution:
library(tidyr)
library(dplyr)
df |>
mutate(Date = strftime(as.Date(Date), "%b-%y")) |>
pivot_wider(id_cols=Name, names_from=Date, values_from=Date, values_fn=length, values_fill=0)
# A tibble: 5 x 9
Name `Sep-21` `Aug-21` `Mar-21` `Jul-21` `Jun-21` `May-21` `Apr-21` `Feb-21`
<chr> <int> <int> <int> <int> <int> <int> <int> <int>
1 John 1 2 0 0 0 0 0 0
2 Kate 0 0 1 0 0 0 0 0
3 Mary 0 0 0 1 1 0 0 0
4 Joel 0 0 1 1 0 2 1 0
5 Mike 0 0 0 0 0 0 1 1
Or using the built-in functions xtabs
and transform
df |>
transform(Date = strftime(as.Date(Date), "%b-%y")) |>
xtabs(formula=~Name Date)
Date
Name Apr-21 Aug-21 Feb-21 Jul-21 Jun-21 Mar-21 May-21 Sep-21
Joel 1 0 0 1 0 1 2 0
John 0 2 0 0 0 0 0 1
Kate 0 0 0 0 0 1 0 0
Mary 0 0 0 1 1 0 0 0
Mike 1 0 1 0 0 0 0 0
Or using data.table
library(data.table)
dcast(setDT(df), Name ~ strftime(as.Date(Date), "%b-%y"), fun=length)
Name Apr-21 Aug-21 Feb-21 Jul-21 Jun-21 Mar-21 May-21 Sep-21
1: Joel 1 0 0 1 0 1 2 0
2: John 0 2 0 0 0 0 0 1
3: Kate 0 0 0 0 0 1 0 0
4: Mary 0 0 0 1 1 0 0 0
5: Mike 1 0 1 0 0 0 0 0