Home > Software engineering >  Geom_tile plots non-existent discontinuities in data for time axis
Geom_tile plots non-existent discontinuities in data for time axis

Time:10-06

If I have the following dataframe called data

year    month   id  group   returns
2016    2   asset_a group1  0.11592118
2016    3   asset_a group1  0.104526128
2016    4   asset_a group1  0.244925532
2016    5   asset_a group1  0.252377372
2016    6   asset_a group1  0.282602889
2016    7   asset_a group1  0.607148925
2016    8   asset_a group1  0.257815581
2016    9   asset_a group1  0.202712468
2016    10  asset_a group1  0.177455704
2016    11  asset_a group1  0.208526305
2016    12  asset_a group1  0.179808043
2017    1   asset_a group1  0.204425208
2017    2   asset_a group1  0.167787787
2017    3   asset_a group1  0.122357671
2017    4   asset_a group1  0.095889965
2017    5   asset_a group1  0.180117687
2017    6   asset_a group1  0.146912234
2017    7   asset_a group1  0.286743829
2017    8   asset_a group1  0.201531197
2017    9   asset_a group1  0.166819132
2017    10  asset_a group1  0.136262625
2017    11  asset_a group1  0.128844762
2017    12  asset_a group1  0.147595906
2018    1   asset_a group1  0.099843877
2018    2   asset_a group1  0.1928918
2018    3   asset_a group1  0.188344307
2018    4   asset_a group1  0.155801889
2018    5   asset_a group1  0.185813076
2018    6   asset_a group1  0.217531263
2018    7   asset_a group1  0.269840901
2018    8   asset_a group1  0.267351364
2018    9   asset_a group1  0.183753448
2018    10  asset_a group1  0.195182592
2018    11  asset_a group1  0.228886115
2018    12  asset_a group1  0.166964407

and in order to plot it in a heatmap I create a date vector with

data <- data %>%
 mutate(date= make_datetime(year, month))

I get a database structure of

 $ year     : int [1:564] 2016 2016 2016 2016 2016 2016 2016 2016 2016 2016 ...
 $ month    : int [1:564] 2 2 2 2 2 2 2 2 3 3 ...
 $ id       : chr [1:564] "asset_a" "asset_b" "asset_c" "asset_d" ...
 $ group    : chr [1:564] "group1" "group2" "group3" "group4" ...
 $ returns  : num [1:564] 0.115 0.3 0.105 0.245 0.28 ...
 $ date     : POSIXct[1:564], format: "2016-02-01" "2016-02-01" "2016-02-01" "2016-02-01" ...

and inputting that into the ggplot heatmap

data %>% 
  ggplot(aes(x = date, y = asset))    
  geom_tile(aes(fill = returns))  
  theme_classic()  
  scale_fill_gradientn(colours=c("#66bf7b", "#a1d07e", "#dce182", 
                                 "#ffeb84",
                                 "#fedb81", "#faa075", "#faa075"),
                       values=rescale(c(-3, -2, -1,
                                        0,
                                        1, 2, 3)),
                       guide="colorbar")  
 
  labs(x="",y="")

I get

enter image description here

Why did the ggplot create missing data out of nowhere, given that my data in the dataframe is without any monthly discontinuities? How can I fix it so that there are no white gaps in between the dates, is it related to hours and seconds in the date format?

If I plot the dates as characters I get the desired result, however, in that case, how can I reduce the number of ticks on the date axis to be readable?

enter image description here

UPDATE: The output according to stefan's suggestion didn't solve it because each asset id should have its own heatmap row. Right now, they are plotted on top of each other.

enter image description here

UPDATE 2

For me this didn't work

breaks <- sort(unique(as.numeric(factor(data$id)))) - .5
labels <- levels(factor(data$id))

Typing out manually:

  mutate(xmin = date,
         xmax = date   months(1),
         ymin = case_when(
               id == "asset_a" ~ 0,
               id == "asset_b" ~ 1,
               id == "asset_c" ~ 2,
               id == "asset_d" ~ 3,
               id == "asset_e" ~ 4,
               id == "asset_f" ~ 5,
               id == "asset_g" ~ 6,
               id == "asset_h" ~ 7,
               id == "asset_i" ~ 8,
         ),
         ymax = case_when(
               id == "asset_a" ~ 1,
               id == "asset_b" ~ 2,
               id == "asset_c" ~ 3,
               id == "asset_d" ~ 4,
               id == "asset_e" ~ 5,
               id == "asset_f" ~ 6,
               id == "asset_g" ~ 7,
               id == "asset_h" ~ 8,
               id == "asset_i" ~ 9)
  )

solved the problem and each asset id is stacked on top of each other.

CodePudding user response:

Not 100% sure what's the issue but my guess is that geom_tile chooses the same width and height for each tile. However, because months differ in the number of days you get the discontinuities.

One option to achieve your desired result while still making use of a date or date time would be to switch to geom_rect which however needs some additional steps to compute the coordinates of the four corners:

EDIT To make the example more in line with your real data I added a two more assets where I simply replicated your example data but added some random noise to the returns. I also fixed a bug in my original code which resulted in wrong axis labels as I missed to sort the values when computing the breaks.

library(ggplot2)
library(dplyr)
library(lubridate)
library(scales)

set.seed(123)

data2 <- data
data2$id <- "asset_b"
data2$returns <- data2$returns   runif(nrow(data2), 0, .2)

data3 <- data
data3$id <- "asset_c"
data3$returns <- data3$returns   runif(nrow(data3), 0, .2)
data <- bind_rows(data2, data, data3)

data <- data %>%
  mutate(date = make_datetime(year, month),
         xmin = date,
         xmax = date   months(1),
         ymin = as.numeric(factor(id)) - 1,
         ymax = as.numeric(factor(id)))

breaks <- sort(unique(as.numeric(factor(data$id)))) - .5
labels <- levels(factor(data$id))

data %>% 
  ggplot(aes(x = date))    
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax, fill = returns))  
  scale_y_continuous(breaks = breaks, labels = labels)  
  theme_classic()  
  scale_fill_gradientn(colours=c("#66bf7b", "#a1d07e", "#dce182", 
                                 "#ffeb84",
                                 "#fedb81", "#faa075", "#faa075"),
                       values=rescale(c(-3, -2, -1,
                                        0,
                                        1, 2, 3)),
                       guide="colorbar")  
  
  labs(x="",y="")

Another option to get your desired result would be to convert your date column to a character as you already suggested in your post. Here we have to do some data wrangling to set up the axis breaks, labels and limits to mimic the date axis:

data <- data %>%
  mutate(date = make_datetime(year, month))

limits <- expand.grid(
  year = 2016:2018, 
  month = 1:12
) %>% 
  add_row(year = 2019, month = 1) %>% 
  mutate(date = make_datetime(year, month)) %>% 
  pull(date) %>% 
  sort()
breaks <- make_datetime(2016:2019, 1)

data %>% 
  ggplot(aes(x = as.character(date), y = id))    
  geom_tile(aes(fill = returns))  
  scale_x_discrete(breaks = as.character(breaks), labels = year(breaks), limits = as.character(limits))  
  theme_classic()  
  scale_fill_gradientn(colours=c("#66bf7b", "#a1d07e", "#dce182", 
                                 "#ffeb84",
                                 "#fedb81", "#faa075", "#faa075"),
                       values=rescale(c(-3, -2, -1,
                                        0,
                                        1, 2, 3)),
                       guide="colorbar")  
  
  labs(x="",y="")

DATA

structure(list(year = c(2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 
2016L, 2016L, 2016L, 2016L, 2016L, 2017L, 2017L, 2017L, 2017L, 
2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2018L, 
2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 2018L, 
2018L, 2018L), month = c(2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 
11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), id = c("asset_a", 
"asset_a", "asset_a", "asset_a", "asset_a", "asset_a", "asset_a", 
"asset_a", "asset_a", "asset_a", "asset_a", "asset_a", "asset_a", 
"asset_a", "asset_a", "asset_a", "asset_a", "asset_a", "asset_a", 
"asset_a", "asset_a", "asset_a", "asset_a", "asset_a", "asset_a", 
"asset_a", "asset_a", "asset_a", "asset_a", "asset_a", "asset_a", 
"asset_a", "asset_a", "asset_a", "asset_a"), group = c("group1", 
"group1", "group1", "group1", "group1", "group1", "group1", "group1", 
"group1", "group1", "group1", "group1", "group1", "group1", "group1", 
"group1", "group1", "group1", "group1", "group1", "group1", "group1", 
"group1", "group1", "group1", "group1", "group1", "group1", "group1", 
"group1", "group1", "group1", "group1", "group1", "group1"), 
    returns = c(0.11592118, 0.104526128, 0.244925532, 0.252377372, 
    0.282602889, 0.607148925, 0.257815581, 0.202712468, 0.177455704, 
    0.208526305, 0.179808043, 0.204425208, 0.167787787, 0.122357671, 
    0.095889965, 0.180117687, 0.146912234, 0.286743829, 0.201531197, 
    0.166819132, 0.136262625, 0.128844762, 0.147595906, 0.099843877, 
    0.1928918, 0.188344307, 0.155801889, 0.185813076, 0.217531263, 
    0.269840901, 0.267351364, 0.183753448, 0.195182592, 0.228886115, 
    0.166964407)), class = "data.frame", row.names = c(NA, -35L
))
  • Related