Home > Back-end >  Compute trend of moisture index time series in different subperiods
Compute trend of moisture index time series in different subperiods

Time:11-23

desired_output_sample I have following data:

#1. dates of 15 day frequency: 
dates = seq(as.Date("2016-09-01"), as.Date("2020-07-30"), by=15) #96 times observation  

#2. water content in crops corresponding to the times given.  
water <- c(0.5702722, 0.5631781, 0.5560839, 0.5555985, 0.5519783, 0.5463459, 
0.5511598, 0.546652, 0.5361545, 0.530012, 0.5360571, 0.5396569, 
0.5683526, 0.6031535, 0.6417821, 0.671358, 0.7015542, 0.7177007, 
0.7103561, 0.7036985, 0.6958607, 0.6775161, 0.6545367, 0.6380155, 
0.6113306, 0.5846186, 0.5561815, 0.5251135, 0.5085149, 0.495352, 
0.485819, 0.4730029, 0.4686458, 0.4616468, 0.4613918, 0.4615532, 
0.4827496, 0.5149105, 0.5447824, 0.5776764, 0.6090217, 0.6297454, 
0.6399422, 0.6428941, 0.6586344, 0.6507473, 0.6290631, 0.6011123, 
0.5744375, 0.5313527, 0.5008027, 0.4770338, 0.4564025, 0.4464508, 
0.4309046, 0.4351668, 0.4490393, 0.4701232, 0.4911582, 0.5162941, 
0.5490387, 0.5737573, 0.6031149, 0.6400073, 0.6770058, 0.7048311, 
0.7255012, 0.739107, 0.7338938, 0.7265202, 0.6940718, 0.6757214, 
0.6460862, 0.6163091, 0.5743775, 0.5450822, 0.5057753, 0.4715266, 
0.4469859, 0.4303232, 0.4187793, 0.4119401, 0.4201316, 0.426369, 
0.4419331, 0.4757525, 0.5070846, 0.5248457, 0.5607567, 0.5859825, 
0.6107531, 0.6201754, 0.6356589, 0.6336177, 0.6275579, 0.6214981)

I want to compute trend of the water content or moisture data corresponding to different subperiods. Lets say: one trend from 2016 - 09-01 to 2019-11-30. and other trend from 2019-12-15 to the last date (in this case 2020-07-27).

And I want to make a plot like the one attached. Appreciate your help. Can be in R or in python.

CodePudding user response:

To draw a trend line, you can look on this tutorial https://www.statology.org/ggplot-trendline/

Or on this stackoverflow question Draw a trend line using ggplot

To split your dataset in two groups you simply need to do something like this (in R).

data <- data.frame(dates, water)
#This neat trick allows you to turn a logical value into a number
data$group <- 1   (data$dates > "2019-11-30")
old <- subset(data,group == 1)
new <- subset(data,group == 2)

For the plots:

library(ggplot2)
ggplot(old,aes(x = dates, y = water))  
  geom_smooth(method = "lm", col = "blue")   
  geom_point()
ggplot(new,aes(x = dates, y = water))  
  geom_smooth(method = "lm", col = "red")   
  geom_point()

CodePudding user response:

Here is a full-fledged example with added labels:

library(dplyr)
library(ggplot2)
dates <-  seq(as.Date("2016-09-01"), as.Date("2020-07-30"), by=15)
wc <- as.numeric(strsplit("0.5702722 0.5631781 0.5560839 0.5555985 0.5519783 0.5463459 0.5511598 0.5466520 0.5361545 0.5300120 0.5360571 0.5396569 0.5683526 0.6031535 0.6417821 0.6713580 0.7015542 0.7177007 0.7103561 0.7036985 0.6958607 0.6775161 0.6545367 0.6380155 0.6113306 0.5846186 0.5561815 0.5251135 0.5085149 0.4953520 0.4858190 0.4730029 0.4686458 0.4616468 0.4613918 0.4615532 0.4827496 0.5149105 0.5447824 0.5776764 0.6090217 0.6297454 0.6399422 0.6428941 0.6586344 0.6507473 0.6290631 0.6011123 0.5744375 0.5313527 0.5008027 0.4770338 0.4564025 0.4464508 0.4309046 0.4351668 0.4490393 0.4701232 0.4911582 0.5162941 0.5490387 0.5737573 0.6031149 0.6400073 0.6770058 0.7048311 0.7255012 0.7391070 0.7338938 0.7265202 0.6940718 0.6757214 0.6460862 0.6163091 0.5743775 0.5450822 0.5057753 0.4715266 0.4469859 0.4303232 0.4187793 0.4119401 0.4201316 0.4263690 0.4419331 0.4757525 0.5070846 0.5248457 0.5607567 0.5859825 0.6107531 0.6201754 0.6356589 0.6336177 0.6275579 0.6214981", " |\\n")[[1]])

data <- data.frame(date=dates, water_content=wc) %>% 
    mutate(group = ifelse(date <= as.Date("2019-11-30"), "g1", "g2"))

# calculate linear regression and create labels
lmo <- data %>% 
    group_by(group) %>% 
    summarise(res=list(stats::lm(water_content ~ date, data = cur_data()))) %>% 
    .$res
lab <- sapply(lmo, \(x) 
    paste("Slope=", signif(x$coef[[2]], 5),
        "\nAdj R2=", signif(summary(x)$adj.r.squared, 5),
        "\nP=", signif(summary(x)$coef[2,4], 5)))

ggplot(data=data, aes(x=date, y=water_content, col=group))   
    geom_point()  
    stat_smooth(geom="smooth", method="lm")  
    geom_text(aes(date, y, label=lab),
        data=data.frame(data %>% group_by(group) %>% 
                summarise(date=first(date)), y=Inf, lab=lab), 
        vjust=1, hjust=.2)

Created on 2022-11-23 with reprex v2.0.2

CodePudding user response:

Here is a way. Create a grouping variable by dates, coerce it to factor and geom_smooth will draw the two regression lines.

suppressPackageStartupMessages({
  library(ggplot2)
  library(ggpubr)
})

df1 <- data.frame(dates, water)

breakpoint <- as.Date("2019-11-30")
df1$group <- factor(df1$dates > breakpoint, labels = c("before", "after"))

ggplot(df1, aes(dates, water, colour = group))  
  geom_line()  
  geom_point(shape = 21, fill = 'white')  
  geom_smooth(formula = y ~ x, method = lm)  
  geom_vline(xintercept = breakpoint, linetype = "dotdash", linewidth = 1)  
  stat_cor(label.y = c(0.43, 0.38), show.legend = FALSE)  
  stat_regline_equation(label.y = c(0.45, 0.4), show.legend = FALSE)  
  scale_color_manual(values = c(before = 'red', after = 'blue'))  
  theme_bw(base_size = 15) 

Created on 2022-11-23 with reprex v2.0.2

  • Related