Here's the data:
data <-tibble::tribble(
~PROPNUM, ~initial_date, ~third_date, ~OIL, ~GAS, ~Prod_date,
"49-005-61202", "2011-09-30", "2011-11-30", 775L, 0L, "9/30/2011",
"49-005-61202", "2011-09-30", "2011-11-30", 2210L, 477L, "10/31/2011",
"49-005-61202", "2011-09-30", "2011-11-30", 1455L, 1008L, "11/30/2011",
"49-005-61202", "2011-09-30", "2011-11-30", 1054L, 875L, "12/31/2011",
"49-005-61202", "2011-09-30", "2011-11-30", 992L, 3003L, "1/31/2012",
"49-005-61202", "2011-09-30", "2011-11-30", 828L, 745L, "2/29/2012",
"49-005-61202", "2011-09-30", "2011-11-30", 485L, 533L, "3/31/2012",
"49-005-61202", "2011-09-30", "2011-11-30", 1084L, 821L, "4/30/2012",
"49-005-61202", "2011-09-30", "2011-11-30", 677L, 623L, "5/31/2012",
"49-005-61202", "2011-09-30", "2011-11-30", 658L, 598L, "6/30/2012",
"49-005-61202", "2011-09-30", "2011-11-30", 643L, 624L, "7/31/2012",
"49-005-61202", "2011-09-30", "2011-11-30", 608L, 619L, "8/31/2012",
"49-005-61202", "2011-09-30", "2011-11-30", 559L, 561L, "9/30/2012",
"49-005-61202", "2011-09-30", "2011-11-30", 558L, 567L, "10/31/2012",
"49-005-61202", "2011-09-30", "2011-11-30", 514L, 491L, "11/30/2012",
"49-005-61202", "2011-09-30", "2011-11-30", 531L, 462L, "12/31/2012",
"49-005-61202", "2011-09-30", "2011-11-30", 481L, 480L, "1/31/2013",
"49-005-61202", "2011-09-30", "2011-11-30", 436L, 424L, "2/28/2013",
"49-005-61202", "2011-09-30", "2011-11-30", 449L, 488L, "3/31/2013",
"49-005-61202", "2011-09-30", "2011-11-30", 429L, 495L, "4/30/2013",
"49-005-61202", "2011-09-30", "2011-11-30", 422L, 511L, "5/31/2013",
"49-005-61202", "2011-09-30", "2011-11-30", 395L, 497L, "6/30/2013",
"49-005-61202", "2011-09-30", "2011-11-30", 382L, 519L, "7/31/2013",
"49-005-61202", "2011-09-30", "2011-11-30", 391L, 519L, "8/31/2013"
)
Thank you so much for the reply!
I tested on my own data using AKrun and PaulS's method, but it's returning 0 obs . 2 variables, the warning I got is "All formats failed to parse. No formats found. " I uploaded the data again, this one should reproduce my problem.
CodePudding user response:
Using base R
data$Prod_date <- as.Date(data$Prod_date, "%m/%d/%Y")
data$initial_date <- as.Date(data$initial_date)
data$third_date <- as.Date(data$third_date)
subdat <- subset(data, Prod_date <= third_date & Prod_date >= initial_date)
aggregate(OIL ~ PROPNUM, subdat, mean)
-output
PROPNUM OIL
1 49-005-61202 1480
CodePudding user response:
Here is one that is similar to @PaulS one, but using between
from dyplr
and it is also much longer:-)
library(dplyr)
library(lubridate)
data %>%
mutate(across(contains("date"), mdy)) %>%
rowwise() %>%
mutate(helper = ifelse(between(Prod_date, initial_date, third_date), 1, 0)) %>%
group_by(PROPNUM, helper) %>%
summarise(Avg_OIL = mean(OIL)) %>%
filter(helper == 1) %>%
select(-helper)
PROPNUM Avg_OIL
<fct> <dbl>
1 49-005-27619 1613.
2 49-005-61086 2154.
3 49-005-61202 1480
4 49-005-61268 7063
CodePudding user response:
A possible solution, which is based on lubridate::mdy
, to filter out all cases outside the dates interval, and dplyr
:
library(dplyr)
library(lubridate)
data %>%
group_by(PROPNUM) %>%
filter(mdy(Prod_date) <= mdy(third_date) & mdy(Prod_date) >= mdy(initial_date)) %>%
summarise(avgOil = mean(OIL))
#> # A tibble: 4 × 2
#> PROPNUM avgOil
#> <fct> <dbl>
#> 1 49-005-27619 1613.
#> 2 49-005-61086 2154.
#> 3 49-005-61202 1480
#> 4 49-005-61268 7063.
CodePudding user response:
You can use