Function foo
first summarizes values in given dataframe by p1
and then by p2
, where p1
and p2
are offset aliases
.
import pandas as pd
import numpy as np
# Function
def foo(d, p1, p2, brk):
# assert p2 > p1
s1 = df.groupby(pd.Grouper(freq=p1)).sum().gt(brk)
s2 = s1.groupby(pd.Grouper(freq=p2)).sum()
return s2
# Data
df = pd.DataFrame({"datetime": pd.date_range("2017-01-01", "2017-03-31", freq="1H")})
np.random.seed(42)
df["val"] = np.random.sample(2137)
df = df.set_index("datetime")
foo(df, "7D", "1M", 80)
# val
# datetime
# 2017-01-31 4
# 2017-02-28 3
# 2017-03-31 3
The goal is to implement assert p2 > p1
so that the result of foo
is meaningful. One way would be to convert both p1
and p2
to Timedelta
and compare them. However, some aliases such as 1M
aren't valid for conversion to Timedelta
.
pandas.Timedelta("1M")
gives following warning:
FutureWarning: Units 'M', 'Y' and 'y' do not represent unambiguous timedelta values and will be removed in a future version
pd.Grouper(freq="1M") > pd.Grouper(freq="7D")
gives following error:
TypeError: '>' not supported between instances of 'TimeGrouper' and 'TimeGrouper'
What is the proper way to compare two grouper freq
windows?
CodePudding user response:
Based on this answer, you could probably do something like this:
def foo(d, p1, p2, brk):
from pandas.tseries.frequencies import to_offset
from datetime import datetime
tmp = datetime.now()
assert tmp to_offset(p2) > tmp to_offset(p1), 'p1 must be less than p2'
s1 = d.groupby(pd.Grouper(freq=p1)).sum().gt(brk)
s2 = s1.groupby(pd.Grouper(freq=p2)).sum()
return s2
Test:
>>> foo(df, "7D", "1M", 80)
---------------------------------------------------------------------------
AssertionError Traceback (most recent call last)
Input In [51], in <cell line: 1>()
----> 1 foo(df, "7D", "1M", 80)
Input In [50], in foo(d, p1, p2, brk)
3 from datetime import datetime
4 tmp = datetime.now()
----> 5 assert tmp to_offset(p1) > tmp to_offset(p2), 'p1 must be less than p2'
7 s1 = d.groupby(pd.Grouper(freq=p1)).sum().gt(brk)
8 s2 = s1.groupby(pd.Grouper(freq=p2)).sum()
AssertionError: p1 must be less than p2