I have a function that takes a dataframe column and returns a boolean mask based on certain conditions:
def is_downtrending(close):
out = np.full(close.shape, False)
for i in range(close.shape[0]):
# if we've had two consecutive red days
if (close[i] < close[i - 1]) and (close[i - 1] < close[i - 2]):
out[i] = True
else:
out[i] = False
return out
Normally I call it by passing in a column:
ohlc['is_downtrending'] = is_downtrending(ohlc['close'])
But how can I make this work using groupby
? When I try:
df['is_downtrending'] = df.groupby("stock_id").apply(is_downtrending)
I get the following error:
Traceback (most recent call last):
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/indexes/base.py", line 3629, in get_loc
return self._engine.get_loc(casted_key)
File "pandas/_libs/index.pyx", line 136, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 163, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 5198, in pandas._libs.hashtable.PyObjectHashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 5206, in pandas._libs.hashtable.PyObjectHashTable.get_item
KeyError: 0
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/dan/Documents/code/wolfhound/add_indicators_daily.py", line 29, in <module>
df['is_downtrending'] = df.groupby("stock_id").apply(is_downtrending)
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/groupby/groupby.py", line 1423, in apply
result = self._python_apply_general(f, self._selected_obj)
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/groupby/groupby.py", line 1464, in _python_apply_general
values, mutated = self.grouper.apply(f, data, self.axis)
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/groupby/ops.py", line 761, in apply
res = f(group)
File "/home/dan/Documents/code/wolfhound/add_indicators_daily.py", line 11, in is_downtrending
if (close[i] < close[i - 1]) and (close[i - 1] < close[i - 2]):
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/frame.py", line 3505, in __getitem__
indexer = self.columns.get_loc(key)
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/indexes/base.py", line 3631, in get_loc
raise KeyError(key) from err
KeyError: 0
When I try passing in the column:
df['is_downtrending'] = df.groupby("stock_id").apply(is_downtrending('close'))
The error changes to:
Traceback (most recent call last):
File "/home/dan/Documents/code/wolfhound/add_indicators_daily.py", line 29, in <module>
df['is_downtrending'] = df.groupby("stock_id").apply(is_downtrending('close'))
File "/home/dan/Documents/code/wolfhound/add_indicators_daily.py", line 7, in is_downtrending
out = np.full(close.shape, False)
AttributeError: 'str' object has no attribute 'shape'
Here's what the df looks like:
index date symbol stock_id open high low close volume vwap
0 0 2021-10-11 BVN 13 7.69 7.98 7.5600 7.61 879710 7.782174
1 1 2021-10-12 BVN 13 7.67 8.08 7.5803 8.02 794436 7.967061
2 2 2021-10-13 BVN 13 8.12 8.36 8.0900 8.16 716012 8.231286
3 3 2021-10-14 BVN 13 8.26 8.29 8.0500 8.28 586091 8.185899
4 4 2021-10-15 BVN 13 8.18 8.44 8.0600 8.44 1278409 8.284539
... ... ... ... ... ... ... ... ... ... ...
227774 227774 2022-10-04 ERIC 11000 6.27 6.32 6.2400 6.29 14655189 6.280157
227775 227775 2022-10-05 ERIC 11000 6.17 6.31 6.1500 6.29 10569193 6.219965
227776 227776 2022-10-06 ERIC 11000 6.20 6.25 6.1800 6.22 7918812 6.217198
227777 227777 2022-10-07 ERIC 11000 6.17 6.19 6.0800 6.10 9671252 6.135976
227778 227778 2022-10-10 ERIC 11000 6.13 6.15 6.0200 6.04 6310661 6.066256
[227779 rows x 10 columns]
I've tried Code Different's suggestion:
import pandas as pd
from IPython.display import display
import sqlite3 as sql
import numpy as np
conn = sql.connect('allStockData.db')
# get everything inside daily_ohlc and add to a dataframe
df = pd.read_sql_query("SELECT * from daily_ohlc_init", conn)
df["is_downtrending"] = (
df["close"]
.groupby(['stock_id']).diff() # diff between current close and previous close
.groupby(['stock_id']).rolling(2) # consider the diff of the last n days
.apply(lambda diff: (diff < 0).all()) # true if they are all < 0
).fillna(0)
df.to_sql('daily_ohlc_init_with_indicators', if_exists='replace', con=conn, index=True)
Which gives the error:
Traceback (most recent call last):
File "/home/dan/Documents/code/wolfhound/add_indicators_daily.py", line 13, in <module>
.groupby(['stock_id']).diff() # diff between current close and previous close
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/series.py", line 1922, in groupby
return SeriesGroupBy(
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/groupby/groupby.py", line 882, in __init__
grouper, exclusions, obj = get_grouper(
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/groupby/grouper.py", line 882, in get_grouper
raise KeyError(gpr)
KeyError: 'stock_id'
And trying Ynjxsjmh suggestion threw the error:
raise KeyError(key) from err
KeyError: -1
So I changed the code to:
def is_downtrending(close):
out = np.full(close.shape, False)
for i in close.index: # <--- changes here
# if we've had two consecutive red days
if i > 3:
if (close[i] < close[i - 1]) and (close[i - 1] < close[i - 2]):
out[i] = True
else:
out[i] = False
return out
df['is_downtrending'] = df.groupby("stock_id", as_index=False)["close"].transform(is_downtrending)
Which gives the error:
Traceback (most recent call last):
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/indexes/base.py", line 3629, in get_loc
return self._engine.get_loc(casted_key)
File "pandas/_libs/index.pyx", line 136, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/index.pyx", line 163, in pandas._libs.index.IndexEngine.get_loc
File "pandas/_libs/hashtable_class_helper.pxi", line 2131, in pandas._libs.hashtable.Int64HashTable.get_item
File "pandas/_libs/hashtable_class_helper.pxi", line 2140, in pandas._libs.hashtable.Int64HashTable.get_item
KeyError: -1
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "/home/dan/Documents/code/wolfhound/add_indicators_daily.py", line 25, in <module>
df['is_downtrending'] = df.groupby("stock_id", as_index=False)["close"].transform(is_downtrending)
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/groupby/generic.py", line 1184, in transform
return self._transform(
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/groupby/groupby.py", line 1642, in _transform
return self._transform_general(func, *args, **kwargs)
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/groupby/generic.py", line 1156, in _transform_general
path, res = self._choose_path(fast_path, slow_path, group)
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/groupby/generic.py", line 1208, in _choose_path
res = slow_path(group)
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/groupby/generic.py", line 1201, in <lambda>
slow_path = lambda group: group.apply(
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/frame.py", line 8848, in apply
return op.apply().__finalize__(self, method="apply")
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/apply.py", line 733, in apply
return self.apply_standard()
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/apply.py", line 857, in apply_standard
results, res_index = self.apply_series_generator()
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/apply.py", line 873, in apply_series_generator
results[i] = self.f(v)
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/groupby/generic.py", line 1202, in <lambda>
lambda x: func(x, *args, **kwargs), axis=self.axis
File "/home/dan/Documents/code/wolfhound/add_indicators_daily.py", line 17, in is_downtrending
if (close[i] < close[i - 1]) and (close[i - 1] < close[i - 2]):
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/series.py", line 958, in __getitem__
return self._get_value(key)
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/series.py", line 1069, in _get_value
loc = self.index.get_loc(label)
File "/home/dan/.local/lib/python3.10/site-packages/pandas/core/indexes/base.py", line 3631, in get_loc
raise KeyError(key) from err
KeyError: -1
CodePudding user response:
When you call df.groupby("...").apply(is_downtrending)
, you are passing a dataframe to the function. Your function does not take a dataframe, hence the error.
You can send only one column into the function, as a Series:
df.groupby("stock_id")["close"].apply(is_downtrending)
However, I think your function does not calculate the red day correctly.
If your intention is downtrend = n red days in a row, try this:
stock_id = ohlc["stock_id"]
n = 2
df["is_downtrending"] = (
ohlc["close"]
.groupby(stock_id).diff() # diff between current close and previous close
.groupby(stock_id).rolling(n) # consider the diff of the last n days
.apply(lambda diff: (diff < 0).all()) # true if they are all < 0
).fillna(0)
CodePudding user response:
You can try with groupby.transform
and update Series[idx]
to Series.iloc[idx]
def is_downtrending(close):
out = np.full(close.shape, False)
for i in range(close.shape[0]):
# if we've had two consecutive red days
if (close.iloc[i] < close.iloc[i - 1]) and (close.iloc[i - 1] < close.iloc[i - 2]): # <--- changes here
out[i] = True
else:
out[i] = False
return out
def is_downtrending2(close):
out = np.full(close.shape, False)
for i in close.index: # <--- changes here
# if we've had two consecutive red days
if (close[i] < close[i - 1]) and (close[i - 1] < close[i - 2]):
out[i] = True
else:
out[i] = False
return out
df['is_downtrending'] = df.groupby("stock_id", as_index=False)["close"].transform(is_downtrending)