Function to GetMaxValueOffset?-CodePudding

I'm looking for a one line column calculation that can find out the index OFFSET from the current row backwards, using a window size.

For example, given this dataset:

import numpy as np
import pandas as pd

# Create a reproducible, static dataframe.
# 1 minute SPY data. Skip to the bottom...
df = pd.DataFrame([
    {
        "time": "2021-10-26 9:30",
        "open": "457.2",
        "high": "457.29",
        "low": "456.78",
        "close": "456.9383",
        "volume": "594142"
    },
    {
        "time": "2021-10-26 9:31",
        "open": "456.94",
        "high": "457.07",
        "low": "456.8",
        "close": "456.995",
        "volume": "194061"
    },
    {
        "time": "2021-10-26 9:32",
        "open": "456.99",
        "high": "457.22",
        "low": "456.84",
        "close": "457.21",
        "volume": "186114"
    },
    {
        "time": "2021-10-26 9:33",
        "open": "457.22",
        "high": "457.45",
        "low": "457.2011",
        "close": "457.308",
        "volume": "294158"
    },
    {
        "time": "2021-10-26 9:34",
        "open": "457.31",
        "high": "457.4",
        "low": "457.25",
        "close": "457.32",
        "volume": "172574"
    },
    {
        "time": "2021-10-26 9:35",
        "open": "457.31",
        "high": "457.48",
        "low": "457.18",
        "close": "457.44",
        "volume": "396668"
    },
    {
        "time": "2021-10-26 9:36",
        "open": "457.48",
        "high": "457.6511",
        "low": "457.44",
        "close": "457.57",
        "volume": "186777"
    },
    {
        "time": "2021-10-26 9:37",
        "open": "457.5699",
        "high": "457.73",
        "low": "457.5699",
        "close": "457.69",
        "volume": "187596"
    },
    {
        "time": "2021-10-26 9:38",
        "open": "457.7",
        "high": "457.73",
        "low": "457.54",
        "close": "457.63",
        "volume": "185570"
    },
    {
        "time": "2021-10-26 9:39",
        "open": "457.63",
        "high": "457.64",
        "low": "457.31",
        "close": "457.59",
        "volume": "164707"
    },
    {
        "time": "2021-10-26 9:40",
        "open": "457.59",
        "high": "457.72",
        "low": "457.46",
        "close": "457.7199",
        "volume": "167438"
    },
    {
        "time": "2021-10-26 9:41",
        "open": "457.72",
        "high": "457.8",
        "low": "457.68",
        "close": "457.72",
        "volume": "199951"
    },
    {
        "time": "2021-10-26 9:42",
        "open": "457.73",
        "high": "457.74",
        "low": "457.6",
        "close": "457.62",
        "volume": "152134"
    },
    {
        "time": "2021-10-26 9:43",
        "open": "457.6",
        "high": "457.65",
        "low": "457.45",
        "close": "457.5077",
        "volume": "142530"
    },
    {
        "time": "2021-10-26 9:44",
        "open": "457.51",
        "high": "457.64",
        "low": "457.4001",
        "close": "457.61",
        "volume": "122575"
    },
    {
        "time": "2021-10-26 9:45",
        "open": "457.61",
        "high": "457.76",
        "low": "457.58",
        "close": "457.75",
        "volume": "119886"
    },
    {
        "time": "2021-10-26 9:46",
        "open": "457.74",
        "high": "457.75",
        "low": "457.37",
        "close": "457.38",
        "volume": "183157"
    },
    {
        "time": "2021-10-26 9:47",
        "open": "457.42",
        "high": "457.49",
        "low": "457.37",
        "close": "457.44",
        "volume": "128542"
    },
    {
        "time": "2021-10-26 9:48",
        "open": "457.43",
        "high": "457.49",
        "low": "457.33",
        "close": "457.44",
        "volume": "154181"
    },
    {
        "time": "2021-10-26 9:49",
        "open": "457.43",
        "high": "457.5898",
        "low": "457.42",
        "close": "457.47",
        "volume": "163063"
    },
    {
        "time": "2021-10-26 9:50",
        "open": "457.45",
        "high": "457.59",
        "low": "457.44",
        "close": "457.555",
        "volume": "96229"
    },
    {
        "time": "2021-10-26 9:51",
        "open": "457.56",
        "high": "457.61",
        "low": "457.31",
        "close": "457.4217",
        "volume": "110380"
    },
    {
        "time": "2021-10-26 9:52",
        "open": "457.42",
        "high": "457.56",
        "low": "457.42",
        "close": "457.47",
        "volume": "107518"
    },
    {
        "time": "2021-10-26 9:53",
        "open": "457.475",
        "high": "457.51",
        "low": "457.4",
        "close": "457.48",
        "volume": "78062"
    },
    {
        "time": "2021-10-26 9:54",
        "open": "457.49",
        "high": "457.57",
        "low": "457.42",
        "close": "457.46",
        "volume": "133883"
    },
    {
        "time": "2021-10-26 9:55",
        "open": "457.47",
        "high": "457.56",
        "low": "457.45",
        "close": "457.51",
        "volume": "98998"
    },
    {
        "time": "2021-10-26 9:56",
        "open": "457.51",
        "high": "457.54",
        "low": "457.43",
        "close": "457.43",
        "volume": "110237"
    },
    {
        "time": "2021-10-26 9:57",
        "open": "457.43",
        "high": "457.65",
        "low": "457.375",
        "close": "457.65",
        "volume": "98794"
    },
    {
        "time": "2021-10-26 9:58",
        "open": "457.66",
        "high": "457.69",
        "low": "457.35",
        "close": "457.45",
        "volume": "262154"
    },
    {
        "time": "2021-10-26 9:59",
        "open": "457.45",
        "high": "457.47",
        "low": "457.33",
        "close": "457.4",
        "volume": "74685"
    },
    {
        "time": "2021-10-26 10:00",
        "open": "457.41",
        "high": "457.48",
        "low": "457.18",
        "close": "457.38",
        "volume": "166617"
    },
    {
        "time": "2021-10-26 10:01",
        "open": "457.39",
        "high": "457.7",
        "low": "457.39",
        "close": "457.5",
        "volume": "265649"
    },
    {
        "time": "2021-10-26 10:02",
        "open": "457.51",
        "high": "457.57",
        "low": "457.39",
        "close": "457.53",
        "volume": "131947"
    },
    {
        "time": "2021-10-26 10:03",
        "open": "457.53",
        "high": "457.54",
        "low": "457.4",
        "close": "457.51",
        "volume": "80111"
    },
    {
        "time": "2021-10-26 10:04",
        "open": "457.51",
        "high": "457.62",
        "low": "457.5",
        "close": "457.6101",
        "volume": "117174"
    },
    {
        "time": "2021-10-26 10:05",
        "open": "457.621",
        "high": "457.64",
        "low": "457.51",
        "close": "457.58",
        "volume": "168758"
    },
    {
        "time": "2021-10-26 10:06",
        "open": "457.58",
        "high": "457.64",
        "low": "457.46",
        "close": "457.61",
        "volume": "84076"
    },
    {
        "time": "2021-10-26 10:07",
        "open": "457.62",
        "high": "457.7401",
        "low": "457.62",
        "close": "457.66",
        "volume": "125156"
    },
    {
        "time": "2021-10-26 10:08",
        "open": "457.665",
        "high": "457.69",
        "low": "457.5",
        "close": "457.67",
        "volume": "116919"
    },
    {
        "time": "2021-10-26 10:09",
        "open": "457.69",
        "high": "457.72",
        "low": "457.5",
        "close": "457.57",
        "volume": "102551"
    },
    {
        "time": "2021-10-26 10:10",
        "open": "457.56",
        "high": "457.75",
        "low": "457.56",
        "close": "457.7",
        "volume": "109165"
    },
    {
        "time": "2021-10-26 10:11",
        "open": "457.7",
        "high": "457.725",
        "low": "457.63",
        "close": "457.66",
        "volume": "146209"
    },
    {
        "time": "2021-10-26 10:12",
        "open": "457.665",
        "high": "457.88",
        "low": "457.64",
        "close": "457.86",
        "volume": "210620"
    },
    {
        "time": "2021-10-26 10:13",
        "open": "457.855",
        "high": "457.96",
        "low": "457.83",
        "close": "457.95",
        "volume": "159975"
    },
    {
        "time": "2021-10-26 10:14",
        "open": "457.95",
        "high": "458.02",
        "low": "457.93",
        "close": "457.95",
        "volume": "152042"
    },
    {
        "time": "2021-10-26 10:15",
        "open": "457.96",
        "high": "458.15",
        "low": "457.96",
        "close": "458.08",
        "volume": "146047"
    },
    {
        "time": "2021-10-26 10:16",
        "open": "458.085",
        "high": "458.17",
        "low": "457.99",
        "close": "458.15",
        "volume": "100732"
    },
    {
        "time": "2021-10-26 10:17",
        "open": "458.17",
        "high": "458.33",
        "low": "458.155",
        "close": "458.245",
        "volume": "235072"
    },
    {
        "time": "2021-10-26 10:18",
        "open": "458.25",
        "high": "458.29",
        "low": "458.14",
        "close": "458.16",
        "volume": "422002"
    },
    {
        "time": "2021-10-26 10:19",
        "open": "458.17",
        "high": "458.2801",
        "low": "458.1699",
        "close": "458.28",
        "volume": "114611"
    },
    {
        "time": "2021-10-26 10:20",
        "open": "458.29",
        "high": "458.39",
        "low": "458.24",
        "close": "458.37",
        "volume": "241797"
    },
    {
        "time": "2021-10-26 10:21",
        "open": "458.37",
        "high": "458.42",
        "low": "458.31",
        "close": "458.345",
        "volume": "124824"
    },
    {
        "time": "2021-10-26 10:22",
        "open": "458.33",
        "high": "458.49",
        "low": "458.33",
        "close": "458.47",
        "volume": "132125"
    }
])

... and using the below code...

# Convert close to numeric re: the .csv to .json
# converter tool I used online...
df['close'] = pd.to_numeric(df['close'])

# Define the lookback length
lookback_window = 20

# Define a blank target column
df['MaxIndexOffset'] = None

# WORKING EXAMPLE
# Iterate through the df
for i in range(lookback_window, len(df)):

    # Convert the current chunk of rows to a list and reverse it
    curr_range = df['close'].iloc[i-lookback_window:i].to_list()
    curr_range.reverse()
    # Then find the max between 0 and the first instance of the max
    # value. The first instance (because we reversed the order) will
    # be the number I need. If the current row IS the max value, then
    # the offset will be 0. If the max value is 2 bars ago, it'll be 2, etc.
    x1 = max(0, curr_range.index(max(curr_range)))
    df['MaxIndexOffset'].iloc[i] = x1

# Export the correct answer dataset to compare the next function
# to below
df.to_csv("correct_answers.csv", index=False)

# NON-WORKING EXAMPLE
# What I'd like to do is NOT use a for loop to do the above. I envision
# we can use np.where() here? But I don't know how yet. Psuedo might look
# something like:
df['MaxIndexOffset2'] = i - (df['close'].rolling(window=lookback_window).tolist().reverse().index(max(df['close'].rolling(window=lookback_window))))
# ... but obviously doesn't work. "AttributeError: 'Rolling' object has no attribute 'to_list'"

# Check for matches
df.to_csv("correct_answers2.csv", index=False)

...is there a way to do this without the for loop? I'm looking to get the offset (i.e. number of rows back from the current row, up to the lookback_window) of the max value in that window. If there are multiples of the same max value, it should just return the closest one from the current row. Any ideas??? Thanks!

CodePudding user response：

Reverse the close column, then apply np.argmax on a rolling window of size equal to lookback_window then shift the result upwards and assign it to the column MaxIndexOffset

df['MaxIndexOffset'] = df['close'][::-1].rolling(lookback_window)\
                         .apply(np.argmax, raw=True).shift(-lookback_window   1)

                time      open      high       low     close  volume  MaxIndexOffset
0    2021-10-26 9:30     457.2    457.29    456.78  456.9383  594142             NaN
1    2021-10-26 9:31    456.94    457.07     456.8  456.9950  194061             NaN
2    2021-10-26 9:32    456.99    457.22    456.84  457.2100  186114             NaN
3    2021-10-26 9:33    457.22    457.45  457.2011  457.3080  294158             NaN
4    2021-10-26 9:34    457.31     457.4    457.25  457.3200  172574             NaN
5    2021-10-26 9:35    457.31    457.48    457.18  457.4400  396668             NaN
6    2021-10-26 9:36    457.48  457.6511    457.44  457.5700  186777             NaN
7    2021-10-26 9:37  457.5699    457.73  457.5699  457.6900  187596             NaN
8    2021-10-26 9:38     457.7    457.73    457.54  457.6300  185570             NaN
9    2021-10-26 9:39    457.63    457.64    457.31  457.5900  164707             NaN
10   2021-10-26 9:40    457.59    457.72    457.46  457.7199  167438             NaN
11   2021-10-26 9:41    457.72     457.8    457.68  457.7200  199951             NaN
12   2021-10-26 9:42    457.73    457.74     457.6  457.6200  152134             NaN
13   2021-10-26 9:43     457.6    457.65    457.45  457.5077  142530             NaN
14   2021-10-26 9:44    457.51    457.64  457.4001  457.6100  122575             NaN
15   2021-10-26 9:45    457.61    457.76    457.58  457.7500  119886             NaN
16   2021-10-26 9:46    457.74    457.75    457.37  457.3800  183157             NaN
17   2021-10-26 9:47    457.42    457.49    457.37  457.4400  128542             NaN
18   2021-10-26 9:48    457.43    457.49    457.33  457.4400  154181             NaN
19   2021-10-26 9:49    457.43  457.5898    457.42  457.4700  163063             4.0
20   2021-10-26 9:50    457.45    457.59    457.44  457.5550   96229             5.0
21   2021-10-26 9:51    457.56    457.61    457.31  457.4217  110380             6.0
22   2021-10-26 9:52    457.42    457.56    457.42  457.4700  107518             7.0
23   2021-10-26 9:53   457.475    457.51     457.4  457.4800   78062             8.0
24   2021-10-26 9:54    457.49    457.57    457.42  457.4600  133883             9.0
25   2021-10-26 9:55    457.47    457.56    457.45  457.5100   98998            10.0
26   2021-10-26 9:56    457.51    457.54    457.43  457.4300  110237            11.0
27   2021-10-26 9:57    457.43    457.65   457.375  457.6500   98794            12.0
28   2021-10-26 9:58    457.66    457.69    457.35  457.4500  262154            13.0
29   2021-10-26 9:59    457.45    457.47    457.33  457.4000   74685            14.0
30  2021-10-26 10:00    457.41    457.48    457.18  457.3800  166617            15.0
31  2021-10-26 10:01    457.39     457.7    457.39  457.5000  265649            16.0
32  2021-10-26 10:02    457.51    457.57    457.39  457.5300  131947            17.0
33  2021-10-26 10:03    457.53    457.54     457.4  457.5100   80111            18.0
34  2021-10-26 10:04    457.51    457.62     457.5  457.6101  117174            19.0
35  2021-10-26 10:05   457.621    457.64    457.51  457.5800  168758             8.0
36  2021-10-26 10:06    457.58    457.64    457.46  457.6100   84076             9.0
37  2021-10-26 10:07    457.62  457.7401    457.62  457.6600  125156             0.0
38  2021-10-26 10:08   457.665    457.69     457.5  457.6700  116919             0.0
39  2021-10-26 10:09    457.69    457.72     457.5  457.5700  102551             1.0
40  2021-10-26 10:10    457.56    457.75    457.56  457.7000  109165             0.0
41  2021-10-26 10:11     457.7   457.725    457.63  457.6600  146209             1.0
42  2021-10-26 10:12   457.665    457.88    457.64  457.8600  210620             0.0
43  2021-10-26 10:13   457.855    457.96    457.83  457.9500  159975             0.0
44  2021-10-26 10:14    457.95    458.02    457.93  457.9500  152042             0.0
45  2021-10-26 10:15    457.96    458.15    457.96  458.0800  146047             0.0
46  2021-10-26 10:16   458.085    458.17    457.99  458.1500  100732             0.0
47  2021-10-26 10:17    458.17    458.33   458.155  458.2450  235072             0.0
48  2021-10-26 10:18    458.25    458.29    458.14  458.1600  422002             1.0
49  2021-10-26 10:19    458.17  458.2801  458.1699  458.2800  114611             0.0
50  2021-10-26 10:20    458.29    458.39    458.24  458.3700  241797             0.0
51  2021-10-26 10:21    458.37    458.42    458.31  458.3450  124824             1.0
52  2021-10-26 10:22    458.33    458.49    458.33  458.4700  132125             0.0