Home > Back-end >  Find minima and maxima of DataFrame by chronological order
Find minima and maxima of DataFrame by chronological order

Time:10-09

I have a pandas data frame where I extract minima and extrema values. It work good so far, but the problem is how can I place them by Date (chronological order) into a list? They are separated into two list and I only want one price values list with them being in chronological order

import pandas as pd
import numpy as np
import yfinance
from scipy.signal import argrelextrema
import matplotlib.dates as mpl_dates


def extract_data():
    ticker = 'GBPJPY=X'
    ticker = yfinance.Ticker(ticker)

    start_date = '2022-09-25'
    end_date = '2022-10-08'

    df = ticker.history(interval='1h', start=start_date, end=end_date)

    df['Date'] = pd.to_datetime(df.index)
    df['Date'] = df['Date'].apply(mpl_dates.date2num)

    df = df.loc[:, ['Date', 'Open', 'High', 'Low', 'Close']]

    # Call function to find Min-Max Extrema
    find_extrema(df)

def find_extrema(df):

    n = 10  # number of points to be checked before and after

    # Find local peaks
    df['min'] = df.iloc[argrelextrema(df.Close.values, np.less_equal,
                                      order=n)[0]]['Close']
    df['max'] = df.iloc[argrelextrema(df.Close.values, np.greater_equal,
                                      order=n)[0]]['Close']
    min_values_list = []
    max_values_list = []

    # Add min value to list
    for item in df['min']:
        check_NaN = np.isnan(item) # check if values is empty
        if check_NaN == True:
            pass
        else:
            min_values_list.append(item)

    # Add max values to list
    for item in df['max']:
        check_NaN = np.isnan(item) # check if values is empty
        if check_NaN == True:
            pass
        else:
            max_values_list.append(item)

    print(f"Min: {min_values_list}")
    print(f"Max: {max_values_list}")


extract_data()

CodePudding user response:

Option 1

  • First, use df.to_numpy to convert columns min and max to a np.array.
  • Get rid of all the NaN values by selecting from the array using np.logical_or applied to a boolean mask (created with np.isnan).
arr = df[['min','max']].to_numpy()
value_list = arr[np.logical_not(np.isnan(arr))].tolist()

print(value_list)

[159.7030029296875,
 154.8979949951172,
 160.7830047607422,
 165.43800354003906,
 149.55799865722656,
 162.80499267578125,
 156.6529998779297,
 164.31900024414062,
 156.125,
 153.13499450683594,
 161.3520050048828,
 156.9340057373047,
 162.52200317382812,
 155.7740020751953,
 160.98500061035156,
 161.83700561523438]

Option 2

Rather more cumbersome:

n = 10

# get the indices for `min` and `max` in two arrays
_min = argrelextrema(df.Close.values, np.less_equal, order=n)[0]
_max = argrelextrema(df.Close.values, np.greater_equal, order=n)[0]

# create columns (assuming you need this for other purposes as well)
df['min'] = df.iloc[_min]['Close']
df['max'] = df.iloc[_max]['Close']

# create lists for `min` and `max`
min_values_list = df['min'].dropna().tolist()
max_values_list = df['max'].dropna().tolist()

# join the lists
value_list2 = min_values_list   max_values_list
value_idxs = _min.tolist()   _max.tolist()

# finally, sort `value_list2` based on `value_idxs`
value_list2 = [x for _, x in sorted(zip(value_idxs, value_list2))]

# check if result is the same:
value_list2 == value_list
# True
  • Related