Python updating stock option field into three fields-CodePudding

I've been struggling with this problem for sometime. I'm a hobby developer and self taught, but no where near intermediate. My for loop appears to work properly, until I try to add to the data frame using the if, elsif, else statement.

Instead of updating on each row the for loop updates all records in the column to the same value.

Why is this?

There should be different values for the contract_date, contract_type, and strike_price.

from numpy import dtype
import pandas as pd
import requests
import urllib.parse
from datetime import datetime
from dateutil import tz
     
s = requests.Session()
    
    
headers = {       #match headers on API request
        'Accept':'*',
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36'
        }
    #print('Enter a Ticker to Pull Data From')
    #ticker = input()
ticker = 'SPY'
    
tickerurl = f'https://cdn.cboe.com/api/global/delayed_quotes/options/{ticker}.json'
data = s.get(tickerurl).json()
    
lookupdata = data['data']['options']
df = pd.json_normalize(data['data']['options'])
df['ticker'] = ticker
  
for contract in lookupdata:
    y = contract['option'].replace(ticker,'')
    contract_date = f'20{y[0:2]}-{y[2:4]}-{y[4:6]}'
    z = y.replace(y[0:6],'')
    contract_type = "Call" if z[0] == 'C' else "Put"
    strikeprice = z.replace(z[0],'')
    strike_price = float(strikeprice)/1000
    df['contract_date'] = contract_date
    df['contract_type'] = contract_type
    df['strike_price'] = strike_price
     
print(df)

What is the proper way to update each row of the dataframe instead of the entire column?

I've tried the following with a bunch of variations and keep coming up with the same result or an endless loop that doesn't add to the dataframe properly.

    from numpy import dtype
    import pandas as pd
    import requests
    import urllib.parse
    from datetime import datetime
    from dateutil import tz
         
    s = requests.Session()
        
        
    headers = {       #match headers on API request
            'Accept':'*',
            'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36'
            }
        #print('Enter a Ticker to Pull Data From')
        #ticker = input()
    ticker = 'SPY'
        
    tickerurl = f'https://cdn.cboe.com/api/global/delayed_quotes/options/{ticker}.json'
    data = s.get(tickerurl).json()
        
    lookupdata = data['data']['options']
    df = pd.json_normalize(data['data']['options'])
    df['ticker'] = ticker
    df2 = pd.DataFrame()  
    for contract in lookupdata:
        y = contract['option'].replace(ticker,'')
        contract_date = f'20{y[0:2]}-{y[2:4]}-{y[4:6]}'
        z = y.replace(y[0:6],'')
        contract_type = "Call" if z[0] == 'C' else "Put"
        strikeprice = z.replace(z[0],'')
        strike_price = float(strikeprice)/1000
        df['contract_date'] = contract_date
        df['contract_type'] = contract_type
        df['strike_price'] = strike_price
        df2 = df.append(df2) 
    
    print(df2)

# tried the following as well:

#    df2.append(df)
#df2 = pd.concat(df2)
#print(df2)

# gives the error TypeError: first argument must be an iterable of pandas objects, you passed an object of type "DataFrame"


    #trying the following gives a key error on contract_date
    
    #    df2.append(df)
    #    df2 = pd.concat(df2['contract_date']['contract_type'][strike_price])
    #print(df2)

#   df2.append(df)
#    df2 = #pd.concat((df2['contract_date'],df2['contract_type'],df2['strike_price']), #ignore_index=True)
#print(df2)

CodePudding user response：

I think you are making this far more difficult than it needs to be. You have a df via pd.json_normalize() that contains a column like this:

import pandas as pd

data = {'option': ['SPY220829C00310000','SPY220829P00310000']}

df = pd.DataFrame(data)

print(df)

               option
0  SPY220829C00310000
1  SPY220829P00310000

On Wikipedia you can find the standard format of the "blocks" that make up these codes. What you want to do, is translate those blocks into a regex pattern, and then use pd.Series.str.extract to retrieve the individual blocks and assign them to individual columns.

# read as (string up to 6 letters)(6 digits)(1 cap letter)(rest of digits)
pattern = r'([A-Z]{0,6})(\d{6})([A-Z])(\d )'

df[['ticker','contract_date','contract_type','strike_price']] = \
    df.option.str.extract(pattern, expand=True)

print(df)

               option ticker contract_date contract_type strike_price
0  SPY220829C00310000    SPY        220829             C     00310000
1  SPY220829P00310000    SPY        220829             P     00310000

Next, you can alter the formats of the newly created columns:

df.contract_date = pd.to_datetime(df.contract_date, format='%y%m%d')
df.contract_type = df.contract_type.map({'P':'Put','C':'Call'})
df.strike_price = df.strike_price.astype(float)/1000

print(df)

               option ticker contract_date contract_type  strike_price
0  SPY220829C00310000    SPY    2022-08-29          Call         310.0
1  SPY220829P00310000    SPY    2022-08-29           Put         310.0

CodePudding user response：

Not exactly sure what the problem is, but I guess instead of:

df = pd.json_normalize(...)
df2 = pd.DataFrame()  
for contract in lookupdata:
    # ... do something
    df2 = df.append(df2)

You actully want to do something like this?

df = pd.json_normalize(...)
for contract in lookupdata:
    df2 = pd.DataFrame()
    # ... do something
    df = df.append(df2)