Import data from another dataframe for matching cells-CodePudding

I have two data frames that I import from the excel sheets. There is some information I need to import from auxiliary dataframe to main dataframe if there is a matching. My code:

auxdf =pd.DataFrame({'prod':['look','duck','chik']},index=['prod_team1','prod_team2','prod_team3'])

auxdf =     

            prod
prod_team1  look
prod_team2  duck
prod_team3  chik
 
# Main dataframe after importing from an excel sheet

maindf = 
            col1                     col2
mar_team1   aoo                      auxdf['prod_team1']   
mar_team2   auxdf.loc['prod_team2']  bla
mar_team3   foo                      auxdf['prod_team3']

# I want to import information from auxdf into maindf
for i in range(len(mdf)):
  for j in range(len(mdf.columns)):
    # Check if a cell value has a string called 'auxdf', if so, change its value
    if 'auxdf' in maindf[maindf.columns[0]].iloc[0]: 
       maindf[maindf.columns[0]].iloc[0] = eval(maindf[maindf.columns[0]].iloc[0])

Expected output:

maindf = 
            col1      col2
mar_team1   aoo       look   
mar_team2   duck      bla
mar_team3   foo       chik

Need help to find most pythonic way of doing it. Thanks

CodePudding user response：

You can create Series by DataFrame.stack, get matched values by Series.str.extract, so possible mapping by Series.map with auxdf['prod'], last reshape back by Series.unstack:

s = maindf.stack()
s1 = s.str.extract(r"auxdf.*\['(.*?)'\]", expand=False)
print (s1)
mar_team1  col1           NaN
           col2    prod_team1
mar_team2  col1    prod_team2
           col2           NaN
mar_team3  col1           NaN
           col2    prod_team3
dtype: object

df = s1.map(auxdf['prod']).fillna(s).unstack()
print (df)
           col1  col2
mar_team1   aoo  look
mar_team2  duck   bla
mar_team3   foo  chik

CodePudding user response：

You can use a regex with str.replace to match and replace the auxdf strings with the matching value:

out = maindf.apply(lambda s: s.str.replace(
        r'auxdf.*\[\'(\w )\'\]',                 # extract key
        lambda m: auxdf['prod'].get(m.group(1)), # replace by matching value
        regex=True)

Output:

           col1  col2
mar_team1   aoo  look
mar_team2  duck   bla
mar_team3   foo  chik

CodePudding user response：

I hope it works for your solution,

import pandas as pd
import re
auxdf =pd.DataFrame({'prod':['look','duck','chik']},index=['prod_team1','prod_team2','prod_team3'])

maindf = pd.DataFrame(
    {
        "col1": ["aoo", "auxdf.loc['prod_team2']", "foo"],
        "col2": ["auxdf['prod_team1']", "bla", "auxdf['prod_team3']"]
    }, index=['mar_team1', 'mar_team2', 'mar_team3']
)
def replaceFunc(col):
    c = re.findall("auxdf\.?\w*\['(.*)'\]", col)
    if len(c) > 0:
        col = c[0]
        for i, v in zip(auxdf.values, auxdf.index):
            if v == col:
                col = i[0]
    return col
maindf['col1'] = maindf['col1'].apply(replaceFunc)
maindf['col2'] = maindf['col2'].apply(replaceFunc)
maindf