I want to compare the sku of source and destination file and if destination sku matches with source, i want to copy the meta_title, meta_description and description of that row
import pandas as pd
source = pd.read_csv('/content/source-source.csv')
destination = pd.read_csv('/content/out.csv')
for i in range(0, len(source)):
try:
source_sku = source.iloc[i]['sku']
destination_sku = destination.iloc[i]['sku']
source_meta_title = source.iloc[i]['meta_title']
source_meta_description = source.iloc[i]['meta_description']
source_description = source.iloc[i]['description']
if source_sku == destination_sku:
destination.loc[i, 'meta_title'] = source_meta_title
destination.loc[i, 'meta_description'] = source_meta_description
destination.loc[i, 'description'] = source_description
destination.to_csv('merged.csv', index=False, encoding='utf-8-sig')
except ValueError:
break
source.csv
sku meta_title meta_description description
a ab ab ab
b bb bb bb
destination.csv
sku meta_title meta_description description
a bb bb bb
b ab ab ab
merged.csv
sku meta_title meta_description description
a ab ab ab
b bb bb bb
my code is working now, but if the number of lines changes increase, then it throw error
CodePudding user response:
I think this is what you want. It keeps the set of SKUs from destination and replaces the values for SKUs that are also in source.
import pandas as pd
source = pd.read_csv('/content/source-source.csv')
destination = pd.read_csv('/content/out.csv')
destination = destination.merge(
source,
how='left',
on=['sku'],
suffixes=('_old', ''),
)
value_cols = ['meta_title', 'meta_description', 'description']
for c in value_cols:
destination[c] = destination[c].fillna(destination[f'{c}_old'])
destination = destination.drop(columns=[
f'{c}_old'
for c
in value_cols
])
destination.to_csv('merged.csv', index=False, encoding='utf-8-sig')
CodePudding user response:
If in the merged dataset, you want to keep the rows in destination and source with a different value in ['sku'], you could try doing this way
import pandas as pd
source = pd.DataFrame({'sku':['a','b', 'c'], 'meta_title':['ab','bb', 'ac'], 'meta_description':['ab','bb', 'ac'], 'description':['ab','bb', 'ac']})
destination = pd.DataFrame({'sku':['a','b', 'd'], 'meta_title':['bb','ab', 'dd'], 'meta_description':['bb','ab', 'dd'], 'description':['bb','ab', 'dd']})
df = source.merge(destination, how='outer', on='sku', indicator=True, suffixes=['', '_2'])
cols = ['sku', 'meta_title', 'meta_description', 'description', '_merge']
df = df[cols].loc[df['sku'].drop_duplicates().index].reset_index(drop=True)
# To check only rows with the same SKUs
df[df['_merge']=='both']