I am trying to use df.update(), but my dfs have different sizes. Now I want to fill up the smaler df with dummy rows to match the shape the bigger df. Here's a minimal example:
import pandas as pd
import numpy as np
data = {
"Feat_A": ["INVALID", "INVALID", "INVALID"],
"Feat_B": ["INVALID", "INVALID", "INVALID"],
"Key": [12, 25, 99],
}
df = pd.DataFrame(data=data)
data = {"Feat_A": [1, np.nan], "Feat_B": [np.nan, 2], "Key": [12, 99]}
result = pd.DataFrame(data=data)
# df.update(result) not working because of different sizes/shape
# result should be
# Feat_A Feat_B Key
# 0 1.0 NaN 12
# NaN NaN NaN NaN
# 2 NaN 2.0 99
# df.update(result) should work now
CodePudding user response:
This did it:
df.update(result.set_index('Key').reindex(df.set_index('Key').index).reset_index())
CodePudding user response:
Does this meet your needs? Modified your example to include unique DataFrame values to confirm proper alignment:
# Modified example
data = {
"Feat_A": ["INVALID_A12", "INVALID_A25", "INVALID_A99"],
"Feat_B": ["INVALID_B12", "INVALID_B25", "INVALID_B99"],
"Key": [12, 25, 99],
}
df = pd.DataFrame(data=data)
data = {"Feat_A": [1, np.nan], "Feat_B": [np.nan, 2], "Key": [12, 99]}
result = pd.DataFrame(data=data)
# Use Key column as DataFrame indexes
df = df.set_index('Key')
result = result.set_index('Key')
# Add all-NaN rows with keys that exist in df but not in result
result = result.reindex_like(df)
# Update
result.update(df)
print(result)
Feat_A Feat_B
Key
12 INVALID_A12 INVALID_B12
25 INVALID_A25 INVALID_B25
99 INVALID_A99 INVALID_B99