I inherited a code base which relies a lot on DataFrame.assign
and dict unpacking for arguments, which is not something I have seen a lot before.
I was doing some tests today, I guess I must have hit an edge case, and I have been looking for an explanation and/or a solution to this for the last few hours.
I cannot share the data, but I have managed to create the following MRE.
import sys
import pandas as pd
print("python", sys.version)
print("pandas", pd.__version__)
# template df for the output format
def template_df():
return pd.DataFrame(
columns=["id", "subid", "empty", "dat1", "dat2", "dat3", "dat4"]
)
# input data looks like this
df = pd.DataFrame({
"id1": [None, None, "0039"],
"id2": ["10", "12", "a1"],
"dat": [601, 482, 890],
})
# filter on id2 like 'a%'
m1 = df["id2"].str.startswith("a")
# start building output with input data and constant data
output = template_df().assign(**{
"id": df.loc[m1, "id2"],
"subid": df.loc[m1, "id1"],
"dat1": df.loc[m1, "dat"],
"dat2": "constant2",
})
# filter for id1 = '0039'
m2 = output["subid"].str.match("0039")
# add data for id1 = '0039' only
output[m2] = output[m2].assign(**{"dat3": "dependent3", "dat4": "dependent4"})
When I execute the code above, I get:
% python soq.py
python 3.9.12 (main, Mar 26 2022, 15:51:15)
[Clang 13.1.6 (clang-1316.0.21.2)]
pandas 1.4.1
Traceback (most recent call last):
File "/path/to/src/dir/soq.py", line 35, in <module>
output[m2] = output[m2].assign(**{"dat3": "dependent3", "dat4": "dependent4"})
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/frame.py", line 3643, in __setitem__
self._setitem_array(key, value)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/frame.py", line 3678, in _setitem_array
self.iloc[indexer] = value
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 716, in __setitem__
iloc._setitem_with_indexer(indexer, value, self.name)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 1688, in _setitem_with_indexer
self._setitem_with_indexer_split_path(indexer, value, name)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 1724, in _setitem_with_indexer_split_path
self._setitem_with_indexer_frame_value(indexer, value, name)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 1813, in _setitem_with_indexer_frame_value
self._setitem_single_column(loc, val, pi)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 1875, in _setitem_single_column
ser = value[np.argsort(pi)]
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/series.py", line 984, in __getitem__
return self._get_with(key)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/series.py", line 1019, in _get_with
return self.loc[key]
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 967, in __getitem__
return self._getitem_axis(maybe_callable, axis=axis)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 1191, in _getitem_axis
return self._getitem_iterable(key, axis=axis)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 1132, in _getitem_iterable
keyarr, indexer = self._get_listlike_indexer(key, axis)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexing.py", line 1327, in _get_listlike_indexer
keyarr, indexer = ax._get_indexer_strict(key, axis_name)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 5782, in _get_indexer_strict
self._raise_if_missing(keyarr, indexer, axis_name)
File "/path/to/src/dir/.venv/lib/python3.9/site-packages/pandas/core/indexes/base.py", line 5842, in _raise_if_missing
raise KeyError(f"None of [{key}] are in the [{axis_name}]")
KeyError: "None of [Int64Index([0], dtype='int64')] are in the [index]"
CodePudding user response:
You are not using assign()
properly. You should instead use .loc
:
output.loc[m2, ['dat3', 'dat4']] = ["dependent3", "dependent4"]
See this for more info.