Subset pandas column based on specifc columns-CodePudding

I want to keep rows of pandas dataframe if column 7 onwards not null. My code raised TypeError: unhashable type: 'Index'.

import pandas as pd
import numpy as np

merged_df = merged_df.dropna(merged_df.columns[7:])

Traceback:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
/tmp/ipykernel_17/1347313789.py in <module>
      1 # Drop NA
----> 2 merged_df = merged_df.dropna(merged_df.columns[7:])
      3 merged_df

/opt/conda/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    309                     stacklevel=stacklevel,
    310                 )
--> 311             return func(*args, **kwargs)
    312 
    313         return wrapper

/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py in dropna(self, axis, how, thresh, subset, inplace)
   5942             raise TypeError("supplying multiple axes to axis is no longer supported.")
   5943 
-> 5944         axis = self._get_axis_number(axis)
   5945         agg_axis = 1 - axis
   5946 

/opt/conda/lib/python3.7/site-packages/pandas/core/generic.py in _get_axis_number(cls, axis)
    544     def _get_axis_number(cls, axis: Axis) -> int:
    545         try:
--> 546             return cls._AXIS_TO_AXIS_NUMBER[axis]
    547         except KeyError:
    548             raise ValueError(f"No axis named {axis} for object type {cls.__name__}")

TypeError: unhashable type: 'Index'

CodePudding user response：

I think you need keep all non missing values - add DataFrame.all:

merged_df[merged_df.iloc[:,7:].notna().all(axis=1)]

If need keep if at least one non null value:

merged_df[merged_df.iloc[:,7:].notna().any(axis=1)]

EDIT:

merged_df.insert(0,'a4',4) 
print (merged_df)
   a4  a3  a2  a1  a0  a    b    c    d
0   4   3   2   1   0  1  2.0  3.0  4.0
1   4   3   2   1   0  7  NaN  NaN  NaN
2   4   3   2   1   0  7  7.0  9.0  NaN


df1 = merged_df[merged_df.iloc[:,7:].notna().all(axis=1)]
print (df1)
   a4  a3  a2  a1  a0  a    b    c    d
0   4   3   2   1   0  1  2.0  3.0  4.0

df2 = merged_df[merged_df.iloc[:,7:].notna().any(axis=1)]
print (df2)
   a4  a3  a2  a1  a0  a    b    c    d
0   4   3   2   1   0  1  2.0  3.0  4.0
2   4   3   2   1   0  7  7.0  9.0  NaN

df1 = merged_df.dropna(subset=merged_df.columns[7:].tolist())
print (df1)
   a4  a3  a2  a1  a0  a    b    c    d
0   4   3   2   1   0  1  2.0  3.0  4.0