How to drop rows with NA based on a range of columns-CodePudding

I want to drop rows in pandas dataframe meth_clin_sub_nt_kipanif the columns in meth_clin_sub_nt_kipan.iloc[:,7:-1] is NA.

import pandas as pd
import numpy as np

# Drop rows if cg* columns has NA
meth_clin_sub_nt_2_kipan = meth_clin_sub_nt_kipan.dropna(subset=meth_clin_sub_nt_kipan.iloc[:,7:-1],inplace=True)

Traceback:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/tmp/ipykernel_3010/559698406.py in <module>
      1 # Drop rows if cg* columns has NA
----> 2 meth_clin_sub_nt_2_kipan = meth_clin_sub_nt_kipan.dropna(subset=meth_clin_sub_nt_kipan.iloc[:,7:-1],inplace=True)


/opt/conda/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    309                     stacklevel=stacklevel,
    310                 )
--> 311             return func(*args, **kwargs)
    312 
    313         return wrapper

/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py in dropna(self, axis, how, thresh, subset, inplace)
   5948         if subset is not None:
   5949             ax = self._get_axis(agg_axis)
-> 5950             indices = ax.get_indexer_for(subset)
   5951             check = indices == -1
   5952             if check.any():

/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_indexer_for(self, target, **kwargs)
   5273         """
   5274         if self._index_as_unique:
-> 5275             return self.get_indexer(target, **kwargs)
   5276         indexer, _ = self.get_indexer_non_unique(target)
   5277         return indexer

/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_indexer(self, target, method, limit, tolerance)
   3435         # returned ndarray is np.intp
   3436         method = missing.clean_reindex_fill_method(method)
-> 3437         target = self._maybe_cast_listlike_indexer(target)
   3438 
   3439         self._check_indexing_method(method, limit, tolerance)

/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in _maybe_cast_listlike_indexer(self, target)
   5706         Analogue to maybe_cast_indexer for get_indexer instead of get_loc.
   5707         """
-> 5708         return ensure_index(target)
   5709 
   5710     @final

/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in ensure_index(index_like, copy)
   6334     else:
   6335 
-> 6336         return Index(index_like, copy=copy)
   6337 
   6338 

/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in __new__(cls, data, dtype, copy, name, tupleize_cols, **kwargs)
    474             raise cls._scalar_data_error(data)
    475         elif hasattr(data, "__array__"):
--> 476             return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs)
    477         else:
    478 

/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in __new__(cls, data, dtype, copy, name, tupleize_cols, **kwargs)
    467 
    468             klass = cls._dtype_to_subclass(arr.dtype)
--> 469             arr = klass._ensure_array(arr, dtype, copy)
    470             disallow_kwargs(kwargs)
    471             return klass._simple_new(arr, name)

/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/numeric.py in _ensure_array(cls, data, dtype, copy)
    169         if subarr.ndim > 1:
    170             # GH#13601, GH#20285, GH#27125
--> 171             raise ValueError("Index data must be 1-dimensional")
    172 
    173         subarr = np.asarray(subarr)

ValueError: Index data must be 1-dimensional

Data:

meth_clin_sub_nt_kipan.iloc[0,0:19].to_dict()

{'admin.disease_code': 'kirp',
 'days_to_death': nan,
 'vital_status': 'alive',
 'age_at_initial_pathologic_diagnosis': 53.0,
 'gender': 'male',
 'karnofsky_performance_score': nan,
 'survival': 'lts',
 'cg00000029': 0.461440642939772,
 'cg00000165': 0.143910373119058,
 'cg00000236': 0.847164847154162,
 'cg00000289': 0.737361955793681,
 'cg00000292': 0.716794733144112,
 'cg00000321': 0.351877113536983,
 'cg00000363': 0.248986769373366,
 'cg00000622': 0.0121360989202765,
 'cg00000658': 0.876303885229884,
 'cg00000721': 0.944311384947134,
 'cg00000734': 0.0490407302658151,
 'cg00000769': 0.0200484962577958}

CodePudding user response：

Try this:

meth_clin_sub_nt_2_kipan = meth_clin_sub_nt_kipan.dropna(subset=meth_clin_sub_nt_kipan.columns[7:-1])

Btw. if you assigning df with dropped nans to new df you do not need to do inplace=True. It is useful if you want to modify your current df without assigning it to itself, so this:

meth_clin_sub_nt_kipan.dropna(subset=meth_clin_sub_nt_kipan.columns[7:-1], inplace=True)

is equivalent to this:

meth_clin_sub_nt_kipan = meth_clin_sub_nt_kipan.dropna(subset=meth_clin_sub_nt_kipan.columns[7:-1])