I want to drop rows in pandas dataframe meth_clin_sub_nt_kipan
if the columns in meth_clin_sub_nt_kipan.iloc[:,7:-1]
is NA.
import pandas as pd
import numpy as np
# Drop rows if cg* columns has NA
meth_clin_sub_nt_2_kipan = meth_clin_sub_nt_kipan.dropna(subset=meth_clin_sub_nt_kipan.iloc[:,7:-1],inplace=True)
Traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/tmp/ipykernel_3010/559698406.py in <module>
1 # Drop rows if cg* columns has NA
----> 2 meth_clin_sub_nt_2_kipan = meth_clin_sub_nt_kipan.dropna(subset=meth_clin_sub_nt_kipan.iloc[:,7:-1],inplace=True)
/opt/conda/lib/python3.7/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
/opt/conda/lib/python3.7/site-packages/pandas/core/frame.py in dropna(self, axis, how, thresh, subset, inplace)
5948 if subset is not None:
5949 ax = self._get_axis(agg_axis)
-> 5950 indices = ax.get_indexer_for(subset)
5951 check = indices == -1
5952 if check.any():
/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_indexer_for(self, target, **kwargs)
5273 """
5274 if self._index_as_unique:
-> 5275 return self.get_indexer(target, **kwargs)
5276 indexer, _ = self.get_indexer_non_unique(target)
5277 return indexer
/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in get_indexer(self, target, method, limit, tolerance)
3435 # returned ndarray is np.intp
3436 method = missing.clean_reindex_fill_method(method)
-> 3437 target = self._maybe_cast_listlike_indexer(target)
3438
3439 self._check_indexing_method(method, limit, tolerance)
/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in _maybe_cast_listlike_indexer(self, target)
5706 Analogue to maybe_cast_indexer for get_indexer instead of get_loc.
5707 """
-> 5708 return ensure_index(target)
5709
5710 @final
/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in ensure_index(index_like, copy)
6334 else:
6335
-> 6336 return Index(index_like, copy=copy)
6337
6338
/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in __new__(cls, data, dtype, copy, name, tupleize_cols, **kwargs)
474 raise cls._scalar_data_error(data)
475 elif hasattr(data, "__array__"):
--> 476 return Index(np.asarray(data), dtype=dtype, copy=copy, name=name, **kwargs)
477 else:
478
/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in __new__(cls, data, dtype, copy, name, tupleize_cols, **kwargs)
467
468 klass = cls._dtype_to_subclass(arr.dtype)
--> 469 arr = klass._ensure_array(arr, dtype, copy)
470 disallow_kwargs(kwargs)
471 return klass._simple_new(arr, name)
/opt/conda/lib/python3.7/site-packages/pandas/core/indexes/numeric.py in _ensure_array(cls, data, dtype, copy)
169 if subarr.ndim > 1:
170 # GH#13601, GH#20285, GH#27125
--> 171 raise ValueError("Index data must be 1-dimensional")
172
173 subarr = np.asarray(subarr)
ValueError: Index data must be 1-dimensional
Data:
meth_clin_sub_nt_kipan.iloc[0,0:19].to_dict()
{'admin.disease_code': 'kirp',
'days_to_death': nan,
'vital_status': 'alive',
'age_at_initial_pathologic_diagnosis': 53.0,
'gender': 'male',
'karnofsky_performance_score': nan,
'survival': 'lts',
'cg00000029': 0.461440642939772,
'cg00000165': 0.143910373119058,
'cg00000236': 0.847164847154162,
'cg00000289': 0.737361955793681,
'cg00000292': 0.716794733144112,
'cg00000321': 0.351877113536983,
'cg00000363': 0.248986769373366,
'cg00000622': 0.0121360989202765,
'cg00000658': 0.876303885229884,
'cg00000721': 0.944311384947134,
'cg00000734': 0.0490407302658151,
'cg00000769': 0.0200484962577958}
CodePudding user response:
Try this:
meth_clin_sub_nt_2_kipan = meth_clin_sub_nt_kipan.dropna(subset=meth_clin_sub_nt_kipan.columns[7:-1])
Btw. if you assigning df with dropped nans to new df you do not need to do inplace=True
. It is useful if you want to modify your current df without assigning it to itself, so this:
meth_clin_sub_nt_kipan.dropna(subset=meth_clin_sub_nt_kipan.columns[7:-1], inplace=True)
is equivalent to this:
meth_clin_sub_nt_kipan = meth_clin_sub_nt_kipan.dropna(subset=meth_clin_sub_nt_kipan.columns[7:-1])