Home > OS >  Python pandas delete rows by uncertain number of columns' value
Python pandas delete rows by uncertain number of columns' value

Time:10-13

I want to delete rows in pandas dataframe by uncertain number of columns' value.

I create a string which contains python code, then use exec() to execute it. Is there a alternative way without exec() to handle uncertain number of conditions like this but work in pandas?

This is my code:

import pandas as pd


class DelTest:
    def __init__(self, df) -> None:
        self.df = df

    def deleteRows(self, conditions):
        conditions = [f"(self.df['{c['col']}'] == {c['val']})" for c in conditions]
        code = f"self.df = self.df.drop(self.df[{' & '.join(conditions)}].index)"
        exec(code)
        return self.df


# delete if df['value1'] == 0
condition1 = [{'col': 'value1', 'val': 0}]
# delete if df['value1'] == 0 & df['value2'] == 0
condition2 = [{'col': 'value1', 'val': 0}, {'col': 'value2', 'val': 0}]

df = pd.DataFrame(data=[['A', 2, 0],
                        ['B', 5, 1],
                        ['C', 0, 1],
                        ['X', 0, 1],
                        ['X', 0, 0]],
                  columns=['name', 'value1', 'value2'])

print(df)
'''
  name  value1  value2
0    A       2       0
1    B       5       1
2    C       0       1
3    X       0       1
4    X       0       0
'''
print(DelTest(df).deleteRows(condition1))
'''
  name  value1  value2
0    A       2       0
1    B       5       1
'''
print(DelTest(df).deleteRows(condition2))
'''
  name  value1  value2
0    A       2       0
1    B       5       1
2    C       0       1
3    X       0       1
'''

CodePudding user response:

Let us try with merge

cond = pd.DataFrame(condition2).set_index('col').T
out = df.merge(cond,how='left',indicator = True).query('_merge == "left_only"')
Out[209]: 
  name  value1  value2     _merge
0    A       2       0  left_only
1    B       5       1  left_only
2    C       0       1  left_only
3    X       0       1  left_only
#cond = pd.DataFrame(condition1).set_index('col').T
#out = df.merge(cond,how='left',indicator = True).query('_merge == "left_only"')
#Out[210]: 
#  name  value1  value2     _merge
#0    A       2       0  left_only
#1    B       5       1  left_only
  • Related