I have a problem. I want to run a loop through the whole series and check if it contains a certain value. If this row contains a certain value, it should be set to true
. I get the following error: TypeError: 'bool' object does not support item assignment
Dataframe
customerId text
0 1 Something with Cat
1 3 That is a huge dog
2 3 Hello agian
Code
import pandas as pd
import copy
import re
d = {
"customerId": [1, 3, 3],
"text": ["Something with Cat", "That is a huge dog", "Hello agian"],
}
df = pd.DataFrame(data=d)
my_list = ['cat', 'dog', 'mouse']
def f(x):
match = False
for element in my_list:
x = bool(re.search(element, x['text'], re.IGNORECASE))
if(x):
#print(forwarder)
match = True
break
x['test'] = str(match)
return x
#print(match)
df['test'] = None
df = df.apply(lambda x: f(x), axis = 1)
What I want
customerId text test
0 1 Something with Cat True
1 3 That is a huge dog True
2 3 Hello agian False
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
File <timed exec>:13, in <module>
File ~\Anaconda3\lib\site-packages\pandas\core\frame.py:8839, in DataFrame.apply(self, func, axis, raw, result_type, args, **kwargs)
8828 from pandas.core.apply import frame_apply
8830 op = frame_apply(
8831 self,
8832 func=func,
(...)
8837 kwargs=kwargs,
8838 )
-> 8839 return op.apply().__finalize__(self, method="apply")
File ~\Anaconda3\lib\site-packages\pandas\core\apply.py:727, in FrameApply.apply(self)
724 elif self.raw:
725 return self.apply_raw()
--> 727 return self.apply_standard()
File ~\Anaconda3\lib\site-packages\pandas\core\apply.py:851, in FrameApply.apply_standard(self)
850 def apply_standard(self):
--> 851 results, res_index = self.apply_series_generator()
853 # wrap results
854 return self.wrap_results(results, res_index)
File ~\Anaconda3\lib\site-packages\pandas\core\apply.py:867, in FrameApply.apply_series_generator(self)
864 with option_context("mode.chained_assignment", None):
865 for i, v in enumerate(series_gen):
866 # ignore SettingWithCopy here in case the user mutates
--> 867 results[i] = self.f(v)
868 if isinstance(results[i], ABCSeries):
869 # If we have a view on v, we need to make a copy because
870 # series_generator will swap out the underlying data
871 results[i] = results[i].copy(deep=False)
File <timed exec>:13, in <lambda>(x)
File <timed exec>:9, in f(x)
TypeError: 'bool' object does not support item assignment
CodePudding user response:
What about just using:
df['test'] = df['text'].str.contains('|'.join(my_list), case=False)
Or, if your words might contain special characters:
import re
df['test'] = df['text'].str.contains('|'.join(map(re.escape, my_list)),
case=False)
output:
customerId text test
0 1 Something with Cat True
1 3 That is a huge dog True
2 3 Hello agian False
CodePudding user response:
You can use re.compile
and create pattaren you want : cat|dog| mouse
then use apply
on column text
.
import re
my_list = ['cat', 'dog', 'mouse']
pattern = re.compile('|'.join(my_list), re.IGNORECASE)
df['test'] = df['text'].apply(lambda x: True if pattern.search(x) else False)
print(df)
Output:
customerId text test
0 1 Something with Cat True
1 3 That is a huge dog True
2 3 Hello agian False
CodePudding user response:
The x = bool(re.)
is duplicated with the x
in f(x)
, you can rename it with another name
def f(x):
match = False
for element in my_list:
y = bool(re.search(element, x['text'], re.IGNORECASE))
#^ here
if(y):
#print(forwarder)
match = True
break
x['test'] = str(match)
return x