def clean_text (text):
'''Text Preprocessing '''
# Convert words to lower case
text = text.lower()
#Expand contractions
if True:
text = text.split()
new_text= []
for word in text:
if word in contractions:
new_text.append(contractions [word])
else:
new_text.append(word)
text = "".join(new_text)
# Format words and remove unwanted characters
text = re.sub(r'https?:\/\/[\r\n],"[\r\n]"', '', text, flags=re.MULTILINE)
text = re.sub(r'\<a href', ' ', text)
text = re.sub(r'&', '', text)
text- re.sub(r'[_"\-;%()| &=*%.,!?:#$@\[\]/]',' ', text)
text = re.sub(r'<br />', ' ', text)
text = re.sub(r'\'', ' ', text)
#remove stopwords
if remove_stopwords:
text = text.split()
stops = set(stopwords.words ("english"))
text = [w for w in text if not w in stops]
text = "" .join(text)
# Tokenize each word
text = nltk.WordPunctTokenizer().tokenize(text)
text = nltk.TreebankWordTokenizer().tokenize(text)
text = nltk.WordPunctTokenizer().tokenize(text)
#Lemmatize each token
lemm = nltk.stem.WordNetLemmatizer()
text = list(map(lambda word:list(map(lemm.lemmatize, word)), text))
return text
when I run the above code it runs without an issue.
but when I run the below code using the above def it shows "argument of type 'module' is not iterable'
sentences_train = list(map(clean_text, sentences_train))
I have attached an image of the error for reference.
I have tried different ways to solve this but it make the error worse. if someone can help me with this and tell me why this occurs it would be really nice. thank you! any suggestions will be considered.
CodePudding user response:
The error seems to be caused by the contractions
. I don't know how you created the contractions
but keep in mind that you can only do if word in contractions:
if the contractions
is a list of words like
contractions = ["abc", "xyz", "123"]
.
CodePudding user response:
There seems to be a miss match with your import of contradictions
and what you actually want to iterate over to check if word
is in something (Without a complete example difficult to say). See example below for the same error. The solution would be to use contradictions.[something iterable from this module]
import os
sent = ['1', '2', '3']
something_it = ['2']
def return_str(s):
# if s in something_it: # <- something like this
if s in os:
return 'x'
else:
return s
another_list = list(map(return_str, sent))
print(another_list)