I am trying to get a list of positive words and negative words with my code but it keeps giving me an empty list. Also I'd like to know how to make my text all lowercase so it can properly append to the positive and negative lists which are all lower case.
def main():
text = open ('dataset.csv','r').read()
SC = [ ',' , '.' , '”' , '’' , '"' , "'" , '?' , '!' , '&', '@', '*' , '#' ]
for i in SC:
text = ''.join(text.split(i))
text.replace (i,'')
outfile = open ('dataset_new.csv','w')
outfile.write(text)
outfile.close()
infile = outfile = open ('dataset_new.csv','r')
pos_words = []
neg_words = []
count = 0
lines = infile.readlines()
for line in lines:
print (line)
print ('Your positive words are', pos_words)
print ('There are', len(pos_words), 'positive words in this text')
print ('Your negative words are', neg_words)
print ('There are', len(neg_words), 'negative words in this text')
count =1
positive_txt = open('positives.txt','r')
negative_txt = open('negatives.txt','r')
positive_list = pos_list(positive_txt)
negative_list = neg_list(negative_txt)
pos_words = positivewords(lines, pos_words, positive_list)
neg_words = negativewords(lines, neg_words, negative_list)
def pos_list(positive_txt):
data = positive_txt.read()
positive_list = data.replace('\n','.').split('.')
return positive_list
def neg_list(negative_txt):
data = negative_txt.read()
negative_list = data.replace('\n','.').split('.')
return negative_list
def positivewords(lines, pos_words, positive_list):
w = len(positive_list)
for i in range(w):
if positive_list[i] in lines:
pos_words.append(positive_list[i])
i =1
return pos_words
def negativewords(lines, neg_words, negative_list):
w = len(negative_list)
for i in range(w):
if negative_list[i] in lines:
neg_words.append(negative_list[i])
i =1
return neg_words
main()
CodePudding user response:
I think ur running into trouble because you're using lists.
try something like this:
import csv
SC = { ',' , '.' , '”' , '’' , '"' , "'" , '?' , '!' , '&', '@', '*' , '#' }
def main():
filename = input("Enter the filename: ")
sub = filename ".txt"
infile = open(sub, 'r')
out_file = open ("NewFile3.csv", 'w')
writer = csv.DictWriter(out_file, FIELDS)
writer.writeheader()
data1 = {}
data2 = {}
data3 = {}
Basically just put everything in dictionaries like your positive and negative and then convert/append to a final dictionary or list. Also in regards to making it lowercase u can just take your final string output and say ".lower())"
so say:
x = "STACK OVERFLOW"
print(x.lower()) # would output "stack overflow"
CodePudding user response:
I did not analyse your code in detail, but your list is empty because you initialize it as empty and don't change it before printing.
Check that these lines initializes your lists as empty:
...
pos_words = []
neg_words = []
...
Then you print the content of these lists without making any change to them.
I think you probably want to move the piece of code that prints the content of the lists (the code below) to the end of main()
function:
# move this code to the end of main() function
for line in lines:
print (line)
print ('Your positive words are', pos_words)
print ('There are', len(pos_words), 'positive words in this text')
print ('Your negative words are', neg_words)
print ('There are', len(neg_words), 'negative words in this text')
count =1
Finally, if you want to make a string lowercase, just use the lower
function. As this example:
def pos_list(positive_txt):
data = positive_txt.read().lower()
positive_list = data.replace('\n','.').split('.')
return positive_list
Notice the .lower()
function call in the first line.
I made a full solution so you can check what's wrong with your code:
dataset.txt
You are cool!
He is uglY.
Is she pRetty? I think she is bad.
Ok, this dog is a bad guy.
How pretty?!
positives.txt
cool
pretty
negatives.txt
ugly
bad
main.py
import re
DATASET_FILE = 'dataset.txt'
POSITIVEWORDS_FILE = 'positives.txt'
NEGATIVEWORDS_FILE = 'negatives.txt'
def main():
with open(POSITIVEWORDS_FILE, 'r') as positivefile:
positive_words = positivefile.read().split()
with open(NEGATIVEWORDS_FILE, 'r') as negativefile:
negative_words = negativefile.read().split()
positive_found = []
negative_found = []
with open(DATASET_FILE,'r') as dataset:
for line in dataset:
# remove all non alphanumeric chars (except spaces and "-")
words = re.sub('[^\w\s-]', '', line).lower().split()
for word in words:
if word in positive_words:
positive_found.append(word)
if word in negative_words:
negative_found.append(word)
# remove duplicates from the list turning the list into a set
unique_positivewords = set(positive_found)
unique_negativewords = set(negative_found)
print(f'Your positive words are {list(unique_positivewords)}.')
print(f'There are {len(positive_found)} positive words in this text.')
print(f'Your negative words are {list(unique_negativewords)}.')
print(f'There are {len(negative_found)} negative words in this text.')
main()