I'm using the txt file: https://drive.google.com/file/d/1-VrWf7aqiqvnshVQ964zYsqaqRkcUoL1/view?usp=sharin
I'm running the script:
data = f.read()
ny_sum=0
ny_count=0
sf_sum=0
sf_count=0
for line in data.split('\n'):
print(line)
parts = line.split('\t')
city = parts[2]
amount = float(parts[4])
if city == 'San Francisco':
sf_sum = sf_sum amount
elif city == 'New York':
ny_sum = ny_sum amount
ny_count = ny_count 1
ny_avg = ny_sum / ny_count
sf_avg = sf_sum / sf_count
#print(ny_avg, sf_avg)
f = open('result_file.txt', 'w')
f.write('The average transaction amount based on {} transactions in New York is {}\n'.format(ny_count, ny_avg))
f.write('The average transaction amount based on {} transactions in San Francisco is {}\n'.format(sf_count, sf_avg))
if ny_avg>sf_avg:
f.write('New York has higher average transaction amount than San Francisco\n')
else:
f.write('San Francisco has higher average transaction amount than New York\n')
f.close()
And I ALWAYS get the error: ValueError: could not convert string to float: ''
I'm pretty new-ish to Python and I'm really not sure what I'm doing wrong here. I'm trying to get averages for New York and San Francisco, then export the results AND the comparison to a txt results file
CodePudding user response:
I have re-arranged the code. I agree with BrutusFocus that the splits are making it difficult to read exactly the location on each row. I have set it so if it sees the location at any point in the row, it counts it.
with open("data.txt", "r") as f:
data = f.read()
ny_sum=0
ny_count=0
sf_sum=0
sf_count=0
for line in data.split('\n'):
parts = line.split('\t')
city = parts[2]
amount = float(parts[4])
print(city, amount)
if "New York" in line:
ny_sum = ny_sum amount
ny_count = ny_count 1
elif "San Francisco" in line:
sf_sum = sf_sum amount
sf_count = sf_count 1
ny_avg = ny_sum / ny_count
sf_avg = sf_sum / sf_count
#print(ny_avg, sf_avg)
f = open('result_file.txt', 'w')
f.write('The average transaction amount based on {} transactions in New York is
{}\n'.format(ny_count, ny_avg))
f.write('The average transaction amount based on {} transactions in San
Francisco is {}\n'.format(sf_count, sf_avg))
if ny_avg>sf_avg:
f.write('New York has higher average transaction amount than San Francisco\n')
else:
f.write('San Francisco has higher average transaction amount than New York\n')
f.close()
CodePudding user response:
This should give you what you're looking for:
from collections import defaultdict
sums = defaultdict(lambda: [0.0, 0])
with open('New Purchases.txt') as pfile:
for line in [line.strip().split('\t') for line in pfile.readlines()]:
if len(line) > 5:
sums[line[2]][0] = float(line[4])
sums[line[2]][1] = 1
for k in ['San Francisco', 'New York']:
v = sums.get(k, [0.0, 1])
print(f'Average for {k} = ${v[0]/v[1]:.2f}')
The check for the number of tokens is necessary because your file seems to have at least one incomplete line (the last line in the file)