I am trying to find the most common value located within a column in a CSV file, and return that value.
I am only allowed to import the file that I'm using.
I cannot use pandas.
Here is what I tried to do:
def get_longest_value_from_col(filename, column_name):
with open(filename, 'r') as csvfile:
reader = csv.DictReader(csvfile)
counts = {}
num = filename(column_name)
for i in filename:
curr_frequency = filename.count(i)
if curr_frequency > counter:
counter = curr_frequency
num = i
return num
print(str(
get_longest_value_from_col(
filename='personal_data.csv',
column_name='the_location'
)
))
CodePudding user response:
As a simple solution, you can update your function, as below:
import csv
def get_longest_value_from_col(filename, column_name):
with open(filename, 'r') as csvfile:
reader = csv.DictReader(csvfile)
column_values = (row[column_name] for row in reader)
return max(column_values, key=column_values.count)
print(
get_longest_value_from_col('personal_data.csv', 'the_location')
)
Or we can follow, what uozcan12 said, therefore we are able to use generators as a more efficient iterable (python docs):
import csv
from collections import Counter
def get_longest_value_from_col(filename, column_name):
with open(filename, 'r') as csvfile:
reader = csv.DictReader(csvfile)
column_values = (row[column_name] for row in reader)
return Counter(column_values).most_common(1)[0][0]
print(
get_longest_value_from_col('personal_data.csv', 'the_location')
)
CodePudding user response:
If you don't want to import anything then:
COLUMN = 'the_location'
SEP = ','
def get_columns(line):
return list(map(str.strip, line.split(SEP)))
def get_longest_value_from_col(filename):
with open(filename) as csv:
columns = get_columns(next(csv)) # header columns
try:
column_index = columns.index(COLUMN)
data = {}
for line in csv:
cdata = get_columns(line)[column_index]
data[cdata] = data.get(cdata, 0) 1
return max(data.items(), key=lambda x: x[1])[0]
except ValueError:
print(f"Couldn't find column {COLUMN}")
print(get_longest_value_from_col('personal_data.csv'))