I'm trying to 'translate' a code which used the module pandas into a code not using pandas.
the code looks like this:
import pandas as pd
data=pd.read_csv('review.csv')
data
titles=data['book_title']
temp=[]
for name in titles:
temp.append(name)
temp_set=set(temp)
temp_list=list(temp_set)
temp_list
data_simple=data.filter(items=['book_title','stars_given'])
data_simple=data_simple.set_index('book_title')
result_table=[]
for title in temp_list:
book_data=data_simple.filter(like=title,axis=0)
average=book_data['stars_given'].mean()
result_table.append([title,average])
result=pd.DataFrame(data=result_table, columns=['book_title', 'average_rating'])
result
result.to_csv('average_rating.csv', index=False, encoding='cp949')
(check out the picture; my typing may not be accurate)
Not using the pandas module and only using the built-in modules, (such as beginning with 'import csv') can somebody please help changing the codes?
CodePudding user response:
Suggest using:
- CSV Module
- List comprehension for filtering data
Code
import csv
# Load Data using CSV module
with open('review.csv', 'r') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
columns = next(csv_reader) # list of columns
data = []
for row in csv_reader:
data.append(row)
print(data)
# Names of unique book titles
titles = columns.index('book_title') # column index with book titles
temp = []
for name in [row[titles] for row in data]: # list comprehension for titles column
temp.append(name)
temp_set = set(temp)
temp_list=list(temp_set)
print(temp_list)
# Filter to book_titles and stars_given (use list comprehension)
item_columns = [columns.index('book_title'), columns.index('stars_given')]
# List comprehension which places book_title and stars_given in first two columns
data_simple = [[row[i] for i in item_columns] for row in data]
print(data_simple)
# Mean of stars by title
# Dictionary to look up column indexes for book_title and stars_given
data_simple_columns = {'book_title':0, 'stars_given':1}
result_table = []
for title in temp_list:
# Rows with title (use list comprehension to filter)
book_data = [row for row in data_simple if row[data_simple_columns['book_title']]==title]
# Sum up number of stars for book (have to convert star count to int)
sum_ = sum(int(row[data_simple_columns['stars_given']]) for row in book_data)
count = sum(1 for row in book_data) # number of items
average = sum_ / count
result_table.append((title, average)) # store each as tuple
print(result_table)
# Create resulting CSV
with open('average_rating.csv', 'w', newline = '', encoding = 'cp949') as csv_file:
csv_writer = csv.writer(csv_file, delimiter=',')
csv_writer.writerow(['book_title', 'average_rating']) # Header
for row in result_table:
csv_writer.writerow(row)
Test
File: review.csv
book_title,stars_given,comment
abc,5,loved it
def,3,okay to watch
bce,2,too long
abc,4,very funny
File: average_rating.csv
book_title,average_rating
def,3.0
abc,4.5
bce,2.0
CodePudding user response:
I think NumPy could make it?
import numpy as np
# using loadtxt()
arr = np.loadtxt("review.csv",
delimiter=",", dtype=str)
I am not sure but try Numpy.