How can I rewrite the python code without using pandas?-CodePudding

I'm trying to 'translate' a code which used the module pandas into a code not using pandas.

the code looks like this:

import pandas as pd
data=pd.read_csv('review.csv')
data
titles=data['book_title']
temp=[]
for name in titles:
    temp.append(name)
temp_set=set(temp)
temp_list=list(temp_set)
temp_list
data_simple=data.filter(items=['book_title','stars_given'])
data_simple=data_simple.set_index('book_title')
result_table=[]
for title in temp_list:
  book_data=data_simple.filter(like=title,axis=0)
  average=book_data['stars_given'].mean()
   result_table.append([title,average])
result=pd.DataFrame(data=result_table, columns=['book_title', 'average_rating'])
result
result.to_csv('average_rating.csv', index=False, encoding='cp949')

(check out the picture; my typing may not be accurate)

Not using the pandas module and only using the built-in modules, (such as beginning with 'import csv') can somebody please help changing the codes?

CodePudding user response：

Suggest using:

CSV Module
List comprehension for filtering data

Code

import csv

# Load Data using CSV module
with open('review.csv', 'r') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    columns = next(csv_reader)   # list of columns
    data = []
    for row in csv_reader:
        data.append(row)
        
print(data)

# Names of unique book titles
titles = columns.index('book_title')   # column index with book titles
temp = []
for name in [row[titles] for row in data]: # list comprehension for titles column
    temp.append(name)
temp_set = set(temp)
temp_list=list(temp_set)
print(temp_list)

# Filter to book_titles and stars_given (use list comprehension)
item_columns = [columns.index('book_title'), columns.index('stars_given')]
# List comprehension which places book_title and stars_given in first two columns
data_simple = [[row[i] for i in item_columns] for row in data]
print(data_simple)
    
# Mean of stars by title
# Dictionary to look up column indexes for book_title and stars_given
data_simple_columns = {'book_title':0, 'stars_given':1}
result_table = []
for title in temp_list:
    # Rows with title (use list comprehension to filter)
    book_data = [row for row in data_simple if row[data_simple_columns['book_title']]==title]
    # Sum up number of stars for book (have to convert star count to int)
    sum_ = sum(int(row[data_simple_columns['stars_given']]) for row in book_data)
    count = sum(1 for row in book_data)    # number of items
    average = sum_ / count
    result_table.append((title, average))   # store each as tuple
    
print(result_table)

# Create resulting CSV
with open('average_rating.csv', 'w', newline = '', encoding = 'cp949') as csv_file:
    csv_writer = csv.writer(csv_file, delimiter=',')
    csv_writer.writerow(['book_title', 'average_rating'])  # Header
    for row in result_table:
        csv_writer.writerow(row)

Test

File: review.csv

book_title,stars_given,comment
abc,5,loved it
def,3,okay to watch
bce,2,too long
abc,4,very funny

File: average_rating.csv

book_title,average_rating
def,3.0
abc,4.5
bce,2.0

CodePudding user response：

I think NumPy could make it?

 import numpy as np
  
# using loadtxt()
arr = np.loadtxt("review.csv",
                 delimiter=",", dtype=str)

I am not sure but try Numpy.