When I open the generated csv file there are some unwanted double quotes in the column track_uri that I cannot remove. Please can you tell me how to remove them?
The code of the program is here:
import json
import csv
import os
input_data_dir = 'C:\\Users\\ProjectsDesktop\\dataset\\'
output_data_dir = 'C:\\Users\\Projects\\'
p_t_cols = ["pid", "track_uri"]
list_track_uri=[]
for filename in os.listdir(input_data_dir):
if('.json' in filename):
filepath = input_data_dir filename
print(filepath)
data = json.load(open(filepath))
playlists = data["playlists"]
for p in playlists:
line = [p["name"], p["collaborative"], p["pid"], p["modified_at"], p["num_tracks"], p["num_albums"], p["num_followers"]]
pid = p["pid"]
tracks = p["tracks"]
for t in tracks:
line = [t["artist_name"], t["track_uri"], t["artist_uri"], t["track_name"], t["album_uri"], t["duration_ms"], t["album_name"]]
track_uri = t["track_uri"]
line = [pid, track_uri]
list_track_uri.append(track_uri)
uris= set(list_track_uri)
import random
final_uris=set(random.sample(uris, 500))
print(len(final_uris))
final_uris = ','.join(final_uris)
path=r'C:\\Users\\user\\Desktop\\ML Datasets\\'
filenames = os.listdir(path)
with open('submission.csv', mode='w', newline='', encoding='utf-8') as accounts:
writer=csv.writer(accounts)
for filename in sorted(filenames):
if filename.startswith("challenge_set") and filename.endswith(".json"):
fullpath = os.sep.join((path, filename))
f = open(fullpath)
js = f.read()
f.close()
mpd_slice = json.loads(js)
writer.writerow(["pid", "track_uri"])
for playlist in mpd_slice["playlists"]:
final_uris.replace('"', '')
writer.writerow([playlist["pid"], final_uris])
Here is a screenshot from the output file
CodePudding user response:
Change
writer.writerow([playlist["pid"], final_uris])
to
writer.writerow([playlist["pid"], *final_uris])
CodePudding user response:
After a few hours I found the solution to this problem and I post it if anyone in the future will need it.
I changed this line into this by adding quotechar= " "
writer=csv.writer(accounts, quotechar = " ")
Thank you all for your help.