My code is as follows:
import json
import pandas as pd
from difflib import SequenceMatcher
description_file = open("description.json", encoding="utf-8")
nodescription_file = open("nodescription.json", encoding="utf-8")
desc = json.load(description_file)
nodesc = json.load(nodescription_file)
stack = []
def get_values():
data = []
for pages in nodesc:
for rows in pages["dataRows"]:
skip = False
if skip:
break
email = ""
companyname = rows["columnValues"]["companyName"][0]["name"]
fullname = ""
firmName = rows["columnValues"]["firm"][0]["name"]
pbid = rows["columnValues"]["companyName"][0]["pbId"]
managementposition = rows["columnValues"]["managementPosition"][0]["value"]
if len(rows["columnValues"]["email"]):
email = rows["columnValues"]["email"][0]["value"]
else:
email = "No email"
if len(rows["columnValues"]["fullName"]):
fullname = rows["columnValues"]["fullName"][0]["name"]
for desc_rows in desc["dataRows"]:
pbid_desc = desc_rows["columnValues"]["investorName"][0]["pbId"]
description = ""
if len(desc_rows["columnValues"]["description"]):
description = desc_rows["columnValues"]["description"][0]["value"]
else:
description = "No description"
if pbid == pbid_desc:
data.append({"Full Name": fullname, "Email": email, "Company Name": companyname, "Position":managementposition, "Description": description})
save_data(data, "file7.csv")
def similar(a, b): #Dont use this anymore, pbid was relational
return SequenceMatcher(None, a, b).ratio()
def instack(string):
for i in stack:
if string == i:
return True
return False
def save_data(data, name):
pd.read_json(json.dumps(data, ensure_ascii=False).encode('utf8'), encoding='utf-8').to_csv(name, encoding="utf-8")
get_values()
description_file.close()
nodescription_file.close()
I am getting an error of:
Traceback (most recent call last):
File "/Users/dan/Desktop/Upwork/main.py", line 69, in <module>
get_values()
File "/Users/dan/Desktop/Upwork/main.py", line 53, in get_values
save_data(data, "file7.csv")
File "/Users/dan/Desktop/Upwork/main.py", line 68, in save_data
pd.read_json(json.dumps(data, ensure_ascii=False).encode('utf8'), encoding='utf-8').to_csv(name, encoding="utf-8")
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/util/_decorators.py", line 207, in wrapper
return func(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/util/_decorators.py", line 311, in wrapper
return func(*args, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/io/json/_json.py", line 588, in read_json
json_reader = JsonReader(
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/io/json/_json.py", line 673, in __init__
data = self._get_data_from_filepath(filepath_or_buffer)
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/io/json/_json.py", line 710, in _get_data_from_filepath
self.handles = get_handle(
File "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/pandas/io/common.py", line 826, in get_handle
raise TypeError(
TypeError: Expected file path name or file-like object, got <class 'bytes'> type
Please help - I am a total noob. Thank you so much. The above is not code it is an error and the stackoverflow autobot is making be write more of a questions because it cannot discern the code text from the result text - or I am just a noob. Please help. Thank you so much in advance. I am on PC using Visual Studio and have already installed pip pandas and pip cdifflib is not installing for some reason with exit code 1120 legacy install failure.
CodePudding user response:
The error you get is because read_json
expects as a fisrt argument either a path to a file or a file-like object, but you're giving it a bytes string.
I think what should work is to convert your bytes string into a bytes buffer that is a file-like object, with io.BytesIO :
pd.read_json(io.BytesIO(json.dumps(data, ensure_ascii=False).encode('utf8')), encoding='utf-8').to_csv(name, encoding="utf-8")