I am uploading a file from the front end and trying to read it in the backend to do some data extraction from that. I have written the following code which is failing in all scenarios
Views.py
class UserInfo(View):
template_name = "Recruit/recruit.html"
def get(self, request):
user = UserInformationFrom()
return render(request, self.template_name, {"form": user})
def post(self, request):
user = UserInformationFrom(request.POST, request.FILES)
output = dict()
HTMLExtensionList = ['.html','.htm']
if user.is_valid():
savedUser = user.save()
filename = user['file'].data.name
name, extension = os.path.splitext(filename)
if extension.lower() in HTMLExtensionList:
output = readHTML(filename=user['file'].data)
savedUser.email = output['Email']
savedUser.mobile = output['Phone']
savedUser.Zipcode = output['zipCode']
savedUser.state = output['state']
savedUser.upload_by = request.user
savedUser.updated = timezone.now()
savedUser.save()
return render(request, self.template_name, {"form": user})
else:
return render(request, self.template_name, {"form": user})
DataExtract.py
def readHTML(filename):
with open(filename, "r", encoding='utf-8') as file:
soup = BeautifulSoup(file)
for data in soup(['style', 'script']):
data.decompose()
var = ' '.join(soup.stripped_strings)
email = ExtractEmail(var)
phone = findPhone(var)
zipCode = extractZipCode(var)
state = extractState(var)
return {"Email": email, "Phone": phone, "zipCode": zipCode, "state": state}
I am getting the following error
expected str, bytes or os.PathLike object, not InMemoryUploadedFile
I am getting errors in DataExtract where I am trying to open the file. I tried this solution still not working
expected str, bytes or os.PathLike object, not InMemoryUploadedFile
CodePudding user response:
Well, since your readHTML
function expects a filename, you'd need to pass it one, not just the file.
Refactor readHTML
to a function that can read its input from just a string:
def read_html_string(s):
soup = BeautifulSoup(s)
for data in soup(["style", "script"]):
data.decompose()
var = " ".join(soup.stripped_strings)
email = ExtractEmail(var)
phone = findPhone(var)
zipCode = extractZipCode(var)
state = extractState(var)
return {"Email": email, "Phone": phone, "zipCode": zipCode, "state": state}
# If you still need this for something...
def readHTML(filename):
with open(filename, "r", encoding="utf-8") as file:
return read_html_string(file.read())
Then just do
output = read_html_string(user['file'].data.read())
in your view function.
CodePudding user response:
Try to pass the InMemoryUploadedFile directly to the BeautifulSoup
class like this:
def readHTML(file):
soup = BeautifulSoup(file)
for data in soup(['style', 'script']):
data.decompose()
var = ' '.join(soup.stripped_strings)
email = ExtractEmail(var)
phone = findPhone(var)
zipCode = extractZipCode(var)
state = extractState(var)
return {"Email": email, "Phone": phone, "zipCode": zipCode, "state": state}
obviously the error comes from this line: with open(filename, "r", encoding='utf-8') as file
so you might not need to call open
to be able to read the file
source: https://tutorialmeta.com/question/expected-str-bytes-or-os-pathlike-object-not-inmemoryuploadedfile