I have a class and in that class I have a method that calls multiple methods in it.
But the problem I am facing now is that when the method with the multiple methods in it duplicate parameter has.
And so when I am calling the method with the multiple methods in it, it returns a empty list:[].
So this is the method with the multiple methods in it:
def show_extracted_data_from_file(self, file_name):
self.extractingText.extract_text_from_image(file_name)
total_fruit = self.filter_verdi_total_number_fruit()
fruit_name = self.filter_verdi_fruit_name()
fruit_total_cost = self.filter_verdi_total_fruit_cost(file_name)
return "\n".join("{} \t {} \t {}".format(a, b, c) for a, b, c in zip(total_fruit, fruit_name, fruit_total_cost))
and this is the method: filter_verdi_total_fruit_cost:
def filter_verdi_total_fruit_cost(self, file_name):
locale.setlocale(locale.LC_ALL, locale='Dutch')
self.extractingText.extract_text_from_image(file_name)
return [
locale.atof(items[-1]) for items in (
token.split() for token in file_name.split('\n')
) if len(items) > 2 and items[1] in self.extractingText.list_fruit
]
this method returns the following data:
[123.2, 2772.0, 46.2, 577.5, 69.3, 3488.16, 137.5, 500.0, 1000.0, 2000.0, 1000.0, 381.25]
You see that I am calling two times file_name.
and so when I calling the method show_extracted_data_from_file in the views.py:
if uploadfile.image.path.endswith('.pdf'):
content = filter_text.show_extracted_data_from_file(uploadfile.image.path)
print(content)
it produces a empty list: []
Question: how can I reduce the parameter file_name so that it will return the correct results?
this are my two other methods that I am calling in the combined method:
def filter_verdi_total_number_fruit(self):
regex = r"(\d*(?:\.\d )*)\s*\W (?:" '|'.join(re.escape(word)
for word in self.extractingText.list_fruit) ')'
return re.findall(regex, self.extractingText.text_factuur_verdi[0])
def filter_verdi_fruit_name(self):
regex = r"(?:\d*(?:\.\d )*)\s*\W (" '|'.join(re.escape(word)
for word in self.extractingText.list_fruit) ')'
return re.findall(regex, self.extractingText.text_factuur_verdi[0])
So this is the other class:
class ExtractingTextFromFile:
def extract_text_from_image(self, filename):
self.text_factuur_verdi = []
pdf_file = wi(filename=filename, resolution=300)
all_images = pdf_file.convert('jpeg')
for image in all_images.sequence:
image = wi(image=image)
image = image.make_blob('jpeg')
image = Image.open(io.BytesIO(image))
text = pytesseract.image_to_string(image, lang='eng')
self.text_factuur_verdi.append(text)
return self.text_factuur_verdi
def __init__(self):
# class variables:
self.tex_factuur_verdi = []
self.list_fruit = ['Appels', 'Ananas', 'Peen Waspeen',
'Tomaten Cherry', 'Sinaasappels',
'Watermeloenen', 'Rettich', 'Peren', 'Peen',
'Mandarijnen', 'Meloenen', 'Grapefruit', 'Rettich']
CodePudding user response:
@AndrewRyan has the right idea
I presume calling extract_text_from_image
just adds the attribute list_fruit
Two routes you can go, from what you are commenting you'll probably just go with #1.. but I gave #2 as another option in case you'd ever want to call filter_verdi_total_fruit_cost
by itself
Path 1, Just remove it.
- Note:
filter_verdi_total_fruit_cost
is only called fromshow_extracted_data_from_file
def show_extracted_data_from_file(self, file_name):
# extract text
# Note: stores data in `self.extractingText.list_fruit`
self.extractingText.extract_text_from_image(file_name)
total_fruit = self.filter_verdi_total_number_fruit()
fruit_name = self.filter_verdi_fruit_name()
fruit_total_cost = self.filter_verdi_total_fruit_cost()
return "\n".join("{} \t {} \t {}".format(a, b, c) for a, b, c in zip(total_fruit, fruit_name, fruit_total_cost))
def filter_verdi_total_fruit_cost(self):
# Note: `self.extractingText.list_fruit` should be already defined
locale.setlocale(locale.LC_ALL, locale='Dutch')
return [
locale.atof(items[-1]) for items in (
token.split() for token in file_name.split('\n')
) if len(items) > 2 and items[1] in self.extractingText.list_fruit
]
Path 2, Check if it's already extracted- if not, extract; if so, continue
- Note: if you wanted to just call
filter_verdi_total_fruit_cost
def show_extracted_data_from_file(self, file_name):
# extract text
# Note: stores data in `self.extractingText.list_fruit`
self.extractingText.extract_text_from_image(file_name)
total_fruit = self.filter_verdi_total_number_fruit()
fruit_name = self.filter_verdi_fruit_name()
fruit_total_cost = self.filter_verdi_total_fruit_cost(file_name)
return "\n".join("{} \t {} \t {}".format(a, b, c) for a, b, c in zip(total_fruit, fruit_name, fruit_total_cost))
def filter_verdi_total_fruit_cost(self, file_name):
locale.setlocale(locale.LC_ALL, locale='Dutch')
if not hasattr(self, 'list_fruit'):
# file hasn't been extracted yet.. extract it
# Note: stores data in `self.extractingText.list_fruit`
self.extractingText.extract_text_from_image(file_name)
return [
locale.atof(items[-1]) for items in (
token.split() for token in file_name.split('\n')
) if len(items) > 2 and items[1] in self.extractingText.list_fruit
]