How to reduce called paramets in methods?-CodePudding

I have a class and in that class I have a method that calls multiple methods in it.

But the problem I am facing now is that when the method with the multiple methods in it duplicate parameter has.

And so when I am calling the method with the multiple methods in it, it returns a empty list:[].

So this is the method with the multiple methods in it:

 def show_extracted_data_from_file(self,  file_name):
        self.extractingText.extract_text_from_image(file_name)
        total_fruit = self.filter_verdi_total_number_fruit()
        fruit_name = self.filter_verdi_fruit_name()
        fruit_total_cost = self.filter_verdi_total_fruit_cost(file_name)

        return "\n".join("{} \t {} \t {}".format(a, b, c) for a, b, c in zip(total_fruit, fruit_name, fruit_total_cost))

and this is the method: filter_verdi_total_fruit_cost:

   def filter_verdi_total_fruit_cost(self, file_name):
        locale.setlocale(locale.LC_ALL, locale='Dutch')
        self.extractingText.extract_text_from_image(file_name)
        return [
            locale.atof(items[-1]) for items in (
                token.split() for token in file_name.split('\n')
            ) if len(items) > 2 and items[1] in self.extractingText.list_fruit
        ]

this method returns the following data:

[123.2, 2772.0, 46.2, 577.5, 69.3, 3488.16, 137.5, 500.0, 1000.0, 2000.0, 1000.0, 381.25]

You see that I am calling two times file_name.

and so when I calling the method show_extracted_data_from_file in the views.py:

if uploadfile.image.path.endswith('.pdf'):
                    content = filter_text.show_extracted_data_from_file(uploadfile.image.path)
                    print(content)

it produces a empty list: []

Question: how can I reduce the parameter file_name so that it will return the correct results?

this are my two other methods that I am calling in the combined method:

  def filter_verdi_total_number_fruit(self):
        regex = r"(\d*(?:\.\d )*)\s*\W (?:"   '|'.join(re.escape(word)
                                                       for word in self.extractingText.list_fruit)   ')'
        return re.findall(regex, self.extractingText.text_factuur_verdi[0])

    def filter_verdi_fruit_name(self):
        regex = r"(?:\d*(?:\.\d )*)\s*\W ("   '|'.join(re.escape(word)
                                                       for word in self.extractingText.list_fruit)   ')'
        return re.findall(regex, self.extractingText.text_factuur_verdi[0])

So this is the other class:

class ExtractingTextFromFile:

    def extract_text_from_image(self, filename):

        self.text_factuur_verdi = []
        pdf_file = wi(filename=filename, resolution=300)
        all_images = pdf_file.convert('jpeg')

        for image in all_images.sequence:
            image = wi(image=image)
            image = image.make_blob('jpeg')
            image = Image.open(io.BytesIO(image))

            text = pytesseract.image_to_string(image, lang='eng')
            self.text_factuur_verdi.append(text)

        return self.text_factuur_verdi

    def __init__(self):
        # class variables:
     
        self.tex_factuur_verdi = []
        self.list_fruit = ['Appels', 'Ananas', 'Peen Waspeen',
                           'Tomaten Cherry', 'Sinaasappels',
                           'Watermeloenen', 'Rettich', 'Peren', 'Peen',
                           'Mandarijnen', 'Meloenen', 'Grapefruit', 'Rettich']

CodePudding user response：

@AndrewRyan has the right idea

I presume calling extract_text_from_image just adds the attribute list_fruit
Two routes you can go, from what you are commenting you'll probably just go with #1.. but I gave #2 as another option in case you'd ever want to call filter_verdi_total_fruit_cost by itself

Path 1, Just remove it.

Note: filter_verdi_total_fruit_cost is only called from show_extracted_data_from_file

def show_extracted_data_from_file(self,  file_name):

    # extract text
    #  Note: stores data in `self.extractingText.list_fruit`
    self.extractingText.extract_text_from_image(file_name)

    total_fruit = self.filter_verdi_total_number_fruit()
    fruit_name = self.filter_verdi_fruit_name()
    fruit_total_cost = self.filter_verdi_total_fruit_cost()

    return "\n".join("{} \t {} \t {}".format(a, b, c) for a, b, c in zip(total_fruit, fruit_name, fruit_total_cost))

def filter_verdi_total_fruit_cost(self):
    # Note: `self.extractingText.list_fruit` should be already defined

    locale.setlocale(locale.LC_ALL, locale='Dutch')
    return [
        locale.atof(items[-1]) for items in (
            token.split() for token in file_name.split('\n')
        ) if len(items) > 2 and items[1] in self.extractingText.list_fruit
    ]

Path 2, Check if it's already extracted- if not, extract; if so, continue

Note: if you wanted to just call filter_verdi_total_fruit_cost

def show_extracted_data_from_file(self,  file_name):

    # extract text
    #  Note: stores data in `self.extractingText.list_fruit`
    self.extractingText.extract_text_from_image(file_name)

    total_fruit = self.filter_verdi_total_number_fruit()
    fruit_name = self.filter_verdi_fruit_name()
    fruit_total_cost = self.filter_verdi_total_fruit_cost(file_name)

    return "\n".join("{} \t {} \t {}".format(a, b, c) for a, b, c in zip(total_fruit, fruit_name, fruit_total_cost))

def filter_verdi_total_fruit_cost(self, file_name):
    locale.setlocale(locale.LC_ALL, locale='Dutch')

    if not hasattr(self, 'list_fruit'):
        # file hasn't been extracted yet.. extract it
        #  Note: stores data in `self.extractingText.list_fruit`
        self.extractingText.extract_text_from_image(file_name)

    return [
        locale.atof(items[-1]) for items in (
            token.split() for token in file_name.split('\n')
        ) if len(items) > 2 and items[1] in self.extractingText.list_fruit
    ]