How to extracting long views code block in a smaller method?-CodePudding

I have a Django application. And I have a long method where a user can upload a file and the content of the file will be shown in textarea.

Because of the S.O.L.I.D principle. The code that is responsible for extracting the data from the file has to be in a seperate method.

So this is the views.py:


class ReadingFile(View):
    def get(self, request):
        form = ProfileForm()
        return render(request, "main/create_profile.html", {
            "form": form
        })
        
    def extractingtextfromimage():
        pass

    def post(self, request):
        submitted_form = ProfileForm(request.POST, request.FILES)
        content = ''

        if submitted_form.is_valid():
            uploadfile = UploadFile(image=request.FILES["upload_file"])

            name_of_file = str(request.FILES['upload_file'])
            uploadfile.save()
            print('path of the file is:::', uploadfile.image.name)            

            with open(os.path.join(settings.MEDIA_ROOT,
                                   f"{uploadfile.image}"), 'r') as f:

                print("Now its type is ", type(name_of_file))
                print(uploadfile.image.path)

                # reading PDF file
                if name_of_file.endswith('.pdf'):
                    pdfFile = wi(filename= uploadfile.image.path , resolution=300)
                    text_factuur_verdi = []

                    image = pdfFile.convert('jpeg')
                    imageBlobs = []

                    for img in image.sequence:
                        imgPage = wi(image=img)
                        imageBlobs.append(imgPage.make_blob('jpeg'))

                    for imgBlob in imageBlobs:
                        image = Image.open(io.BytesIO(imgBlob))
                        text = pytesseract.image_to_string(image, lang='eng')
                        text_factuur_verdi.append(text)

                    content = text_factuur_verdi
                    print(text_factuur_verdi)
                    
                # ENDING Reading pdf file

                else:
                    content = f.read()
                    print(content)

            return render(request, "main/create_profile.html", {
                'form': ProfileForm(),
                "content": content
            })

        return render(request, "main/create_profile.html", {
            "form": submitted_form,
        })

And it is about the comment:

reading PDF file

till: # ENDING Reading pdf file

How to seperate that block of code in a seperate method?

I am realy stuck about that part.

CodePudding user response：

Frankly, I don't understand what is your problem.

I can't test it but I would simply copy code to function and send some values as parameters, and use return to send result - so it could be something like this

class ReadingFile(View):

    # ... other functions ...

    def read_pdf_file(self, uploadfile):
        
        pdfFile = wi(filename=uploadfile.image.path , resolution=300)
        text_factuur_verdi = []

        image = pdfFile.convert('jpeg')
        imageBlobs = []

        for img in image.sequence:
            imgPage = wi(image=img)
            imageBlobs.append(imgPage.make_blob('jpeg'))

        for imgBlob in imageBlobs:
            image = Image.open(io.BytesIO(imgBlob))
            text = pytesseract.image_to_string(image, lang='eng')
            text_factuur_verdi.append(text)

        content = text_factuur_verdi
        print(text_factuur_verdi)

        return content

    def post(self, request):

        # ... code ...

                # reading PDF file
                if name_of_file.endswith('.pdf'):

                    content = self.read_pdf_file(uploadfile)

                # ENDING Reading pdf file
                else:
                    content = f.read()

        # ... code ...

BTW:

I would reduce code to single for-loop, and send only filename instead of uploadfile

    def read_pdf_file(self, filename):
        
        content = []

        pdf_file = wi(filename=filename, resolution=300)
        all_images = pdf_file.convert('jpeg')

        for image in all_images.sequence:
            image = wi(image=image)
            image = image.make_blob('jpeg')
            image = Image.open(io.BytesIO(image))
            
            text = pytesseract.image_to_string(image, lang='eng')
            
            content.append(text)

        #print(content)

        #content = '\n'.join(content)  # convert list to single string

        return content

    # ... later ...

    content = self.read_pdf_file(uploadfile.image.path)

And I think it should be

if uploadfile.image.path.endswith('.pdf'):
    content = self.read_pdf_file(uploadfile.image.path)
else:
    with open(os.path.join(settings.MEDIA_ROOT, uploadfile.image.path)) as f:
         content = f.read()