Home > Back-end >  python convert single file into multiple pdf file, but second file until the end file content merged
python convert single file into multiple pdf file, but second file until the end file content merged

Time:12-14

What i'm to do is to read all the pdf file from selected directory, then i do for loop for all the pdf that i get from the directory. and then i protect the file with given password.

let say i have 5 file to be protect with given password. first file protected successfully with correct content. but the second file until five file content are merged with the previous file content.

example in second file there is a first file content and second file content. in the third file content there is first file, second file and third file content.

This is the code :

import os
import datetime
from PyPDF2 import PdfFileReader, PdfFileWriter

def is_encrypted(filename: str) -> bool:
    with open(filename, 'rb') as f:
        pdf_reader = PdfFileReader(f, strict=False)
        return pdf_reader.isEncrypted

curdate = datetime.date.today()
folder = os.getcwd() "\\" curdate.strftime("%d-%m-%Y")

pdf2merge = []

for filename in os.listdir(folder):
    #print(filename)
    if filename.endswith('.pdf'):
        pdf2merge.append(filename)
    
pdf2merge.sort()

pdf_writer = PdfFileWriter()

for counter, filename in enumerate(pdf2merge):
    path_file = folder '/' filename
    splitExt = filename.split('.')
    splitExt.pop()
    joinName = '_'.join(splitExt)
    splitFile = joinName.split('_')
    password = splitFile.pop()
    enc_filename = str('_'.join(splitFile)) ".pdf"
    output_file = "payslip-encrypted/" enc_filename

    if is_encrypted(path_file):
        print("PDF File " filename " is already encrypted.")

    try:
        pdfFile = open(path_file, 'rb')
        pdf_reader = PdfFileReader(pdfFile, strict=False)

        for page_number in range(pdf_reader.numPages):
            pdf_writer.addPage(pdf_reader.getPage(page_number))
    except utils.PdfReadError:
        print("Error while reading PDF file " filename)

    pdf_writer.encrypt(user_pwd=password, use_128bit=True)

    pdfOut = open(output_file, "wb")
    pdf_writer.write(pdfOut)
    pdfOut.close()
    pdfFile.close()

    print("PDF file " os.getcwd() "\payslip-encrypted\\" enc_filename " encrypted successfully")

Please correct the code, so the content is correct per file

CodePudding user response:

Just move pdf_writer = PdfFileWriter() into loop.

import os
import datetime
from PyPDF2 import PdfFileReader, PdfFileWriter

def is_encrypted(filename: str) -> bool:
    with open(filename, 'rb') as f:
        pdf_reader = PdfFileReader(f, strict=False)
        return pdf_reader.isEncrypted

curdate = datetime.date.today()
folder = os.getcwd() "/" curdate.strftime("%d-%m-%Y")

pdf2merge = []

for filename in os.listdir(folder):
    #print(filename)
    if filename.endswith('.pdf'):
        pdf2merge.append(filename)
    
pdf2merge.sort()

for filename in pdf2merge:
    pdf_writer = PdfFileWriter()
    
    path_file = folder '/' filename
    splitExt = filename.split('.')
    splitExt.pop()
    joinName = '_'.join(splitExt)
    splitFile = joinName.split('_')
    password = splitFile.pop()
    enc_filename = str('_'.join(splitFile)) ".pdf"
    output_file = "payslip-encrypted/" enc_filename

    if is_encrypted(path_file):
        print("PDF File " filename " is already encrypted.")

    try:
        pdfFile = open(path_file, 'rb')
        pdf_reader = PdfFileReader(pdfFile, strict=False)

        for page_number in range(pdf_reader.numPages):
            pdf_writer.addPage(pdf_reader.getPage(page_number))
    except utils.PdfReadError:
        print("Error while reading PDF file " filename)

    pdf_writer.encrypt(user_pwd=password, use_128bit=True)

    pdfOut = open(output_file, "wb")
    pdf_writer.write(pdfOut)
    pdfOut.close()
    pdfFile.close()

    print("PDF file " os.getcwd() "\payslip-encrypted\\" enc_filename " encrypted successfully")
  • Related