Home > OS >  How to remove sheets based on array with python openpyxl
How to remove sheets based on array with python openpyxl

Time:09-15

I need to remove sheets that names are in array. Unfortunetely this: tempWb.remove(wsToRemoveNameArray[wsToRemoveIndex]) , and this: del tempWb[wsToRemoveNameArray[wsToRemoveIndex]] dont want to work with my code: Anyone know how to deal with it?

def splitExcelFiles(InputPath, OutputPath, fileNameArray):
for file in range(0, len(fileNameArray)):
    tempFile = InputPath   '\\'  fileNameArray[file]
    tempWb =   load_workbook(tempFile)
    wsToRemoveNameArray = []

    if(len(tempWb.sheetnames)==1):
        #new wb 
        tempWb.save(str(OutputPath)   '\\'   str(tempWb.sheetnames)   '.xlsx')
    else:
        for ws in range (0,len(tempWb.sheetnames)):
            newName = tempWb.sheetnames[ws]
            wsToRemoveNameArray = []

            #copyWs = tempWb.copy_worksheet[ws]

            # #This section will save the names to remove other sheets from ws
            for wsToRemoveName in range (0,len(tempWb.sheetnames)):
                if newName != tempWb.sheetnames[wsToRemoveName]:
                    #print(tempWb.sheetnames[wsToRemoveName])
                    wsToRemoveNameArray.append(str(tempWb.sheetnames[wsToRemoveName]))

            for wsToRemoveIndex in range (0, len(wsToRemoveNameArray)):
            #     tem
                #tempWb.remove(wsToRemoveNameArray[wsToRemoveIndex])
                #del tempWb[wsToRemoveNameArray[wsToRemoveIndex]]
            #     tempWb.

            # print(len(wsToRemoveNameArray))
            tempWb.save(str(OutputPath)   '\\'   newName   '.xlsx')

CodePudding user response:

First things first, some general tips:

  • Use the pathlib library whenever you deal with Paths. It makes things a lot easier. There is never a good reason to include the path delimiter in your code.

  • In python, it's common to write variables and functions with an underscore: save_path instead of savePath

Now that we have that out of the way, here is a tested example. Just change the directories to match yours.

from openpyxl import load_workbook, Workbook
from pathlib import Path
import shutil


def make_absolute_path(path, name, suffix=".xlsx"):
    input_file_path = path / name
    return input_file_path.with_suffix(suffix)


def remove_all_sheets_except_filename(input_path, output_path, filenames):
    output_files_path = []
    for i in range(0, len(filenames)):
        input_file_path = make_absolute_path(input_path, filenames[i])
        output_file_path = make_absolute_path(output_path, filenames[i])

        if not Path.is_file(input_file_path):
            print(f"Skipping {input_file_path}: Not valid file " f"path. ")
            continue

        shutil.copyfile(input_file_path, output_file_path)

        wb_source: Workbook = load_workbook(filename=output_file_path)
        sheets = wb_source.worksheets

        if len(sheets) == 1:
            save_path = make_absolute_path(output_path, str(wb_source.sheetnames[0]))
            wb_source.save(save_path)
            output_files_path.append(str(save_path))
        else:
            for sheet in wb_source.sheetnames:
                if not sheet == input_file_path.stem:
                    wb_source.remove(wb_source[sheet])

            if len(wb_source.worksheets) == 1:
                save_path = make_absolute_path(
                    output_path, str(wb_source.sheetnames[0])
                )
                wb_source.save(save_path)
                output_files_path.append(str(save_path))
            else:
                print(
                    f"Failed to process {input_file_path} with following "
                    f"sheets: {','.join(wb_source.worksheets)}."
                )
                raise ValueError("")

    return output_files_path


def main():

    # Adjust to where you have the xlsx files and where you want them
    input_directory = Path("path/to/your/xlsx/files")
    output_directory = Path("path/to/the/output/directory")

    file_names = ["input", "foo", "bar"]
    paths = remove_all_sheets_except_filename(
        input_directory, output_directory, file_names
    )


if __name__ == "__main__":
    main()

``



  • Related