I would like to create the code which extracts the certaine data and its file name from multiple excels to .txt format. I wrote the code as below, but it shows the all file names in folder to each data in .txt format. How to extract the each file name..? My goal is to have the extracted data displayed as "filename" "data from column10" in .txt format.
import pathlib
import openpyxl
import os.path
import glob
f = open('data.txt', 'w')
path = pathlib.Path(r"..\file")
for path_obj in path.glob("*.xlsx"):
wb = openpyxl.load_workbook(path_obj)
sheetnames = wb.sheetnames
for sheetname in sheetnames:
sheet = wb[sheetname]
for row in range(2, sheet.max_row 1):
if sheet["A" str(row)].fill.start_color.index == 'FFFF0000':
file_path = glob.glob(r"C:\\python\\file\*.xlsx")
name_list = [os.path.splitext(os.path.basename(file))[0] for file in file_path]
f.write(str(name_list) " ")
f.write(str(sheet.cell(row=row, column=10).value))
f.write("\n")
f.close()
CodePudding user response:
Please see if this is what you are looking for. As mentioned by Vlad, I don't believe you need to use glob twice. The path_obj
already has the file name. So, update you code like below and see if it is what you are looking for...
import glob, os
import pathlib
import openpyxl
from openpyxl.styles import PatternFill
f = open('data.txt', 'w')
path = pathlib.Path(r"SampleDir") ##My test dir, you can change it
for path_obj in path.glob("*.xlsx"):
wb = openpyxl.load_workbook(path_obj)
sheetnames = wb.sheetnames
for sheetname in sheetnames:
sheet = wb[sheetname]
for row in range(2, sheet.max_row 1):
if sheet["A" str(row)].fill.start_color.index == 'FFFF0000':
#f.write(str(path_obj) " ") ## This will add the file name incl. path
f.write(os.path.basename(path_obj) " ") ## REPLACE for only filename
f.write(str(sheet.cell(row=row, column=10).value))
f.write("\n")
f.close()