I'm trying to read the folder name at the specific place from the file path. My current code:
import os
# search for and input multiple files
def get_files(source):
matches = []
for root, dirnames, filenames in os.walk(source):
for filename in filenames:
matches.append(os.path.join(root, filename))
return matches
def parse(files):
for file in files:
xml_information = {}
metadata = []
# Get the file path
filepath = os.path.dirname(file)
xml_information['file_path'] = '%s' % filepath
# Get customer name
customer = filepath.split("\\")[5]
xml_information['customer_name'] = '%s' % customer
metadata.append(xml_information)
print(metadata)
path = 'C:\\Users\\quan.nguyen\\SAGE\\Lania Thompson - Searching Project Files'
parse(get_files(path))
My program searches through folders and find the files and report back their folder path. However, I would like to read the folder path as well as the folder name at the sixth place which is customer name. When I run customer = filepath.split("\\")[5]
it report an error:
Traceback (most recent call last):
File "*hidden*", line 33, in <module>
parse(get_files(path))
File "*hidden*", line 26, in parse
customer = filepath.split("\\")[5]
~~~~~~~~~~~~~~~~~~~~^^^
IndexError: list index out of range
However, when I run with customer = filepath.split("\\")[4]
, the program works and reads the last folder specified in path
which is Lania Thompson - Searching Project Files. The result is as follows:
[{'file_path': 'C:\\Users\\quan.nguyen\\SAGE\\Lania Thompson - Searching Project Files\\Hazor Ltd\\PCS Mah\\Machine', 'customer_name': 'Lania Thompson - Searching Project Files'}]
My expecting result is Hazor Ltd:
[{'file_path': 'C:\\Users\\quan.nguyen\\SAGE\\Lania Thompson - Searching Project Files\\Hazor Ltd\\PCS Mah\\Machine', 'customer_name': 'Hazor Ltd'}]
The names are made up except from my name
CodePudding user response:
So I have worked out the code using the pathlib library. The code is:
import os
from pathlib import Path
# search for and input multiple files
def get_files(source):
matches = []
for root, dirnames, filenames in os.walk(source):
for filename in filenames:
matches.append(os.path.join(root, filename))
return matches
def parse(files):
for file in files:
xml_information = {}
metadata = []
# Get the file path
filepath = os.path.dirname(file)
# Get customer name
p = Path(filepath)
files = [f for f in p.rglob('*') if f.is_file()]
for f in files:
xml_information['Customer'] = f.parts[5]
metadata.append(xml_information)
print(metadata)
path = 'C:\\Users\\quan.nguyen\\SAGE\\Lania Thompson - Searching Project Files'
parse(get_files(path))
change the number [5]
in xml_information['Customer'] = f.parts[5]
for the place you want to get the folder's name from.