I have a function that organizes files in a particular directory on the basis of its name,
Basically what the function does is split the files in the directory based on their name then create folders with that name, then all the files containing that name will be moved into that folder. For example if there are two files wave.png
and wave-edited.png
it will create a folder named wave
and because those two files contain the keyword wave
they will be moved into that folder. I am stuck figuring out how to get the keyword
List of file_names = ['ghosts-edited.png', 'ghosts.png', 'wave.png', 'wave-edited.png', '10-14-day', '12-11-day']
Expected output
['ghosts', 'wave', 'day']
Code:
def name_category():
sub_file_names = []
file_names = []
delimiters = ['.', ',', '!', ' ', '-', ';', '?', '*', '!', '@', '#', '$', '%', '^', '&', '(', ')', '_', '/', '|', '<', '>']
try:
for filename in os.listdir(folder_to_track):
filename = filename.lower()
file_names.append(filename)
sub_file_names.append(max(re.findall(r'[A-Za-z] ',filename),key=len)) # I want to replace this method
sub_file_names = list(set(sub_file_names))
file_mappings = collections.defaultdict()
for filename in os.listdir(folder_to_track):
if not os.path.isdir(os.path.join(folder_to_track, filename)):
for sub_file_name in sub_file_names:
file_mappings.setdefault(sub_file_name, []).append(filename)
for folder_name, folder_items in file_mappings.items():
folder_path = os.path.join(folder_to_track, folder_name)
if not os.path.exists(folder_path):
os.mkdir(folder_path)
for filename in file_names:
filename = filename.lower()
i = 1
regexPattern = '|'.join(map(re.escape, delimiters))
splittedstring = re.split(regexPattern, filename, 0)
if folder_name in splittedstring:
new_name = filename
file_exits = os.path.isfile(folder_path '\\' new_name)
while file_exits:
i = 1
new_name = os.path.splitext(folder_to_track '\\' new_name)[0] str(i) os.path.splitext(folder_to_track '\\' new_name)[1]
new_name = new_name.split("\\")[4]
file_exits = os.path.isfile(folder_path "\\" new_name)
src = folder_to_track "\\" filename
new_name = folder_path "\\" new_name
os.rename(src, new_name)
except Exception as e:
print(e)
sub_file_names when printed:
['ghosts', 'wave', 'edited']
Right now I am filtering the keywords using the biggest word in the filename called sub_file_name.
CodePudding user response:
IIUC, you would like a method that could obtain the names before the first non-word character.
Code
def find_prefixes(strings):
'''
Finds common prefix in strings before non-alphanumeric character
'''
prefixes = set() # set of prefixes
pattern = re.compile('[^a-zA-Z0-9]') # pattern to detect non-alphanumeric character
for string in strings:
# Add first non-integer list
arr = pattern.split(string) # single split on non-letter character
for prefix in arr:
if not prefix.isdigit(): # first that is not a number
prefixes.add(prefix) # add to prefix
break
return list(prefixes)
Test
file_names = ['10-12-day', '12-11-day', 'ghosts-edited.png', 'ghosts.png', 'wave.png', 'wave-edited.png']
print(find_prefixes(file_names))
# Output: ['wave', 'ghosts', 'day']
CodePudding user response:
You can create a dictionary where keys will be desired folder names and values will be lists with filenames. For example:
names = [
"ghosts-edited.png",
"ghosts.png",
"wave.png",
"wave-edited.png",
"another.png",
"just-edited.png",
]
out = {}
for n in names:
if "-edited" in n:
out.setdefault(n.rsplit("-", maxsplit=1)[0], []).append(n)
else:
out.setdefault(n.split(".")[0], []).append(n)
print(out)
Prints:
{
"ghosts": ["ghosts-edited.png", "ghosts.png"],
"wave": ["wave.png", "wave-edited.png"],
"another": ["another.png"],
"just": ["just-edited.png"],
}