I have large file sets
965_0000000005_0000000001_20211105153826.json
965_0000000005_0000000001_20211105153826.jpg
965_0000000005_0000000002_20211105153826.json
965_0000000005_0000000002_20211105153826.jpg
965_0000000005_0000000003_20211105153826.json
965_0000000005_0000000003_20211105153826.json
.
.
.
965_0000000005_0000000010_20211105153826.json
965_0000000005_0000000010_20211105153826.jpg
965_0000000006_0000000001_20211105153826.json
965_0000000006_0000000001_20211105153826.json
965_0000000007_0000000001_20211105153826.jpg
965_0000000007_0000000001_20211105153826.json
965_0000000008_0000000001_20211105153826.jpg
965_0000000008_0000000001_20211105153826.json
965_0000000009_0000000001_20211105153826.json
965_0000000009_0000000002_20211105153826.jpg
965_0000000010_0000000002_20211105153826.json
965_0000000010_0000000003_20211105153826.jpg
965_0000000011_0000000001_20211105153826.json
965_0000000011_0000000001_20211105153826.jpg
965_0000000011_0000000002_20211105153826.json
965_0000000011_0000000002_20211105153826.jpg
............
965_0000000011_0000000010_20211105153826.json
965_0000000011_0000000010_20211105153826.jpg
together want to copy and sort them by giving input like
Enter product numbers:965-1000;100-110; 71
output :
965...jpeg file ok copied
965...json file ok copied
...
71 file doesnt exit.
for sort the file in the folder like
20220627
if 20220627_XXXX_NG_2_anomaly else 20220627_XXXX_OK_2_nomaly
gray_images -jpeg files
5_0000000001_
5_0000000002_
5_0000000003_
5_0000000004_
5_0000000010_
Json -json files
5_0000000001_
5_0000000002_
5_0000000003_
5_0000000004_
5_0000000010_
if20220627_XXXX_NG_3_anomaly
else20220627_XXXX_OK_3_nomaly
gray_images -jpeg files
6_0000000001_
7_0000000001_
8_0000000001_
9_0000000001_
10_0000000001_
Json -json files
6_0000000001_
7_0000000001_
8_0000000001_
9_0000000001_
10_0000000001_
if 20220627_XXXX_NG_4_anomaly else 220220627_XXXX_OK_4_nomaly
gray_images -jpeg files
11_000000001_
.
.
11_0000000010_
Json -json files
11_000000001_
.
.
11_0000000010_
one product 25 (10-5-10) jpeg files. json files depend on annotation. I am stuck in sorting files. please help me out in programming.
CodePudding user response:
One easy method of seperating files into specific subdirectories is to use the os.path
library and incrementally create subdirectories until you reach the base for which you can copy the files based on the file extension.
for example:
import os
def output_path(filename, status, product, outpaths):
today = "".join(str(dt.today()).split("-"))
parts = filename.split("_")
if parts[1] in outpaths:
num = outpaths[parts[1]]
else:
num = len(outpaths)
outpaths[parts[1]] = num
subdir = "_".join([today, product, str(num), status])
path = os.path.join(today, subdir, "json", filename)
return path
If you are still working on the same program as last time you could integrate it like this.
import os
import shutil
from datetime import datetime as dt
def gen_range(text):
start, end = [int(i) for i in text.split("-")]
return list(range(start,end))
def get_user_input():
nums = input("Enter search values - example: (123;135;561-568;12)\n")
if ";" in nums:
values = []
for value in [i for i in nums.split(';')]:
if "-" in value:
values = gen_range(value)
else:
values.append(value)
elif nums.isdigit():
values = [int(nums)]
elif "-" in nums:
values = gen_range(nums)
else:
raise ValueError
return list(map(str,values))
def get_input_values():
while True:
nums = get_user_input()
product = input("Enter product name: ")
status = input("Enter the product status\nEnter 0 for OK\nEnter 1 for NG\nEnter other for others\n")
d = {"numbers": nums, "status": status, "product": product}
print(d)
correct = input("Is the above information correct? yes/no: ")
if correct == "yes":
return d
def output_path(filename, status, product, outpaths):
today = "".join(str(dt.today()).split("-"))
parts = filename.split("_")
if parts[1] in outpaths:
num = outpaths[parts[1]]
else:
num = len(outpaths)
outpaths[parts[1]] = num
if status == "0":
status = "anomaly"
else:
status = "nomaly"
subdir = "_".join([today, product, str(num), status])
path = os.path.join(today, subdir, "json")
if not os.path.exists(path):
os.makedirs(path)
return os.path.join(path, filename)
def copyfiles(numbers=None, status=None, product=None):
unused = numbers[:]
data = os.walk(os.path.normcase('./'))
outpaths = {}
for root, _, files in data:
for filename in files:
part = filename.split('_')[0]
name, ext = os.path.splitext(filename)
if part in numbers and ext.lower() == ".json":
if part in unused:
unused.remove(part)
origin = os.path.join(root, filename)
dest = output_path(filename, status, product, outpaths)
try:
shutil.copyfile(origin, dest)
print(f"OK==>No {filename} has been copied.")
except:
print(f"NG==>No {part} failed to copy")
for number in unused:
print(f"NG==>No {number} does not exist")
if __name__ == "__main__":
params = get_input_values()
copyfiles(**params)