Home > other >  Create subfolders bases on image labels in csv
Create subfolders bases on image labels in csv

Time:10-21

I want to create subfolders with related images for each class. Now, I only have a folder with all images. I created a .csv with images_id and labels. I was able to create the subfolders based on the labels but I cant move the images in the related subfolders.

My folder structure looks likes that:

  • data (images)
    • img1.jpg
    • img2.jpg
    • ...jpg
  • labels.csv (image_id, label)
  • notebook.ipynb
import shutil, os
import pandas as pd

labels = pd.read_csv("labels.csv")
labels = labels.sort_values('Manu_Series_Year')

class_names = list(labels.Manu_Series_Year.unique())

train_images = 'train'
train_cat = 'train_'

#creating subfolders
for i in class_names:
    os.makedirs(os.path.join('train_', i))

#moving the image files to their respective categories
for c in class_names: # Category Name
    for i in list(labels[labels['Manu_Series_Year']==c]['Image_id']): # Image Id
        get_image = os.path.join('train', str(i)) # Path to Images 
        move_image_to_cat = shutil.move(get_image, 'train_/' c)
Traceback 
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.9_3.9.2032.0_x64__qbz5n2kfra8p0\lib\shutil.py in move(src, dst, copy_function)
    813     try:
--> 814         os.rename(src, real_dst)
    815     except OSError:

FileNotFoundError: [WinError 3] Das System kann den angegebenen Pfad nicht finden: 'C://Users/deniz/OneDrive/MA/Dataset/archive_unbekannt/train\\1' -> 'C://Users/deniz/OneDrive/MA/Dataset/archive_unbekannt/train_/Acura ILX 2013'

During handling of the above exception, another exception occurred:

FileNotFoundError                         Traceback (most recent call last)
<ipython-input-28-8d99c8eb764e> in <module>
     18     for i in list(labels[labels['Manu_Series_Year']==c]['Image_id']): # Image Id
     19         get_image = os.path.join('C://Users/deniz/OneDrive/MA/Dataset/archive_unbekannt/train', str(i)) # Path to Images
---> 20         move_image_to_cat = shutil.move(get_image, 'C://Users/deniz/OneDrive/MA/Dataset/archive_unbekannt/train_/' c)

C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.9_3.9.2032.0_x64__qbz5n2kfra8p0\lib\shutil.py in move(src, dst, copy_function)
    832             rmtree(src)
    833         else:
--> 834             copy_function(src, real_dst)
    835             os.unlink(src)
    836     return real_dst

C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.9_3.9.2032.0_x64__qbz5n2kfra8p0\lib\shutil.py in copy2(src, dst, follow_symlinks)
    441     if os.path.isdir(dst):
    442         dst = os.path.join(dst, os.path.basename(src))
--> 443     copyfile(src, dst, follow_symlinks=follow_symlinks)
    444     copystat(src, dst, follow_symlinks=follow_symlinks)
    445     return dst

C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.9_3.9.2032.0_x64__qbz5n2kfra8p0\lib\shutil.py in copyfile(src, dst, follow_symlinks)
    263     else:
    264         try:
--> 265             with open(src, 'rb') as fsrc, open(dst, 'wb') as fdst:
    266                 # macOS
    267                 if _HAS_FCOPYFILE:

FileNotFoundError: [Errno 2] No such file or directory: 'train\\1'

UPDATE

import shutil, os
import pandas as pd

labels = pd.read_csv("labels.csv")
labels = labels.sort_values('Manu_Series_Year')

class_names = list(labels.Manu_Series_Year.unique())

train_images = 'data'
train_cat = 'data_'

#creating subfolders
for c in class_names:
    dest = 'data_'   c
    os.makedirs(c)
    for i in list(labels[labels['Manu_Series_Year']==c]['Image_id']): # Image Id
        get_image = os.path.join('data', str(i)) # Path to Images 
        move_image_to_cat = shutil.move(get_image, dest)

UPDATE2

# should go in the right direction
# works
import shutil, os
import pandas as pd

# read label
labels = pd.read_csv("labels.csv")
# sort label
labels = labels.sort_values('Manu_Series_Year')

# take unique labels
class_names = list(labels.Manu_Series_Year.unique())

# loc of train data
train_images = 'data'
# loc of labels with img
train_cat = 'data_'

#creating subfolders
for c in class_names:
    dest = 'data_/'   c
    os.makedirs(dest)
    for i in list(labels[labels['Manu_Series_Year']==c]['Image_id']): # Image Id
        get_image = os.path.join('data', i) # Path to Images 
        move_image_to_cat = shutil.move(get_image, dest)

CodePudding user response:

I believe it's this part:

#creating subfolders
for i in class_names:
    os.makedirs(os.path.join('train_', i))

that creates folders like root/train_/[class_name]

then you try to move your images with:

move_image_to_cat = shutil.move(get_image, 'train_/' c)

Suggestions

  • Just do one single loop over your class names (you're looping twice)
  • Store the destination path to a variable

Code:

#creating subfolders
for c in class_names:
    dest = 'train_'   c
    os.makedirs(dest)
    for i in list(labels[labels['Manu_Series_Year']==c]['Image_id']): # Image Id
        get_image = os.path.join('data', i) # Path to Images 
        move_image_to_cat = shutil.move(get_image, dest)
  • Related