How to convert image and mask path dataframe to Images in tensorflow?-CodePudding

Data frame Creation

# Creating DataFrame of image and mask
all_val_img = sorted([os.path.join(VAL_DIR,i) for i in os.listdir(VAL_DIR)])
all_val_mask = sorted([os.path.join(VAL_MASK_DIR,i) for i in os.listdir(VAL_MASK_DIR)])

#DataFrame
val_data_df = pd.DataFrame(zip(all_val_img,all_val_mask), columns = ['photos', 'mask'])

I have Data Frame that looks like this(below). and I want to create a tensor dataset out of it.

    photos  mask
4691    dataset/val2017/000000546556.jpg    dataset/panoptic_val2017/000000546556.png
1191    dataset/val2017/000000140286.jpg    dataset/panoptic_val2017/000000140286.png
3041    dataset/val2017/000000351823.jpg    dataset/panoptic_val2017/000000351823.png
2552    dataset/val2017/000000294163.jpg    dataset/panoptic_val2017/000000294163.png
3070    dataset/val2017/000000356169.jpg    dataset/panoptic_val2017/000000356169.png

I Converted the data frame into tensor data. and want to map function to make them image.

val_data = tf.data.Dataset.from_tensor_slices(val_data_df)

so wrote a function to map on the dataset.but it did not work.

def make_it_image(image, label):
    image_raw = tf.io.read_file(image)
    image = tf.image.decode_image(image_raw)

    label_raw = tf.io.read_file(label)
    label = tf.image.decode_image(label_raw)

    # normalize
    image = image /255
    label = label /255

    return image, label

when I mapped the function. Result was

val_data = val_data.map(make_it_image).cache().batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

Error :

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-72-c6fd8ebb8233> in <module>
----> 1 val_data = val_data.map(make_it_image).cache().batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

10 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/autograph/impl/api.py in wrapper(*args, **kwargs)
    690       except Exception as e:  # pylint:disable=broad-except
    691         if hasattr(e, 'ag_error_metadata'):
--> 692           raise e.ag_error_metadata.to_exception(e)
    693         else:
    694           raise

TypeError: in user code:


    TypeError: tf__make_it_image() missing 1 required positional argument: 'label'

tell me how to create a dataset from two image directories one as image, one as mask?

CodePudding user response：

You can create a dataset from images, masks, and labels. I spend some time after news adding database buffers that create more interesting you build database buffers with file reading in TIFF.

Sample: TIFF image, masks, and label by appending database buffers for reading.

import os
from os.path import exists

import tensorflow as tf
import tensorflow_io as tfio
import h5py

import matplotlib.pyplot as plt
import random

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
None
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
config = tf.config.experimental.set_memory_growth(physical_devices[0], True)
print(physical_devices)
print(config)

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Variables
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
PATH_FILE = os.path.join('F:\\datasets\\downloads\\Actors\\train\\Pikaploy', '*.tif')
PATH_MASK = os.path.join('F:\\datasets\\downloads\\Actors\\train\\Pikaploy', '*.tif')
files = tf.data.Dataset.list_files(PATH_FILE)
filters = tf.data.Dataset.list_files(PATH_MASK)
labels = tf.ones( shape=(len(files), 1), dtype=tf.dtypes.int32, name="label" )

database_buffer = "F:\\models\\buffer\\"   os.path.basename(__file__).split('.')[0]   "\\TF_DataSets_01.h5"
database_buffer_dir = os.path.dirname(database_buffer)

if not exists(database_buffer_dir) : 
    os.mkdir(database_buffer_dir)
    print("Create directory: "   database_buffer_dir)

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""
: Functions
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""   
def create(datasetpath, image_datasetname, n, values, imagemask_datasetname, m, mask_values, label_datasetname, o, labels):
    with h5py.File(datasetpath, mode='a') as h5f:
        grp = h5f.create_group('group_01')
        dset = []
        dset.append(grp.create_dataset(image_datasetname, (n, 32 * 32 * 4), maxshape=(100, None), dtype='f', chunks=(n, 32 * 32 * 4)))
        dset.append(grp.create_dataset(imagemask_datasetname, (m, 32 * 32 * 4), maxshape=(100, None), dtype='f', chunks=(m, 32 * 32 * 4)))
        dset.append(grp.create_dataset(label_datasetname, (o, 1), maxshape=(100, None), dtype='f', chunks=(o, 1)))
        
        icount = 0
        for file in values.take(n):
            image = tf.io.read_file( file )
            image = tfio.experimental.image.decode_tiff(image, index=0)
            image = tf.image.resize(image, [32,32], method='nearest')
            dset[0][icount] = tf.constant( image , shape=( 1, 32 * 32 * 4 ) )
            icount = icount   1
            
        icount = 0
        for file in mask_values.take(m):
            image = tf.io.read_file( file )
            image = tfio.experimental.image.decode_tiff(image, index=0)
            image = tf.image.resize(image, [32,32], method='nearest')
            dset[1][icount] = tf.constant( image , shape=( 1, 32 * 32 * 4 ) )
            icount = icount   1
        
        icount = 0
        for icount in range(o):
            dset[2][icount] = tf.constant( labels[icount] , shape=( 1, 1 ) )

        h5f.flush()

    return dset

# Invoke method
dset = create(database_buffer, 'image', 5, files, 'mask', 5, filters, 'label', 5, labels)

# Visualize dataset train sample
hdf5_file = h5py.File(database_buffer,  mode='r')
print( hdf5_file['group_01']['image'][0: 5] )
print( hdf5_file['group_01']['mask'][0: 5] )
print( hdf5_file['group_01']['label'][0: 5] )

# random pickup
index = random.randint(0, 5)
image = hdf5_file['group_01']['image'][index]
mask = hdf5_file['group_01']['mask'][index]
label = hdf5_file['group_01']['mask'][index]

image = tf.keras.preprocessing.image.array_to_img(
        tf.constant(image, shape=(32,32,4)),
        data_format=None,
        scale=True
    )
plt.imshow(image)
plt.show()

print( ' ... ' )

Output:

C:\Python310>python.exe F:\temp\Python\test_tf_image_mask.py
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
None
2022-10-07 21:38:37.643829: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-10-07 21:38:38.181025: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4632 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1060 6GB, pci bus id: 0000:01:00.0, compute capability: 6.1
2022-10-07 21:38:38.210686: I tensorflow_io/core/kernels/cpu_check.cc:128] Your CPU supports instructions that this TensorFlow IO binary was not compiled to use: SSE SSE2 SSE3 SSE4.1 SSE4.2 AVX AVX2 FMA
[[220. 220. 220. ...  16.  16. 255.]
 [214. 212. 200. ... 155. 123. 255.]
 [241. 226. 207. ...   7.   5. 255.]
 [207. 111.  69. ...  36.  27. 255.]
 [ 92.  97. 103. ... 116. 128. 255.]]
[[125. 126. 121. ...  61.  60. 255.]
 [207. 111.  69. ...  36.  27. 255.]
 [ 92.  97. 103. ... 116. 128. 255.]
 [ 67.  67.  67. ... 223. 223. 255.]
 [182. 175. 165. ... 226. 216. 255.]]
[[1.]
 [1.]
 [1.]
 [1.]
 [1.]]

CodePudding user response：

Combine the data

val_data = tf.data.Dataset.from_tensor_slices((np.array(all_val_img),
 np.array(all_val_mask)))

map the dataset


def make_image(x,y):
    image = tf.io.read_file(x)
    image = tf.image.decode_png(image, channels=3)
    
    image = image/255

    label = tf.io.read_file(y)
    label = tf.image.decode_png(label, channels=3)
    label = label/255

    return image,label

val_data = val_data.map(make_image)

and it works

val_data
# <MapDataset element_spec=(TensorSpec(shape=(None, None, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, None, 3), dtype=tf.float32, name=None))>