ValueError: Cannot feed value of shape (20, 3000) for Tensor 'Placeholder:0', which has sh-CodePudding

as an exercise I am trying to train a classification model using Tensorflow v1 without using keras nor sklearn nor any other library that would greatly ease my life.. lol.

I keep getting this error though and I can't figure out what's wrong about my code :

# imports
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O

import os,shutil, cv2, itertools

import tensorflow.compat.v1 as tf
tf.disable_v2_behavior() # use Tensorflow v1

For simplicity I resized every image to 64 x 64.

Since there are 3 channels, size of input vector X wiz n is 64 x 64 x 3 (=12288).

To reduce computation time I only use 3000 images from the training data.

TRAIN_DATA_DIR_PREFIX = '/kaggle/input/cat-and-dog/training_set/training_set/'
TRAIN_DATA_DOG_DIR = TRAIN_DATA_DIR_PREFIX   'dogs/'
TRAIN_DATA_CAT_DIR = TRAIN_DATA_DIR_PREFIX   'cats/'

N_x = 64 * 64 * 3

TRAIN_DOG = [(TRAIN_DATA_DOG_DIR i, 1) for i in os.listdir(TRAIN_DATA_DOG_DIR) if i.endswith(".jpg")]
TRAIN_DOG = TRAIN_DOG[:1500]

TRAIN_CAT = [(TRAIN_DATA_CAT_DIR i, 0) for i in os.listdir(TRAIN_DATA_CAT_DIR) if i.endswith(".jpg")]
TRAIN_CAT = TRAIN_CAT[:1500]

TRAIN_DATA = TRAIN_DOG   TRAIN_CAT

print('TRAIN_DATA :', len(TRAIN_DATA))

def read_image(file_path):
    image = cv2.imread(file_path, cv2.IMREAD_COLOR)
    return cv2.resize(image, (64, 64),interpolation=cv2.INTER_CUBIC)

def prepare(data):
    X = np.ndarray((N_x,len(TRAIN_DATA)), dtype=np.uint8)
    print ("X shape is {}".format(X.shape))
    Y = np.zeros((1,len(TRAIN_DATA)))
    print ("Y shape is {}".format(Y.shape))
    for i,(image_path, y) in enumerate(data):
        Y[0,i] = y
        image = read_image(image_path)
        X[:,i] = np.squeeze(image.reshape((N_x,1)))
    return X,Y

X,Y = prepare(TRAIN_DATA)

Output:

TRAIN_DATA : 3000
X shape is (12288, 3000)
Y shape is (1, 3000)

Then I define my tf_train function:

def tf_train(X, Y, batch_size=20):
    # Dataset (inputs and labels)
    
    D=200
    
    x = tf.placeholder(tf.float32, [batch_size, D])
    y_ = tf.placeholder(tf.float32, [batch_size, 1])

    # random variable
    W = tf.Variable(tf.random_normal([D, 1],stddev=0.1))
    
    z=tf.matmul(x,W)
    
    # map x to inferred output function
    # a = tf.sigmoid(tf.matmul(x, W))
    
    # Define loss and optimizer
    # cross_entropy = tf.reduce_mean(-(y_*tf.log(a) (1-y_)*tf.log(1-a)))
    cross_entropy = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=z, labels=y_))
    
    train_step = tf.train.GradientDescentOptimizer(1e-2).minimize(cross_entropy)
    
    sess = tf.InteractiveSession()
    tf.initialize_all_variables().run() #initializes W and a
    
    # Train
    for epoch in range (3000):
        idx = np.random.choice(len(TRAIN_DATA), batch_size, replace=False)
        #idx = np.random.permutation(np.arange(X.shape[1])) [:batch_size]
        _, l = sess.run([train_step, cross_entropy], feed_dict={x: X[idx], y_: Y[0,idx]})
        if epoch%100 == 0:
            print('loss: ' str(l))

And then when I start training my model :

tf_train(X,Y)

I get this following output:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
/tmp/ipykernel_34/1897119706.py in <module>
----> 1 tf_train(X,Y)

/tmp/ipykernel_34/2072834761.py in tf_train(X, Y, batch_size)
     28         idx = np.random.choice(len(TRAIN_DATA), batch_size, replace=False)
     29         #idx = np.random.permutation(np.arange(X.shape[1])) [:batch_size]
---> 30         _, l = sess.run([train_step, cross_entropy], feed_dict={x: X[idx], y_: Y[0,idx]})
     31         if epoch%100 == 0:
     32             print('loss: ' str(l))

/opt/conda/lib/python3.7/site-packages/tensorflow/python/client/session.py in run(self, fetches, feed_dict, options, run_metadata)
    966     try:
    967       result = self._run(None, fetches, feed_dict, options_ptr,
--> 968                          run_metadata_ptr)
    969       if run_metadata:
    970         proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)

/opt/conda/lib/python3.7/site-packages/tensorflow/python/client/session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
   1165                 'Cannot feed value of shape %r for Tensor %r, '
   1166                 'which has shape %r' %
-> 1167                 (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
   1168           if not self.graph.is_feedable(subfeed_t):
   1169             raise ValueError('Tensor %s may not be fed.' % subfeed_t)

ValueError: Cannot feed value of shape (20, 3000) for Tensor 'Placeholder:0', which has shape '(20, 200)'

Why do I keep getting this error message ?

CodePudding user response：

You need to reshape your data because at the moment it is interpreted as (12288, 3000) which means 12288 samples with 3000 features for each sample. You probably want the shape (3000, 12288) or even (3000, 64, 64, 3), so try this:

import tensorflow as tf

X = tf.random.normal((12288, 3000))
Y = tf.random.normal((1, 3000))

X = tf.reshape(X, (3000, 64, 64, 3)) # or tf.transpose(X) if you want (3000, 12288)
Y = tf.transpose(Y)
print(X.shape)
print(Y.shape)

(3000, 64, 64, 3)
(3000, 1)

And your code here is causing the actual problem:

D = 200
x = tf.placeholder(tf.float32, [batch_size, D])

D should be 12288 or 64 x 64 x 3, depending on how you will reshape your data.