So I have multi-class classification. I want to compile my model:
feature_layer = DenseFeatures(feature_columns) # A layer that produces a dense Tensor
model = Sequential([
feature_layer,
Dense(32, activation='relu'),
Dense(3, activation='softmax')
])
So I use categorical_crossentropy loss:
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
model.fit(train_ds,
validation_data=val_ds,
epochs=10)
But, of course, it gives me an error. I know about to_categorical
method, but it doesn't take BatchDataset as a parameter, which train_ds and val_ds are.
Please direct me
UPDATE: I tried to do something like this:
def df_to_dataset(df, shuffle=True, batch_size=32):
df = df.copy()
labels = df.pop('class')
ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))
if shuffle:
ds = ds.shuffle(buffer_size=len(df))
ds = ds.batch(batch_size).map(lambda x, y: (x, tf.one_hot(y, depth=3)))
return ds
batch_size = 32
train_ds = df_to_dataset(train, batch_size=batch_size) # error
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)
And it gives me:
Value passed to parameter 'indices' has DataType string not in list of allowed values: uint8, int32, int64
My class column has string values (it tells if the object is star, galaxy or quazar, others are int/float), but I popped it:
def df_to_dataset(df, shuffle=True, batch_size=32):
df = df.copy()
labels = df.pop('class')
ds = tf.data.Dataset.from_tensor_slices((dict(df), labels))
if shuffle:
ds = ds.shuffle(buffer_size=len(df))
return ds
df_to_dataset(df)
labels:
<ShuffleDataset shapes: ({objid: (), ra: (), dec: (), u: (), g: (), r: (), i: (), z: (), run: (), rerun: (), camcol: (), field: (), specobjid: (), redshift: (), plate: (), mjd: (), fiberid: ()}, ()), types: ({objid: tf.float64, ra: tf.float64, dec: tf.float64, u: tf.float64, g: tf.float64, r: tf.float64, i: tf.float64, z: tf.float64, run: tf.int64, rerun: tf.int64, camcol: tf.int64, field: tf.int64, specobjid: tf.float64, redshift: tf.float64, plate: tf.int64, mjd: tf.int64, fiberid: tf.int64}, tf.string)>
CodePudding user response:
You can either convert your labels to one-hot encoded labels and use the categorical_crossentropy
loss function:
one_hot_encoded_train_ds = train_ds.map(lambda x, y: (x, tf.one_hot(y, depth=3)))
one_hot_encoded_val_ds = val_ds.map(lambda x, y: (x, tf.one_hot(y, depth=3)))
Or change your loss function to sparse_categorical_crossentropy
and leave your labels the way they are as integers:
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
Update: Convert your string labels to integers:
def df_to_dataset(df, shuffle=True, batch_size=32):
df = df.copy()
labels = df.pop('class')
dicts = {'STAR': 0, 'GALAXY': 1, 'QSO': 2}
converted_labels = np.array([dicts[l] for l in labels.to_list()])
ds = tf.data.Dataset.from_tensor_slices((dict(df), converted_labels))
if shuffle:
ds = ds.shuffle(buffer_size=len(df))
return ds
df_to_dataset(df)