I found keras tutorial and when was following it got error.
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
print(f"Total videos for training: {len(train_df)}")
print(f"Total videos for testing: {len(test_df)}")
center_crop_layer = layers.CenterCrop(IMG_SIZE, IMG_SIZE)
def crop_center(frame):
cropped = center_crop_layer(frame[None, ...])
cropped = cropped.numpy().squeeze()
return cropped
# Following method is modified from this tutorial:
# https://www.tensorflow.org/hub/tutorials/action_recognition_with_tf_hub
def load_video(path, max_frames=0):
cap = cv2.VideoCapture(path)
frames = []
try:
while True:
ret, frame = cap.read()
if not ret:
break
frame = crop_center(frame)
frame = frame[:, :, [2, 1, 0]]
frames.append(frame)
if len(frames) == max_frames:
break
finally:
cap.release()
return np.array(frames)
def build_feature_extractor():
feature_extractor = keras.applications.DenseNet121(
weights="imagenet",
include_top=False,
pooling="avg",
input_shape=(IMG_SIZE, IMG_SIZE, 3),
)
preprocess_input = keras.applications.densenet.preprocess_input
inputs = keras.Input((IMG_SIZE, IMG_SIZE, 3))
preprocessed = preprocess_input(inputs)
outputs = feature_extractor(preprocessed)
return keras.Model(inputs, outputs, name="feature_extractor")
feature_extractor = build_feature_extractor()
# Label preprocessing with StringLookup.
label_processor = keras.layers.StringLookup(
num_oov_indices=0, vocabulary=np.unique(train_df["tag"]), mask_token=None
)
print(label_processor.get_vocabulary())
def prepare_all_videos(df, root_dir):
num_samples = len(df)
video_paths = df["video_name"].values.tolist()
labels = df["tag"].values
labels = label_processor(labels[..., None]).numpy()
# `frame_features` are what we will feed to our sequence model.
frame_features = np.zeros(
shape=(num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
)
# For each video.
for idx, path in enumerate(video_paths):
# Gather all its frames and add a batch dimension.
frames = load_video(os.path.join(root_dir, path))
# Pad shorter videos.
if len(frames) < MAX_SEQ_LENGTH:
diff = MAX_SEQ_LENGTH - len(frames)
padding = np.zeros((diff, IMG_SIZE, IMG_SIZE, 3))
frames = np.concatenate(frames, padding)
frames = frames[None, ...]
# Initialize placeholder to store the features of the current video.
temp_frame_features = np.zeros(
shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
)
# Extract features from the frames of the current video.
for i, batch in enumerate(frames):
video_length = batch.shape[0]
length = min(MAX_SEQ_LENGTH, video_length)
for j in range(length):
if np.mean(batch[j, :]) > 0.0:
temp_frame_features[i, j, :] = feature_extractor.predict(
batch[None, j, :]
)
else:
temp_frame_features[i, j, :] = 0.0
frame_features[idx,] = temp_frame_features.squeeze()
return frame_features, labels
When i call prepare_all_videos and pass train_df or test_df to it, this error occurs:
81 diff = MAX_SEQ_LENGTH - len(frames)
82 padding = np.zeros((diff, IMG_SIZE, IMG_SIZE, 3))
---> 83 frames = np.concatenate(frames, padding)
84
85 frames = frames[None, ...]
<__array_function__ internals> in concatenate(*args, **kwargs)
TypeError: only integer scalar arrays can be converted to a scalar index
That's how test_df looks like:
video_name tag
0 v_CricketShot_g01_c01.avi CricketShot
1 v_CricketShot_g01_c02.avi CricketShot
2 v_CricketShot_g01_c03.avi CricketShot
3 v_CricketShot_g01_c04.avi CricketShot
4 v_CricketShot_g01_c05.avi CricketShot
... ... ...
219 v_TennisSwing_g07_c03.avi TennisSwing
220 v_TennisSwing_g07_c04.avi TennisSwing
221 v_TennisSwing_g07_c05.avi TennisSwing
222 v_TennisSwing_g07_c06.avi TennisSwing
223 v_TennisSwing_g07_c07.avi TennisSwing
What's wrong? How to fix it? This tutorial has colab and you can run it if you want.
CodePudding user response:
Most probably it's because frame
is returning an empty array, so concat is failing. So add a condition to check the length of the frame,
frames = load_video(os.path.join(root_dir, path))
if len(frames) == 0:
continue
# Pad shorter videos.
if len(frames) < MAX_SEQ_LENGTH:
diff = MAX_SEQ_LENGTH - len(frames)
padding = np.zeros((diff, IMG_SIZE, IMG_SIZE, 3))