I'm new to Machine Learning and Neural Nets and am experimenting with a configuration I found in a
I thought that might has to something with the shuffling and size of shuffle_buffer, but this also happens for different sizes of shuffle_buffer, e.g. 20.
What is the reason for this behaviour and how to prevent this?
The code:
#%% Initializing
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
#%% Defining the series
series = np.array([ 218, 555, 550, 563, 2492, 2848, 4041, 1302, 1040, 1073, 1392, 2093,
1870, 2328, 2102, 2844, 1730, 2431, 1974, 2450, 1975, 1415, 2568, 2831,
3011, 2576, 2825, 3327, 3539, 3392, 2949, 3283, 3854, 3918, 2639, 3826,
3980, 3134, 3997, 2708, 3257, 3435, 3337, 2571, 3370, 4277, 3482, 2804,
3253, 2979, 2458, 2306, 2482, 3209, 3915, 1292, 931, 2748, 2874, 2089,
2660, 3205, 3093, 1389, 834, 1914, 2568, 2831, 2129, 3138, 2841, 2318,
2653, 1598, 1779, 1529, 2190, 2180, 1737, 1845, 2511, 1922, 3679, 3277,
2633, 2064, 2802, 2853, 2220, 1987, 2491, 1867, 3593, 1998, 2425, 3226,
2143, 3466, 3327, 3283, 3011, 2552, 2844, 2501, 1575, 1829, 3086, 3345,
1905, 1192, 2772, 3667, 4223, 4117, 2113, 2312, 2615, 3126, 2581, 3265,
3682, 3355, 1820, 2989, 2806, 3333, 2395, 2777, 2189, 2628, 2379, 1867])
time = range(1, len(series) 1)
#%% Prepare Data for NN
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
dataset = tf.data.Dataset.from_tensor_slices(series)
dataset = dataset.window(window_size 1, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(window_size 1))
dataset = dataset.shuffle(shuffle_buffer).map(lambda window: (window[:-4], window[-1])) # :-4 because we want to predict the value in 4 weeks, not for the next week
dataset = dataset.batch(batch_size).prefetch(1)
return dataset
split_time = len(time)-16
time_train = time[:split_time]
x_train = series[:split_time]
# For normalization based on training data, so that model does not "see" the validation data before validation
train_mean = x_train.mean()
train_std = x_train.std()
x_train_norm = (x_train - train_mean) / train_std
series_norm = (series - train_mean) / train_std
time_valid = time[split_time:]
x_valid = series[split_time:]
def plot_series(time, series, format="-", start=0, end=None):
plt.plot(time[start:end], series[start:end], format)
plt.xlabel("Time")
plt.ylabel("Value")
plt.grid(True)
plt.figure(figsize=(10, 6))
plot_series(time_train, x_train)
plt.show()
plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid)
plt.show()
#%% Build the dataset
w_size = 4
b_size = 4
sb_size = 1
w_size_adjusted = w_size - 3
dataset = windowed_dataset(series_norm, window_size = w_size, batch_size = b_size, shuffle_buffer = sb_size)
#%% Build and train the model
l0 = tf.keras.layers.Dense(1, input_shape=[w_size_adjusted])
model = tf.keras.models.Sequential([l0])
model.compile(loss="mse", optimizer=tf.keras.optimizers.SGD(learning_rate=1e-6, momentum=0.9)) # original: learning_rat = 1e-6, also trid [... ].optimizers.Adam(learning_rate=1e-6)
model.fit(dataset,epochs=100,verbose=0)
#%% Forecast data
forecast = []
for time_step in range(len(series) - w_size_adjusted 1):
forecast.append(model.predict(series_norm[time_step:time_step w_size_adjusted][np.newaxis]))
forecast_subset = forecast[split_time - w_size_adjusted 1:]
results_norm = np.array(forecast_subset)[:, 0, 0]
#%% Unnormalize
results = (results_norm * train_std) train_mean
#%% Plot
plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid)
plot_series(time_valid, results)
plt.title('window_size: {0}, batch_size: {1}, shuffle_buffer: {2}'.format(w_size, b_size, sb_size))
CodePudding user response:
Using this does the trick
l0 = tf.keras.layers.Dense(1, input_shape=[w_size_adjusted], kernel_initializer="ones")
Essentially, initial weight values make too much of an impact because of too few training data.