### Preprocessing


In [0]:
import numpy as np 
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

def downcast_dtypes(df):
    float_cols = [c for c in df if df[c].dtype == "float64"]
    int_cols = [c for c in df if df[c].dtype in ["int64", "int32"]]
    df[float_cols] = df[float_cols].astype(np.float32)
    df[int_cols] = df[int_cols].astype(np.int16)
    return df

timesteps = 21
startDay = 350
dt = pd.read_csv("sales_train_validation.csv")
dt = downcast_dtypes(dt)
dt = dt.T
# Remove the first six colums id to end up only days as rows
dt = dt[6 + startDay:]

# Feature Scaling
sc = MinMaxScaler(feature_range = (0, 1))
dt_scaled = sc.fit_transform(dt)

X_train, y_train = [], []
for i in range(timesteps, 1913 - startDay):
    X_train.append(dt_scaled[i-timesteps:i]) #1~14天, 2~15天, ...
    y_train.append(dt_scaled[i][0:30490])  
    
del dt_scaled
X_train = np.array(X_train)
y_train = np.array(y_train)

### Training
**LSTM Model with Keras**

In [0]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout

regressor = Sequential()
layer_1_units=50
regressor.add(LSTM(units = layer_1_units, return_sequences = True, input_shape = (X_train.shape[1], X_train.shape[2])))
regressor.add(Dropout(0.2))
layer_2_units=200
regressor.add(LSTM(units = layer_2_units, return_sequences = True))
regressor.add(Dropout(0.2))
layer_3_units=400
regressor.add(LSTM(units = layer_3_units, return_sequences = True))
regressor.add(Dropout(0.2))
layer_4_units=800
regressor.add(LSTM(units = layer_4_units))
regressor.add(Dropout(0.2))
regressor.add(Dense(units = 30490))
regressor.compile(optimizer = 'adam', loss = 'mean_squared_error')

epoch_no=40
batch_size_RNN=44
regressor.fit(X_train, y_train, epochs = epoch_no, batch_size = batch_size_RNN)

Using TensorFlow backend.


Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.callbacks.History at 0x7fc8c9cfcef0>

### Testing

In [0]:
inputs= dt[-timesteps:]    
inputs = sc.transform(inputs)

X_test = []
X_test.append(inputs[0:timesteps]) 
X_test = np.array(X_test)
predictions = []

for j in range(timesteps,timesteps + 28):
    predicted_stock_price = regressor.predict(X_test[0, j-timesteps:j].reshape(1, timesteps, 30490))
    testInput = np.array(predicted_stock_price)
    X_test = np.append(X_test, np.array(predicted_stock_price)).reshape(1,j + 1,30490)
    predicted_stock_price = sc.inverse_transform(testInput)[:,0:30490]  
    predictions.append(predicted_stock_price)
    
# submission
submission = pd.DataFrame(data=np.array(predictions).reshape(28,30490))
submission = submission.T
submission = pd.concat((submission, submission), ignore_index=True)  #valid=evaluate

sample_submission = pd.read_csv("sample_submission.csv") 
idColumn = sample_submission[["id"]]
submission[["id"]] = idColumn  
cols = list(submission.columns)
cols = cols[-1:] + cols[:-1] 
submission = submission[cols]
colsdeneme = ["id"] + [f"F{i}" for i in range (1,29)]
submission.columns = colsdeneme
submission.to_csv("submission.csv", index=False)