In [2]:
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import GRU, Dense
from keras.layers import LSTM
from keras  import callbacks
from keras import optimizers
import pandas as pd 
import tensorflow as tf
import numpy as np

Using TensorFlow backend.


In [None]:
df = pd.read_csv('datasets/international-airline-passengers.csv', index_col='Month')
print(df.head())
df.plot()

# Data cleansing

In [None]:
columns_to_keep = ['Passengers']
df = df[columns_to_keep]
df['Passengers'] = df['Passengers'].apply(lambda x: x*1000)
df.index.names = ['Month']
df.sort_index(inplace=True)
print('Total rows: {}'.format(len(df)))
df.head()

In [None]:
df.describe()

In [None]:
df.plot()

Null values?

In [None]:
df.isnull().sum()

In [None]:
null_columns=df.columns[df.isnull().any()]
df[null_columns].isnull().sum()

In [None]:
print(df[df.isnull().any(axis=1)][null_columns].head())

In [None]:
df.dropna(inplace=True)

In [None]:
df.isnull().sum()

In [None]:
df.hist(bins=10)

In [None]:
len(df[df['Passengers'] == 0])

# Scaled data

In [None]:
print('Min', np.min(df))
print('Max', np.max(df))

In [None]:
dataset = df.astype('float32')

In [None]:
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(dataset) 

In [None]:
print('Min', np.min(scaled))
print('Max', np.max(scaled))

In [None]:
train_size = int(len(scaled) * 0.70)
test_size = len(scaled - train_size)
train, test = scaled[0:train_size, :], scaled[train_size: len(scaled), :]
print('train: {}\ntest: {}'.format(len(train), len(test)))

In [None]:
def create_dataset(dataset, look_back=1):
    print(len(dataset), look_back)
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back):
        a = dataset[i:(i+look_back), 0]
        dataset[i + look_back, 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

In [None]:
look_back = 1
X_train, y_train = create_dataset(train, look_back)
X_test, y_test = create_dataset(test, look_back)
print(X_train.shape)
print(X_test.shape)

In [None]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
print(X_train.shape)
print(X_test.shape)

In [None]:
batch_size = 1
model = Sequential()
model.add(LSTM(4, batch_input_shape=(batch_size, look_back, 1), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
model.fit(X_train, y_train, epochs=100, batch_size=batch_size, verbose=0, shuffle=True)

In [None]:
trainPredict = model.predict(X_train, batch_size=1)
testPredict = model.predict(X_test, batch_size=1)

trainScore = np.sqrt(np.mean((y_train-trainPredict)**2))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = np.sqrt(np.mean((y_test-testPredict)**2))
print('Test Score: %.2f RMSE' % (testScore))

In [None]:
plt.figure(figsize=(20,5))

x = np.arange(scaled.shape[0])
plt.plot(scaled,'k')
plt.plot(trainPredict,'b')
plt.plot(x[-testPredict.shape[0]:],testPredict,'r')