In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

tf.keras.backend.set_floatx("float64")  # we want to carry out GP calculations in 64 bit
tf.get_logger().setLevel("INFO")

In [None]:
# from google.colab import drive
# drive.mount('/content/drive/')

# import os
# import sys
# os.chdir('/content/drive/MyDrive/ML_Project')

# ROOT_DIR = '/content/drive/MyDrive/Data/ML_Project'
# sys.path.append(ROOT_DIR)


In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
def return_data(fold,month,with_scaling, station_id = None):
  train_input = pd.read_csv('data/beijing-18/time_feature/'+'/fold'+str(fold)+'/train_data_'+month+'.csv.gz')
  test_input = pd.read_csv('data/beijing-18/time_feature'+'/fold'+str(fold)+'/test_data_'+month+'.csv.gz')
  if station_id != None:
    test_input = test_input[test_input['station_id'] == station_id]
  #     test_input = test_input[test_input['station_id' == ]]
  test_output = np.array(test_input['PM25_Concentration'])
  train_output = np.array(train_input['PM25_Concentration'])
  train_input= train_input.drop(['station_id','PM25_Concentration','time','filled'],axis=1)
  try:
    test_input= test_input.drop(['PM25_Concentration','station_id','time','filled'],axis=1)
  except:
    test_input= test_input.drop(['station_id','time','filled'],axis=1)
  #     test_output= test_output.drop(['time'],axis=1)
  if with_scaling:
    scaler = MinMaxScaler().fit(train_input)
    train_input = pd.DataFrame(scaler.transform(train_input),columns=list(train_input.columns))
    test_input = pd.DataFrame(scaler.transform(test_input),columns=list(test_input.columns))
  return train_input,train_output,test_input,test_output


In [None]:
# for fold in [0]:
#     train_input,train_output,test_input,test_output = return_data(fold=fold,month='mar',with_scaling=True)
#     train_output = train_output.reshape(-1,1)
#     print(train_input.shape,train_output.shape)
   
#     print("Fold: ",fold)
#     print("Data received")

In [None]:
# np.random.seed(42)
# X=np.random.rand(100,2)
# # noise=np.random.normal(0,1,100)
# Y=-8*X[:,0] - 6*X[:,1] + 3
# X=X
# # print(X)
# # X=X.reshape(-1,1)
# Y=Y.reshape(-1,1)
# print(X.shape)
# print


In [None]:
# plt.plot(train_input.iloc[:,1], train_output, "kx")
# plt.xlabel("X")
# plt.ylabel("Y")


In [None]:
# !pip install gpflux

In [None]:
os.environ["CUDA_DEVICE_ORDER"]= "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]= '0'

In [10]:
import gpflux

from gpflux.architectures import Config, build_constant_input_dim_deep_gp
from gpflux.models import DeepGP

for fold in [0]:
    train_input,train_output,test_input,test_output = return_data(fold=fold,month='mar',with_scaling=True)
    train_output = train_output.reshape(-1,1)
    print(train_input.shape,train_output.shape)
   
    print("Fold: ",fold)
    print("Data received")

    config = Config(
        num_inducing=25, inner_layer_qsqrt_factor=1e-5, likelihood_noise_variance=1e-2, whiten=True
    )
    deep_gp: DeepGP = build_constant_input_dim_deep_gp(train_input, num_layers=10, config=config)

    training_model: tf.keras.Model = deep_gp.as_training_model()

    # Following the Keras procedure we need to compile and pass a optimizer,
    # before fitting the model to data
    training_model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01))

    callbacks = [
        # Create callback that reduces the learning rate every time the ELBO plateaus
        tf.keras.callbacks.ReduceLROnPlateau("loss", factor=0.95, patience=3, min_lr=1e-6, verbose=0),
        # Create a callback that writes logs (e.g., hyperparameters, KLs, etc.) to TensorBoard
        gpflux.callbacks.TensorBoard(),
        # Create a callback that saves the model's weights
        tf.keras.callbacks.ModelCheckpoint(filepath="ckpts/", save_weights_only=True, verbose=0),
    ]

    history = training_model.fit(
        {"inputs": train_input, "targets": train_output},
        batch_size=12,
        epochs=20,
        callbacks=callbacks,
        verbose=1,
    )

In [None]:
# # From the `DeepGP` model we instantiate a training model which is a `tf.keras.Model`
# training_model: tf.keras.Model = deep_gp.as_training_model()

# # Following the Keras procedure we need to compile and pass a optimizer,
# # before fitting the model to data
# training_model.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01))

# callbacks = [
#     # Create callback that reduces the learning rate every time the ELBO plateaus
#     tf.keras.callbacks.ReduceLROnPlateau("loss", factor=0.95, patience=3, min_lr=1e-6, verbose=0),
#     # Create a callback that writes logs (e.g., hyperparameters, KLs, etc.) to TensorBoard
#     gpflux.callbacks.TensorBoard(),
#     # Create a callback that saves the model's weights
#     tf.keras.callbacks.ModelCheckpoint(filepath="ckpts/", save_weights_only=True, verbose=0),
# ]

# history = training_model.fit(
#     {"inputs": train_input, "targets": train_output},
#     batch_size=12,
#     epochs=50,
#     callbacks=callbacks,
#     verbose=0,
# )

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 3))
ax1.plot(history.history["loss"])
ax1.set_xlabel("Iteration")
ax1.set_ylabel("Objective = neg. ELBO")

ax2.plot(history.history["lr"])
ax2.set_xlabel("Iteration")
ax2.set_ylabel("Learning rate")

In [None]:
# def plot(model, X, Y, ax=None):
#     if ax is None:
#         fig, ax = plt.subplots()

#     x_margin = 1.0
#     N_test = 100
#     X_test = np.linspace(X.min() - x_margin, X.max() + x_margin, N_test).reshape(-1, 1)
#     out = model(X_test)

#     mu = out.f_mean.numpy().squeeze()
#     var = out.f_var.numpy().squeeze()
#     X_test = X_test.squeeze()
#     lower = mu - 2 * np.sqrt(var)
#     upper = mu + 2 * np.sqrt(var)

#     ax.set_ylim(Y.min() - 0.5, Y.max() + 0.5)
#     ax.plot(X.iloc[:,0], Y, "kx", alpha=0.5)
#     ax.plot(X_test, mu, "C1")

#     ax.fill_between(X_test, lower, upper, color="C1", alpha=0.3)


prediction_model = deep_gp.as_prediction_model()
print(prediction_model)
# plot(prediction_model,test_input,test_output)

In [None]:
# prediction_model = deep_gp.as_prediction_model()
from sklearn.metrics import mean_squared_error
Test_pred = prediction_model.predict(np.array(test_input))
err = mean_squared_error(Test_pred, test_output, squared=False)


In [None]:
print(err)

In [None]:
#41.72707824766764
#41.62436334687556