# Importing Libraries

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os.path as op
import pickle
import cupy as cp

# Data Fetching

In [None]:
A1=np.empty((0,5),dtype='float32')
U1=np.empty((0,7),dtype='float32')
node=['150','149','147','144','142','140','136','61']
mon=['Apr','Mar','Aug','Jun','Jul','Sep','May','Oct']
for j in node:
  for i in mon:
    inp= pd.read_csv('data_gkv/AT510_Node_'+str(j)+'_'+str(i)+'19_OutputFile.csv',usecols=[1,2,3,15,16],low_memory=False)
    out= pd.read_csv('data_gkv/AT510_Node_'+str(j)+'_'+str(i)+'19_OutputFile.csv',usecols=[5,6,7,8,17,18,19],low_memory=False)
    
    inp=np.array(inp,dtype='float32')
    out=np.array(out,dtype='float32')
    
    A1=np.append(A1, inp, axis=0)
    U1=np.append(U1, out, axis=0)

print(A1)
print(U1)


# Min Max Scaler

In [None]:
from sklearn.preprocessing import MinMaxScaler
import warnings
scaler_obj=MinMaxScaler()
X1=scaler_obj.fit_transform(A1)
Y1=scaler_obj.fit_transform(U1)

warnings.filterwarnings(action='ignore', category=UserWarning)

# Parameter Tuning

In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import RandomizedSearchCV

def hyperParameterTuning(X_train, y_train):
    param_tuning = {
        'learning_rate': [0.01, 0.1],
        'max_depth': [7,8,10,],
        'min_child_weight': [1, 3, 5],
        'subsample': [0.5, 0.7],
        'n_estimators' : [80,90,100,200,400,1000],
        'objective': ['reg:squarederror','count:poisson']
    }

    xgb_model = XGBRegressor(tree_method='gpu_hist', gpu_id=0)

    gsearch = RandomizedSearchCV(estimator = xgb_model, param_distributions = param_tuning, n_iter = 5, cv = 2, verbose=2, random_state=0, n_jobs = -1)

    
    grid_result = MultiOutputRegressor(gsearch).fit(x_train, y_train)

    return grid_result.estimators_[0].best_params_

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(X1,Y1,test_size=0.25,random_state=0)

params=hyperParameterTuning(x_train,y_train)
params

# Model

In [None]:
# Splitting Data into training and testing dataset
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(X1,Y1,test_size=0.25,random_state=0)

from sklearn.multioutput import MultiOutputRegressor
from xgboost import XGBRegressor

#creating object of sgboostregressor
model1=MultiOutputRegressor(XGBRegressor(tree_method='gpu_hist', gpu_id=0,objective='count:poisson',max_depth=10,learning_rate=0.1,min_child_weight=3,
                   n_estimators=400,subsample=0.9))

#training the model
model_fit1=model1.fit(x_train, y_train)
print("Model training is Done!!")

#saving as file
filename1 = 'xgboost.sav'
pickle.dump(model_fit1, open(filename1, 'wb'))

# Error Analysis

In [None]:
from sklearn.metrics import r2_score
from sklearn import metrics
train_sizes=['NO2','O3','NO','CO','PM1','PM2.5','PM10']

#finding out the r2 score
y_train_pred1=model1.predict(x_train)
r2_train1=r2_score(y_train,y_train_pred1)

y_test_pred1=model1.predict(x_test)
r2_test1=r2_score(y_test,y_test_pred1)

print('r2 score on train data '+str(r2_train1))
print('r2 score on test data '+ str(r2_test1))

xgboost_mae=metrics.mean_absolute_error(y_test, y_test_pred1)
xgboost_mse=metrics.mean_squared_error(y_test, y_test_pred1)
xgboost_rmse=np.sqrt(xgboost_mse)
print('Mean Absolute Error:',xgboost_mae)
print('Mean Squared Error:',xgboost_mse )
print('Root Mean Squared Error:',xgboost_rmse)
print(' \n')

# y-test vs y-predict

In [None]:
# printing y_test and y_test_predict
print("Y_Test:",y_test)
print("Y_Test_Predict:",y_test_pred1)

from matplotlib import style

style.use('ggplot')

for i in range(0,7):
  plt.figure(figsize=[12,10])
  plt.plot(y_test[:,i],linewidth=3, markersize=12)
  plt.plot(y_test_pred1[:,i],linewidth=2, markersize=12)
  plt.xlabel('X')
  plt.ylabel(train_sizes[i])
  plt.show()