# Importing Libraries

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import os.path as op
import pickle
import cupy as cp

# Data Fetching

In [None]:
A1=np.empty((0,5),dtype='float32')
U1=np.empty((0,7),dtype='float32')
node=['150','149','147','144','142','140','136','61']
mon=['Apr','Mar','Aug','Jun','Jul','Sep','May','Oct']
for j in node:
  for i in mon:
    inp= pd.read_csv('data_gkv/AT510_Node_'+str(j)+'_'+str(i)+'19_OutputFile.csv',usecols=[1,2,3,15,16],low_memory=False)
    out= pd.read_csv('data_gkv/AT510_Node_'+str(j)+'_'+str(i)+'19_OutputFile.csv',usecols=[5,6,7,8,17,18,19],low_memory=False)
    
    inp=np.array(inp,dtype='float32')
    out=np.array(out,dtype='float32')
    
    A1=np.append(A1, inp, axis=0)
    U1=np.append(U1, out, axis=0)

print(A1)
print(U1)


# Min Max Scaler

In [None]:
from sklearn.preprocessing import MinMaxScaler
import warnings
scaler_obj=MinMaxScaler()
X1=scaler_obj.fit_transform(A1)
Y1=scaler_obj.fit_transform(U1)

warnings.filterwarnings(action='ignore', category=UserWarning)

# Parameter Tuning

In [None]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor

def hyperparameterRF(x_train,y_train):
    random_grid = {'bootstrap': [True, False],
     'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, None],
     'max_features': ['auto', 'sqrt'],
     'min_samples_leaf': [1, 2, 4],
     'min_samples_split': [2, 5, 10],
     'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]
    }
    rf = RandomForestRegressor()
    rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 2, verbose=2, random_state=42, n_jobs = -1)

    grid_result = MultiOutputRegressor(rf_random).fit(x_train, y_train)
    
    return grid_result.estimators_[0].best_params_
    

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(X1,Y1,test_size=0.5,random_state=0)

params=hyperparameterRF(x_train,y_train)
params

# Model

In [None]:
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor

# Splitting Data into training and testing dataset
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(X1,Y1,test_size=0.2,random_state=42)

model7 = MultiOutputRegressor(SVR(kernel='linear',C=1e1, gamma='auto'))

model_fit7=model7.fit(x_train,y_train)

print("Model Training Done !!")

# Dumping Model into a file

filename7 = 'svm.sav'
pickle.dump(model_fit7, open(filename7, 'wb'))

In [None]:
loaded_model_fit7 = pickle.load(open(filename7, 'rb'))
loaded_model_fit7.predict(x_test)

# Error Analysis

In [None]:
from sklearn import metrics
from sklearn.metrics import r2_score
train_sizes=['NO2','O3','NO','CO','PM1','PM2.5','PM10']

y_test_pred7=model_fit7.predict(x_test)
y_train_pred7=model_fit7.predict(x_train)

#finding out the r2 score
r2_test7=r2_score(y_test,y_test_pred7)
r2_train7=r2_score(y_train,y_train_pred7)

print('r2 score on train data '+ str(r2_train7))
print('r2 score on test data '+ str(r2_test7))

svm_mae=metrics.mean_absolute_error(y_test, y_test_pred7)
svm_mse=metrics.mean_squared_error(y_test, y_test_pred7)
svm_rmse=np.sqrt(metrics.mean_squared_error(y_test, y_test_pred7))
print('Mean Absolute Error:',svm_mae)
print('Mean Squared Error:',svm_mse )
print('Root Mean Squared Error:',svm_rmse)

# y-test vs y-predict

In [None]:
# printing y_test and y_test_predict
print("Y_Test:",y_test)
print("Y_Test_Predict:",y_test_pred7)

from matplotlib import style
style.use('ggplot')

for i in range(0,7):
  plt.figure(figsize=[12,10])
  plt.plot(y_test[:,i],linewidth=3, markersize=12)
  plt.plot(y_test_pred7[:,i],linewidth=2, markersize=12)
  plt.xlabel('X')
  plt.ylabel(train_sizes[i])
  plt.show()