In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from pandasql import sqldf
import matplotlib.pyplot as plt
import seaborn as sns
import optuna
import lightgbm as lgb

from sklearn.metrics import mean_absolute_error,mean_squared_error
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
data=pd.read_csv('../input/air-quality-data-set/AirQuality.csv',delimiter=';', decimal=",", usecols=['Date','Time','CO(GT)','T'])

In [3]:
data.drop(data[data['CO(GT)'] < 0].index, inplace=True)
data.drop(data[data['T'] < 0].index, inplace=True)
data.describe([.01,.1,.2,.3,.4,.5,.6,.7,.8,.9,.97])

In [4]:
percentile95 = data['T'].quantile(0.75)
percentile5 = data['T'].quantile(0)
upper_limit = percentile95 
lower_limit = percentile5 
print(upper_limit)
data=data[data['T'] < upper_limit]
data=data[data['T'] > lower_limit]
data.describe()

In [5]:
plt.figure(figsize=(16,5))
plt.subplot(1,2,1)
sns.distplot(data['CO(GT)'])
plt.subplot(1,2,2)
sns.distplot(data['T'])
plt.show()

In [6]:
sns.boxplot(data['CO(GT)'])

In [7]:
sns.boxplot(data['T'])

In [8]:
data['DateTime']=data['Date'].astype(str)+" "+data['Time'].astype(str)
data['DateTime']=data['DateTime'].str.replace('/','-')

In [9]:
data['DateTime'] = pd.to_datetime(data['DateTime'], format='%d-%m-%Y %H.%M.%S', errors='coerce')
data=sqldf("select * from data")

In [10]:
data.dropna(inplace=True)
# data.drop(['Date','Time'], axis=1, inplace=True)

In [11]:
data=data.set_index('DateTime')

In [12]:

data

In [13]:
from dateutil.relativedelta import relativedelta

In [14]:
def queryToRun(dataset):
    return "select *," \
        +"LAG([CO(GT)],1) OVER (PARTITION BY Time ORDER BY CAST(DateTime as Date)) COPrev," \
        +"LAG([T],1) OVER (PARTITION BY Time ORDER BY CAST(DateTime as Date)) TPrev," \
        +"CUME_DIST() OVER (PARTITION BY CAST(DateTime as Date) ORDER BY [CO(GT)]) AS CoCumeDist," \
        +"CUME_DIST() OVER (PARTITION BY CAST(DateTime as Date) ORDER BY [T]) AS TCumeDist," \
        +"ROUND(AVG([CO(GT)]) OVER (PARTITION BY Time ORDER BY CAST(DateTime as Date) ROWS BETWEEN 6 PRECEDING AND CURRENT ROW),3) CO6Dma," \
        +"ROUND(AVG([CO(GT)]) OVER (PARTITION BY Time ORDER BY CAST(DateTime as Date) ROWS BETWEEN 5 PRECEDING AND CURRENT ROW),3) CO5Dma," \
        +"ROUND(AVG([CO(GT)]) OVER (PARTITION BY Time ORDER BY CAST(DateTime as Date) ROWS BETWEEN 4 PRECEDING AND CURRENT ROW),3) CO4Dma," \
        +"ROUND(AVG([CO(GT)]) OVER (PARTITION BY Time ORDER BY CAST(DateTime as Date) ROWS BETWEEN 3 PRECEDING AND CURRENT ROW),3) CO3Dma," \
        +"ROUND(AVG([CO(GT)]) OVER (PARTITION BY Time ORDER BY CAST(DateTime as Date) ROWS BETWEEN 2 PRECEDING AND CURRENT ROW),3) CO2Dma," \
        +"ROUND(AVG([T]) OVER (PARTITION BY Time ORDER BY CAST(DateTime as Date) ROWS BETWEEN 6 PRECEDING AND CURRENT ROW),3) T6Dma," \
        +"ROUND(AVG([T]) OVER (PARTITION BY Time ORDER BY CAST(DateTime as Date) ROWS BETWEEN 5 PRECEDING AND CURRENT ROW),3) T5Dma," \
        +"ROUND(AVG([T]) OVER (PARTITION BY Time ORDER BY CAST(DateTime as Date) ROWS BETWEEN 4 PRECEDING AND CURRENT ROW),3) T4Dma," \
        +"ROUND(AVG([T]) OVER (PARTITION BY Time ORDER BY CAST(DateTime as Date) ROWS BETWEEN 3 PRECEDING AND CURRENT ROW),3) T3Dma," \
        +"ROUND(AVG([T]) OVER (PARTITION BY Time ORDER BY CAST(DateTime as Date) ROWS BETWEEN 2 PRECEDING AND CURRENT ROW),3) T2Dma" \
        +" from data"

def filterQuery(dateFrom,dateTo):
    return "select * from df where datetime between '"+dateFrom+"' and '"+dateTo+"'"

def testTrain(dFrame,fromDate,toDate,testFrom,testTo):
    df=sqldf(queryToRun(dFrame))
    df['DateTime'] = pd.to_datetime(df['DateTime'], format='%Y-%m-%d %H:%M:%S.%f')
    df['DateTime'] = pd.to_datetime(df['DateTime'], format='%Y-%m-%d %H:%M:%S')
    df=df.set_index('DateTime')
    df_train=sqldf(filterQuery(fromDate,toDate))
    df_test=sqldf(filterQuery(testFrom,testTo))
    df_train=df_train.set_index('DateTime')
    df_test=df_test.set_index('DateTime')
    return df_train,df_test

In [15]:
# df_train=sqldf(filterQuery('2004-03-11','2004-03-18'))
# df_train=sqldf(filterQuery('2004-03-17','2004-03-18')).dropna()
# df_test=sqldf(filterQuery('2004-03-18','2004-03-19'))
df_train,df_test=testTrain(data,'2004-03-11','2004-03-18','2004-03-18','2004-03-19')

In [16]:
df_train.head()

In [17]:
df_test.head()

In [18]:
x_train=df_train[['COPrev','TPrev','CO6Dma','CO5Dma','CO4Dma', 'CO3Dma','CO2Dma','T6Dma','T5Dma','T4Dma', 'T3Dma', 'T2Dma']]
# x_train=df_train[['COPrev','CO6Dma','CO5Dma','CO4Dma', 'CO3Dma','CO2Dma']]
yco_train=df_train[['CO(GT)']]
yt_train=df_train[['T']]
x_test=df_test[['COPrev','TPrev','CO6Dma','CO5Dma','CO4Dma', 'CO3Dma','CO2Dma','T6Dma','T5Dma','T4Dma', 'T3Dma', 'T2Dma']]
yco_test=df_test[['CO(GT)']]
yt_test=df_test[['T']]

In [19]:
class Objective():

    def __init__(self):
        self.best_booster = None
        self._booster = None

    def __call__(self, trial):
        param = {
            "objective": "regression",
            "metric": "mape",
            "verbosity": -1,
            "boosting_type": "gbdt",
#             "bagging_fraction": trial.suggest_loguniform("bagging_fraction", 0.5, 1.0),
#             "feature_fraction": trial.suggest_loguniform("feature_fraction", 0.7, 1.0),
            "min_gain_to_split": trial.suggest_loguniform("min_gain_to_split", 0.3, 1.0),
            "max_depth": trial.suggest_int("max_depth", 75, 150),
            "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 10.0),
            "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 10.0),
            "num_leaves": trial.suggest_int("num_leaves", 150, 300),
            "num_iterations": trial.suggest_int("num_iterations", 20, 400),
            "early_stopping_rounds": trial.suggest_int("early_stopping_rounds", 20, 100),
            "learning_rate":trial.suggest_loguniform("learning_rate", 0.001, 0.1),
            "n_estimators": trial.suggest_int("n_estimators", 50, 1000),
            "max_bin":trial.suggest_int("max_bin", 600, 2000)
        }
        trainx=x_train
        trainy=yco_train
        dtrain = lgb.Dataset(trainx, label=trainy,free_raw_data = False)
        # Add a callback for pruning.
        pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "mape")
        gbm = lgb.train(
            param, dtrain, valid_sets=[dtrain], verbose_eval=False
        )

        self._booster = gbm

        preds = gbm.predict(trainx)
        pred_labels = np.rint(preds)
        accuracy = mean_absolute_error(trainy, pred_labels)
        return accuracy

    def callback(self, study, trial):
        if study.best_trial == trial:
            self.best_booster = self._booster

In [20]:
class Objective1():

    def __init__(self):
        self.best_booster = None
        self._booster = None

    def __call__(self, trial):
        param = {
            "objective": "regression",
            "metric": "mape",
            "verbosity": -1,
            "boosting_type": "gbdt",
#             "bagging_fraction": trial.suggest_loguniform("bagging_fraction", 0.5, 1.0),
#             "feature_fraction": trial.suggest_loguniform("feature_fraction", 0.7, 1.0),
            "min_gain_to_split": trial.suggest_loguniform("min_gain_to_split", 0.3, 1.0),
            "max_depth": trial.suggest_int("max_depth", 75, 150),
            "lambda_l1": trial.suggest_loguniform("lambda_l1", 1e-8, 10.0),
            "lambda_l2": trial.suggest_loguniform("lambda_l2", 1e-8, 10.0),
            "num_leaves": trial.suggest_int("num_leaves", 150, 300),
            "num_iterations": trial.suggest_int("num_iterations", 20, 400),
            "early_stopping_rounds": trial.suggest_int("early_stopping_rounds", 20, 100),
            "learning_rate":trial.suggest_loguniform("learning_rate", 0.001, 0.1),
            "n_estimators": trial.suggest_int("n_estimators", 50, 1000),
            "max_bin":trial.suggest_int("max_bin", 600, 2000)
        }
        trainx=x_train
        trainy=yt_train
        dtrain = lgb.Dataset(trainx, label=trainy,free_raw_data = False)
        # Add a callback for pruning.
        pruning_callback = optuna.integration.LightGBMPruningCallback(trial, "mape")
        gbm = lgb.train(
            param, dtrain, valid_sets=[dtrain], verbose_eval=False
        )

        self._booster = gbm

        preds = gbm.predict(trainx)
        pred_labels = np.rint(preds)
        accuracy = mean_absolute_error(trainy, pred_labels)
        return accuracy

    def callback(self, study, trial):
        if study.best_trial == trial:
            self.best_booster = self._booster

In [21]:
objective = Objective()
from warnings import simplefilter
simplefilter("ignore", category=RuntimeWarning)
optuna.logging.set_verbosity(optuna.logging.ERROR)
study = optuna.create_study(
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10), direction="minimize"
)
study.optimize(objective, n_trials=500)

print("Best trial:")
trial_co = study.best_trial

print("  Params: ")
for key, value in trial_co.params.items():
    print("    {}: {}".format(key, value))

best_model_co = objective.best_booster

objective = Objective1()
from warnings import simplefilter
simplefilter("ignore", category=RuntimeWarning)
optuna.logging.set_verbosity(optuna.logging.ERROR)
study = optuna.create_study(
    pruner=optuna.pruners.MedianPruner(n_warmup_steps=10), direction="minimize"
)
study.optimize(objective, n_trials=500)

print("Best trial:")
trial_t = study.best_trial

print("  Params: ")
for key, value in trial_t.params.items():
    print("    {}: {}".format(key, value))

best_model_t = objective.best_booster

In [22]:
def model(train_x,train_y_co,train_y_t,test_x):
    params =  trial_co.params
    lgb_train = lgb.Dataset(train_x,train_y_co)
    lgb_valid = lgb.Dataset(train_x,train_y_co)
    model_co = lgb.train(params, lgb_train, 5000, valid_sets=[lgb_train], verbose_eval=50)
    
    params =  trial_t.params
    lgb_train = lgb.Dataset(train_x,train_y_t)
    lgb_valid = lgb.Dataset(train_x,train_y_t)
    model_t = lgb.train(params, lgb_train, 5000, valid_sets=[lgb_train], verbose_eval=50)
    y_pred_co=model_co.predict(test_x)
    y_pred_t=model_t.predict(test_x)
    return y_pred_co,y_pred_t

In [23]:
y_pred_co,y_pred_t = model(x_train,yt_train,yco_train,x_test)
# yy=df_test[['T','CO(GT)']]#.reset_index(drop=True)
yy=pd.DataFrame()
yy['T']=y_pred_co
yy['CO(GT)']= y_pred_t
yy.index=yt_test.index
yy_new=yy
yy_new['T_org']=df_test[['T']]
yy_new['co_org']=df_test[['CO(GT)']]

In [24]:
def MAPE(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [25]:
t_pred_18=MAPE(yy_new.T_org,yy_new.T)
t_co_18=MAPE(yy_new.co_org,yy_new['CO(GT)'])
print(t_pred_18,t_co_18)

In [27]:
df_train,df_test=testTrain(data,'2004-03-12','2004-03-19','2004-03-19','2004-03-20')
y_pred_co_org,y_pred_t_org = model(x_train,yt_train,yco_train,x_test)
df_1 = pd.merge(data,yy, left_index=True, right_index=True, how='left')
df_1.T_y=df_1.T_y.fillna(df_1.T_x)
df_1['CO(GT)_y']=df_1['CO(GT)_y'].fillna(df_1['CO(GT)_x'])
data=df_1[['Date','Time','CO(GT)_x','T_x']]
data.columns=['Date','Time','CO(GT)','T']
df_train,df_test=testTrain(data,'2004-03-12','2004-03-19','2004-03-19','2004-03-20')
x_train=df_train[['COPrev','TPrev','CO6Dma','CO5Dma','CO4Dma', 'CO3Dma','CO2Dma','T6Dma','T5Dma','T4Dma', 'T3Dma', 'T2Dma']]
yco_train=df_train[['CO(GT)']]
yt_train=df_train[['T']]
x_test=df_test[['COPrev','TPrev','CO6Dma','CO5Dma','CO4Dma', 'CO3Dma','CO2Dma','T6Dma','T5Dma','T4Dma', 'T3Dma', 'T2Dma']]
yco_test=df_test[['CO(GT)']]
yt_test=df_test[['T']]
y_pred_co,y_pred_t = model(x_train,yt_train,yco_train,x_test)
# yy=df_test[['T','CO(GT)']]#.reset_index(drop=True)
yy=pd.DataFrame()
yy['T']=y_pred_co
yy['CO(GT)']= y_pred_t
yy['T_org_pred']=y_pred_co_org
yy['CO(GT)_org_pred']= y_pred_t_org
yy.index=yt_test.index
yy_new=yy
yy_new['T_org']=df_test[['T']]
yy_new['co_org']=df_test[['CO(GT)']]
yy_new


In [28]:
t_pred_19=MAPE(yy_new.T_org,yy_new.T)
t_co_19=MAPE(yy_new.co_org,yy_new['CO(GT)'])
t_pred_org_19=MAPE(yy_new.T_org,yy_new.T_org_pred)
t_co_org_19=MAPE(yy_new.co_org,yy_new['CO(GT)_org_pred'])
print(t_pred_19,t_co_19,t_pred_org_19,t_co_org_19)

In [29]:
df_train,df_test=testTrain(data,'2004-03-13','2004-03-20','2004-03-20','2004-03-21')
y_pred_co_org,y_pred_t_org = model(x_train,yt_train,yco_train,x_test)
df_1 = pd.merge(data,yy, left_index=True, right_index=True, how='left')
df_1.T_y=df_1.T_y.fillna(df_1.T_x)
df_1['CO(GT)_y']=df_1['CO(GT)_y'].fillna(df_1['CO(GT)_x'])
data=df_1[['Date','Time','CO(GT)_x','T_x']]
data.columns=['Date','Time','CO(GT)','T']
df_train,df_test=testTrain(data,'2004-03-13','2004-03-20','2004-03-20','2004-03-21')
x_train=df_train[['COPrev','TPrev','CO6Dma','CO5Dma','CO4Dma', 'CO3Dma','CO2Dma','T6Dma','T5Dma','T4Dma', 'T3Dma', 'T2Dma']]
yco_train=df_train[['CO(GT)']]
yt_train=df_train[['T']]
x_test=df_test[['COPrev','TPrev','CO6Dma','CO5Dma','CO4Dma', 'CO3Dma','CO2Dma','T6Dma','T5Dma','T4Dma', 'T3Dma', 'T2Dma']]
yco_test=df_test[['CO(GT)']]
yt_test=df_test[['T']]
y_pred_co,y_pred_t = model(x_train,yt_train,yco_train,x_test)
# yy=df_test[['T','CO(GT)']]#.reset_index(drop=True)
yy=pd.DataFrame()
yy['T']=y_pred_co
yy['CO(GT)']= y_pred_t
yy.index=yt_test.index
yy_new=yy
yy['T_org_pred']=y_pred_co_org
yy['CO(GT)_org_pred']= y_pred_t_org
yy_new['T_org']=df_test[['T']]
yy_new['co_org']=df_test[['CO(GT)']]
yy_new



In [30]:
df_train,df_test=testTrain(data,'2004-03-13','2004-03-20','2004-03-20','2004-03-21')
y_pred_co_org,y_pred_t_org = model(x_train,yt_train,yco_train,x_test)
df_1 = pd.merge(data,yy, left_index=True, right_index=True, how='left')
df_1.T_y=df_1.T_y.fillna(df_1.T_x)
df_1['CO(GT)_y']=df_1['CO(GT)_y'].fillna(df_1['CO(GT)_x'])
data=df_1[['Date','Time','CO(GT)_x','T_x']]
data.columns=['Date','Time','CO(GT)','T']
df_train,df_test=testTrain(data,'2004-03-14','2004-03-21','2004-03-21','2004-03-22')
x_train=df_train[['COPrev','TPrev','CO6Dma','CO5Dma','CO4Dma', 'CO3Dma','CO2Dma','T6Dma','T5Dma','T4Dma', 'T3Dma', 'T2Dma']]
yco_train=df_train[['CO(GT)']]
yt_train=df_train[['T']]
x_test=df_test[['COPrev','TPrev','CO6Dma','CO5Dma','CO4Dma', 'CO3Dma','CO2Dma','T6Dma','T5Dma','T4Dma', 'T3Dma', 'T2Dma']]
yco_test=df_test[['CO(GT)']]
yt_test=df_test[['T']]
y_pred_co,y_pred_t = model(x_train,yt_train,yco_train,x_test)
# yy=df_test[['T','CO(GT)']]#.reset_index(drop=True)
yy=pd.DataFrame()
yy['T']=y_pred_co
yy['CO(GT)']= y_pred_t
yy.index=yt_test.index
yy_new=yy
yy['T_org_pred']=y_pred_co_org
yy['CO(GT)_org_pred']= y_pred_t_org
yy_new['T_org']=df_test[['T']]
yy_new['co_org']=df_test[['CO(GT)']]
yy_new


In [31]:
t_pred_20=MAPE(yy_new.T_org,yy_new.T)
t_co_20=MAPE(yy_new.co_org,yy_new['CO(GT)'])
t_pred_org_20=MAPE(yy_new.T_org,yy_new.T_org_pred)
t_co_org_20=MAPE(yy_new.co_org,yy_new['CO(GT)_org_pred'])
print(t_pred_20,t_co_20,t_pred_org_20,t_co_org_20)

In [32]:
df_train,df_test=testTrain(data,'2004-03-15','2004-03-22','2004-03-22','2004-03-23')
y_pred_co_org,y_pred_t_org = model(x_train,yt_train,yco_train,x_test)
df_1 = pd.merge(data,yy, left_index=True, right_index=True, how='left')
df_1.T_y=df_1.T_y.fillna(df_1.T_x)
df_1['CO(GT)_y']=df_1['CO(GT)_y'].fillna(df_1['CO(GT)_x'])
data=df_1[['Date','Time','CO(GT)_x','T_x']]
data.columns=['Date','Time','CO(GT)','T']
df_train,df_test=testTrain(data,'2004-03-15','2004-03-22','2004-03-22','2004-03-23')
x_train=df_train[['COPrev','TPrev','CO6Dma','CO5Dma','CO4Dma', 'CO3Dma','CO2Dma','T6Dma','T5Dma','T4Dma', 'T3Dma', 'T2Dma']]
yco_train=df_train[['CO(GT)']]
yt_train=df_train[['T']]
x_test=df_test[['COPrev','TPrev','CO6Dma','CO5Dma','CO4Dma', 'CO3Dma','CO2Dma','T6Dma','T5Dma','T4Dma', 'T3Dma', 'T2Dma']]
yco_test=df_test[['CO(GT)']]
yt_test=df_test[['T']]
y_pred_co,y_pred_t = model(x_train,yt_train,yco_train,x_test)
# yy=df_test[['T','CO(GT)']]#.reset_index(drop=True)
yy=pd.DataFrame()
yy['T']=y_pred_co
yy['CO(GT)']= y_pred_t
yy.index=yt_test.index
yy_new=yy
yy['T_org_pred']=y_pred_co_org
yy['CO(GT)_org_pred']= y_pred_t_org
yy_new['T_org']=df_test[['T']]
yy_new['co_org']=df_test[['CO(GT)']]
yy_new


In [33]:
t_pred_21=MAPE(yy_new.T_org,yy_new.T)
t_co_21=MAPE(yy_new.co_org,yy_new['CO(GT)'])
t_pred_org_21=MAPE(yy_new.T_org,yy_new.T_org_pred)
t_co_org_21=MAPE(yy_new.co_org,yy_new['CO(GT)_org_pred'])
print(t_pred_21,t_co_21,t_pred_org_21,t_co_org_21)

In [34]:
df_train,df_test=testTrain(data,'2004-03-16','2004-03-23','2004-03-23','2004-03-24')
y_pred_co_org,y_pred_t_org = model(x_train,yt_train,yco_train,x_test)
df_1 = pd.merge(data,yy, left_index=True, right_index=True, how='left')
df_1.T_y=df_1.T_y.fillna(df_1.T_x)
df_1['CO(GT)_y']=df_1['CO(GT)_y'].fillna(df_1['CO(GT)_x'])
data=df_1[['Date','Time','CO(GT)_x','T_x']]
data.columns=['Date','Time','CO(GT)','T']
df_train,df_test=testTrain(data,'2004-03-16','2004-03-23','2004-03-23','2004-03-24')
x_train=df_train[['COPrev','TPrev','CO6Dma','CO5Dma','CO4Dma', 'CO3Dma','CO2Dma','T6Dma','T5Dma','T4Dma', 'T3Dma', 'T2Dma']]
yco_train=df_train[['CO(GT)']]
yt_train=df_train[['T']]
x_test=df_test[['COPrev','TPrev','CO6Dma','CO5Dma','CO4Dma', 'CO3Dma','CO2Dma','T6Dma','T5Dma','T4Dma', 'T3Dma', 'T2Dma']]
yco_test=df_test[['CO(GT)']]
yt_test=df_test[['T']]
y_pred_co,y_pred_t = model(x_train,yt_train,yco_train,x_test)
# yy=df_test[['T','CO(GT)']]#.reset_index(drop=True)
yy=pd.DataFrame()
yy['T']=y_pred_co
yy['CO(GT)']= y_pred_t
yy.index=yt_test.index
yy_new=yy
yy['T_org_pred']=y_pred_co_org
yy['CO(GT)_org_pred']= y_pred_t_org
yy_new['T_org']=df_test[['T']]
yy_new['co_org']=df_test[['CO(GT)']]
yy_new


In [35]:
t_pred_22=MAPE(yy_new.T_org,yy_new.T)
t_co_22=MAPE(yy_new.co_org,yy_new['CO(GT)'])
t_pred_org_22=MAPE(yy_new.T_org,yy_new.T_org_pred)
t_co_org_22=MAPE(yy_new.co_org,yy_new['CO(GT)_org_pred'])
print(t_pred_22,t_co_22,t_pred_org_22,t_co_org_22)

In [37]:
df_train,df_test=testTrain(data,'2004-03-17','2004-03-24','2004-03-24','2004-03-25')
y_pred_co_org,y_pred_t_org = model(x_train,yt_train,yco_train,x_test)
df_1 = pd.merge(data,yy, left_index=True, right_index=True, how='left')
df_1.T_y=df_1.T_y.fillna(df_1.T_x)
df_1['CO(GT)_y']=df_1['CO(GT)_y'].fillna(df_1['CO(GT)_x'])
data=df_1[['Date','Time','CO(GT)_x','T_x']]
data.columns=['Date','Time','CO(GT)','T']
df_train,df_test=testTrain(data,'2004-03-17','2004-03-24','2004-03-24','2004-03-25')
x_train=df_train[['COPrev','TPrev','CO6Dma','CO5Dma','CO4Dma', 'CO3Dma','CO2Dma','T6Dma','T5Dma','T4Dma', 'T3Dma', 'T2Dma']]
yco_train=df_train[['CO(GT)']]
yt_train=df_train[['T']]
x_test=df_test[['COPrev','TPrev','CO6Dma','CO5Dma','CO4Dma', 'CO3Dma','CO2Dma','T6Dma','T5Dma','T4Dma', 'T3Dma', 'T2Dma']]
yco_test=df_test[['CO(GT)']]
yt_test=df_test[['T']]
y_pred_co,y_pred_t = model(x_train,yt_train,yco_train,x_test)
# yy=df_test[['T','CO(GT)']]#.reset_index(drop=True)
yy=pd.DataFrame()
yy['T']=y_pred_co
yy['CO(GT)']= y_pred_t
yy['T_org_pred']=y_pred_co_org
yy['CO(GT)_org_pred']= y_pred_t_org
yy.index=yt_test.index
yy_new=yy
yy_new['T_org']=df_test[['T']]
yy_new['co_org']=df_test[['CO(GT)']]
yy_new


In [38]:
t_pred_22=MAPE(yy_new.T_org,yy_new.T)
t_co_22=MAPE(yy_new.co_org,yy_new['CO(GT)'])
t_pred_org_22=MAPE(yy_new.T_org,yy_new.T_org_pred)
t_co_org_22=MAPE(yy_new.co_org,yy_new['CO(GT)_org_pred'])
print(t_pred_22,t_co_22,t_pred_org_22,t_co_org_22)

In [40]:
df_train,df_test=testTrain(data,'2004-03-18','2004-03-25','2004-03-25','2004-03-26')
y_pred_co_org,y_pred_t_org = model(x_train,yt_train,yco_train,x_test)
df_1 = pd.merge(data,yy, left_index=True, right_index=True, how='left')
df_1.T_y=df_1.T_y.fillna(df_1.T_x)
df_1['CO(GT)_y']=df_1['CO(GT)_y'].fillna(df_1['CO(GT)_x'])
data=df_1[['Date','Time','CO(GT)_x','T_x']]
data.columns=['Date','Time','CO(GT)','T']
df_train,df_test=testTrain(data,'2004-03-18','2004-03-25','2004-03-25','2004-03-26')
x_train=df_train[['COPrev','TPrev','CO6Dma','CO5Dma','CO4Dma', 'CO3Dma','CO2Dma','T6Dma','T5Dma','T4Dma', 'T3Dma', 'T2Dma']]
yco_train=df_train[['CO(GT)']]
yt_train=df_train[['T']]
x_test=df_test[['COPrev','TPrev','CO6Dma','CO5Dma','CO4Dma', 'CO3Dma','CO2Dma','T6Dma','T5Dma','T4Dma', 'T3Dma', 'T2Dma']]
yco_test=df_test[['CO(GT)']]
yt_test=df_test[['T']]
y_pred_co,y_pred_t = model(x_train,yt_train,yco_train,x_test)
# yy=df_test[['T','CO(GT)']]#.reset_index(drop=True)
yy=pd.DataFrame()
yy['T']=y_pred_co
yy['CO(GT)']= y_pred_t
yy['T_org_pred']=y_pred_co_org
yy['CO(GT)_org_pred']= y_pred_t_org
yy.index=yt_test.index
yy_new=yy
yy_new['T_org']=df_test[['T']]
yy_new['co_org']=df_test[['CO(GT)']]
yy_new


In [41]:
t_pred_23=MAPE(yy_new.T_org,yy_new.T)
t_co_23=MAPE(yy_new.co_org,yy_new['CO(GT)'])
t_pred_org_23=MAPE(yy_new.T_org,yy_new.T_org_pred)
t_co_org_23=MAPE(yy_new.co_org,yy_new['CO(GT)_org_pred'])
print(t_pred_23,t_co_23,t_pred_org_23,t_co_org_23)

In [42]:
print(t_pred_18,t_pred_19,t_pred_20,t_pred_21,t_pred_22,t_pred_23)
print(t_co_18,t_co_19,t_co_20,t_co_21,t_co_22,t_co_23)

In [44]:
print(t_pred_org_19,t_pred_org_20,t_pred_org_21,t_pred_org_22,t_pred_org_23)
print(t_co_org_19,t_co_org_20,t_co_org_21,t_co_org_22,t_co_org_23)