In [1]:
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split, TimeSeriesSplit, GridSearchCV
from sklearn import pipeline
import sklearn
import xgboost
from xgboost import XGBRegressor
import numpy as np
import pandas as pd
import math
from sklearn.metrics import r2_score, mean_squared_error
from numpy.random import seed
from numpy.random import normal
import joblib

import warnings
warnings.filterwarnings("ignore")

pd.options.display.max_columns = None
pd.options.display.max_rows = None
pd.options.mode.chained_assignment = None

In [2]:
df_real = pd.read_csv('../Data/All_p_w_m.csv')
df_real.W = np.round(df_real.W, decimals=1)
df_real.sort_values(['P','Exp','W'], ascending=True, inplace=True)
df_real = df_real[['P','Exp','W','M']]
df_real.head()

Unnamed: 0,P,Exp,W,M
0,1,1,0.0,0.0
1901,1,1,0.2,0.697278
3802,1,1,0.4,1.291415
5703,1,1,0.6,1.854972
7604,1,1,0.8,2.348394


In [4]:
df_data = pd.read_csv('../Data/generated_Synthetic_exp_data.csv')
print(df_data.shape)
df_data.head()

(2292606, 6)


Unnamed: 0,W,P,Est_M,Est_S,Exp,M
0,0.0,1,0.0,0.0,1,0.0
1,0.1,1,0.426763,0.000816,1,0.425324
2,0.2,1,0.852853,0.001632,1,0.8546
3,0.3,1,1.277595,0.002449,1,1.28164
4,0.4,1,1.700321,0.003265,1,1.698087


In [5]:
real_filled = []

df_data.sort_values(['P','Exp','W'], ascending=True, inplace=True)
df_real.sort_values(['P','Exp','W'], ascending=True, inplace=True)

for p in range(1,7):
    for exp in range(1, 1901):
        gen = df_data.loc[np.logical_and(df_data.P==p, df_data.Exp==exp), ['W','M']]
        real = df_real.loc[np.logical_and(df_real.P==p, df_real.Exp==exp), ['W','M']]
        
        i=0
        for w in gen.W.unique():
            i+=1
            if i==5:
                break
                
            m = gen.M[gen.W==w].values[0]
            if w in real.W:
                m = real.M[real.W==w].values[0]
                
            real_filled.append([p, exp, w, m])
            
df_real_filled = pd.DataFrame(real_filled, columns=['P','Exp','W','M'])
df_real_filled.sample(10)

Unnamed: 0,P,Exp,W,M
41138,6,785,0.2,0.026496
39079,6,270,0.3,0.039746
18580,3,846,0.0,0.0
20393,3,1299,0.1,0.048676
45181,6,1796,0.1,0.013236
7598,1,1900,0.2,0.851384
38268,6,68,0.0,0.0
33488,5,773,0.0,0.0
16197,3,250,0.1,0.048665
11593,2,999,0.1,0.105307


In [6]:
test = []
allWs = sorted(np.round(df_data.W.unique(), decimals=1))
loaded_model = [
    joblib.load('../model/GB_exp_Yes.sav'), 
    joblib.load('../model/RF_exp_Yes.sav'), 
    joblib.load('../model/XGB_exp_Yes.sav')
]

for p in range(1,7):
    for exp in range(1, 1901):
        tmpGB = []
        tmpRF = []
        tmpXGB = []
        
        W = sorted(df_real_filled.W[np.logical_and(df_real_filled.P==p, df_real_filled.Exp==exp)].unique())
        
        for w in W[:4]:
            m = df_real_filled.M[np.logical_and(df_real_filled.P==p, np.logical_and(df_real_filled.Exp==exp, df_real_filled.W==w))].values[0]
            test.append([p,exp,w,m,m,m])
            
            tmpGB.append(w)
            tmpGB.append(m)
            tmpRF.append(w)
            tmpRF.append(m)
            tmpXGB.append(w)
            tmpXGB.append(m)
            
        #print('real m=', df_real.M[np.logical_and(df_real.P==p, np.logical_and(df_real.Exp==exp, df_real.W==W[4]))].values[0])
        
        term = 1
        W_new = [x for x in allWs if x>W[3]]
        #print(len(W_new))
        for w in W_new:
            #print(w)
            b = [p, exp, w]
            
            tmpGB.append(w)
            tmpRF.append(w)
            tmpXGB.append(w)
            
            a = [p,exp]
            a.extend(tmpGB)
            a = np.array(a).reshape(1,-1)
            #print(a)
            m = loaded_model[0][p-1].predict(a)[0]
            #print('m=',m)
            b.append(m)
            tmpGB.append(m)
            
            a = [p,exp]
            a.extend(tmpRF)
            a = np.array(a).reshape(1,-1)
            #print(a)
            m = loaded_model[1][p-1].predict(a)[0]
            #print('m=',m)
            b.append(m)
            tmpRF.append(m)
            
            a = [p,exp]
            a.extend(tmpXGB)
            a = np.array(a).reshape(1,-1)
            #print(a)
            m = loaded_model[2][p-1].predict(a)[0]
            #print('m=',m)
            b.append(m)
            tmpXGB.append(m)
            
            #print(tmpLR)
            #print(tmpGB)
            #print(tmpRF)
            #print(tmpXGB)
            
            test.append(b)
            
            tmpGB.pop(0)
            tmpGB.pop(0)
            
            tmpRF.pop(0)
            tmpRF.pop(0)
            
            tmpXGB.pop(0)
            tmpXGB.pop(0)
            
            #term += 1
            
            #if term==3:
             #   break
                
            #print('\n\n')
        
    print(p, 'Done')
        
#test
df_test = pd.DataFrame(test, columns=['P','Exp','W','M_GB','M_RF','M_XGB'])
del [test]
df_test.to_csv('../Data/Real_data_projected_v2.csv', index = False)
print('Saving done...')
df_test.sample(10)

AttributeError: 'HalfSquaredError' object has no attribute 'get_init_raw_predictions'