In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor, BaggingRegressor, AdaBoostRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_percentage_error as mape
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV, ShuffleSplit, LeaveOneOut, cross_val_score
from sklearn.model_selection import cross_val_predict, cross_validate, KFold, learning_curve
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.gaussian_process.kernels import RationalQuadratic, RBF, Matern, DotProduct
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.kernel_ridge import KernelRidge
from joblib import dump, load
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.manifold import TSNE
from catboost import CatBoostRegressor
from sklearn.ensemble import RandomForestRegressor, BaggingRegressor, VotingRegressor
from tqdm import tqdm
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeRegressor
from plotfunc import plot_result, cross_validate_and_plot, plot_learning_curve, plot_with_err

import seaborn as sns

In [2]:
data = pd.read_csv('data_tc.csv')

In [16]:
data['yeld strength']

0     230.0
1     330.0
2     320.0
3     360.0
4     420.0
5     480.0
6     465.0
7     350.0
8     330.0
9     314.0
10    320.0
11    420.0
12    480.0
13    680.0
14    765.0
15    450.0
16    462.0
17    780.0
18    358.0
19    620.0
20    718.0
21    710.0
22    565.0
23    240.0
24    240.0
25    196.0
26    320.0
27    260.0
28    530.0
29    480.0
30    350.0
31    309.0
32    785.0
33    531.0
34    446.0
35    700.0
36    750.0
37    652.0
38    343.0
Name: yeld strength, dtype: float64

In [3]:
columns =  ['Fe', 'Cr', 'Ni', 'Mo',
       'Mn', 'C', 'Ti', 'Si', 'Nb', 'V', 'P', 'B', 'Al', 'N', 'S', 'Co', 'Cu',
       'W', 'As',  'HT stage 1 temperature',
       'HT stage 1 duration', 'HT stage 1 cooling', 'HT stage 2 temperature', 
        'HT stage 2 duration', 'HT stage 2 cooling']

# Constuct hard_model

In [6]:
model_names = ['cb_vf_fcc_a1#2.joblib', 'cb_nd_fcc_a1#2.joblib', 'cb_mr_fcc_a1#2.joblib',
               'cb_vf_hcp_a3#2.joblib', 'cb_nd_hcp_a3#2.joblib', 'rf_mr_hcp_a3#2.joblib',
               'rf_vf_m7c3_d101.joblib', 'cb_nd_m7c3_d101.joblib', 'tree_mr_m7c3_d101.joblib']
features = ['VF FCC_A1#2', 'ND FCC_A1#2',
       'MR FCC_A1#2', 'VF HCP_A3#2', 'ND HCP_A3#2', 'MR HCP_A3#2',
       'VF M7C3_D101', 'ND M7C3_D101', 'MR M7C3_D101']

In [7]:
hard_model = {}
for i, name in enumerate(model_names):
    model = load(name)
    hard_model[features[i]] = model

In [8]:
hard_model

{'VF FCC_A1#2': <catboost.core.CatBoostRegressor at 0x23284e66830>,
 'ND FCC_A1#2': <catboost.core.CatBoostRegressor at 0x23284e66fe0>,
 'MR FCC_A1#2': <catboost.core.CatBoostRegressor at 0x232857c54e0>,
 'VF HCP_A3#2': <catboost.core.CatBoostRegressor at 0x232857c5f30>,
 'ND HCP_A3#2': <catboost.core.CatBoostRegressor at 0x232857c67a0>,
 'MR HCP_A3#2': RandomForestRegressor(random_state=42),
 'VF M7C3_D101': RandomForestRegressor(random_state=42),
 'ND M7C3_D101': <catboost.core.CatBoostRegressor at 0x232857c7130>,
 'MR M7C3_D101': DecisionTreeRegressor(max_depth=9, random_state=4)}

# hard_model prediction

In [9]:
X = np.array(data[columns])

In [11]:
preds = pd.DataFrame()

In [12]:
for name in features:
    preds[name] = hard_model[name].predict(X)

In [19]:
preds

Unnamed: 0,VF FCC_A1#2,ND FCC_A1#2,MR FCC_A1#2,VF HCP_A3#2,ND HCP_A3#2,MR HCP_A3#2,VF M7C3_D101,ND M7C3_D101,MR M7C3_D101,yeld strength
0,0.001017,24.412848,0.455605,5.786607e-06,19.474488,1.076015,0.001518,16.8341,15.180947,230.0
1,0.007573,23.827195,1.432287,3.136883e-06,20.933134,3.436912,0.00187,19.33757,7.083446,330.0
2,0.001993,21.224937,6.551577,0.005145547,20.357543,15.905469,0.005549,19.529613,32.813126,320.0
3,0.00249,20.827932,9.655582,0.004137499,20.621059,13.059021,0.007505,19.767398,31.458874,360.0
4,0.002548,21.00068,8.439891,0.002842151,21.471394,8.07134,0.010585,19.940485,31.055496,420.0
5,0.002185,20.854257,9.084984,0.004604583,20.62222,12.963942,0.006169,19.656833,32.552389,480.0
6,0.002506,21.700901,4.919732,0.006058586,21.004032,9.230492,0.009233,20.169687,25.208551,465.0
7,0.009172,21.225926,10.958368,4.412969e-06,18.031291,6.418122,0.000289,17.788558,16.506298,350.0
8,0.005258,22.125553,4.549308,8.32259e-06,20.589058,3.576355,0.004086,19.231043,39.841369,330.0
9,0.001027,20.871049,6.955508,0.00528112,20.231953,15.629223,0.008049,19.560416,38.842894,314.0


In [17]:
preds['yeld strength'] = data['yeld strength']

In [18]:
preds.to_csv('data_tc_pred.csv')