In [1]:
from ml4pdm.data import Dataset, DatasetSummary
from ml4pdm.parsing import DatasetParser
from ml4pdm.evaluation import Evaluator
from sklearn.pipeline import make_pipeline
from ml4pdm.evaluation.metrics import loss_asymmetric, score_performance, loss_false_positive_rate, loss_false_negative_rate
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
from random import random
from sklearn.ensemble import RandomForestRegressor
from copy import deepcopy
from ml4pdm.transformation import TimeSeriesImputer, UniToMultivariateWrapper, PytsTransformWrapper, PytsSupportedAlgorithm, DatasetToSklearn, EMDSignalWrapper, PywtWrapper, AttributeFilter, TSFreshWrapper, TSFreshFeatureCalculators

In [2]:
train_dataset, test_dataset = DatasetParser.get_cmapss_data(test=True)

In [3]:
prepared_training_dataset = train_dataset.generate_simple_cut_dataset(cut_repeats=5, min_length = 5, max_length=155)
print(len(prepared_training_dataset.data))
print(min(prepared_training_dataset.target))
print(max(prepared_training_dataset.target))

500
5.0
155.0


In [4]:
pipeline = make_pipeline(AttributeFilter([0,1,2], 3), UniToMultivariateWrapper(make_pipeline(TimeSeriesImputer(), PytsTransformWrapper(PytsSupportedAlgorithm.BOSS)), n_jobs=15), DatasetToSklearn(), RandomForestRegressor(n_estimators=50, min_samples_leaf=4, n_jobs=15))

In [5]:
evaluator = Evaluator(None, [pipeline], None, [loss_asymmetric, mean_squared_error, score_performance, mean_absolute_error,
                                          mean_absolute_percentage_error, loss_false_positive_rate, loss_false_negative_rate])
results = evaluator.evaluate_train_test_split(prepared_training_dataset, test_dataset)[0]
for i in [2,4,5,6]:
    results[i] *= 100
print("S:\t{:.2f}\nMSE:\t{:.2f}\nA(%):\t{:.2f}\nMAE:\t{:.2f}\nMAPE:\t{:.2f}\nFPR(%):\t{:.2f}\nFNR(%):\t{:.2f}".format(*results))

S:	809.95
MSE:	441.67
A(%):	49.00
MAE:	16.68
MAPE:	37.34
FPR(%):	19.00
FNR(%):	32.00


In [6]:
for i, pred in enumerate(evaluator.full_y_pred_per_pipeline[0]):
    print(test_dataset.target[i], "\t", pred)

112.0 	 138.39065462315463
98.0 	 126.71889105339108
69.0 	 60.120063364739835
82.0 	 80.68816527916528
91.0 	 90.20083982683984
93.0 	 89.51295404595405
91.0 	 82.03505411255412
95.0 	 76.06890648240649
111.0 	 116.15733405483407
96.0 	 73.73505411255414
97.0 	 103.77469053169054
124.0 	 77.5382597957598
95.0 	 65.35080613830614
107.0 	 123.72978671328674
83.0 	 118.60254667554668
84.0 	 103.26273154623154
50.0 	 68.07353896103896
28.0 	 66.96675036075035
87.0 	 94.48813691863693
16.0 	 17.37259928306987
57.0 	 86.61664291264293
111.0 	 126.84063292263296
113.0 	 93.80405422355423
20.0 	 46.740918470418464
145.0 	 132.64666433076727
119.0 	 111.93674153624156
66.0 	 77.90689610389612
97.0 	 85.470715416936
90.0 	 77.0402303807304
115.0 	 73.94682034632032
8.0 	 12.263473914973915
48.0 	 77.04600910200911
106.0 	 103.79041092241091
7.0 	 16.33075568875569
11.0 	 30.483611832611825
19.0 	 33.96931879231879
21.0 	 57.47448801783011
50.0 	 68.65967471417471
142.0 	 136.31205538905542
28.0

In [12]:
from ml4pdm.parsing import PipelineConfigParser
import os

PIPELINE_CONFIGURATION_FILENAME = os.path.join('.','PipelineConfiguration.json') 

PipelineConfigParser.save_to_file(pipeline, path=PIPELINE_CONFIGURATION_FILENAME)

loaded_pipeline = PipelineConfigParser.parse_from_file(path=PIPELINE_CONFIGURATION_FILENAME)

evaluator = Evaluator(None, [loaded_pipeline], None, [loss_asymmetric, mean_squared_error, score_performance, mean_absolute_error,
                                          mean_absolute_percentage_error, loss_false_positive_rate, loss_false_negative_rate])
results = evaluator.evaluate_train_test_split(prepared_training_dataset, test_dataset)[0]
for i in [2,4,5,6]:
    results[i] *= 100
print("S:\t{:.2f}\nMSE:\t{:.2f}\nA(%):\t{:.2f}\nMAE:\t{:.2f}\nMAPE:\t{:.2f}\nFPR(%):\t{:.2f}\nFNR(%):\t{:.2f}".format(*results))

FileNotFoundError: [Errno 2] No such file or directory: ''