### IMPORTS

In [2]:
import ipynb.fs.full.processing as processing
import ipynb.fs.full.training as training
import ipynb.fs.full.storage as storage
import ipynb.fs.full.misc as misc
import ipynb.fs.full.splitting as splitting
import ipynb.fs.full.features as features
import ipynb.fs.full.ensemble as ensemble
import ipynb.fs.full.profit as profit
import json

### CREATE REGRESSION DATASET

In [2]:
def create_regression_dataset(config):
    
    # CRATE BASELINE DATAFRAME
    dataframe = processing.create_dataframe(config)
    
    # ADD TYPE FEATURES
    regression_dataset = features.add(dataframe, config['features'])
    
    return regression_dataset

### TRAIN & VALIDATE PIPELINE ENSEMBLE

In [3]:
def create_pipeline(config):
    
    # CREATE REGRESSION DATASET
    regression_dataset = create_regression_dataset(config)
    
    # DROP THE CLOSE PRICE COLUMN
    regression_dataset.drop(columns=['close'], inplace=True)
    
    # SPLIT INTO TRAIN & TEST DATASETS
    primary_dataset = splitting.general(
        regression_dataset,
        config['splitting']['train_split']
    )
    
    # PRINT MSG
    print('\n### START TRAINING REGRESSION ENSEMBLE\n')
    
    # TRAIN THE REGRESSION ENSEMBLE
    regression_ensemble, regression_table = ensemble.regression(primary_dataset, config)
    
    # CREATE A DECISION MACHINE
    decision_machine = misc.decision_machine()
    
    # PUT REGRESSION LABELS THROUGH IT
    regression_labels = decision_machine.calibrate(
        regression_table,
        config['classification_ensemble']['decision']
    )
    
    # REPLACE OLD LABELS WITH NEW ONES
    labeled_regression_table = misc.replace_labels(
        regression_table,
        regression_labels
    )
    
    # PRINT MSG
    print('\n### START TRAINING CLASSIFIER ENSEMBLE\n')
    
    # TRAIN THE CLASSIFIER ENSEMBLE
    classifier_ensemble, classifier_table = ensemble.classifier(
        labeled_regression_table,
        config
    )
    
    # CREATE A CONFUSION MATRIX FOR TRAIN PREDICTIONS
    classifier_matrixes = misc.train_matrixes(
        labeled_regression_table,
        classifier_table
    )
    
    # PRINT MSG
    print('\n### PERFORMING VALIDATION')
    
    # CREATE VALIDATION DATASET WITH REGRESSION ENSEMBLE PREDICTIONS
    validation_dataset = regression_ensemble.predict(primary_dataset['test'])
    
    # CLONE DATASET & ADD LABELS
    validation_with_labels = validation_dataset.copy()
    validation_with_labels['label'] = primary_dataset['test']['labels'][-len(validation_dataset):]
    
    # PREDICT WITH THE CLASSIFIER ENSEMBLE
    classifier_predictions = classifier_ensemble.predict({
        'features': validation_dataset.to_numpy(),
        'labels': []
    })
    
    # PUT TRUE LABELS THROUGH DECISION MACHINE
    matrix_labels = decision_machine.convert(validation_with_labels)
    
    # CREATE A CONFUSION MATRIX FOR VALIDATION PREDICTIONS
    classifier_matrixes = misc.validation_matrixes(
        classifier_matrixes,
        classifier_predictions,
        matrix_labels
    )
    
    # STITCH TOGETHER REGRESSION FITTING METRICS
    regression_fitting = misc.regression_fitting_metrics(regression_ensemble)
    
    # PRINT MSG
    print('### SAVING PIPELINE')
    
    # SAVE EVERYTHING
    pipeline_name = storage.save_pipeline({
        'config': config,
        'regression_ensemble': regression_ensemble,
        'classifier_ensemble': classifier_ensemble,
        'predictions': {
            'regression': {
                'training': {
                    'graph': 'line',
                    'data': json.loads(regression_table.to_json())
                },
                'validation': {
                    'graph': 'line',
                    'data': json.loads(validation_with_labels.to_json())
                }
            },
            'classifiers': classifier_matrixes
        },
        'regression_fitting': regression_fitting
    })
    
    # PRINT MSG
    print('### FINISHED\n')

    return pipeline_name

### LOAD & PREDICT WITH PIPELINE

In [4]:
def use_pipeline(name, config):
    
    # PRINT MSG
    print('\n### SERIALIZING PIPELINE\n')
    
    # SERIALIZE THE REGRESSION & CLASSIFIER ENSEMBLE
    regression_ensemble, classifier_ensemble, pipeline_config = storage.load_pipeline(name)
    
    # REPLACE OLD DATASET
    pipeline_config['data'] = config['data'] # 'C://Users/35840/desktop/coding/python/pipeline/extra/fresh.csv'
    
    # PRINT MSG
    print('\n### CREATING REGRESSION DATASET')
    
    # ADD FEATURES & CREATE REGRESSION DATASET
    regression_dataset = create_regression_dataset(pipeline_config)
    
    # DROP LABEL COLUMN & POP CLOSE COLUMN
    regression_dataset.drop(columns=['label'], inplace=True)
    closing_prices = regression_dataset.pop('close')
    
    # PRINT MSG
    print('### CREATING CLASSIFIER DATASET')
    
    # PREDICT WITH REGRESSION ENSEMBLE
    regression_predictions = regression_ensemble.predict({
        'features': regression_dataset.to_numpy(),
        'labels': [0] * len(regression_dataset)
    })
    
    # PRINT MSG
    print('### PREDICTING VALUES')
    
    # PREDICT WITH CLASSIFIER ENSEMBLE
    classifier_predictions = classifier_ensemble.predict({
        'features': regression_predictions.to_numpy(),
        'labels': []
    })
    
    # PRINT MSG
    print('### CALCULATING PROFIT/LOSS')
    
    # ATTACH CLOSING PRICE TO PREDICTIONS
    classifier_predictions['close'] = closing_prices.to_numpy()[-len(classifier_predictions):]
    
    # RUN PREDICTIONS THROUGH A HEURISTIC PROFIT CALC
    profit_metric = profit.weighted_position_investing(
        classifier_predictions,
        config
    )
    
    # PRINT MSG
    print('### SAVING PREDICTION')
    
    # SAVE THE PROFIT METRIC
    pred_name = storage.save_prediction(name, profit_metric)
    
    # PRINT MSG
    print('### FINISHED\n')
    
    # PARSE AS JSON & RETURN
    return pred_name, profit_metric.to_json()

### TESTING CREATE

In [5]:
#train_config = storage.load_yaml('configs/train_config.yaml')

In [6]:
#regression_dataset = create_regression_dataset(train_config)

In [7]:
#name = create_pipeline(train_config)

### TESTING USAGE

In [8]:
#pred_config = storage.load_yaml('configs/pred_config.yaml')

In [9]:
#foob = use_pipeline('PIPELINE-1603544359', pred_config)

In [10]:
#foob.head(5)

In [11]:
#prof = profit.weighted_position_investing(foob, pred_config)

In [12]:
#foo = prof.to_json()

In [13]:
#foo