In [1]:
import sys
import os
import subprocess
import zipfile
import numpy as np

from oodles import Framework
from oodles import Signal
from oodles import monitor
from oodles import ModelSignal, AnnotationMethod, Anomaly

from dataset import input_to_dataset_transformation, read_json, write_json
from model import run_real_world_inference, get_accuracy
from pushup_signal import pushup_signal
from train import train_model
from contextlib import redirect_stdout
orig_training_file = 'data/training_data.json'

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
# data_dir = "data"
# remote_url = "https://oodles-dev-training-data.s3.amazonaws.com/data.zip"
# if not os.path.exists(data_dir):
#     try:
#         file_downloaded_ok = subprocess.check_output("wget " + remote_url, shell=True)
#     except:
#         print("Could not load training data")
#     with zipfile.ZipFile("data.zip", 'r') as zip_ref:
#         zip_ref.extractall("./")

#     full_training_data = read_json(orig_training_file)
#     np.random.seed(1)
#     np.random.shuffle(full_training_data)
#     reduced_training_data = full_training_data[0:1000]
#     write_json(orig_training_file, reduced_training_data)

In [3]:
real_world_test_cases = 'data/real_world_testing_data.json'
data_save_fold_name = 'oodles_smart_data'
golden_testing_file = 'data/golden_testing_data.json'
annotation_args = {'master_file': 'data/master_annotation_data.json'}

In [4]:
cfg = {
    # Define your signal to identify edge cases
    "checks": [{
        'type': Anomaly.EDGE_CASE, 
        "signal_formulae": (Signal("Pushup", pushup_signal) | Signal(ModelSignal.BINARY_ENTROPY_CONFIDENCE, 
                is_model_signal=True, extra_args={'conf_threshold': 0.8}))}],

    # Connect training pipeline to annotate data and retrain the model
    "training_args": {
        "data_transformation_func": input_to_dataset_transformation,  
        "annotation_method": {"method": AnnotationMethod.MASTER_FILE, "args": annotation_args}, 
        "training_func": train_model, 
        "fold_name": data_save_fold_name,  
        "orig_training_file": orig_training_file,  
    },

    # Connect evaluation pipeline to test retrained model against original model
    "evaluation_args": {
        "inference_func": get_accuracy,
        "golden_testing_dataset": golden_testing_file,
        "metrics_to_check": ['accuracy']
    }
}

framework = Framework(cfg)

@monitor(framework)
def model_predict(args):
    with open('evaluation_logs.txt', 'w') as f:
        with redirect_stdout(f):
            return args['model'].predict(args['kps'])

Deleting the folder:  oodles_smart_data


In [5]:
train_model('data/training_data.json', 'initial_model_',logistic_reg=True)

Training on:  data/training_data.json  which has  15788  data-points
Training on:  data/training_data.json  which has  15788  data-points
Model saved at:  trained_models/initial_model_


In [6]:
run_real_world_inference(real_world_test_cases, 'initial_model_', model_predict,logistic_reg=True)