In [9]:
import azureml.core
from azureml.core import Workspace
from azureml.core import Experiment
import os
import shutil

ws = Workspace.from_config()
exp_name = 'c4ts-customlib'
exp = Experiment(workspace = ws, name = exp_name)

project_folder = './{}'.format(exp_name)
os.makedirs(project_folder, exist_ok=True)
shutil.copy('AssetData_Historical.csv', project_folder)

Found the config file in: C:\Users\zhpek\Desktop\C4TS-Challenge4\aml_config\config.json


'./c4ts-customlib\\AssetData_Historical.csv'

In [30]:
%%writefile ./c4ts-customlib/utils.py

import pandas as pd
import numpy as np
from sklearn.decomposition import PCA


def cleanLongLat(l):
    split = l.str.split(',', expand=True)
    split = (split[0]+'.'+[i if len(i)>1 else i+'0' for i in split[1]]).astype(float)
    return(split)
    
class scaler:
    def __init__(self, x = None):
        if type(x) == pd.core.frame.DataFrame:
            self.fit(x)
        elif x == None:
            self.x = None
            self.mean = None
            self.var = None
        else:
            raise Exception('Require pandas.DF input')


    def fit(self, x):
        self.x = x
        self.mean = x.mean()
        self.var = x.var()

    def scale(self, new_x):
        result = (new_x - self.mean) / np.sqrt(self.var)
        return (result)
    
def prepare(X, fit = False, scaler_obj = None, pca_obj = None):
    
    if fit:
        s = scaler(X)
        pca = PCA()
        X_ = s.scale(X)
        X_ = pca.fit_transform(X_)
        X_ = X_[:,:10]
        
        return(X_, s, pca)
    else:
        if scaler_obj == None or pca_obj == None:
            raise Exception('Non fitting requires scaler/pca obj')
        X_ = pca_obj.transform(scaler_obj.scale(X))[:,:10]
        return X_

Overwriting ./c4ts-customlib/utils.py


In [41]:
%%writefile ./c4ts-customlib/train.py

import os
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from azureml.core.run import Run
from sklearn.externals import joblib
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import GradientBoostingClassifier

from utils import *
    
os.makedirs('./outputs', exist_ok=True)

# Data Preparation
df = pd.read_csv('AssetData_Historical.csv')
df.drop(['Machine_ID', 'District'], axis=1, inplace=True)
df['Latitude'] = cleanLongLat(df['Latitude'])
df['Longitude'] = cleanLongLat(df['Longitude'])
X = df.drop('Failure_NextHour', 1)
y = df['Failure_NextHour']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, stratify = y)

X_prep, s, pca= prepare(X_train, fit = True)

run = Run.get_context()

data = {"train": {"X": X_train, "y": y_train},
        "test": {"X": X_test, "y": y_test}}

param_grid = {'learning_rate': [0.0001, 0.001, 0.01, 0.1, 0.2, 0.3],
              'n_estimators': [100, 200, 300, 400, 500]
}

model = GradientBoostingClassifier(loss = 'exponential')
kf = StratifiedKFold(n_splits = 5, shuffle = True)
gridsearch = GridSearchCV(model, param_grid, 
                          scoring = 'f1_weighted',
                          n_jobs = -1,
                          cv = kf)
weights = y_train * 3 + 1
result = gridsearch.fit(X_prep, y_train, sample_weight = weights)

run.log('bestScore', result.best_score_)
run.log('bestParam', result.best_params_)
run.log('valMean', result.cv_results_['mean_test_score'])
run.log('valStd', result.cv_results_['std_test_score'])
run.log('valParams', result.cv_results_['params'])

#################
#Fit Final Model#
#################

X, s, pca = prepare(X)
best_model = result.estimator.fit(X)

pickle.dump(s, open('./outputs/scaler.pkl', 'wb'))
pickle.dump(pca,open('./outputs/pca_transform.pkl','wb'))

import time
model_name = 'GBT_{}'.format(time.time())
with open(model_name, 'wb') as f:
    joblib.dump(value = best_model, filename = './outputs/{}.pkl'.format(model_name))

Overwriting ./c4ts-customlib/train.py


In [43]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.compute import ComputeTarget#, AmlCompute

cpu_cluster = ComputeTarget(workspace=ws, name= "pekamlcompute")
run_config = RunConfiguration(framework="python")
run_config.target = cpu_cluster.name
run_config.environment.docker.enabled = True
run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['scikit-learn'])

from azureml.core import Run
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(source_directory=project_folder, 
                      script='train.py', 
                      run_config=run_config) 
run = exp.submit(config=src)
run

Experiment,Id,Type,Status,Details Page,Docs Page
c4ts-customlib,c4ts-customlib_1548053495769,azureml.scriptrun,Starting,Link to Azure Portal,Link to Documentation


In [44]:
%%time
# Shows output of the run on stdout.
run.wait_for_completion(show_output=True)

RunId: c4ts-customlib_1548053495769

Streaming azureml-logs/20_image_build_log.txt

2019/01/21 06:51:47 Using acb_vol_b3b8514b-587e-43bc-830b-2cdb8393e8f7 as the home volume
2019/01/21 06:51:47 Creating Docker network: acb_default_network, driver: 'bridge'
2019/01/21 06:51:47 Successfully set up Docker network: acb_default_network
2019/01/21 06:51:47 Setting up Docker configuration...
2019/01/21 06:51:48 Successfully set up Docker configuration
2019/01/21 06:51:48 Logging in to registry: pekamlws1688522486.azurecr.io
2019/01/21 06:51:50 Successfully logged into pekamlws1688522486.azurecr.io
2019/01/21 06:51:50 Executing step ID: acb_step_0. Working directory: '', Network: 'acb_default_network'
2019/01/21 06:51:50 Obtaining source code and scanning for dependencies...
2019/01/21 06:51:51 Successfully obtained source code and scanned for dependencies
2019/01/21 06:51:51 Launching container with name: acb_step_0
Sending build context to Docker daemon  3.489MB

Step 1/12 : FROM mcr.microso


xz-5.2.4             | 366 KB    |            |   0% [0m[91m
xz-5.2.4             | 366 KB    | ########## | 100% [0m[91m

ncurses-6.0          | 920 KB    |            |   0% [0m[91m
ncurses-6.0          | 920 KB    | #######9   |  79% [0m[91m
ncurses-6.0          | 920 KB    | #########1 |  92% [0m[91m
ncurses-6.0          | 920 KB    | ########## | 100% [0m[91m

libgcc-ng-8.2.0      | 7.6 MB    |            |   0% [0m[91m
libgcc-ng-8.2.0      | 7.6 MB    | #######5   |  76% [0m[91m
libgcc-ng-8.2.0      | 7.6 MB    | #########3 |  94% [0m[91m
libgcc-ng-8.2.0      | 7.6 MB    | ########## | 100% [0m[91m

numpy-1.15.4         | 47 KB     |            |   0% [0m[91m
numpy-1.15.4         | 47 KB     | ########## | 100% [0m[91m

certifi-2018.11.29   | 146 KB    |            |   0% [0m[91m
certifi-2018.11.29   | 146 KB    | ########## | 100% [0m[91m

scipy-1.1.0          | 17.7 MB   |            |   0% [0m[91m
scipy-1.1.0          | 17.7 MB   | ###5       |  

    Uninstalling wheel-0.32.3:
      Successfully uninstalled wheel-0.32.3
Successfully installed PyJWT-1.7.1 PyOpenSSL-18.0.0 SecretStorage-2.3.1 adal-1.2.0 antlr4-python3-runtime-4.7.2 applicationinsights-0.11.7 argcomplete-1.9.4 asn1crypto-0.24.0 azure-cli-command-modules-nspkg-2.0.2 azure-cli-core-2.0.55 azure-cli-nspkg-3.0.3 azure-cli-profile-2.1.2 azure-cli-telemetry-1.0.0 azure-common-1.1.17 azure-graphrbac-0.53.0 azure-mgmt-authorization-0.51.1 azure-mgmt-containerregistry-2.6.0 azure-mgmt-keyvault-1.1.0 azure-mgmt-nspkg-3.0.2 azure-mgmt-resource-2.1.0 azure-mgmt-storage-3.1.1 azure-nspkg-3.0.2 azure-storage-blob-1.4.0 azure-storage-common-1.4.0 azure-storage-nspkg-3.1.0 azureml-core-1.0.2 azureml-defaults-1.0.2 backports.tempfile-1.0 backports.weakref-1.0.post1 bcrypt-3.1.6 cffi-1.11.5 chardet-3.0.4 colorama-0.4.1 contextlib2-0.5.5 cryptography-2.4.2 docker-3.7.0 docker-pycreds-0.4.0 futures-3.1.1 humanfriendly-4.17 idna-2.8 isodate-0.6.0 jmespath-0.9.3 jsonpickle-1.0 knack-0.

{'runId': 'c4ts-customlib_1548053495769',
 'target': 'pekamlcompute',
 'status': 'Failed',
 'startTimeUtc': '2019-01-21T07:01:14.392529Z',
 'endTimeUtc': '2019-01-21T07:05:06.103667Z',
 'properties': {'azureml.runsource': 'experiment',
  'ContentSnapshotId': 'bfd7f6a6-40f1-4ca7-87dd-ffd34da05dd6'},
 'runDefinition': {'Script': 'train.py',
  'Arguments': [],
  'SourceDirectoryDataStore': None,
  'Framework': 0,
  'Communicator': 0,
  'Target': 'pekamlcompute',
  'DataReferences': {},
  'JobName': None,
  'AutoPrepareEnvironment': True,
  'MaxRunDurationSeconds': None,
  'NodeCount': 1,
  'Environment': {'Python': {'InterpreterPath': 'python',
    'UserManagedDependencies': False,
    'CondaDependencies': {'name': 'project_environment',
     'dependencies': ['python=3.6.2',
      {'pip': ['azureml-defaults==1.0.2']},
      'scikit-learn']}},
   'EnvironmentVariables': {'EXAMPLE_ENV_VAR': 'EXAMPLE_VALUE'},
   'Docker': {'BaseImage': 'mcr.microsoft.com/azureml/base:0.2.0',
    'Enabled': T

In [45]:
run.get_metrics()

{}