In [1]:
# Authenticate if we haven't already

from azureml.core.authentication import InteractiveLoginAuthentication

credentials = InteractiveLoginAuthentication()

In [None]:
from azureml.core import Workspace

ws = Workspace.from_config(auth=credentials)
print(ws.name, ws.location, ws.resource_group, ws.location, sep = '\t')

In [3]:
from azureml.core import Experiment
exp = Experiment(workspace=ws, name='Round4')

In [4]:
from azureml.core.runconfig import RunConfiguration

run_config = RunConfiguration()
run_config.environment.python.user_managed_dependencies = True

In [5]:
from azureml.core import ScriptRunConfig

# run a trial from the train.py code in your current directory
config = ScriptRunConfig(source_directory='.',
                         script='train-round4.py',
                         run_config=run_config)

run = exp.submit(config)

run.wait_for_completion(show_output=True)

RunId: Round4_1552924865_58d8da70

Streaming azureml-logs/60_control_log.txt

Streaming log file azureml-logs/60_control_log.txt
Running: ['python', 'azureml-setup/run_script.py', 'python', 'azureml-setup/context_manager_injector.py', '-i', 'ProjectPythonPath:context_managers.ProjectPythonPath', '-i', 'OutputCollection:context_managers.RunHistory', 'train-round4.py']
Logging experiment running status in history service.
Streaming log file azureml-logs/80_driver_log.txt

Streaming azureml-logs/80_driver_log.txt

Accuracy of DecisionTreeClassifier is 0.3352894105094692
Accuracy of RidgeClassifier is 0.3486263003467591
Accuracy of RandomForestClassifier is 0.36009602560682846
Accuracy of LogisticRegression is 0.327554014403841


The experiment completed successfully. Finalizing run...
Logging experiment finalizing status in history service
Cleaning up all outstanding Run operations, waiting 300.0 seconds
2 items cleaning up...
Cleanup took 0.2035079002380371 seconds

Execution Summary
Run

{'runId': 'Round4_1552924865_58d8da70',
 'target': 'local',
 'status': 'Completed',
 'startTimeUtc': '2019-03-18T16:01:07.135275Z',
 'endTimeUtc': '2019-03-18T16:02:22.441761Z',
 'properties': {'azureml.runsource': 'experiment',
  'ContentSnapshotId': '9ec38a41-79ad-40f1-897e-15c89c10b1a0'},
 'runDefinition': {'Script': 'train-round4.py',
  'Arguments': [],
  'SourceDirectoryDataStore': None,
  'Framework': 0,
  'Communicator': 0,
  'Target': 'local',
  'DataReferences': {},
  'JobName': None,
  'AutoPrepareEnvironment': True,
  'MaxRunDurationSeconds': None,
  'NodeCount': 1,
  'Environment': {'Python': {'InterpreterPath': 'python',
    'UserManagedDependencies': True,
    'CondaDependencies': {'name': 'project_environment',
     'dependencies': ['python=3.6.2', {'pip': ['azureml-defaults']}]},
    'BaseCondaEnvironment': None},
   'EnvironmentVariables': {'EXAMPLE_ENV_VAR': 'EXAMPLE_VALUE'},
   'Docker': {'BaseImage': 'mcr.microsoft.com/azureml/base:0.2.2',
    'Enabled': False,
    

In [6]:
run.id

'Round4_1552924865_58d8da70'

In [8]:
# Register and download the model
model = run.register_model(model_name='best_model', model_path='outputs/best_model.pkl', tags={'run_id': run.id})

model.download(exist_ok=True)

'best_model.pkl'

In [None]:
# Test the input/output code in score.py

In [13]:
from sklearn.externals import joblib

clf = joblib.load('best_model.pkl')
clf

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [14]:
import json
import numpy as np

with open('./input.json') as f:
    data = json.load(f)

values = data['Inputs']['input1']['Values']

values

[['2',
  'Skittles',
  '3',
  '299',
  '0',
  '1',
  '1',
  '7',
  '0',
  '1',
  '1',
  '2',
  '2',
  '2',
  '1',
  '1',
  '100',
  '41326',
  '8480853f516546f6cf33aa88cd76c379',
  '0',
  '-',
  '86e1089a3',
  '2'],
 ['1',
  'Benji',
  '2',
  '299',
  '0',
  '1',
  '1',
  '7',
  '0',
  '1',
  '1',
  '2',
  '2',
  '2',
  '1',
  '1',
  '100',
  '41328',
  '9590853f516546f6cf33aa88cd76c379',
  '0',
  '-',
  '87e1089a3',
  '1']]

In [15]:
import pandas as pd

df = pd.DataFrame(values, columns=['Type', 'Name', 'Age', 'Breed1', 'Breed2', 'Gender', 'Color1', 'Color2',
       'Color3', 'MaturitySize', 'FurLength', 'Vaccinated', 'Dewormed',
       'Sterilized', 'Health', 'Quantity', 'Fee', 'State', 'RescuerID',
       'VideoAmt', 'Description', 'PetID', 'PhotoAmt'])

df

Unnamed: 0,Type,Name,Age,Breed1,Breed2,Gender,Color1,Color2,Color3,MaturitySize,...,Sterilized,Health,Quantity,Fee,State,RescuerID,VideoAmt,Description,PetID,PhotoAmt
0,2,Skittles,3,299,0,1,1,7,0,1,...,2,1,1,100,41326,8480853f516546f6cf33aa88cd76c379,0,-,86e1089a3,2
1,1,Benji,2,299,0,1,1,7,0,1,...,2,1,1,100,41328,9590853f516546f6cf33aa88cd76c379,0,-,87e1089a3,1


In [16]:
df['IsFree'] = df['Fee'] == 0
df['IsMulticolor'] = (df['Color2'] != 0) | (df['Color3'] != 0)
df['IsPurebreed'] = df['Breed2'] == 0

df.drop(['Name', 'RescuerID', 'PetID', 'Description'], axis=1, inplace=True)

df

Unnamed: 0,Type,Age,Breed1,Breed2,Gender,Color1,Color2,Color3,MaturitySize,FurLength,...,Sterilized,Health,Quantity,Fee,State,VideoAmt,PhotoAmt,IsFree,IsMulticolor,IsPurebreed
0,2,3,299,0,1,1,7,0,1,1,...,2,1,1,100,41326,0,2,False,True,False
1,1,2,299,0,1,1,7,0,1,1,...,2,1,1,100,41328,0,1,False,True,False


In [18]:
y = clf.predict(df)
output = np.column_stack([df, y]).tolist()

output

[['2',
  '3',
  '299',
  '0',
  '1',
  '1',
  '7',
  '0',
  '1',
  '1',
  '2',
  '2',
  '2',
  '1',
  '1',
  '100',
  '41326',
  '0',
  '2',
  False,
  True,
  False,
  2],
 ['1',
  '2',
  '299',
  '0',
  '1',
  '1',
  '7',
  '0',
  '1',
  '1',
  '2',
  '2',
  '2',
  '1',
  '1',
  '100',
  '41328',
  '0',
  '1',
  False,
  True,
  False,
  1]]