# Connect to Azure ML workspace

In [1]:
import azureml.core
from azureml.core import Workspace
from azureml.core.run import Run
from azureml.core.experiment import Experiment

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)
workspace="league-ws"
subscription_id="79451499-b2c0-4513-8dea-ef7f37173fbb"
resource_grp="league"

experiment_name = "league_predict_temp"
model_name = "leaguepredict.mml" # in case you want to change the name, keep the .mml extension
ws = Workspace(workspace_name = workspace,
               subscription_id = subscription_id,
               resource_group = resource_grp)

ws.get_details()

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

# Read final feature table from cosmos DB

In [2]:
import os
import urllib
import pprint
import numpy as np
import shutil
import time

In [3]:
## Dataset
# Champion Container
readConfig = {
"Endpoint" : "https://lolpredict.documents.azure.com:443/",
"Masterkey" : "lAb4WLi89cpXb7jGeMlLZtXUqvlGJip9NtKxDZMWHKUf0wasuAFknSgTxX3M7RAlK2uyFZjau7AJethdaBRDWw==",
"Database" : "league_extraction",
"Collection" : "FINAL_FEATURES_MATRIX_9112", 
}
final_features_matrix = spark.read.format("com.microsoft.azure.cosmosdb.spark").options(**readConfig).load()

In [4]:
pfinal=final_features_matrix.toPandas()

# Machine Learning and save model to Azure workspace

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [6]:
list(pfinal.columns)

In [7]:
#drop uncessary columns
dropped=[
 '_attachments',
 '_etag',
 '_rid',
 '_self',
 '_ts',
 'id',
 'label',
 'match_id',
 ]
for i in pfinal.columns:
    if i not in dropped:
        if "summonerId_inv" in i or "name" in i or 'champion_inv' in i:
            dropped.append(i)

In [8]:
y=pfinal['label']
x=pfinal.drop(dropped, axis=1)
x_train, x_test, y_train, y_test=train_test_split(x,y,test_size=.3)
print("There are {} training instances and {} test instance".format(len(x_train),len(x_test)))

In [10]:
model_name="lrm_league_predict.pkl"

In [11]:
from azureml.core import Experiment
experiment = Experiment(workspace=ws, name="league-exp-final")


In [12]:
# load train and test set into numpy arrays
# note we scale the pixel intensity values to 0-1 (by dividing it with 255.0) so the model can converge faster.
from sklearn.externals import joblib
run = experiment.start_logging()
lrm = LogisticRegression()

lrm.fit(x_train,y_train)

predicted=lrm.predict(x_test)
score=lrm.score(x_test,y_test)
score_train = lrm.score(x_train,y_train)
print("Train accuracy:{}".format(score_train))
print("Test accuracy:{}".format(score))


run.log('score', np.float(score))

model_name = "LRM_MODEL_LEAGUE.pkl"
filename = "outputs/" + model_name
joblib.dump(value=lrm, filename=filename)
run.upload_file(name=model_name, path_or_stream=filename)
run.complete()

#This lead to a pickle file in experiment, download the model file and deploy offline using azure