# Initiate + Create Workspace

In [2]:
from azureml.core import Workspace
from azureml.core import Environment
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core import Experiment
import os

# from azureml.core.authentication import InteractiveLoginAuthentication

ws = Workspace.from_config()
ds = ws.get_default_datastore()
print(ws.name, "loaded successfully")

aml-aks-poc loaded successfully


# Creating Training Folder

In [3]:
folder_training_script = './jobcode'
os.makedirs(folder_training_script, exist_ok=True)

print('Done')

Done


# Upload data by using get_default_datastore()

In [33]:
ds.upload(src_dir='./jobsdata', target_path='jobsdata', overwrite=True, show_progress=True)

print('Uploaded Jobs Data')

Uploading an estimated of 2 files
Uploading ./jobsdata\jobs.csv
Uploaded ./jobsdata\jobs.csv, 1 files out of an estimated total of 2
Uploading ./jobsdata\jobs_old.csv
Uploaded ./jobsdata\jobs_old.csv, 2 files out of an estimated total of 2
Uploaded 2 files
Uploaded Jobs Data


In [18]:
ds.upload(src_dir='./jobcode', target_path='jobcode', overwrite=True, show_progress=True)




Uploading an estimated of 1 files
Uploading ./jobcode\train.py
Uploaded ./jobcode\train.py, 1 files out of an estimated total of 1
Uploaded 1 files


$AZUREML_DATAREFERENCE_8bc3699dab9c4c7a9f246e609a9a4640

# Create Compute Cluster

In [4]:
# Naming the cluster and setting minimal and maximal number of nodes 
compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster")
min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0)
max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 1)

# Choosing environment variables 
vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2")

provisioning_config = AmlCompute.provisioning_configuration(
    vm_size = vm_size, min_nodes = min_nodes, max_nodes = max_nodes)

# Creating the cluster
compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)

print('Compute target created')

Compute target created


# Create ML Model

In [54]:
%%writefile $folder_training_script/train.py

import argparse
import os
import numpy as np
import pandas as pd
import glob

from azureml.core import Run
# from utils import load_data

import joblib

from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import cross_val_score

# let user feed in 2 parameters, the dataset to mount or download, and the regularization rate of the logistic regression model
parser = argparse.ArgumentParser()
parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point')
parser.add_argument('--max-depth', type=int, dest='max_depth', default=5, help='max depth')
args = parser.parse_args()

###
data_folder = os.path.join(args.data_folder, 'jobsdata')
print('Data folder:', data_folder)

job_data = pd.read_csv(os.path.join(data_folder, 'jobs.csv'))

                        
X = job_data.drop(columns =["quality"])
y = job_data["quality"]

clf = DecisionTreeRegressor(random_state=0,max_depth = args.max_depth)
rmse= np.mean(np.sqrt(-cross_val_score(clf, X, y, scoring="neg_mean_squared_error", cv = 5)))
print('RMSE is', rmse)

# Get the experiment run context
run = Run.get_context()

run.log('max depth', np.float(args.max_depth))
run.log('rmse', np.float(rmse))

os.makedirs('outputs', exist_ok=True)

clf.fit(X,y)
# file saved in the outputs folder is automatically uploaded into experiment record
joblib.dump(value=clf, filename='outputs/job_model.pkl')

run.complete()

Overwriting ./jobcode/train.py


# Creating Environment

In [55]:
job_env = Environment("job-experiment-env-5")
job_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies
job_env.docker.enabled = True # Docker container

job_packages = CondaDependencies.create(conda_packages=['scikit-learn', "numpy", "pandas", "joblib"])

# Add the dependencies to the environment
job_env.python.conda_dependencies = job_packages

print(job_env.name, 'defined.')



job-experiment-env-5 defined.


# Registering env to Azure ML Workspace

In [56]:
job_env.register(workspace=ws)

{
    "assetId": "azureml://locations/eastus2/workspaces/6f7360e0-658a-4233-8141-019587c2881d/environments/job-experiment-env-5/versions/1",
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221010.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "buildContext": null,
        "enabled": true,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "job-experiment-en

# Creating Estimator

In [57]:
from azureml.train.estimator import Estimator

script_params = {
    '--data-folder': ds.as_mount(),
    '--max-depth': 10
}

registered_env = Environment.get(ws, 'job-experiment-env-5')

# Create an estimator
estimator = Estimator(source_directory=folder_training_script,
                      script_params=script_params,
                      compute_target = compute_target, # Run the experiment on the remote compute target
                      environment_definition = registered_env,
                      entry_script='train.py')



# Create Jobs (Experiment)

In [58]:
#Create an experiment
experiment = Experiment(workspace = ws, name = "job_expt")

print('Experiment created')

Experiment created


# Submit Experiment with Estimator Information

In [81]:
run = experiment.submit(config=estimator)
run



Experiment,Id,Type,Status,Details Page,Docs Page
job_expt,job_expt_1668252103_63a04a17,azureml.scriptrun,Preparing,Link to Azure Machine Learning studio,Link to Documentation


# Register ML Model

In [61]:
model = run.register_model(model_name='job_model',
                           model_path='outputs/job_model.pkl',
                           tags = {'area': "jobs", 'type': "sklearn"},
                           description = "salary prediction")

print(model.name, model.id, model.version, sep='\t')

job_model	job_model:1	1


# Inference

In [82]:
%%writefile $folder_training_script/score.py
import json
import joblib
import numpy as np
from azureml.core.model import Model

# Called when the service is loaded
def init():
    global model
    # Get the path to the registered model file and load it
    model_path = Model.get_model_path('job_model')
    model = joblib.load(model_path)

# Called when a request is received
def run(raw_data):
    # Get the input data as a numpy array
    data = np.array(json.loads(raw_data)['data'])
    # Get a prediction from the model
    predictions = model.predict(data)
    log_txt = 'Data:' + str(data) + ' - Predictions:' + str(predictions)
    print(log_txt)
    # Return the predictions as any JSON serializable format
    return predictions.tolist()

Overwriting ./jobcode/score.py


# Inference Dependencies

In [88]:
from azureml.core.conda_dependencies import CondaDependencies

# Add the dependencies for your model
myenv = CondaDependencies()
myenv.add_conda_package("scikit-learn")
myenv.add_conda_package("numpy")
myenv.add_conda_package("joblib")
# myenv.add_conda_package("json")
# myenv.add_conda_package("azure-ml-api-sdk")

# Save the environment config as a .yml file
env_file = './jobcode/env.yml'
with open(env_file,"w") as f:
    f.write(myenv.serialize_to_string())
print("Saved dependency info in", env_file)

Saved dependency info in ./jobcode/env.yml


# Inference Config

In [89]:
from azureml.core.model import InferenceConfig

classifier_inference_config = InferenceConfig(runtime= "python",
                                              source_directory = './jobcode',
                                              entry_script="score.py",
                                              conda_file="env.yml")

# Inference Cluster

In [None]:
from azureml.core.compute import ComputeTarget, AksCompute

cluster_name = 'aks-cluster'
compute_config = AksCompute.provisioning_configuration(cluster_purpose = AksCompute.ClusterPurpose.DEV_TEST, vm_size="standard_d11")
production_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
production_cluster.wait_for_completion(show_output=True)

In [None]:
# from azureml.core.compute import ComputeTarget, AksCompute
# from azureml.core.webservice import AksWebservice, Webservice
# from azureml.core import Model

# cluster_name = 'aks-cluster'
# aks_target = AksCompute(ws, "cpucluster")

# deployment_config = AksWebservice.deploy_configuration(cpu_cores=1, memory_gb=1, autoscale_enabled=True, autoscale_target_utilization=30, autoscale_max_replicas=3, autoscale_min_replicas=1)

# service = Model.deploy(ws, "myservice", [model], classifier_inference_config, deployment_config, aks_target)
# service.wait_for_deployment(show_output=True)
# print(service.state)
# print(service.get_logs())

In [75]:
from azureml.core.webservice import AksWebservice

classifier_deploy_config = AksWebservice.deploy_configuration(cpu_cores = 1,
                                                              memory_gb = 1)

# Deploy Model to AKS Cluster

In [90]:
from azureml.core.model import Model

model = ws.models['job_model']
service = Model.deploy(workspace=ws,
                       name = 'job-service',
                       models = [model],
                       inference_config = classifier_inference_config,
                       deployment_config = classifier_deploy_config,
                       deployment_target = production_cluster)
service.wait_for_deployment(show_output = True)

To leverage new model deployment capabilities, AzureML recommends using CLI/SDK v2 to deploy models as online endpoint, 
please refer to respective documentations 
https://docs.microsoft.com/azure/machine-learning/how-to-deploy-managed-online-endpoints /
https://docs.microsoft.com/azure/machine-learning/how-to-deploy-managed-online-endpoint-sdk-v2 /
https://docs.microsoft.com/azure/machine-learning/how-to-attach-kubernetes-anywhere 
For more information on migration, see https://aka.ms/acimoemigration. 
  service = Model.deploy(workspace=ws,


Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-11-12 17:18:08+05:30 Creating Container Registry if not exists.
2022-11-12 17:18:08+05:30 Registering the environment.
2022-11-12 17:18:09+05:30 Building image..
2022-11-12 17:28:27+05:30 Creating resources in AKS.
2022-11-12 17:28:27+05:30 Submitting deployment to compute.
2022-11-12 17:28:27+05:30 Checking the status of deployment job-service..
2022-11-12 17:31:47+05:30 Checking the status of inference endpoint job-service.
Succeeded
AKS service creation operation finished, operation "Succeeded"


In [91]:
# service.update(enable_app_insights=True)

In [None]:
endpoint = service.scoring_uri
print(endpoint)

In [None]:
primary_key, secondary_key = service.get_keys()
primary_key

# Obtain Cred from Local File

In [4]:
import csv

filename = r"C:\Users\VIGNEN\Documents\Azure POC\Passwords.csv"

rows = []
fields = []

with open(filename, 'r') as csvfile:
	csvreader = csv.reader(csvfile)
	
	fields = next(csvreader)

	for row in csvreader:
		rows.append(row)

endpoint = rows[0][0]
primary_key = rows[0][1]
sqlPassword = rows[0][2]

# Azure SQL Server

In [22]:
import pyodbc
server = 'aml-aks-poc.database.windows.net'
database = 'aks-aml-poc-inputdb'
username = 'viknhat'
# sqlPassword = '*********'
driver = '{ODBC Driver 17 for SQL Server}'

## Obtain Prediction Data from Azure SQL

In [26]:
dbdata = []

with pyodbc.connect('DRIVER='+driver+';SERVER=tcp:'+server+';PORT=1433;DATABASE='+database+';UID='+username+';PWD=' + sqlPassword) as conn:
    with conn.cursor() as cursor:
        cursor.execute("SELECT * FROM WineData")
        row = cursor.fetchone()
        while row:
            dbrow = []
            for element in row:
                dbrow.append(element)
            dbdata.append(dbrow)
            row = cursor.fetchone()

print(dbdata)

[[7.6, 1.58, 0.0, 2.1, 0.137, 5.0, 9.0, 0.99476, 3.5, 0.4, 10.9], [8.4, 0.635, 0.36, 2.0, 0.089, 15.0, 55.0, 0.99745, 3.31, 0.57, 10.4], [8.3, 0.85, 0.14, 2.5, 0.093, 13.0, 54.0, 0.99724, 3.36, 0.54, 10.1], [6.0, 0.31, 0.47, 3.6, 0.067, 18.0, 42.0, 0.99549, 3.39, 0.66, 11.0], [6.7, 0.32, 0.44, 2.4, 0.061, 24.0, 34.0, 0.99484, 3.29, 0.8, 11.6], [7.4, 0.36, 0.3, 1.8, 0.074, 17.0, 24.0, 0.99419, 3.24, 0.7, 11.4]]


## Perform Predictions

In [33]:
import requests
import json

# Set the content type in the request headers
request_headers = {"Content-Type": "application/json",
                   "Authorization": "Bearer " + primary_key}

responses = []

for inputdata in dbdata:
    # Convert the array to JSON
    json_data = json.dumps({"data": [inputdata]})

    # Post to Azure (AKS)
    response = requests.post(url=endpoint,
                            data=json_data,
                            headers=request_headers)
    responses.append(response.json())

print(responses)

[[3.0], [4.666666666666667], [5.0], [6.0], [6.785714285714286], [8.0]]


In [46]:
# outputToDB = ""

# for value in responses:
#     outputToDB = outputToDB + str(value[0]) + ", "
# outputToDB = outputToDB[:-2]
# print(outputToDB)

3.0, 4.666666666666667, 5.0, 6.0, 6.785714285714286, 8.0


## Post Predicted Data into Azure SQL Server Database

In [48]:
database = 'aks-aml-poc-outputdb'

with pyodbc.connect('DRIVER='+driver+';SERVER=tcp:'+server+';PORT=1433;DATABASE='+database+';UID='+username+';PWD=' + sqlPassword) as conn:
    with conn.cursor() as cursor:
        for value in responses:
            cursor.execute("INSERT INTO PredictedData VALUES (" + str(value[0]) + ");")

## Verify Data

In [49]:
database = 'aks-aml-poc-outputdb'

with pyodbc.connect('DRIVER='+driver+';SERVER=tcp:'+server+';PORT=1433;DATABASE='+database+';UID='+username+';PWD=' + sqlPassword) as conn:
    with conn.cursor() as cursor:
        cursor.execute("SELECT * FROM PredictedData")
        row = cursor.fetchone()
        while row:
            for element in row:
                print(element)
            row = cursor.fetchone()

3.0
4.666666666666667
5.0
6.0
6.785714285714286
8.0


# Azure Synapse

In [1]:
import os, uuid, sys
from azure.storage.filedatalake import DataLakeServiceClient
from azure.core._match_conditions import MatchConditions
from azure.storage.filedatalake._models import ContentSettings

## Connect to Data Lake

In [13]:
storage_account_name = "amlakspocdatalake"
storage_account_key = "Y/AMeI4WYrOJWarsp0D/Y4K4Sn87KxDFi1zp/JITQgqODaAJG2t7m4sY3iUCzq2ONDjM73by0zb2+ASt3f+XMA=="
    
try:  
    global service_client

    service_client = DataLakeServiceClient(account_url="{}://{}.dfs.core.windows.net".format(
        "https", storage_account_name), credential=storage_account_key)

except Exception as e:
    print(e)

## Create Container

In [16]:
try:
    global file_system_client

    file_system_client = service_client.create_file_system(file_system="datalakefs")

except Exception as e:
    print(e)

The specified container already exists.
RequestId:f4df1d47-801e-000d-3f24-07b98c000000
Time:2022-12-03T14:35:04.5227642Z
ErrorCode:ContainerAlreadyExists
Content: <?xml version="1.0" encoding="utf-8"?><Error><Code>ContainerAlreadyExists</Code><Message>The specified container already exists.
RequestId:f4df1d47-801e-000d-3f24-07b98c000000
Time:2022-12-03T14:35:04.5227642Z</Message></Error>


## Create Directory

In [17]:
try:
    file_system_client.create_directory("my-directory")

except Exception as e:
    print(e)

## Upload to Directory

In [20]:
try:

    file_system_client = service_client.get_file_system_client(file_system="datalakefs")

    directory_client = file_system_client.get_directory_client("my-directory")
    
    file_client = directory_client.get_file_client("wine_data.csv")

    local_file = open("C:\\Users\\VIGNEN\\Documents\\Azure POC\\aml-aks-poc\\jobsdata\\jobs_dynamic.csv",'r')

    file_contents = local_file.read()

    file_client.upload_data(file_contents, overwrite=True)

except Exception as e:
    print(e)

## Download from Directory