## Workspace 정보 가져오기

In [1]:
from azureml.core import Workspace
ws = Workspace.from_config()   # workspace의 setting 가져오기
print(
    'Workspace name\t: ' + 'XXXXXXXXXXXXXXXXXX',
    # 'Workspace name\t: ' + ws.name,
    '\nAzure region\t: ' + 'XXXXXXXXXXXXXXXXXX',
    # '\nAzure region\t: ' + ws.location,
    '\nSubscription ID\t: ' + 'XXXXXXXXXXXXXXXXXX',
    # '\nSubscription ID\t: ' + ws.subscription_id,
    '\nResource Group\t: ' + 'XXXXXXXXXXXXXXXXXX'
    # '\nResource Group\t: ' + ws.resource_group
)

Workspace name	: XXXXXXXXXXXXXXXXXX 
Azure region	: XXXXXXXXXXXXXXXXXX 
Subscription ID	: XXXXXXXXXXXXXXXXXX 
Resource Group	: XXXXXXXXXXXXXXXXXX


## 실험 공간 생성

In [2]:
from azureml.core import Experiment
experiment = Experiment(workspace= ws, name= 'diabetes_experiment') # workspace: 위에 설정한 workspace 지정, name: 실험의 이름

### Diabetes Dataset 가져오기

In [3]:
from azureml.opendatasets import Diabetes
from sklearn.model_selection import train_test_split

x_df = Diabetes.get_tabular_dataset().to_pandas_dataframe().dropna()
y_df = x_df.pop('Y')    # x_df에서 ['Y']는 pop되고 y_df에 저장된다

# Split trainset & testset 
X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size= 0.2, random_state= 66)

## Model 불러오기 (Ridge)

In [4]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib
import math

alphas = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]

for alpha in alphas:
    run = experiment.start_logging()  # logging 기록 남기기
    run.log('alpha', alpha)

    model = Ridge(alpha= alpha)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    # 루트: math.sqrt
    rmse = math.sqrt(mean_squared_error(y_test, y_pred) )    
    run.log('rmse: ', rmse)   # log에 남김
    
    #모델 이름 설정
    model_name = 'model_alpha_{}.pkl'.format(str(alpha))  
    filename = 'outputs/{}'.format(model_name)

    # 모델 저장
    joblib.dump(value= model, filename= filename)

    # 시스템에 올려놓기
    run.upload_file(name= model_name, path_or_stream= filename) # Azure ML Service에 업로드
    run.complete() # 작업종료

    print(f'{alpha} experiment completed')



0.1 experiment completed
0.2 experiment completed
0.3 experiment completed
0.4 experiment completed
0.5 experiment completed
0.6 experiment completed
0.7 experiment completed
0.8 experiment completed
0.9 experiment completed
1 experiment completed


In [5]:
# experiment 관련 Descrpition
experiment # 객체

Name,Workspace,Report Page,Docs Page
diabetes_experiment,labuser37ml,Link to Azure Machine Learning studio,Link to Documentation


## Best Model 탐색 후 다운로드

In [7]:
minimum_rmse = None
minimum_rmse_runid = None

for run in experiment.get_runs():
    run_metrics = run.get_metrics()
    run_details = run.get_details()
    
    run_rmse = run_metrics['rmse']
    run_id = run_details['runId']

    # 제일 처음 실행 시 
    if minimum_rmse is None:
        minimum_rmse = run_rmse
        minimum_rmse_runid = run_id
    # 처음이 아닐시 rmse값 비교
    else:
        if run_rmse < minimum_rmse:
            minimum_rmse = run_rmse
            minimum_rmse_runid = run_id

print('Best run_id: {}\nBest rmse:{}'.format(minimum_rmse_runid, str(minimum_rmse)))

Exception: Malformed metric value

## Best run model을 저장

In [None]:
from azureml.core import Run
# best run의 model 파일을 가져옴
best_run = Run(experiment= experiment, run_id= minimum_rmse_runid)

best_run.download_file(name= str(best_run.get_file_names()[0] ))

## CSV파일로 저장

In [None]:
import numpy as np
from azureml.core import Dataset

np.savetxt('features.csv', X_train, delimiter=',')
np.savetxt('labels.csv', y_train, delimiter=',')

datastore = ws.get_default_datastore()
datastore.upload_files(files=['./features.csv', './labels.csv'],
                       target_path='diabetes-experiment/',
                       overwrite=True)

input_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'diabetes-experiment/features.csv')])
output_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore, 'diabetes-experiment/labels.csv')])

## Model 등록 & 사용

In [None]:
import sklearn
from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration

model = Model.register(workspace=ws,
                       model_name='diabetes-experiment-model',
                       model_path=f"./{str(best_run.get_file_names()[0])}",
                       model_framework=Model.Framework.SCIKITLEARN,  
                       model_framework_version=sklearn.__version__,  
                       sample_input_dataset=input_dataset,
                       sample_output_dataset=output_dataset,
                       resource_configuration=ResourceConfiguration(cpu=1, memory_in_gb=0.5),
                       description='Ridge regression model to predict diabetes progression.',
                       tags={'area': 'diabetes', 'type': 'regression'})

print('Name:', model.name)
print('Version:', model.version)

### Deploy (배포)

In [None]:
service_name = 'diabetes-service'

service = Model.deploy(ws, service_name, [model], overwrite= True)
service.wait_for_deployment(show_output= True)

### Test (테스트) & Service

In [None]:
import json 

input_payload = json.dumps({     
    'data': X_test.values.tolist(),     
    'method': 'predict' 
})

output = service.run(input_payload) 

print(output)