# Hands-On Workshop Exercise
---

For this exercise, your goal is to replace the scikit-learn model in the specified code, with a SAS Viya ML model.

There is sample code already to load the data, transform it, train a model, and register it. However, it will fail since the model registration code is specifically for a SAS model and the model specified is a scikit-learn model.

You should not need to modify any other code other than the cell specified, and the first cell to specify your name.


In [None]:
name = "tztsai"

In [None]:
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from pathlib import Path
import os
import requests
from sasctl import Session
from sasctl.services import model_repository as mr, model_management as mm

In [None]:
# Import data

# Call csv file by name
g02 = pd.read_csv("/workspaces/myfolder/Academic_ViyaWorkbench/data/2021 Census GCP Statistical Area 2 for AUS/2021Census_G02_AUST_SA2.csv")
g33 = pd.read_csv("/workspaces/myfolder/Academic_ViyaWorkbench/data/2021 Census GCP Statistical Area 2 for AUS/2021Census_G33_AUST_SA2.csv")
g34 = pd.read_csv("/workspaces/myfolder/Academic_ViyaWorkbench/data/2021 Census GCP Statistical Area 2 for AUS/2021Census_G34_AUST_SA2.csv")
g35 = pd.read_csv("/workspaces/myfolder/Academic_ViyaWorkbench/data/2021 Census GCP Statistical Area 2 for AUS/2021Census_G35_AUST_SA2.csv")
g38 = pd.read_csv("/workspaces/myfolder/Academic_ViyaWorkbench/data/2021 Census GCP Statistical Area 2 for AUS/2021Census_G38_AUST_SA2.csv")

# Store DataFrames in a dictionary
gcp = {
    'g02': g02,
    'g33': g33,
    'g34': g34,
    'g35': g35,
    'g38': g38
}

In [None]:
# Data Wrangling

# Select columns in g02
# removing medians of variables explained in other df
g02_filtered =  g02.drop(columns=['Median_mortgage_repay_monthly',
                                 'Median_tot_prsnl_inc_weekly', 
                                 'Median_tot_fam_inc_weekly', 
                                 'Median_tot_hhd_inc_weekly'])

# Select columns in g33
# Select columns that contain both "HI_" and "_Tot"
g33_col_mask = g33.columns[g33.columns.str.contains("HI_") & g33.columns.str.contains("_Tot")]

# Append the column name 'SA2_CODE_2021' to the list
g33_col_mask = g33_col_mask.insert(0, 'SA2_CODE_2021')

g33_filtered = g33[g33_col_mask]

# Select columns in g34
# drop total and not stated households
g34_filtered = g34.drop(columns=['Num_MVs_per_dweling_Tot', 
                                 'Num_MVs_NS', 
                                 'Total_dwelings'])

# Select columns in g35
# Select columns that contain "_Total"
g35_col_mask = g35.columns[g35.columns.str.contains("_Total")]

# drop the last Total column
g35_col_mask = g35_col_mask.drop('Total_Total')

# Append the column name 'SA2_CODE_2021' to the list
g35_col_mask = g35_col_mask.insert(0, 'SA2_CODE_2021')

# Apply filtering mask
g35_filtered = g35[g35_col_mask]

# Select columns in g38
# Select columns that contain both "M_" and "_Tot"
g38_col_mask = g38.columns[g38.columns.str.contains("M_") & g38.columns.str.contains("_Tot")]

# Append the column name 'SA2_CODE_2021' to the list
g38_col_mask = g38_col_mask.insert(0, 'SA2_CODE_2021')

# Apply filtering mask
g38_filtered = g38[g38_col_mask]

# Joining dataframes

def merge_dataframes(dataframes, common_column):
    # Start with the first DataFrame
    merged_df = dataframes[0]

    # Iteratively merge the remaining DataFrames
    for df in dataframes[1:]:
        merged_df = merged_df.merge(df, on=common_column, how='inner')

    return merged_df

# Merging filtered df
census_joined = merge_dataframes([g02_filtered, g33_filtered, g34_filtered, g35_filtered, g38_filtered], 'SA2_CODE_2021')
census_joined.shape

In [None]:
# Prepare features and target
def prepare_features(df, target):
    features = df.columns.drop(target)
    X = df[features]
    y = df[target]
    return X, y

In [None]:
X, y = prepare_features(census_joined, 'HI_300_399_Tot')
X.columns

In [None]:
## Split out holdout
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3, random_state=0)

# Start Code Section
---

Edit the cell below - 

Make sure your variable for the model is called wb_model

In [None]:
wb_model = GradientBoostingClassifier()
wb_model.fit(X_train, y_train)

# End Code Section
---
Do not edit anything after this part of the script

In [None]:
## get access token for viya env using refresh token. change to your own viya server and preferred authentication method.
url = "https://apgtps2demo.gtp.unx.sas.com"
auth_url = f"{url}/SASLogon/oauth/token"
## reading long-lived refresh token from txt file
refresh_token = Path(f'/workspaces/{os.environ["DEFAULT_MOUNTNAME"]}/Academic_ViyaWorkbench/apgtps2demo_refresh_token.txt').read_text().replace('\n', '')

payload=f'grant_type=refresh_token&refresh_token={refresh_token}'
headers = {
  'Accept': 'application/json',
  'Content-Type': 'application/x-www-form-urlencoded',
  'Authorization': 'Basic c2FzLmNsaTo=',
}

response = requests.request("POST", auth_url, headers=headers, data=payload, verify=False)
access_token = response.json()['access_token']

In [None]:
st = Session("https://apgtps2demo.gtp.unx.sas.com", token=access_token, verify_ssl=False)
st

In [None]:
project_name = "Academic WorkshopWorkbench HandsOn "+name

In [None]:
## Create a project
try:
    repository = mr.get_repository('DMRepository')
    # repository.name
    project_create = mr.create_project(
        {
        'name':project_name,
        'description':'CBR WB Hands-on Exercise',
        'function':'classification',
        'targetLevel':'binary',
        },
        repository
    )
    print("Created a new project!")
    project = mr.get_project(project_name)
except:
    print("Retrieving already created project")
    project = mr.get_project(project_name)

In [None]:
def register_model(model_name, model, project):
    model_params = {
        "name": model_name,
        "projectId": project.id,
        "type": "ASTORE",
    }
    astore = mr.post(
        "/models",
        files={"files": (f"{model_name}.astore", model.export())},
        data=model_params,
    )
    print(astore)

In [None]:
wb_model

#### Model Registration
---

In [None]:
register_model("MyModel "+name, wb_model, project) 