## Setup
### Import libraries

In [1]:
#!pip install ipython-autotime
%load_ext autotime
import os
from tabulate import tabulate
import numpy as np
from google.cloud import aiplatform as aip
from google.cloud import bigquery
import pandas as pd

time: 3.2 s (started: 2022-11-26 12:31:30 -06:00)


### Set configurations and constants

In [2]:
NOTEBOOK ='Vertex_AI_Streamlit'
REGION = "us-central1"
PROJECT = 'babyweight-prediction'
BUCKET = 'b_w_bucket'
BQ_DATASET = "bw_dataset"
APPNAME = "bw-prediction"
GOOGLE_APPLICATION_CREDENTIALS = 'key/babyweight-prediction-ff79f406c099.json'

os.environ["REGION"] = REGION
os.environ["PROJECT"] = PROJECT
os.environ["BUCKET"] = BUCKET
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = GOOGLE_APPLICATION_CREDENTIALS
GCS_BUCKET=f"gs://{BUCKET}"

time: 827 µs (started: 2022-11-26 12:31:36 -06:00)


---
## 1. Retrieve the Endpoint

In [3]:
aip.init(
    project=PROJECT,
    location=REGION,
    staging_bucket=GCS_BUCKET)

time: 367 µs (started: 2022-11-26 12:31:37 -06:00)


In [4]:
ENDPOINT_NAME = 'projects/69318036822/locations/us-central1/endpoints/4074389870305345536'

time: 373 µs (started: 2022-11-26 12:31:38 -06:00)


In [5]:
endpoint = aip.Endpoint(
    project=PROJECT,
    location=REGION,
    endpoint_name=ENDPOINT_NAME
)

time: 18.6 ms (started: 2022-11-26 12:31:39 -06:00)


## 2. Make Online Predictions
### 2.1. One Sample

In [6]:
instance= [
    {'is_male': 'true',
     'mother_age': '45.0',
     'plurality': 'Single(1)',
     'gestation_weeks': '35.0',
     'cigarette_use': 'true',
     'alcohol_use': 'false',
     },
]

time: 430 µs (started: 2022-11-26 12:31:41 -06:00)


### Get Prediction

In [7]:
endpoint.predict(instance)

Prediction(predictions=[{'value': 6.105488300323486, 'upper_bound': 4.929884910583496, 'lower_bound': 2.85607647895813}], deployed_model_id='3368705715412664320', model_version_id='1', model_resource_name='projects/69318036822/locations/us-central1/models/697490594242494464', explanations=None)

time: 1.85 s (started: 2022-11-26 12:31:43 -06:00)


In [8]:
predicted_value = endpoint.predict(instance).predictions[0]['value']
predicted_value

6.105488300323486

time: 829 ms (started: 2022-11-26 12:31:46 -06:00)


## 3. Explaination
### 3.1.1

In [9]:
explain=endpoint.explain(instance)

time: 1.04 s (started: 2022-11-26 12:31:49 -06:00)


In [21]:
FEATURE_COLUMNS = [
    'is_male',
    'mother_age',
    'plurality',
    'gestation_weeks',
    'cigarette_use',
    'alcohol_use'
]

time: 439 µs (started: 2022-11-26 12:37:00 -06:00)


In [22]:
def get_feature_attributions(
        prediction_expl, instance_index, feature_columns=FEATURE_COLUMNS):
    """Returns the feature attributions with the baseline for a prediction example"""

    rows = []
    attribution = prediction_expl.explanations[instance_index].attributions[0]
    baseline_score = attribution.baseline_output_value
    total_att_val = baseline_score
    for key in feature_columns:
        feature_val = instance[instance_index][key]
        att_val = attribution.feature_attributions[key]
        total_att_val += att_val
        rows.append([key,feature_val,att_val])

    feature_attributions_rows = sorted(rows, key=lambda row: row[2], reverse=True)
    feature_attributions_rows.insert(0,["Baseline_Score", "--", baseline_score])
    feature_attributions_rows.append(["Final_Prediction", "--", total_att_val])

    return feature_attributions_rows

time: 1.11 ms (started: 2022-11-26 12:37:01 -06:00)


In [23]:
feature_attributions_rows = get_feature_attributions(explain, 0)

time: 715 µs (started: 2022-11-26 12:37:02 -06:00)


In [24]:
def generate_dataframe():
    feature_list=[]
    feature_values=[]
    feature_contributions=[]
    feature_attributions_rows = get_feature_attributions(explain, 0)

    for i in range(len(feature_attributions_rows)):
        feature=feature_attributions_rows[i][0]
        feature_list.append(feature)

    for i in range(len(feature_attributions_rows)):
        feature=feature_attributions_rows[i][1]
        feature_values.append(feature)

    for i in range(len(feature_attributions_rows)):
        feature=feature_attributions_rows[i][2]
        feature_contributions.append(feature)

    zipped = list(zip(feature_list, feature_values, feature_contributions))
    df = pd.DataFrame(zipped, columns=['Feature', 'Value', 'Contribution'])

    return df, feature_list, feature_values,feature_contributions


time: 1.11 ms (started: 2022-11-26 12:37:03 -06:00)


In [25]:
df, feature_list, feature_values,feature_contributions=generate_dataframe()

time: 1.13 ms (started: 2022-11-26 12:37:03 -06:00)


In [26]:
df

Unnamed: 0,Feature,Value,Contribution
0,Baseline_Score,--,7.805862
1,is_male,true,0.0
2,plurality,Single(1),0.0
3,alcohol_use,false,0.0
4,mother_age,45.0,-0.029131
5,cigarette_use,true,-0.23636
6,gestation_weeks,35.0,-1.434883
7,Final_Prediction,--,6.105488


time: 6.2 ms (started: 2022-11-26 12:37:06 -06:00)
