# 03_online_predictions_explanations

This notebook retrieves the Endpoint deployed by the pipeline and use it to query the model for online predictions and explanations, including the following steps:
 
1. Retrieve the Endpoint
2. Make Online Predictions with Explanations


---
## Setup

### Import libraries

In [None]:
#!pip install ipython-autotime
%load_ext autotime

In [None]:
#!pip install google-cloud-aiplatform
#!pip install --upgrade google-cloud-bigquery

In [None]:
import os
from tabulate import tabulate
import numpy as np
from google.cloud import aiplatform as aip
from google.cloud import bigquery

### Set configurations and constants

In [None]:
NOTEBOOK ='Vertex_AI_Streamlit'
REGION = "us-central1"
PROJECT = 'babyweight-prediction'
BUCKET = 'b_w_bucket'
BQ_DATASET = "bw_dataset"
APPNAME = "bw-prediction"

GOOGLE_APPLICATION_CREDENTIALS = 'key/babyweight-prediction-ff79f406c099.json'

os.environ["REGION"] = REGION
os.environ["PROJECT"] = PROJECT
os.environ["BUCKET"] = BUCKET

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = GOOGLE_APPLICATION_CREDENTIALS

In [None]:
GCS_BUCKET=f"gs://{BUCKET}"

In [None]:
SERVICE_ACCOUNT = "69318036822-compute@developer.gserviceaccount.com"

---
## 1. Retrieve the Endpoint

In [None]:
aip.init(
    project=PROJECT, 
    location=REGION, 
    staging_bucket=GCS_BUCKET)

In [None]:
#https://us-central1-aiplatform.googleapis.com/v1/projects/69318036822/locations/us-central1/endpoints/4074389870305345536

In [None]:
ENDPOINT_NAME = 'projects/69318036822/locations/us-central1/endpoints/4074389870305345536'

In [None]:
endpoint = aip.Endpoint(
    project=PROJECT,
    location=REGION,
    endpoint_name=ENDPOINT_NAME
)

## 2. Make Online Predictions

### 2.1. One sample

In [None]:
s= [
    {'is_male': 'true',
     'mother_age': '25.0',
     'plurality': 'Single(1)',
     'gestation_weeks': '40.0',
     'cigarette_use': 'true',
     'alcohol_use': 'false',
    },
]

#### Get prediction

In [None]:
endpoint.predict(s)

In [None]:
predicted_value = endpoint.predict(s).predictions[0]['value']
predicted_value

#### Get explanation

In [None]:
explain=endpoint.explain(s)

In [None]:
explain

In [None]:
baseline_output_value=explain.explanations[0].attributions[0].baseline_output_value
baseline_output_value

In [None]:
instance_output_value=explain.explanations[0].attributions[0].instance_output_value
instance_output_value

In [None]:
explain.explanations[0].attributions

In [None]:
FEATURE_COLUMNS = [
    'plurality',
    'mother_age',
    'is_male',
    'alcohol_use',
    'cigarette_use',
    'gestation_weeks'
]

In [None]:
def get_feature_attributions(
    prediction_expl, instance_index, feature_columns=FEATURE_COLUMNS):
    """Returns the feature attributions with the baseline for a prediction example"""

    rows = []
    attribution = prediction_expl.explanations[instance_index].attributions[0]
    baseline_score = attribution.baseline_output_value
    total_att_val = baseline_score
    for key in feature_columns:
        feature_val = test_instances[instance_index][key]
        att_val = attribution.feature_attributions[key]
        total_att_val += att_val
        rows.append([key,feature_val,att_val])

    feature_attributions_rows = sorted(rows, key=lambda row: row[2], reverse=True)
    #feature_attributions_rows.insert(0,["Baseline_Score", "--", baseline_score])
    #feature_attributions_rows.append(["Final_Prediction", "--", total_att_val])

    return feature_attributions_rows

In [None]:
test_instances=s

In [None]:
feature_attributions_rows = get_feature_attributions(explain, 0)
feature_attributions_rows

In [None]:
feature_attributions_rows[2][1]

In [None]:
feature_list=[]
for i in range(len(feature_attributions_rows)):
    feature=feature_attributions_rows[i][0]
    feature_list.append(feature)

In [None]:
feature_list

In [None]:
feature_values=[]
for e in range(len(feature_attributions_rows)):
    feature=feature_attributions_rows[e][1]
    feature_values.append(feature)

In [None]:
feature_values

In [None]:
feature_contributions=[]
for c in range(len(feature_attributions_rows)):
    feature=feature_attributions_rows[c][2]
    feature_contributions.append(feature)

In [None]:
feature_contributions

In [None]:
import plotly.graph_objects as go

fig = go.Figure(go.Waterfall(
    name = "20", orientation = "v",
    measure = ["relative", "relative", "relative", "relative", "relative","relative","relative", "total"],
    x = feature_list,
    textposition = "outside",
    text=feature_values,
    y = feature_contributions,
    connector = {"line":{"color":"rgb(63, 63, 63)"}},
))

fig.update_layout(
        title = "Feature Contribution",
        showlegend = True
)

fig.show()

In [None]:
import plotly.graph_objects as go

fig = go.Figure(go.Waterfall(
    name = "2018", orientation = "h", measure = ["relative", "relative", "relative", "relative", "relative","relative","relative", "total"],
    y = feature_list,
    x = feature_contributions,
    text=feature_values,
    textposition = "outside",
    connector = {"mode":"between", "line":{"width":4, "color":"rgb(0, 0, 0)", "dash":"solid"}}
))

fig.update_layout(title = "Profit and loss statement 2018")


In [None]:
get_all_prediction_explanations_with_graphs(test_instances=s)

In [None]:
explain_dict=explain.predictions[0]

In [None]:
explain_dict

In [None]:
names = list(explain_dict.keys())
values = list(explain_dict.values())

In [None]:
names

In [None]:
values

In [None]:
baseline=endpoint.explain(s).explanations[0].attributions[0].baseline_output_value
baseline

In [None]:
instance=endpoint.explain(s).explanations[0].attributions[0].instance_output_value
instance

### 2.2. Multiple samples

In [None]:
#bqclient = bigquery.Client(project=PROJECT)
#
#BQ_TABLE_ID = "hospital22.surgery.surgical-duration-prediction22_prepped_20000"
#query = f"""
#SELECT *
#FROM `{BQ_TABLE_ID}`
#WHERE splits='TEST'
#LIMIT 3
#"""
#
#df_test_3 = bqclient.query(query = query).to_dataframe()
#df_test_3.head()

In [None]:
#test_instances = df_test_3.drop(columns=['case_min', 'splits']).astype(str).to_dict(orient='records')
#test_instances

#### Get the predictions

In [None]:
#predicted_values =[]
#for i in range(len(test_instances)):
#    predicted_values.append(endpoint.predict(test_instances).predictions[i]['value'])
#
#predicted_values

In [None]:
prediction_expl = get_all_prediction_explanations_with_graphs(test_instances)

#### NOTE: The following is another short way to collect the feature atribution values but it does not show the Baseline Score.

In [None]:
import pandas as pd

explanations = endpoint.explain(test_instances)
df = pd.DataFrame.from_dict(explanations.explanations[2].attributions[0].feature_attributions, orient='index')
df.columns = ["Attribution value"]
df.sort_values(by='Attribution value').plot(kind='barh')
plt.show()

## References:

- https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/quests/vertex-ai/vertex-ai-qwikstart/lab_exercise.ipynb