# 03_online_predictions_explanations

This notebook retrieves the Endpoint deployed by the pipeline and use it to query the model for online predictions and explanations, including the following steps:
 
1. Retrieve the Endpoint
2. Make Online Predictions with Explanations


---
## Setup

### Import libraries

In [107]:
#!pip install ipython-autotime
%load_ext autotime

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime
time: 2.23 ms (started: 2022-11-26 12:08:59 -06:00)


In [108]:
#!pip install google-cloud-aiplatform
#!pip install --upgrade google-cloud-bigquery

time: 390 µs (started: 2022-11-26 12:08:59 -06:00)


In [109]:
import os
from tabulate import tabulate
import numpy as np
from google.cloud import aiplatform as aip
from google.cloud import bigquery

time: 928 µs (started: 2022-11-26 12:08:59 -06:00)


### Set configurations and constants

In [110]:
NOTEBOOK ='Vertex_AI_Streamlit'
REGION = "us-central1"
PROJECT = 'babyweight-prediction'
BUCKET = 'b_w_bucket'
BQ_DATASET = "bw_dataset"
APPNAME = "bw-prediction"

GOOGLE_APPLICATION_CREDENTIALS = 'key/babyweight-prediction-ff79f406c099.json'

os.environ["REGION"] = REGION
os.environ["PROJECT"] = PROJECT
os.environ["BUCKET"] = BUCKET

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = GOOGLE_APPLICATION_CREDENTIALS

time: 1.09 ms (started: 2022-11-26 12:08:59 -06:00)


In [111]:
GCS_BUCKET=f"gs://{BUCKET}"

time: 614 µs (started: 2022-11-26 12:08:59 -06:00)


In [112]:
SERVICE_ACCOUNT = "69318036822-compute@developer.gserviceaccount.com"

time: 513 µs (started: 2022-11-26 12:08:59 -06:00)


---
## 1. Retrieve the Endpoint

In [113]:
aip.init(
    project=PROJECT, 
    location=REGION, 
    staging_bucket=GCS_BUCKET)

time: 742 µs (started: 2022-11-26 12:08:59 -06:00)


In [114]:
#https://us-central1-aiplatform.googleapis.com/v1/projects/69318036822/locations/us-central1/endpoints/4074389870305345536

time: 536 µs (started: 2022-11-26 12:08:59 -06:00)


In [115]:
ENDPOINT_NAME = 'projects/69318036822/locations/us-central1/endpoints/4074389870305345536'

time: 433 µs (started: 2022-11-26 12:08:59 -06:00)


In [116]:
endpoint = aip.Endpoint(
    project=PROJECT,
    location=REGION,
    endpoint_name=ENDPOINT_NAME
)

time: 36.5 ms (started: 2022-11-26 12:08:59 -06:00)


## 2. Make Online Predictions

### 2.1. One sample

In [117]:
s= [
    {'is_male': 'true',
     'mother_age': '25.0',
     'plurality': 'Single(1)',
     'gestation_weeks': '40.0',
     'cigarette_use': 'true',
     'alcohol_use': 'false',
    },
]

time: 562 µs (started: 2022-11-26 12:08:59 -06:00)


#### Get prediction

In [118]:
endpoint.predict(s)

Prediction(predictions=[{'value': 7.632030487060547, 'lower_bound': 4.640171051025391, 'upper_bound': 5.133586883544922}], deployed_model_id='3368705715412664320', model_version_id='1', model_resource_name='projects/69318036822/locations/us-central1/models/697490594242494464', explanations=None)

time: 539 ms (started: 2022-11-26 12:08:59 -06:00)


In [119]:
predicted_value = endpoint.predict(s).predictions[0]['value']
predicted_value

7.632030487060547

time: 367 ms (started: 2022-11-26 12:08:59 -06:00)


#### Get explanation

In [120]:
explain=endpoint.explain(s)

time: 931 ms (started: 2022-11-26 12:09:00 -06:00)


In [121]:
explain

Prediction(predictions=[{'lower_bound': 4.640171051025391, 'value': 7.632030487060547, 'upper_bound': 5.133586883544922}], deployed_model_id='3368705715412664320', model_version_id=None, model_resource_name=None, explanations=[attributions {
  baseline_output_value: 7.8058624267578125
  instance_output_value: 7.632030487060547
  feature_attributions {
    struct_value {
      fields {
        key: "alcohol_use"
        value {
          number_value: 0.0
        }
      }
      fields {
        key: "cigarette_use"
        value {
          number_value: -0.2182244658470154
        }
      }
      fields {
        key: "gestation_weeks"
        value {
          number_value: 0.2012025713920593
        }
      }
      fields {
        key: "is_male"
        value {
          number_value: 0.0
        }
      }
      fields {
        key: "mother_age"
        value {
          number_value: -0.1568100452423096
        }
      }
      fields {
        key: "plurality"
        value {
   

time: 9.71 ms (started: 2022-11-26 12:09:01 -06:00)


In [122]:
baseline_output_value=explain.explanations[0].attributions[0].baseline_output_value
baseline_output_value

7.8058624267578125

time: 4.48 ms (started: 2022-11-26 12:09:01 -06:00)


In [123]:
instance_output_value=explain.explanations[0].attributions[0].instance_output_value
instance_output_value

7.632030487060547

time: 5.33 ms (started: 2022-11-26 12:09:01 -06:00)


In [124]:
explain.explanations[0].attributions

[baseline_output_value: 7.8058624267578125
instance_output_value: 7.632030487060547
feature_attributions {
  struct_value {
    fields {
      key: "alcohol_use"
      value {
        number_value: 0.0
      }
    }
    fields {
      key: "cigarette_use"
      value {
        number_value: -0.2182244658470154
      }
    }
    fields {
      key: "gestation_weeks"
      value {
        number_value: 0.2012025713920593
      }
    }
    fields {
      key: "is_male"
      value {
        number_value: 0.0
      }
    }
    fields {
      key: "mother_age"
      value {
        number_value: -0.1568100452423096
      }
    }
    fields {
      key: "plurality"
      value {
        number_value: 0.0
      }
    }
  }
}
output_index: -1
approximation_error: 0.000995961366158731
output_name: "value"
]

time: 9.65 ms (started: 2022-11-26 12:09:01 -06:00)


In [125]:
FEATURE_COLUMNS = [
    'plurality',
    'mother_age',
    'is_male',
    'alcohol_use',
    'cigarette_use',
    'gestation_weeks'
]

time: 454 µs (started: 2022-11-26 12:09:01 -06:00)


In [126]:
def get_feature_attributions(
    prediction_expl, instance_index, feature_columns=FEATURE_COLUMNS):
    """Returns the feature attributions with the baseline for a prediction example"""

    rows = []
    attribution = prediction_expl.explanations[instance_index].attributions[0]
    baseline_score = attribution.baseline_output_value
    total_att_val = baseline_score
    for key in feature_columns:
        feature_val = test_instances[instance_index][key]
        att_val = attribution.feature_attributions[key]
        total_att_val += att_val
        rows.append([key,feature_val,att_val])

    feature_attributions_rows = sorted(rows, key=lambda row: row[2], reverse=True)
    #feature_attributions_rows.insert(0,["Baseline_Score", "--", baseline_score])
    #feature_attributions_rows.append(["Final_Prediction", "--", total_att_val])

    return feature_attributions_rows

time: 1.11 ms (started: 2022-11-26 12:09:01 -06:00)


In [127]:
test_instances=s

time: 398 µs (started: 2022-11-26 12:09:01 -06:00)


In [128]:
feature_attributions_rows = get_feature_attributions(explain, 0)
feature_attributions_rows

[['gestation_weeks', '40.0', 0.2012025713920593],
 ['plurality', 'Single(1)', 0.0],
 ['is_male', 'true', 0.0],
 ['alcohol_use', 'false', 0.0],
 ['mother_age', '25.0', -0.1568100452423096],
 ['cigarette_use', 'true', -0.2182244658470154]]

time: 4.6 ms (started: 2022-11-26 12:09:01 -06:00)


In [129]:
feature_attributions_rows[2][1]

'true'

time: 3.43 ms (started: 2022-11-26 12:09:01 -06:00)


In [130]:
feature_list=[]
for i in range(len(feature_attributions_rows)):
    feature=feature_attributions_rows[i][0]
    feature_list.append(feature)

time: 494 µs (started: 2022-11-26 12:09:01 -06:00)


In [131]:
feature_list

['gestation_weeks',
 'plurality',
 'is_male',
 'alcohol_use',
 'mother_age',
 'cigarette_use']

time: 2.68 ms (started: 2022-11-26 12:09:01 -06:00)


In [132]:
feature_values=[]
for e in range(len(feature_attributions_rows)):
    feature=feature_attributions_rows[e][1]
    feature_values.append(feature)

time: 666 µs (started: 2022-11-26 12:09:01 -06:00)


In [133]:
feature_values

['40.0', 'Single(1)', 'true', 'false', '25.0', 'true']

time: 3.24 ms (started: 2022-11-26 12:09:01 -06:00)


In [134]:
feature_contributions=[]
for c in range(len(feature_attributions_rows)):
    feature=feature_attributions_rows[c][2]
    feature_contributions.append(feature)

time: 523 µs (started: 2022-11-26 12:09:01 -06:00)


In [135]:
feature_contributions

[0.2012025713920593, 0.0, 0.0, 0.0, -0.1568100452423096, -0.2182244658470154]

time: 4.63 ms (started: 2022-11-26 12:09:01 -06:00)


In [136]:
import plotly.graph_objects as go

fig = go.Figure(go.Waterfall(
    name = "20", orientation = "v",
    measure = ["relative", "relative", "relative", "relative", "relative","relative","relative", "total"],
    x = feature_list,
    textposition = "outside",
    text=feature_values,
    y = feature_contributions,
    connector = {"line":{"color":"rgb(63, 63, 63)"}},
))

fig.update_layout(
        title = "Feature Contribution",
        showlegend = True
)

fig.show()

ModuleNotFoundError: No module named 'plotly'

time: 32.7 ms (started: 2022-11-26 12:09:01 -06:00)


In [None]:
import plotly.graph_objects as go

fig = go.Figure(go.Waterfall(
    name = "2018", orientation = "h", measure = ["relative", "relative", "relative", "relative", "relative","relative","relative", "total"],
    y = feature_list,
    x = feature_contributions,
    text=feature_values,
    textposition = "outside",
    connector = {"mode":"between", "line":{"width":4, "color":"rgb(0, 0, 0)", "dash":"solid"}}
))

fig.update_layout(title = "Profit and loss statement 2018")


In [None]:
get_all_prediction_explanations_with_graphs(test_instances=s)

In [None]:
explain_dict=explain.predictions[0]

In [None]:
explain_dict

In [None]:
names = list(explain_dict.keys())
values = list(explain_dict.values())

In [None]:
names

In [None]:
values

In [None]:
baseline=endpoint.explain(s).explanations[0].attributions[0].baseline_output_value
baseline

In [None]:
instance=endpoint.explain(s).explanations[0].attributions[0].instance_output_value
instance

### 2.2. Multiple samples

In [None]:
#bqclient = bigquery.Client(project=PROJECT)
#
#BQ_TABLE_ID = "hospital22.surgery.surgical-duration-prediction22_prepped_20000"
#query = f"""
#SELECT *
#FROM `{BQ_TABLE_ID}`
#WHERE splits='TEST'
#LIMIT 3
#"""
#
#df_test_3 = bqclient.query(query = query).to_dataframe()
#df_test_3.head()

In [None]:
#test_instances = df_test_3.drop(columns=['case_min', 'splits']).astype(str).to_dict(orient='records')
#test_instances

#### Get the predictions

In [None]:
#predicted_values =[]
#for i in range(len(test_instances)):
#    predicted_values.append(endpoint.predict(test_instances).predictions[i]['value'])
#
#predicted_values

In [None]:
prediction_expl = get_all_prediction_explanations_with_graphs(test_instances)

#### NOTE: The following is another short way to collect the feature atribution values but it does not show the Baseline Score.

In [None]:
import pandas as pd

explanations = endpoint.explain(test_instances)
df = pd.DataFrame.from_dict(explanations.explanations[2].attributions[0].feature_attributions, orient='index')
df.columns = ["Attribution value"]
df.sort_values(by='Attribution value').plot(kind='barh')
plt.show()

## References:

- https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/quests/vertex-ai/vertex-ai-qwikstart/lab_exercise.ipynb