# 03_online_predictions_explanations

This notebook retrieves the Endpoint deployed by the pipeline and use it to query the model for online predictions and explanations, including the following steps:
 
1. Retrieve the Endpoint
2. Make Online Predictions with Explanations


---
## Setup

### Import libraries

In [1]:
#!pip install ipython-autotime
%load_ext autotime

time: 0 ns (started: 2022-11-25 14:38:47 -06:00)


In [2]:
#!pip install google-cloud-aiplatform
#!pip install --upgrade google-cloud-bigquery

time: 0 ns (started: 2022-11-25 14:38:47 -06:00)


In [3]:
import os
from tabulate import tabulate
import numpy as np
from google.cloud import aiplatform as aip
from google.cloud import bigquery

time: 5.84 s (started: 2022-11-25 14:38:47 -06:00)


### Set configurations and constants

In [4]:
NOTEBOOK ='Vertex_AI_Streamlit'
REGION = "us-central1"
PROJECT = 'babyweight-prediction'
BUCKET = 'b_w_bucket'
BQ_DATASET = "bw_dataset"
APPNAME = "bw-prediction"

GOOGLE_APPLICATION_CREDENTIALS = 'key/babyweight-prediction-ff79f406c099.json'

os.environ["REGION"] = REGION
os.environ["PROJECT"] = PROJECT
os.environ["BUCKET"] = BUCKET

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = GOOGLE_APPLICATION_CREDENTIALS

time: 0 ns (started: 2022-11-25 14:38:53 -06:00)


In [5]:
GCS_BUCKET=f"gs://{BUCKET}"

time: 0 ns (started: 2022-11-25 14:38:53 -06:00)


In [6]:
SERVICE_ACCOUNT = "69318036822-compute@developer.gserviceaccount.com"

time: 0 ns (started: 2022-11-25 14:38:53 -06:00)


---
## 1. Retrieve the Endpoint

In [7]:
aip.init(
    project=PROJECT, 
    location=REGION, 
    staging_bucket=GCS_BUCKET)

time: 16 ms (started: 2022-11-25 14:38:53 -06:00)


In [8]:
#https://us-central1-aiplatform.googleapis.com/v1/projects/69318036822/locations/us-central1/endpoints/4074389870305345536

time: 0 ns (started: 2022-11-25 14:38:53 -06:00)


In [9]:
ENDPOINT_NAME = 'projects/69318036822/locations/us-central1/endpoints/4074389870305345536'

time: 0 ns (started: 2022-11-25 14:38:53 -06:00)


In [10]:
endpoint = aip.Endpoint(
    project=PROJECT,
    location=REGION,
    endpoint_name=ENDPOINT_NAME
)

time: 47 ms (started: 2022-11-25 14:38:53 -06:00)


## 2. Make Online Predictions

### 2.1. One sample

In [175]:
s= [
    {'is_male': 'true',
     'mother_age': '25.0',
     'plurality': 'Single(1)',
     'gestation_weeks': '40.0',
     'cigarette_use': 'true',
     'alcohol_use': 'false',
    },
]

time: 0 ns (started: 2022-11-25 16:16:29 -06:00)


#### Get prediction

In [12]:
endpoint.predict(s)

Prediction(predictions=[{'value': 4.160363674163818, 'upper_bound': 3.925392627716064, 'lower_bound': 1.637687683105469}], deployed_model_id='3368705715412664320', model_version_id='1', model_resource_name='projects/69318036822/locations/us-central1/models/697490594242494464', explanations=None)

time: 344 ms (started: 2022-11-25 14:38:53 -06:00)


In [13]:
predicted_value = endpoint.predict(s).predictions[0]['value']
predicted_value

4.160363674163818

time: 188 ms (started: 2022-11-25 14:38:53 -06:00)


#### Get explanation

In [14]:
explain=endpoint.explain(s)

time: 657 ms (started: 2022-11-25 14:38:54 -06:00)


In [15]:
explain

Prediction(predictions=[{'upper_bound': 3.925392627716064, 'value': 4.160363674163818, 'lower_bound': 1.637687683105469}], deployed_model_id='3368705715412664320', model_version_id=None, model_resource_name=None, explanations=[attributions {
  baseline_output_value: 7.8058624267578125
  instance_output_value: 4.160363674163818
  feature_attributions {
    struct_value {
      fields {
        key: "alcohol_use"
        value {
          number_value: -0.1959028542041779
        }
      }
      fields {
        key: "cigarette_use"
        value {
          number_value: -0.1505577117204666
        }
      }
      fields {
        key: "gestation_weeks"
        value {
          number_value: -3.054098650813103
        }
      }
      fields {
        key: "is_male"
        value {
          number_value: -0.2580978572368622
        }
      }
      fields {
        key: "mother_age"
        value {
          number_value: 0.01315832138061523
        }
      }
      fields {
        key:

time: 0 ns (started: 2022-11-25 14:38:54 -06:00)


In [16]:
baseline_output_value=explain.explanations[0].attributions[0].baseline_output_value
baseline_output_value

7.8058624267578125

time: 0 ns (started: 2022-11-25 14:38:54 -06:00)


In [17]:
instance_output_value=explain.explanations[0].attributions[0].instance_output_value
instance_output_value

4.160363674163818

time: 0 ns (started: 2022-11-25 14:38:54 -06:00)


In [43]:
explain.explanations[0].attributions

[baseline_output_value: 7.8058624267578125
instance_output_value: 4.160363674163818
feature_attributions {
  struct_value {
    fields {
      key: "alcohol_use"
      value {
        number_value: -0.1959028542041779
      }
    }
    fields {
      key: "cigarette_use"
      value {
        number_value: -0.1505577117204666
      }
    }
    fields {
      key: "gestation_weeks"
      value {
        number_value: -3.054098650813103
      }
    }
    fields {
      key: "is_male"
      value {
        number_value: -0.2580978572368622
      }
    }
    fields {
      key: "mother_age"
      value {
        number_value: 0.01315832138061523
      }
    }
    fields {
      key: "plurality"
      value {
        number_value: 0.0
      }
    }
  }
}
output_index: -1
approximation_error: 0.0009752713450417257
output_name: "value"
]

time: 0 ns (started: 2022-11-25 15:01:22 -06:00)


In [44]:
FEATURE_COLUMNS = [
    'plurality',
    'mother_age',
    'is_male',
    'alcohol_use',
    'cigarette_use',
    'gestation_weeks'
]

time: 0 ns (started: 2022-11-25 15:01:27 -06:00)


In [160]:
def get_feature_attributions(
    prediction_expl, instance_index, feature_columns=FEATURE_COLUMNS):
    """Returns the feature attributions with the baseline for a prediction example"""

    rows = []
    attribution = prediction_expl.explanations[instance_index].attributions[0]
    baseline_score = attribution.baseline_output_value
    total_att_val = baseline_score
    for key in feature_columns:
        feature_val = test_instances[instance_index][key]
        att_val = attribution.feature_attributions[key]
        total_att_val += att_val
        rows.append([key,feature_val,att_val])

    feature_attributions_rows = sorted(rows, key=lambda row: row[2], reverse=True)
    #feature_attributions_rows.insert(0,["Baseline_Score", "--", baseline_score])
    #feature_attributions_rows.append(["Final_Prediction", "--", total_att_val])

    return feature_attributions_rows

time: 0 ns (started: 2022-11-25 16:09:01 -06:00)


In [176]:
test_instances=s

time: 0 ns (started: 2022-11-25 16:16:36 -06:00)


In [177]:
feature_attributions_rows = get_feature_attributions(explain, 0)
feature_attributions_rows

[['mother_age', '25.0', 0.01315832138061523],
 ['plurality', 'Single(1)', 0.0],
 ['cigarette_use', 'true', -0.1505577117204666],
 ['alcohol_use', 'false', -0.1959028542041779],
 ['is_male', 'true', -0.2580978572368622],
 ['gestation_weeks', '40.0', -3.054098650813103]]

time: 0 ns (started: 2022-11-25 16:16:37 -06:00)


In [178]:
feature_attributions_rows[2][1]

'true'

time: 0 ns (started: 2022-11-25 16:16:38 -06:00)


In [179]:
feature_list=[]
for i in range(len(feature_attributions_rows)):
    feature=feature_attributions_rows[i][0]
    feature_list.append(feature)

time: 0 ns (started: 2022-11-25 16:16:39 -06:00)


In [180]:
feature_list

['mother_age',
 'plurality',
 'cigarette_use',
 'alcohol_use',
 'is_male',
 'gestation_weeks']

time: 16 ms (started: 2022-11-25 16:16:39 -06:00)


In [181]:
feature_values=[]
for e in range(len(feature_attributions_rows)):
    feature=feature_attributions_rows[e][1]
    feature_values.append(feature)

time: 0 ns (started: 2022-11-25 16:16:39 -06:00)


In [182]:
feature_values

['25.0', 'Single(1)', 'true', 'false', 'true', '40.0']

time: 0 ns (started: 2022-11-25 16:16:39 -06:00)


In [183]:
feature_contributions=[]
for c in range(len(feature_attributions_rows)):
    feature=feature_attributions_rows[c][2]
    feature_contributions.append(feature)

time: 0 ns (started: 2022-11-25 16:16:40 -06:00)


In [184]:
feature_contributions

[0.01315832138061523,
 0.0,
 -0.1505577117204666,
 -0.1959028542041779,
 -0.2580978572368622,
 -3.054098650813103]

time: 0 ns (started: 2022-11-25 16:16:40 -06:00)


In [185]:
import plotly.graph_objects as go

fig = go.Figure(go.Waterfall(
    name = "20", orientation = "v",
    measure = ["relative", "relative", "relative", "relative", "relative","relative","relative", "total"],
    x = feature_list,
    textposition = "outside",
    text=feature_values,
    y = feature_contributions,
    connector = {"line":{"color":"rgb(63, 63, 63)"}},
))

fig.update_layout(
        title = "Feature Contribution",
        showlegend = True
)

fig.show()

time: 16 ms (started: 2022-11-25 16:16:41 -06:00)


In [186]:
import plotly.graph_objects as go

fig = go.Figure(go.Waterfall(
    name = "2018", orientation = "h", measure = ["relative", "relative", "relative", "relative", "relative","relative","relative", "total"],
    y = feature_list,
    x = feature_contributions,
    text=feature_values,
    textposition = "outside",
    connector = {"mode":"between", "line":{"width":4, "color":"rgb(0, 0, 0)", "dash":"solid"}}
))

fig.update_layout(title = "Profit and loss statement 2018")


time: 0 ns (started: 2022-11-25 16:16:44 -06:00)


In [51]:
get_all_prediction_explanations_with_graphs(test_instances=s)

Instance #1:
Feature name        Feature value      Attribution value
------------------  ---------------  -------------------
mother_age          25.0                       0.0131583
plurality           Single(1)                  0
cigarette_use       true                      -0.150558
alcohol_use         true                      -0.195903
is_male             false                     -0.258098
gestation_weeks     30.0                      -3.0541
* Baseline_Score    --                         7.80586
* Final_Prediction  --                         4.16036


Prediction(predictions=[{'lower_bound': 1.637687683105469, 'value': 4.160363674163818, 'upper_bound': 3.925392627716064}], deployed_model_id='3368705715412664320', model_version_id=None, model_resource_name=None, explanations=[attributions {
  baseline_output_value: 7.8058624267578125
  instance_output_value: 4.160363674163818
  feature_attributions {
    struct_value {
      fields {
        key: "alcohol_use"
        value {
          number_value: -0.1959028542041779
        }
      }
      fields {
        key: "cigarette_use"
        value {
          number_value: -0.1505577117204666
        }
      }
      fields {
        key: "gestation_weeks"
        value {
          number_value: -3.054098650813103
        }
      }
      fields {
        key: "is_male"
        value {
          number_value: -0.2580978572368622
        }
      }
      fields {
        key: "mother_age"
        value {
          number_value: 0.01315832138061523
        }
      }
      fields {
        key:

time: 406 ms (started: 2022-11-25 15:05:37 -06:00)


In [19]:
explain_dict=explain.predictions[0]

time: 0 ns (started: 2022-11-25 14:38:54 -06:00)


In [20]:
explain_dict

{'upper_bound': 3.925392627716064,
 'value': 4.160363674163818,
 'lower_bound': 1.637687683105469}

time: 0 ns (started: 2022-11-25 14:38:54 -06:00)


In [21]:
names = list(explain_dict.keys())
values = list(explain_dict.values())

time: 0 ns (started: 2022-11-25 14:38:55 -06:00)


In [22]:
names

['upper_bound', 'value', 'lower_bound']

time: 0 ns (started: 2022-11-25 14:38:55 -06:00)


In [23]:
values

[3.925392627716064, 4.160363674163818, 1.637687683105469]

time: 0 ns (started: 2022-11-25 14:38:55 -06:00)


time: 16 ms (started: 2022-11-25 14:38:55 -06:00)


time: 31 ms (started: 2022-11-25 14:38:55 -06:00)


time: 47 ms (started: 2022-11-25 14:38:55 -06:00)


time: 63 ms (started: 2022-11-25 14:38:55 -06:00)


time: 78 ms (started: 2022-11-25 14:38:55 -06:00)


time: 94 ms (started: 2022-11-25 14:38:55 -06:00)


In [24]:
baseline=endpoint.explain(s).explanations[0].attributions[0].baseline_output_value
baseline

7.8058624267578125

time: 375 ms (started: 2022-11-25 14:38:55 -06:00)


In [25]:
instance=endpoint.explain(s).explanations[0].attributions[0].instance_output_value
instance

4.160363674163818

time: 375 ms (started: 2022-11-25 14:38:55 -06:00)


### 2.2. Multiple samples

In [26]:
#bqclient = bigquery.Client(project=PROJECT)
#
#BQ_TABLE_ID = "hospital22.surgery.surgical-duration-prediction22_prepped_20000"
#query = f"""
#SELECT *
#FROM `{BQ_TABLE_ID}`
#WHERE splits='TEST'
#LIMIT 3
#"""
#
#df_test_3 = bqclient.query(query = query).to_dataframe()
#df_test_3.head()

time: 0 ns (started: 2022-11-25 14:38:55 -06:00)


In [27]:
#test_instances = df_test_3.drop(columns=['case_min', 'splits']).astype(str).to_dict(orient='records')
#test_instances

time: 0 ns (started: 2022-11-25 14:38:55 -06:00)


#### Get the predictions

In [28]:
#predicted_values =[]
#for i in range(len(test_instances)):
#    predicted_values.append(endpoint.predict(test_instances).predictions[i]['value'])
#
#predicted_values

time: 0 ns (started: 2022-11-25 14:38:55 -06:00)


time: 16 ms (started: 2022-11-25 14:38:55 -06:00)


time: 31 ms (started: 2022-11-25 14:38:55 -06:00)


time: 47 ms (started: 2022-11-25 14:38:55 -06:00)


time: 63 ms (started: 2022-11-25 14:38:55 -06:00)


time: 78 ms (started: 2022-11-25 14:38:55 -06:00)


time: 94 ms (started: 2022-11-25 14:38:55 -06:00)


time: 109 ms (started: 2022-11-25 14:38:55 -06:00)


time: 125 ms (started: 2022-11-25 14:38:55 -06:00)


time: 141 ms (started: 2022-11-25 14:38:55 -06:00)


time: 156 ms (started: 2022-11-25 14:38:55 -06:00)


time: 172 ms (started: 2022-11-25 14:38:55 -06:00)


time: 188 ms (started: 2022-11-25 14:38:55 -06:00)


time: 203 ms (started: 2022-11-25 14:38:55 -06:00)


time: 219 ms (started: 2022-11-25 14:38:55 -06:00)


time: 234 ms (started: 2022-11-25 14:38:55 -06:00)


time: 250 ms (started: 2022-11-25 14:38:55 -06:00)


time: 266 ms (started: 2022-11-25 14:38:55 -06:00)


In [29]:
prediction_expl = get_all_prediction_explanations_with_graphs(test_instances)

NameError: name 'get_all_prediction_explanations_with_graphs' is not defined

time: 422 ms (started: 2022-11-25 14:38:56 -06:00)


#### NOTE: The following is another short way to collect the feature atribution values but it does not show the Baseline Score.

In [None]:
import pandas as pd

explanations = endpoint.explain(test_instances)
df = pd.DataFrame.from_dict(explanations.explanations[2].attributions[0].feature_attributions, orient='index')
df.columns = ["Attribution value"]
df.sort_values(by='Attribution value').plot(kind='barh')
plt.show()

## References:

- https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/quests/vertex-ai/vertex-ai-qwikstart/lab_exercise.ipynb