# 03_online_predictions_explanations

This notebook retrieves the Endpoint deployed by the pipeline and use it to query the model for online predictions and explanations, including the following steps:
 
1. Retrieve the Endpoint
2. Make Online Predictions with Explanations


---
## Setup

### Import libraries

In [1]:
#!pip install ipython-autotime
%load_ext autotime

time: 409 µs (started: 2022-11-25 11:14:53 -06:00)


In [2]:
#!pip install google-cloud-aiplatform
#!pip install --upgrade google-cloud-bigquery

time: 370 µs (started: 2022-11-25 11:14:59 -06:00)


In [3]:
import os
from tabulate import tabulate
import numpy as np
from google.cloud import aiplatform as aip
from google.cloud import bigquery

time: 7.65 s (started: 2022-11-25 11:15:00 -06:00)


### Set configurations and constants

In [4]:
NOTEBOOK ='Vertex_AI_Streamlit'
REGION = "us-central1"
PROJECT = 'babyweight-prediction'
BUCKET = 'b_w_bucket'
BQ_DATASET = "bw_dataset"
APPNAME = "bw-prediction"

GOOGLE_APPLICATION_CREDENTIALS = 'key/babyweight-prediction-ff79f406c099.json'

os.environ["REGION"] = REGION
os.environ["PROJECT"] = PROJECT
os.environ["BUCKET"] = BUCKET

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = GOOGLE_APPLICATION_CREDENTIALS

time: 1.21 ms (started: 2022-11-25 11:15:07 -06:00)


In [5]:
GCS_BUCKET=f"gs://{BUCKET}"

time: 346 µs (started: 2022-11-25 11:15:07 -06:00)


In [6]:
SERVICE_ACCOUNT = "69318036822-compute@developer.gserviceaccount.com"

time: 346 µs (started: 2022-11-25 11:15:07 -06:00)


---
## 1. Retrieve the Endpoint

In [7]:
aip.init(
    project=PROJECT, 
    location=REGION, 
    staging_bucket=GCS_BUCKET)

time: 398 µs (started: 2022-11-25 11:15:07 -06:00)


In [8]:
#https://us-central1-aiplatform.googleapis.com/v1/projects/69318036822/locations/us-central1/endpoints/4074389870305345536

time: 340 µs (started: 2022-11-25 11:15:07 -06:00)


In [9]:
ENDPOINT_NAME = 'projects/69318036822/locations/us-central1/endpoints/4074389870305345536'

time: 342 µs (started: 2022-11-25 11:15:07 -06:00)


In [10]:
endpoint = aip.Endpoint(
    project=PROJECT,
    location=REGION,
    endpoint_name=ENDPOINT_NAME
)

time: 26.4 ms (started: 2022-11-25 11:15:07 -06:00)


## 2. Make Online Predictions

### 2.1. One sample

In [38]:
s= [
    {'is_male': 'false',
     'mother_age': '25.0',
     'plurality': 'Single(1)',
     'gestation_weeks': '30.0',
     'cigarette_use': 'true',
     'alcohol_use': 'true',
    },
]

time: 737 µs (started: 2022-11-25 11:40:47 -06:00)


#### Get prediction

In [39]:
endpoint.predict(s)

Prediction(predictions=[{'lower_bound': 1.637687683105469, 'value': 4.160363674163818, 'upper_bound': 3.925392627716064}], deployed_model_id='3368705715412664320', model_version_id='1', model_resource_name='projects/69318036822/locations/us-central1/models/697490594242494464', explanations=None)

time: 213 ms (started: 2022-11-25 11:40:49 -06:00)


In [40]:
predicted_value = endpoint.predict(s).predictions[0]['value']
predicted_value

4.160363674163818

time: 167 ms (started: 2022-11-25 11:40:51 -06:00)


#### Get explanation

In [41]:
explain=endpoint.explain(s)

time: 375 ms (started: 2022-11-25 11:40:53 -06:00)


In [42]:
explain

Prediction(predictions=[{'lower_bound': 1.637687683105469, 'upper_bound': 3.925392627716064, 'value': 4.160363674163818}], deployed_model_id='3368705715412664320', model_version_id=None, model_resource_name=None, explanations=[attributions {
  baseline_output_value: 7.8058624267578125
  instance_output_value: 4.160363674163818
  feature_attributions {
    struct_value {
      fields {
        key: "alcohol_use"
        value {
          number_value: -0.1959028542041779
        }
      }
      fields {
        key: "cigarette_use"
        value {
          number_value: -0.1505577117204666
        }
      }
      fields {
        key: "gestation_weeks"
        value {
          number_value: -3.054098650813103
        }
      }
      fields {
        key: "is_male"
        value {
          number_value: -0.2580978572368622
        }
      }
      fields {
        key: "mother_age"
        value {
          number_value: 0.01315832138061523
        }
      }
      fields {
        key:

time: 4.28 ms (started: 2022-11-25 11:40:55 -06:00)


In [72]:
baseline_output_value=explain.explanations[0].attributions[0].baseline_output_value
baseline_output_value

7.8058624267578125

time: 3.25 ms (started: 2022-11-25 12:32:43 -06:00)


In [73]:
instance_output_value=explain.explanations[0].attributions[0].instance_output_value
instance_output_value

4.160363674163818

time: 2.4 ms (started: 2022-11-25 12:32:47 -06:00)


In [105]:
explain.explanations[0].attributions[0]

baseline_output_value: 7.8058624267578125
instance_output_value: 4.160363674163818
feature_attributions {
  struct_value {
    fields {
      key: "alcohol_use"
      value {
        number_value: -0.1959028542041779
      }
    }
    fields {
      key: "cigarette_use"
      value {
        number_value: -0.1505577117204666
      }
    }
    fields {
      key: "gestation_weeks"
      value {
        number_value: -3.054098650813103
      }
    }
    fields {
      key: "is_male"
      value {
        number_value: -0.2580978572368622
      }
    }
    fields {
      key: "mother_age"
      value {
        number_value: 0.01315832138061523
      }
    }
    fields {
      key: "plurality"
      value {
        number_value: 0.0
      }
    }
  }
}
output_index: -1
approximation_error: 0.0009752713450417257
output_name: "value"

time: 3.16 ms (started: 2022-11-25 12:46:41 -06:00)


In [34]:
explain_dict=explain.predictions[0]

time: 313 µs (started: 2022-11-25 11:34:48 -06:00)


In [64]:
explain_dict

{'upper_bound': 5.372188568115234,
 'lower_bound': 4.707945346832275,
 'value': 7.805862426757812}

time: 4.16 ms (started: 2022-11-25 12:30:30 -06:00)


In [35]:
names = list(explain_dict.keys())
values = list(explain_dict.values())

time: 581 µs (started: 2022-11-25 11:35:17 -06:00)


In [36]:
names

['upper_bound', 'lower_bound', 'value']

time: 2.44 ms (started: 2022-11-25 11:36:33 -06:00)


In [37]:
values

[5.372188568115234, 4.707945346832275, 7.805862426757812]

time: 2.56 ms (started: 2022-11-25 11:36:37 -06:00)


In [14]:
baseline=endpoint.explain(s).explanations[0].attributions[0].baseline_output_value
baseline

7.8058624267578125

time: 478 ms (started: 2022-11-25 11:15:26 -06:00)


In [15]:
instance=endpoint.explain(s).explanations[0].attributions[0].instance_output_value
instance

7.8058624267578125

time: 398 ms (started: 2022-11-25 11:15:28 -06:00)


### 2.2. Multiple samples

In [17]:
#bqclient = bigquery.Client(project=PROJECT)
#
#BQ_TABLE_ID = "hospital22.surgery.surgical-duration-prediction22_prepped_20000"
#query = f"""
#SELECT *
#FROM `{BQ_TABLE_ID}`
#WHERE splits='TEST'
#LIMIT 3
#"""
#
#df_test_3 = bqclient.query(query = query).to_dataframe()
#df_test_3.head()

time: 365 µs (started: 2022-11-25 11:16:22 -06:00)


In [18]:
#test_instances = df_test_3.drop(columns=['case_min', 'splits']).astype(str).to_dict(orient='records')
#test_instances

time: 703 µs (started: 2022-11-25 11:16:27 -06:00)


#### Get the predictions

In [19]:
#predicted_values =[]
#for i in range(len(test_instances)):
#    predicted_values.append(endpoint.predict(test_instances).predictions[i]['value'])
#
#predicted_values

time: 336 µs (started: 2022-11-25 11:16:42 -06:00)


In [40]:
prediction_expl = get_all_prediction_explanations_with_graphs(test_instances)

NameError: name 'get_all_prediction_explanations_with_graphs' is not defined

time: 25.3 ms (started: 2022-11-16 21:29:09 +00:00)


#### NOTE: The following is another short way to collect the feature atribution values but it does not show the Baseline Score.

In [None]:
import pandas as pd

explanations = endpoint.explain(test_instances)
df = pd.DataFrame.from_dict(explanations.explanations[2].attributions[0].feature_attributions, orient='index')
df.columns = ["Attribution value"]
df.sort_values(by='Attribution value').plot(kind='barh')
plt.show()

## References:

- https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/quests/vertex-ai/vertex-ai-qwikstart/lab_exercise.ipynb