# 03_online_predictions_explanations

This notebook retrieves the Endpoint deployed by the pipeline and use it to query the model for online predictions and explanations, including the following steps:
 
1. Retrieve the Endpoint
2. Make Online Predictions with Explanations


---
## Setup

### Import libraries

In [2]:
#!pip install ipython-autotime
%load_ext autotime

time: 284 µs (started: 2022-11-24 15:48:04 -06:00)


In [2]:
#!pip install google-cloud-aiplatform
#!pip install --upgrade google-cloud-bigquery

time: 286 µs (started: 2022-11-23 08:55:08 -06:00)


In [4]:
import os
from tabulate import tabulate
import numpy as np
from google.cloud import aiplatform as aip
from google.cloud import bigquery

time: 31.1 s (started: 2022-11-24 15:49:20 -06:00)


### Set configurations and constants

In [5]:
NOTEBOOK ='Vertex_AI_Streamlit'
REGION = "us-central1"
PROJECT = 'babyweight-prediction'
BUCKET = 'b_w_bucket'
BQ_DATASET = "bw_dataset"
APPNAME = "bw-prediction"

GOOGLE_APPLICATION_CREDENTIALS = 'key/babyweight-prediction-ff79f406c099.json'

os.environ["REGION"] = REGION
os.environ["PROJECT"] = PROJECT
os.environ["BUCKET"] = BUCKET

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = GOOGLE_APPLICATION_CREDENTIALS

time: 843 µs (started: 2022-11-24 15:49:53 -06:00)


In [6]:
GCS_BUCKET=f"gs://{BUCKET}"

time: 311 µs (started: 2022-11-24 15:49:56 -06:00)


In [7]:
SERVICE_ACCOUNT = "69318036822-compute@developer.gserviceaccount.com"

time: 299 µs (started: 2022-11-24 15:49:57 -06:00)


---
## 1. Retrieve the Endpoint

In [8]:
aip.init(
    project=PROJECT, 
    location=REGION, 
    staging_bucket=GCS_BUCKET)

time: 383 µs (started: 2022-11-24 15:49:59 -06:00)


In [9]:
#https://us-central1-aiplatform.googleapis.com/v1/projects/69318036822/locations/us-central1/endpoints/4074389870305345536

time: 266 µs (started: 2022-11-24 15:50:02 -06:00)


In [10]:
ENDPOINT_NAME = 'projects/69318036822/locations/us-central1/endpoints/4074389870305345536'

time: 414 µs (started: 2022-11-24 15:50:04 -06:00)


In [12]:
endpoint = aip.Endpoint(
    project=PROJECT,
    location=REGION,
    endpoint_name=ENDPOINT_NAME
)

time: 351 ms (started: 2022-11-24 15:50:44 -06:00)


## 2. Make Online Predictions

### 2.1. One sample

In [13]:
s= [
    {'is_male': 'true',
     'mother_age': '42.0',
     'plurality': 'Single(1)',
     'gestation_weeks': '39.0',
     'cigarette_use': 'false',
     'alcohol_use': 'false',
    },
]

time: 458 µs (started: 2022-11-24 15:50:47 -06:00)


#### Get prediction

In [14]:
predicted_value = endpoint.predict(s).predictions[0]['value']
predicted_value

7.805862426757812

time: 250 ms (started: 2022-11-24 15:50:49 -06:00)


#### Get explanation

In [15]:
endpoint.explain(s)

Prediction(predictions=[{'upper_bound': 270.0006713867188, 'value': 70.01299285888672, 'lower_bound': 48.0077018737793}], deployed_model_id='6073090903369580544', model_version_id=None, model_resource_name=None, explanations=[attributions {
  baseline_output_value: 61.526493072509766
  instance_output_value: 70.01300048828125
  feature_attributions {
    struct_value {
      fields {
        key: "diagnosis_1"
        value {
          number_value: -21.24934577941895
        }
      }
      fields {
        key: "hosp_health_ministry"
        value {
          number_value: 0.0
        }
      }
      fields {
        key: "normalized_surgeon_specialty_name"
        value {
          number_value: 52.86612975597382
        }
      }
      fields {
        key: "num_diag_codes"
        value {
          number_value: 2.053956270217896
        }
      }
      fields {
        key: "num_proc_codes"
        value {
          number_value: 0.0
        }
      }
      fields {
        key: 

time: 620 ms (started: 2022-11-22 18:59:11 +00:00)


In [16]:
baseline=endpoint.explain(s).explanations[0].attributions[0].baseline_output_value
baseline

61.526493072509766

time: 426 ms (started: 2022-11-22 18:59:15 +00:00)


In [17]:
instance=endpoint.explain(s).explanations[0].attributions[0].instance_output_value
instance

70.01300048828125

time: 385 ms (started: 2022-11-22 18:59:16 +00:00)


### 2.2. Multiple samples

In [19]:
bqclient = bigquery.Client(project=PROJECT)

BQ_TABLE_ID = "hospital22.surgery.surgical-duration-prediction22_prepped_20000"
query = f"""
SELECT * 
FROM `{BQ_TABLE_ID}`
WHERE splits='TEST'
LIMIT 3
"""

df_test_3 = bqclient.query(query = query).to_dataframe()
df_test_3.head()

Unnamed: 0,case_min,normalized_surgeon_specialty_name,primary_procedure_code,diagnosis_1,num_proc_codes,hosp_health_ministry,patient_type_group,num_diag_codes,patient_gender,patient_age_yrs_group,splits
0,23,Urology,55700,C61,1,MIGRA,OUTPATIENT,1,M,between_45_and_65_years_old,TEST
1,20,Urology,55700,C61,1,MIGRA,OUTPATIENT,1,M,over_65_years_old,TEST
2,21,Urology,55700,C61,1,MIGRA,OUTPATIENT,1,M,over_65_years_old,TEST


time: 1.63 s (started: 2022-11-22 19:00:15 +00:00)


In [20]:
test_instances = df_test_3.drop(columns=['case_min', 'splits']).astype(str).to_dict(orient='records')
test_instances

[{'normalized_surgeon_specialty_name': 'Urology',
  'primary_procedure_code': '55700',
  'diagnosis_1': 'C61',
  'num_proc_codes': '1',
  'hosp_health_ministry': 'MIGRA',
  'patient_type_group': 'OUTPATIENT',
  'num_diag_codes': '1',
  'patient_gender': 'M',
  'patient_age_yrs_group': 'between_45_and_65_years_old'},
 {'normalized_surgeon_specialty_name': 'Urology',
  'primary_procedure_code': '55700',
  'diagnosis_1': 'C61',
  'num_proc_codes': '1',
  'hosp_health_ministry': 'MIGRA',
  'patient_type_group': 'OUTPATIENT',
  'num_diag_codes': '1',
  'patient_gender': 'M',
  'patient_age_yrs_group': 'over_65_years_old'},
 {'normalized_surgeon_specialty_name': 'Urology',
  'primary_procedure_code': '55700',
  'diagnosis_1': 'C61',
  'num_proc_codes': '1',
  'hosp_health_ministry': 'MIGRA',
  'patient_type_group': 'OUTPATIENT',
  'num_diag_codes': '1',
  'patient_gender': 'M',
  'patient_age_yrs_group': 'over_65_years_old'}]

time: 8.15 ms (started: 2022-11-22 19:00:19 +00:00)


#### Get the predictions

In [21]:
predicted_values =[]
for i in range(len(test_instances)):
    predicted_values.append(endpoint.predict(test_instances).predictions[i]['value'])
    
predicted_values

[24.87191390991211, 24.61504364013672, 24.61504364013672]

time: 174 ms (started: 2022-11-22 19:00:21 +00:00)


In [40]:
prediction_expl = get_all_prediction_explanations_with_graphs(test_instances)

NameError: name 'get_all_prediction_explanations_with_graphs' is not defined

time: 25.3 ms (started: 2022-11-16 21:29:09 +00:00)


#### NOTE: The following is another short way to collect the feature atribution values but it does not show the Baseline Score.

In [None]:
import pandas as pd

explanations = endpoint.explain(test_instances)
df = pd.DataFrame.from_dict(explanations.explanations[2].attributions[0].feature_attributions, orient='index')
df.columns = ["Attribution value"]
df.sort_values(by='Attribution value').plot(kind='barh')
plt.show()

## References:

- https://github.com/GoogleCloudPlatform/training-data-analyst/blob/master/quests/vertex-ai/vertex-ai-qwikstart/lab_exercise.ipynb