In [1]:
# IMPORT THE REQUIRED LIBRARIES

from kfp.v2 import dsl
from kfp.v2.dsl import (Artifact,
                        Dataset,
                        Input,
                        Output,
                        Model,
                        Metrics,
                        Markdown,
                        HTML,
                        component, 
                        OutputPath, 
                        InputPath)

from kfp.v2 import compiler
from google.cloud import aiplatform as vertex_ai
from google.cloud.aiplatform import pipeline_jobs

from datetime import datetime
import pandas as pd

In [2]:
PROJECT_ID = 'gb-lab-bm-risk-sel-41'
REGION = 'europe-west2'

#BUCKET_NAME="gs://"+PROJECT_ID+"-houseprice"
BUCKET_NAME="gs://"+PROJECT_ID+"-test"

PIPELINE_ROOT = f"{BUCKET_NAME}/pipeline_root_houseprice/"
#PIPELINE_ROOT = f"{BUCKET_NAME}/pipeline_root_test/"

In [3]:
# Custom base image created using docker

IMAGE_NAME = "training"
#BASE_IMAGE = f"{REGION}-docker.pkg.dev/{PROJECT_ID}/test/{IMAGE_NAME}"
BASE_IMAGE = f"{REGION}-docker.pkg.dev/{PROJECT_ID}/houseprice/{IMAGE_NAME}"

<b>TO TEST THE PIPELINE WE ARE USING A LEARNING RATE OF 1 FOR BOTH RFE AND MODEL TRAINING, CHANGE IT LATER.</b>

## Read the Dataset

In [4]:
@component(
    base_image=BASE_IMAGE,
    output_component_file="get_data.yaml"
)

def get_houseprice_data(
    filepath: str,
    dataset_train: Output[Dataset],
):
    
    import pandas as pd
    
    df_train = pd.read_csv(filepath + '/train.csv')
    #df_train = df_train.iloc[:20,:]
   
    df_train.to_csv(dataset_train.path, index=False)

## Data Preprocessing

In [5]:
@component(
    base_image=BASE_IMAGE,
    output_component_file="preprocessing.yaml"
)

def preprocess_houseprice_data(
    train_df: Input[Dataset],
    dataset_train_preprocessed: Output[Dataset],
):
    
    import pandas as pd
    from src.data_preprocessing.preprocessing import data_preprocessing_pipeline
   
    train_df = pd.read_csv(train_df.path)
    
    # data_preprocessing_pipeline creates a copy of the df, removes id col, converts to correct dtype, \
    # subtracts YearSold from temporal features and cosine transforms cyclic features.
    train_df_preprocessed = data_preprocessing_pipeline(train_df)
    
    train_df_preprocessed.to_csv(dataset_train_preprocessed.path, index=False)

## Train Test Split

In [6]:
@component(
    base_image=BASE_IMAGE,
    output_component_file="train_test_split.yaml",
)
def train_test_split(dataset_in: Input[Dataset],
                     dataset_train: Output[Dataset],
                     dataset_test: Output[Dataset],
                     test_size: float = 0.2):

    import pandas as pd
    from sklearn.model_selection import train_test_split

    df = pd.read_csv(dataset_in.path)
    df_train, df_test = train_test_split(df, test_size=test_size, random_state=42)

    df_train.to_csv(dataset_train.path, index=False)
    df_test.to_csv(dataset_test.path, index=False)

## Feature Engineering and Feature Selection

## Train the Model

In [7]:
@component(
    base_image=BASE_IMAGE,
    output_component_file="model_training.yaml"
)

def train_houseprice(
    dataset_train: Input[Dataset],
    dataset_test: Input[Dataset],
    best_params: Output[Markdown],
    shap_summary_plot: Output[HTML],
    model: Output[Model], 
):
    
    import pandas as pd
    import pickle
    import shap
    from src.modelling.train import HousePriceModel
    from src.utils.utils import get_image_data
    
    TARGET = 'SalePrice'

    # Read train and test data
    train_data = pd.read_csv(dataset_train.path)
    test_data = pd.read_csv(dataset_test.path)
    
    # Instantiate the model class
    house_price_model = HousePriceModel(test_data.copy(),
                                        target=TARGET,
                                        n_kfold_splits=2,
                                        n_trials=1,  #CHANGE THIS
                                        random_state=42)
    # Create X_train and y_train
    X_train = train_data.drop(TARGET, axis=1)
    y_train = train_data[TARGET]

    # Fit the model
    house_price_model.fit(X_train, y_train)
    
    # Save artifacts
    with open(best_params.path, "w") as f:
        f.write(str(house_price_model.best_params))
        
    shap.summary_plot(house_price_model.shap_values, house_price_model.X_test_transformed, max_display=20) # plot shap summary plot
    shap_plot_dataurl = get_image_data() # get image data to render the image in the html file
    html_content = f'<html><head></head><body><h1>Shap Summary Plot</h1>\n<img src={shap_plot_dataurl} width="97%"></body></html>' 
    with open(shap_summary_plot.path, "w") as f: # save shap summary plot as an html artifact
        f.write(html_content)
      
    model.metadata["framework"] = "xgboost" 
    with open(model.path, 'wb') as f: 
        pickle.dump({
            "pipeline": house_price_model.model_pipeline,
            "target": house_price_model.target,
            "scores_dict": house_price_model.scores}, f)

In [None]:
@component(
    base_image=BASE_IMAGE,
    output_component_file="model_training.yaml"
)

def train_houseprice(
    dataset_train: Input[Dataset],
    dataset_test: Input[Dataset],
    best_params: Output[Markdown],
    shap_summary_plot: Output[HTML],
    model: Output[Model], 
):
    
    import pandas as pd
    import pickle
    import shap
    from src.modelling.train import HousePriceModel
    from src.utils.utils import get_image_data
    
    TARGET = 'SalePrice'

    # Read train and test data
    train_data = pd.read_csv(dataset_train.path)
    test_data = pd.read_csv(dataset_test.path)
    
    # Instantiate the model class
    house_price_model = HousePriceModel(test_data.copy(),   #we perform hyperparameter tuning using train_data and evaluate using test_data and finally train the final model using the entire (train+test) dataset.
                                        target=TARGET,
                                        n_kfold_splits=3,
                                        n_trials=100, 
                                        random_state=42)
                                        
    # Create X_train and y_train
    X_train = train_data.drop(TARGET, axis=1)
    y_train = train_data[TARGET]

    # Fit the model (training pipeline consists of feature engineering, feature selection and training an xgboost model)
    house_price_model.fit(X_train, y_train)
    
    # Save the best hyperparameters as an artifact
    with open(best_params.path, "w") as f:
        f.write(str(house_price_model.best_params))
        
    shap.summary_plot(house_price_model.shap_values, house_price_model.X_test_transformed, max_display=20) # plot shap summary plot
    shap_plot_dataurl = get_image_data() # get image data to render the image in the html file
    html_content = f'<html><head></head><body><h1>Shap Summary Plot</h1>\n<img src={shap_plot_dataurl} width="97%"></body></html>' 
    # Save shap summary plot as an html artifact
    with open(shap_summary_plot.path, "w") as f: 
        f.write(html_content)
      
    model.metadata["framework"] = "xgboost" 
    # Save the model as an artifact
    with open(model.path, 'wb') as f: 
        pickle.dump({
            "pipeline": house_price_model.model_pipeline,
            "target": house_price_model.target,
            "scores_dict": house_price_model.scores}, f)

## Evaluate the Model

In [8]:
@component(
    base_image=BASE_IMAGE,
    output_component_file="model_evaluation.yaml"
)

def evaluate_houseprice(
    houseprice_model: Input[Model], 
    cv_metrics_plot: Output[HTML],
    lorenz_plot: Output[HTML],
    # metrics_train: Output[Metrics],
    metrics_test: Output[Metrics]):
    
    import pickle
    import numpy as np
    from src.modelling.model_validation import plot_cross_validated_metrics, plot_lorenz_curves
    from src.utils.utils import get_image_data
    
    file_name = houseprice_model.path
    with open(file_name, 'rb') as file:  
        model_data = pickle.load(file)
    
    model_pipeline = model_data["pipeline"] 
    cv_results_dict = model_data["results_dict"]
    
    # Plot cv metrics and save as html artifact
    plot_cross_validated_metrics(cv_results_dict)
    cv_metrics_plot_dataurl = get_image_data() # get image data
    html_content = f'<html><head></head><body><h1>Cross Validated Metrics Plot</h1>\n<img src={cv_metrics_plot_dataurl} width="97%"></body></html>' 
    with open(cv_metrics_plot.path, "w") as f: # save shap summary plot as an html artifact
        f.write(html_content)
        
    # Plot lorenz curves and save as html artifact    
    plot_lorenz_curves(cv_results_dict)
    lorenz_plot_dataurl = get_image_data()
    html_content = f'<html><head></head><body><h1>Lorenz Plot</h1>\n<img src={lorenz_plot_dataurl} width="97%"></body></html>' 
    with open(lorenz_plot.path, "w") as f: # save shap summary plot as an html artifact
        f.write(html_content)
        
    # Get average metric values for train and test sets
    for metric_name, val in cv_results_dict.items():
        val = np.mean(val)
        if 'train' in metric_name:
            metrics_train.log_metric(metric_name, float(val))
        elif 'test' in metric_name:
            metrics_test.log_metric(metric_name, float(val))
        else:
            pass
        
   
@component(
    base_image=BASE_IMAGE,
    output_component_file="model_evaluation.yaml"
)
def evaluate_houseprice(
    houseprice_model: Input[Model],
    dataset_train: Input[Dataset],
    dataset_test: Input[Dataset],
    metrics_train: Output[Metrics],
    metrics_test: Output[Metrics]):
    
    import pandas as pd
    import pickle
    
    from sklearn.metrics import mean_gamma_deviance as gamma_deviance
    from sklearn.metrics import mean_squared_error as mse
    from sklearn.metrics import r2_score
    
    TARGET = 'SalePrice'
    
    train_df = pd.read_csv(dataset_train.path)
    test_df = pd.read_csv(dataset_test.path)
    
    file_name = houseprice_model.path
    with open(file_name, 'rb') as file:  
        model_data = pickle.load(file)
    
    model_pipeline = model_data["pipeline"] 
    
    def eval_dataset(dataset, metric):
        scores = {"gamma_deviance": gamma_deviance(dataset[TARGET], model_pipeline.predict(dataset.drop(TARGET, axis=1))),
                  "mse": mse(dataset[TARGET], model_pipeline.predict(dataset.drop(TARGET, axis=1))),
                  "r2": r2_score(dataset[TARGET], model_pipeline.predict(dataset.drop(TARGET, axis=1)))
                 }
        for metric_name, val in scores.items():
            metric.log_metric(metric_name, float(val))
            
    eval_dataset(train_df, metrics_train)
    eval_dataset(test_df, metrics_test)

In [8]:
@component(
    base_image=BASE_IMAGE,
    output_component_file="model_evaluation.yaml"
)
def evaluate_houseprice(
    houseprice_model: Input[Model],
    metrics_baseline: Output[Metrics],
    metrics_train: Output[Metrics],
    metrics_test: Output[Metrics]):
    
    import pickle
    
    file_name = houseprice_model.path
    with open(file_name, 'rb') as file:  
        model_data = pickle.load(file)
    
    scores = model_data["scores_dict"] 

    def log_metrics(scores, metric):
        for metric_name, val in scores.items():
            metric.log_metric(metric_name, float(val))
            
    log_metrics(scores["baseline_scores"], metrics_baseline)        
    log_metrics(scores["train_scores"], metrics_train)
    log_metrics(scores["test_scores"], metrics_test)

## Deploy the Model

In [9]:
@component(
    base_image=BASE_IMAGE,
    install_kfp_package=False,
    output_component_file="model_deployment.yaml",
)
def deploy_houseprice(
        serving_container_image_uri: str,
        display_name: str,
        model_endpoint: str,
        gcp_project: str,
        gcp_region: str,
        model: Input[Model],
        vertex_model: Output[Model],
        vertex_endpoint: Output[Model]
):
    from google.cloud import aiplatform as vertex_ai
    from pathlib import Path
    
    # Checks existing Vertex AI Enpoint or creates Endpoint if it is not exist.
    def create_endpoint ():
        endpoints = vertex_ai.Endpoint.list(
        filter='display_name="{}"'.format(model_endpoint),
        order_by='create_time desc',
        project=gcp_project,
        location=gcp_region,
        )
        if len(endpoints) > 0:
            endpoint = endpoints[0] # most recently created
        else:
            endpoint = vertex_ai.Endpoint.create(
                display_name=model_endpoint,
                project=gcp_project,
                location=gcp_region
        )
        return endpoint

    endpoint = create_endpoint()
    
    # Uploads trained model to Vertex AI Model Registry or creates new model version into existing uploaded one.
    def upload_model ():
        listed_model = vertex_ai.Model.list(
        filter='display_name="{}"'.format(display_name),
        project=gcp_project,
        location=gcp_region,
        )
        if len(listed_model) > 0:
            model_version = listed_model[0] # most recently created
            model_upload = vertex_ai.Model.upload(
                    display_name=display_name,
                    parent_model=model_version.resource_name,
                    artifact_uri=str(Path(model.path).parent),
                    serving_container_image_uri=serving_container_image_uri,
                    location=gcp_region,
                    serving_container_predict_route="/predict",
                    serving_container_health_route="/health"
            )
        else:
            model_upload = vertex_ai.Model.upload(
                    display_name=display_name,
                    artifact_uri=str(Path(model.path).parent),
                    serving_container_image_uri=serving_container_image_uri,
                    location=gcp_region,
                    serving_container_predict_route="/predict",
                    serving_container_health_route="/health"
            )
        return model_upload
    
    uploaded_model = upload_model()
    
    # Save data to the output params
    vertex_model.uri = uploaded_model.resource_name

    # Deploys trained model to Vertex AI Endpoint
    model_deploy = uploaded_model.deploy(
        machine_type='n1-standard-4',
        endpoint=endpoint,
        traffic_split={"0": 100},
        deployed_model_display_name=display_name,
    )

    # Save data to the output params
    vertex_endpoint.uri = model_deploy.resource_name

## Create the Pipeline

In [10]:
# USE TIMESTAMP TO DEFINE UNIQUE PIPELINE NAMES
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")
#DISPLAY_NAME = 'pipeline-test-job{}'.format(TIMESTAMP)
DISPLAY_NAME = 'pipeline-houseprice-job{}'.format(TIMESTAMP)

In [11]:
@dsl.pipeline(
    pipeline_root=PIPELINE_ROOT,
    # A name for the pipeline. Use to determine the pipeline Context.
   # name="pipeline-test",
    name="pipeline-houseprice"
    
)

def pipeline(
    data_filepath: str = f"{BUCKET_NAME}/data",
    project: str = PROJECT_ID,
    region: str = REGION, 
    display_name: str = DISPLAY_NAME,
    #serving_container_image_uri: str = "europe-west2-docker.pkg.dev/gb-lab-bm-risk-sel-41/test/serving_image:latest" # custom serving container image     
    serving_container_image_uri: str = "europe-west2-docker.pkg.dev/gb-lab-bm-risk-sel-41/houseprice/serving_image:latest" # custom serving container image     
):

    data_op = get_houseprice_data(data_filepath)
    data_preprocess_op = preprocess_houseprice_data(data_op.outputs["dataset_train"])
    train_test_split_op = train_test_split(data_preprocess_op.outputs["dataset_train_preprocessed"])
    train_model_op = train_houseprice(train_test_split_op.outputs["dataset_train"], train_test_split_op.outputs["dataset_test"])
    model_evaluation_op = evaluate_houseprice(train_model_op.outputs["model"])
    #model_evaluation_op = evaluate_houseprice(train_model_op.outputs["model"], 
    #                                          train_test_split_op.outputs["dataset_train"], 
    #                                          train_test_split_op.outputs["dataset_test"])
           
    deploy_model_op = deploy_houseprice(
        model = train_model_op.outputs['model'],
        gcp_project = project,
        gcp_region = region, 
        serving_container_image_uri = serving_container_image_uri,
        display_name = "houseprice",
        model_endpoint = "houseprice_endpoint"
       # display_name = "test",
       # model_endpoint = "test_endpoint"
    )

## Compile and Run the Pipeline

In [12]:
# COMPILE THE PIPELINE (to create the job spec file)

compiler.Compiler().compile(pipeline_func=pipeline,
        package_path='ml_houseprice.json')



In [14]:
# CREATE A RUN USING THE JOB SPEC FILE GENERATED 

start_pipeline = pipeline_jobs.PipelineJob(
    display_name="houseprice-pipeline",
  #  display_name="test-pipeline",
    template_path="ml_houseprice.json",
    enable_caching=False,
    location=REGION,
)

In [15]:
# RUN THE PIPELINE

start_pipeline.run()

Creating PipelineJob
PipelineJob created. Resource name: projects/737041112644/locations/europe-west2/pipelineJobs/pipeline-houseprice-20230215125152
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/737041112644/locations/europe-west2/pipelineJobs/pipeline-houseprice-20230215125152')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/europe-west2/pipelines/runs/pipeline-houseprice-20230215125152?project=737041112644
PipelineJob projects/737041112644/locations/europe-west2/pipelineJobs/pipeline-houseprice-20230215125152 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/737041112644/locations/europe-west2/pipelineJobs/pipeline-houseprice-20230215125152 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/737041112644/locations/europe-west2/pipelineJobs/pipeline-houseprice-20230215125152 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/737041112644/locatio

## Make Predictions Using Vertex AI Endpoint

In [79]:
endpoint_name = "projects/737041112644/locations/europe-west2/endpoints/6824026558815010816"
endpoint = vertex_ai.Endpoint(endpoint_name)

In [80]:
endpoint

<google.cloud.aiplatform.models.Endpoint object at 0x7fdd6285d950> 
resource name: projects/737041112644/locations/europe-west2/endpoints/6824026558815010816

In [134]:
test_df.shape

(1459, 80)

In [154]:
test = test_df.iloc[:10,:]

In [155]:
request = test.to_json(orient='records', lines=True)

In [156]:
#print(request)
print(request.splitlines())

['{"Id":1461,"MSSubClass":20,"MSZoning":"RH","LotFrontage":80.0,"LotArea":11622,"Street":"Pave","Alley":null,"LotShape":"Reg","LandContour":"Lvl","Utilities":"AllPub","LotConfig":"Inside","LandSlope":"Gtl","Neighborhood":"NAmes","Condition1":"Feedr","Condition2":"Norm","BldgType":"1Fam","HouseStyle":"1Story","OverallQual":5,"OverallCond":6,"YearBuilt":1961,"YearRemodAdd":1961,"RoofStyle":"Gable","RoofMatl":"CompShg","Exterior1st":"VinylSd","Exterior2nd":"VinylSd","MasVnrType":"None","MasVnrArea":0.0,"ExterQual":"TA","ExterCond":"TA","Foundation":"CBlock","BsmtQual":"TA","BsmtCond":"TA","BsmtExposure":"No","BsmtFinType1":"Rec","BsmtFinSF1":468.0,"BsmtFinType2":"LwQ","BsmtFinSF2":144.0,"BsmtUnfSF":270.0,"TotalBsmtSF":882.0,"Heating":"GasA","HeatingQC":"TA","CentralAir":"Y","Electrical":"SBrkr","1stFlrSF":896,"2ndFlrSF":0,"LowQualFinSF":0,"GrLivArea":896,"BsmtFullBath":0.0,"BsmtHalfBath":0.0,"FullBath":1,"HalfBath":0,"BedroomAbvGr":2,"KitchenAbvGr":1,"KitchenQual":"TA","TotRmsAbvGrd":5,"F

In [152]:
endpoint.predict(instances=request.splitlines())

Prediction(predictions=[117559.0390625, 155208.421875, 179901.609375, 178279.078125, 204086.515625, 178206.5, 172657.609375, 171097.8125, 175429.265625, 114578.828125, 216522.75, 97140.9375, 94648.1875, 153068.328125, 164014.5625, 405270.71875, 250115.234375, 331740.90625, 294255.34375, 513644.78125, 338938.15625, 216631.40625, 182557.84375, 172815.109375, 167922.46875, 197518.640625, 430723.5625, 238339.78125, 179129.15625, 187817.078125, 199417.046875, 102560.9765625, 160195.609375, 297545.375, 251044.828125, 242561.296875, 170523.4375, 160668.828125, 160185.828125, 140382.765625, 172993.03125, 148131.984375, 294549.5625, 230164.15625, 223225.5625, 199261.15625, 230001.78125, 210165.6875, 178455.140625, 151753.640625, 153670.34375, 174244.4375, 176971.515625, 135538.09375, 212039.390625, 169700.59375, 148034.546875, 154145.765625, 209872.453125, 147771.90625, 141599.890625, 179412.796875, 114927.1640625, 120221.953125, 122353.5625, 111293.0078125, 99411.859375, 127387.59375, 156061.4

## List All Models

In [30]:
#DISPLAY_NAME = "houseprice"
DISPLAY_NAME = "test"
! gcloud ai models list --region={REGION} --filter={DISPLAY_NAME}

Using endpoint [https://europe-west2-aiplatform.googleapis.com/]
MODEL_ID             DISPLAY_NAME
8345117332959395840  test
7775411980097028096  test


## Trigger the Pipeline

### Schedule the Pipeline

The scheduled pipelines are supported by Cloud Scheduler and Cloud Functions. Check that APIs Cloud Scheduler and Cloud Functions are enabled.

### Run Recurrent Pipeline

In [None]:
from kfp.v2.google.client import AIPlatformClient

api_client = AIPlatformClient(
                project_id=PROJECT_ID,
                region=REGION,
                )

SERVICE_ACCOUNT = (
    "XXXXXX@developer.gserviceaccount.com" # Replace the Xs with your generated service-account.
)

response = api_client.create_schedule_from_job_spec(
    enable_caching=True,
    job_spec_path="ml_houseprice.json",
    schedule="0 0 * * 1", # once per week on Monday (use schedule parameter to mention the recurrence interval)
    time_zone="Europe/London",  # change this as necessary
    parameter_values={"display_name": DISPLAY_NAME},
    pipeline_root=PIPELINE_ROOT,  # this argument is necessary if you did not specify PIPELINE_ROOT as part of the pipeline definition.
    #service_account=SERVICE_ACCOUNT,  
)

## Simulate Batch Predictions and Real-Time Predictions

### Test the Batch Prediction

In [None]:
# Define variables 
job_display_name = "test-houseprice-batch-prediction-job"
MODEL_NAME="houseprice"
ENDPOINT_NAME="houseprice_endpoint"  # endpoint where the model is deployed
BUCKET_URI="gs://your-bucket-houseprice/pipeline_root_houseprice/332188XXXX/pipeline-houseprice-20211227155508/get-houseprice-data_8271177375014715392" # bucket containing batch instances ready for prediction
input_file_name="test.csv"

# Get model id
MODEL_ID=!(gcloud ai models list --region=$REGION \
           --filter=display_name=$MODEL_NAME)  # ids of the prediction/uploaded models
MODEL_ID=MODEL_ID[2].split(" ")[0]  # we use the latest uploaded model

model_resource_name = f'projects/{PROJECT_ID}/locations/{REGION}/models/{MODEL_ID}'
gcs_source= [f"{BUCKET_URI}/{input_file_name}"]
gcs_destination_prefix=f"{BUCKET_URI}/output" # google storage destination where we save the prediction output

def batch_prediction_job(
    project: str,
    location: str,
    model_resource_name: str,
    job_display_name: str,
    gcs_source: str,
    gcs_destination_prefix: str,
    machine_type: str,
    starting_replica_count: int = 1, # The number of nodes for this batch prediction job. 
    max_replica_count: int = 1,    
):   
    aiplatform.init(project=project, location=location)

    model = aiplatform.Model(model_resource_name)

    batch_prediction_job = model.batch_predict(
        job_display_name=job_display_name,
        instances_format='csv', #json
        gcs_source=[f"{BUCKET_URI}/{input_file_name}"],
        gcs_destination_prefix=f"{BUCKET_URI}/output",
        machine_type=machine_type, # must be present      
    )
    batch_prediction_job.wait()
    print(batch_prediction_job.display_name)
    print(batch_prediction_job.state)
    return batch_prediction_job

batch_prediction_job(PROJECT_ID, REGION, model_resource_name, job_display_name, gcs_source, gcs_destination_prefix, machine_type="n1-standard-2")

### Send an Online Prediction Request

Each prediction request must be max. 1.5 MB.

In [None]:
ENDPOINT_NAME="houseprice_endpoint"
instance = [[1,2,3,2,1,2,3,6,7,10]]  # input data to make prediction on
ENDPOINT_ID = !(gcloud ai endpoints list --region=$REGION \
              --format='value(ENDPOINT_ID)'\
              --filter=display_name=$ENDPOINT_NAME \
              --sort-by=creationTimeStamp | tail -1)
ENDPOINT_ID = ENDPOINT_ID[1]

def endpoint_predict(
    project: str, location: str, instances: list, endpoint: str
):
    aiplatform.init(project=project, location=location)

    endpoint = aiplatform.Endpoint(endpoint)

    prediction = endpoint.predict(instances=instances)
    return prediction

endpoint_predict(PROJECT_ID, REGION, instance, ENDPOINT_ID)