# <center>New Unseen Data Prediction Regression</center>
---

<center>Muhammad Difagama Ivanka</center>

In [18]:
# run this code to deploy your model, let the code run as default 
def deploy_model(
    project: str,
    location: str,
    display_name: str,
    serving_container_image_uri: str,
    artifact_uri: str,
    endpoint_name: str,
    min_replica_count: int = 1,
    max_replica_count: int = 1,
):

    # Google credentials should be assigned to a service account
    # Also note that resources should be configured to use the proper, restricted service account

    aiplatform.init(project=project, location=location)

    model = aiplatform.Model.upload(
        display_name=display_name,
        artifact_uri=artifact_uri,
        serving_container_image_uri=serving_container_image_uri,
    )

    model.wait()

    # Create an Endpoint
    endpoint = aiplatform.Endpoint.create(
        display_name=endpoint_name,
        project=project,
        location=location,
    )

    # Deploy the Model to the Endpoint
    endpoint.deploy( 
        model=model,
        deployed_model_display_name=display_name,
        machine_type='e2-standard-2',
        min_replica_count=min_replica_count,
        max_replica_count=max_replica_count,
        sync=True, 
    )

    print(model.display_name)
    print(model.resource_name)
    return model

    ## model deplyment  
deploy_model(
    project=project_id, # Add your project name
    location='us-west1', # Add your project location
    display_name='tes_model03', # Add display name
    serving_container_image_uri="us-docker.pkg.dev/vertex-ai/prediction/sklearn-cpu.1-3:latest",
    artifact_uri="gs://difgam_gcs_035/model_clv_car/", # Add directory/folder of model location
    endpoint_name="endpoint_tes03"  # Add endpoint display name
)



I0000 00:00:1721314988.499647    2752 config.cc:230] gRPC experiments enabled: call_status_override_on_cancellation, event_engine_dns, event_engine_listener, http2_stats_fix, monitoring_experiment, pick_first_new, trace_record_callops, work_serializer_clears_time_cache


Creating Model
Create Model backing LRO: projects/688900009984/locations/us-west1/models/5812660579214557184/operations/8042426729634988032
Model created. Resource name: projects/688900009984/locations/us-west1/models/5812660579214557184@1
To use this Model in another session:
model = aiplatform.Model('projects/688900009984/locations/us-west1/models/5812660579214557184@1')
Creating Endpoint
Create Endpoint backing LRO: projects/688900009984/locations/us-west1/endpoints/3395612963967598592/operations/6670517693147250688
Endpoint created. Resource name: projects/688900009984/locations/us-west1/endpoints/3395612963967598592
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/688900009984/locations/us-west1/endpoints/3395612963967598592')
Deploying Model projects/688900009984/locations/us-west1/models/5812660579214557184 to Endpoint : projects/688900009984/locations/us-west1/endpoints/3395612963967598592
Deploy Endpoint model backing LRO: projects/688900009984

<google.cloud.aiplatform.models.Model object at 0x7e0a7f70c190> 
resource name: projects/688900009984/locations/us-west1/models/5812660579214557184

In [33]:
# ## predict your data with online prediction here 
# PROJECT_ID = project_id
# # ENDPOINT_ID = "your_endpoint_id"
# ENDPOINT_ID = '3395612963967598592'
# REGION = 'us-west1'

# aiplatform.init(project=PROJECT_ID, location=REGION)
# endpoint = aiplatform.Endpoint(ENDPOINT_ID)
# prediction = endpoint.predict(instances=abc)

# print("PREDICTION:", prediction)

In [34]:
# ## in, deploy and test, predict with this template
# {
#     "instances" : [['Luxury SUV','Premium','Offer3','Retired','Divorced','Master',5.0,10.0,10000.0,50000.0],['Luxury Car','Premium','Offer1','Employed','Single','College',2.0,320.0,2000.0,13000.0]]
# }

In [3]:
# Run locally & on the cloud
import pandas as pd
import numpy as np
import pickle
from sklearn.ensemble import (GradientBoostingRegressor)
# from xgboost import XGBRegressor

In [4]:
new_test_data = pd.DataFrame({
    'Vehicle Class': ['Luxury SUV', 'Luxury Car'],
    'Coverage': ['Premium', 'Premium'],
    'Renew Offer Type': ['Offer3', 'Offer1'],
    'EmploymentStatus': ['Retired', 'Employed'],
    'Marital Status': ['Divorced', 'Single'],
    'Education': ['Master', 'College'],
    'Number of Policies': [5.0, 2.0],
    'Monthly Premium Auto': [10.0, 320.0],
    'Total Claim Amount': [10_000.0, 2_000.0],
    'Income': [50_000.00, 13_000.0]
})

new_test_data

Unnamed: 0,Vehicle Class,Coverage,Renew Offer Type,EmploymentStatus,Marital Status,Education,Number of Policies,Monthly Premium Auto,Total Claim Amount,Income
0,Luxury SUV,Premium,Offer3,Retired,Divorced,Master,5.0,10.0,10000.0,50000.0
1,Luxury Car,Premium,Offer1,Employed,Single,College,2.0,320.0,2000.0,13000.0


# Variable Need for Google Cloud (or You could just upload all of them to Github)

In [2]:
#import google cloud library
from google.cloud import bigquery
from google.cloud import storage
from google.cloud import aiplatform

In [7]:
## set up authentication using services account 
import os
# Authenticate using service account
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "../../vertex-ai-ml/sa-development.json"
# os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "../vertex-ai-ml/dti-ds-31329ac0651d.json"

project_id = 'dti-ds'
dataset_id = 'difgam_dataset_035'
table_id = 'test2_clv_manual'
region = 'us-central1'
bucket_name = 'difgam_gcs_035'
blob_name = 'data/car_insurance_real.csv'

model_name = 'best_model_clv_gb_20240718225701.sav'

## Google Cloud Storage

### Upload to Google Cloud Storage - skipped

In [None]:
# Upload the model to Google Cloud Storage
try : 
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name) # Add bucket name
    blob_model = bucket.blob(f'model/{model_name}')
    blob_model.upload_from_filename(f'{model_name}')

    print ("Uploading model succeeded")
except:
    raise TypeError("An exception occurred")

### Retrieve from Google Cloud Storage

In [10]:
# Retrieve the model from Google Cloud Storage
try : 
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name) # Add bucket name
    blob_model = bucket.blob(f'model/{model_name}')
    blob_model.download_to_filename('final_model_xgb3.sav')

    print ("Read model succeeded")
except:
    raise TypeError("An exception occurred")

Read model succeeded


In [None]:
# # Retrieve the dataset from Google Cloud Storage
# try : 
#     storage_client = storage.Client(project=project_id)
#     bucket = storage_client.get_bucket(bucket_name) # Add bucket name
#     blob_model = bucket.blob(blob_name)
#     blob_model.download_to_filename('car_insurance_real.csv')

#     print ("Read Data succeeded")
# except:
#     raise TypeError("An exception occurred")

## Bigquery

### Sent to BigQuery (Dataset Creation) - Skipped use auto for sent back the prediction

In [27]:
from google.cloud import bigquery

table_id = 'unseen_car_clv_predicted'

# Construct a BigQuery client object.
client = bigquery.Client()

# Define the full table ID
table_full_id = f"{client.project}.{dataset_id}.{table_id}"

# Define the table schema (you could skip this part, but make sure for column naming w/o empty spaces)
# Make sure the column name without "space" so change it to underscore "_" or just without any space
# schema = [
#     bigquery.SchemaField("Vehicle Class", "STRING"),
#     bigquery.SchemaField("Coverage", "STRING"),
#     bigquery.SchemaField("Renew_Offer_Type", "STRING"),
#     bigquery.SchemaField("EmploymentStatus", "STRING"),
#     bigquery.SchemaField("MaritalStatus", "STRING"),
#     bigquery.SchemaField("Education", "STRING"),
#     bigquery.SchemaField("Number_of_Policies", "FLOAT"),
#     bigquery.SchemaField("MonthlyPremiumAuto", "FLOAT"),
#     bigquery.SchemaField("TotalClaimAmount", "FLOAT"),
#     bigquery.SchemaField("Income", "FLOAT"),
#     bigquery.SchemaField("CLVPrediction", "FLOAT")
# ]

auto_cloud.columns = ['_'.join(i.split(' ')) for i in auto_cloud.columns] # make sure no empty space on columns' name

# Create the table
# table = bigquery.Table(table_full_id)
# table = client.create_table(table, exists_ok=True)
# print(f"Created table {table.project}.{table.dataset_id}.{table.table_id}")

# Load the DataFrame into the BigQuery table
job = client.load_table_from_dataframe(auto_cloud, table_full_id) # your df predicted name, and table full id above

# Wait for the job to complete
job.result()
print(f"Loaded {job.output_rows} rows into {table_full_id}")

Loaded 946 rows into dti-ds.difgam_dataset_035.unseen_car_clv_predicted


In [7]:
auto_cloud.info()

NameError: name 'auto_cloud' is not defined

### Load from Bigquery

In [6]:
#load data from BQ
from google.cloud import bigquery
## using bigquery client 
# client = bigquery.Client(credentials=credentials,project=project_id)
client = bigquery.Client(project=project_id)

# query 
query_job = client.query(f"""select * from {dataset_id}.{table_id}""")
auto_cloud = query_job.result().to_dataframe()

# df = df.loc[:, new_test_data.columns]
auto_cloud.columns = new_test_data.columns # added preprocessing for on cloud
#cleansing 
# result = df.drop(['ID'], axis = 1)
auto_cloud



Unnamed: 0,Vehicle Class,Coverage,Renew Offer Type,EmploymentStatus,Marital Status,Education,Number of Policies,Monthly Premium Auto,Total Claim Amount,Income
0,Four-Door Car,Basic,Offer1,Retired,Divorced,Bachelor,3.0,65.0,312.000000,17622.0
1,Two-Door Car,Basic,Offer1,Retired,Single,College,3.0,65.0,468.000000,28215.0
2,Four-Door Car,Basic,Offer1,Retired,Divorced,High School or Below,1.0,65.0,572.072106,22283.0
3,Four-Door Car,Extended,Offer3,Retired,Married,Master,9.0,93.0,498.452698,16224.0
4,SUV,Basic,Offer1,Retired,Single,High School or Below,1.0,101.0,694.598288,25147.0
...,...,...,...,...,...,...,...,...,...,...
941,Four-Door Car,Extended,Offer3,Medical Leave,Married,College,5.0,79.0,344.693511,23843.0
942,Four-Door Car,Extended,Offer1,Medical Leave,Married,High School or Below,7.0,83.0,398.400000,19831.0
943,Two-Door Car,Premium,Offer2,Medical Leave,Married,Master,2.0,107.0,333.886816,15818.0
944,Two-Door Car,Premium,Offer1,Medical Leave,Married,College,1.0,115.0,508.614758,20394.0


# Model Prediction

## Local Prediction (Please do not run this part on the cloud!!)

In [14]:
# local test
with open(f'../models/{model_name}', 'rb') as f:
    loaded_model = pickle.load(f)
loaded_model

In [22]:
y_pred_local_new_data = loaded_model.predict(new_test_data)
y_pred_local_new_data

array([ 5625.1167, 19218.45  ], dtype=float32)

In [26]:
new_test_local = new_test_data.copy()
new_test_local['CLV Prediction'] = y_pred_local_new_data
new_test_local

Unnamed: 0,Vehicle Class,Coverage,Renew Offer Type,EmploymentStatus,Marital Status,Education,Number of Policies,Monthly Premium Auto,Total Claim Amount,Income,CLV Prediction
0,Luxury SUV,Premium,Offer3,Retired,Divorced,Master,5.0,10.0,10000.0,50000.0,5625.116699
1,Luxury Car,Premium,Offer1,Employed,Single,College,2.0,320.0,2000.0,13000.0,19218.449219


In [20]:
# see new unseend data without target feature

auto = pd.read_csv('../car_insurance_real.csv')
auto

Unnamed: 0,Vehicle Class,Coverage,Renew Offer Type,EmploymentStatus,Marital Status,Education,Number of Policies,Monthly Premium Auto,Total Claim Amount,Income
0,Two-Door Car,Basic,Offer1,Unemployed,Single,Bachelor,2.0,68.0,489.600000,0.0
1,Four-Door Car,Basic,Offer1,Employed,Married,College,1.0,73.0,59.861963,67763.0
2,Two-Door Car,Basic,Offer3,Unemployed,Single,College,1.0,65.0,468.000000,0.0
3,Four-Door Car,Basic,Offer1,Unemployed,Single,High School or Below,3.0,70.0,336.000000,0.0
4,Four-Door Car,Basic,Offer3,Unemployed,Married,Doctor,9.0,63.0,272.843439,0.0
...,...,...,...,...,...,...,...,...,...,...
941,Four-Door Car,Basic,Offer1,Unemployed,Single,Bachelor,1.0,70.0,522.413140,0.0
942,Four-Door Car,Basic,Offer3,Medical Leave,Single,High School or Below,1.0,62.0,219.288706,16618.0
943,Luxury SUV,Extended,Offer1,Unemployed,Single,Bachelor,1.0,245.0,2345.413441,0.0
944,Sports Car,Basic,Offer2,Employed,Married,College,1.0,100.0,121.928014,56586.0


In [29]:
y_pred_file_local = loaded_model.predict(auto)
y_pred_file_local[:13]

array([ 9868.214 ,  2943.0862,  2085.693 ,  5032.478 ,  4819.431 ,
        4983.1313,  5515.4155, 17676.86  ,  4225.032 ,  2922.464 ],
      dtype=float32)

In [28]:
auto_local = auto.copy()
auto_local['CLV Prediction'] = y_pred_file_local
auto_local

Unnamed: 0,Vehicle Class,Coverage,Renew Offer Type,EmploymentStatus,Marital Status,Education,Number of Policies,Monthly Premium Auto,Total Claim Amount,Income,CLV Prediction
0,Two-Door Car,Basic,Offer1,Unemployed,Single,Bachelor,2.0,68.0,489.600000,0.0,9868.213867
1,Four-Door Car,Basic,Offer1,Employed,Married,College,1.0,73.0,59.861963,67763.0,2943.086182
2,Two-Door Car,Basic,Offer3,Unemployed,Single,College,1.0,65.0,468.000000,0.0,2085.693115
3,Four-Door Car,Basic,Offer1,Unemployed,Single,High School or Below,3.0,70.0,336.000000,0.0,5032.478027
4,Four-Door Car,Basic,Offer3,Unemployed,Married,Doctor,9.0,63.0,272.843439,0.0,4819.431152
...,...,...,...,...,...,...,...,...,...,...,...
941,Four-Door Car,Basic,Offer1,Unemployed,Single,Bachelor,1.0,70.0,522.413140,0.0,2485.338379
942,Four-Door Car,Basic,Offer3,Medical Leave,Single,High School or Below,1.0,62.0,219.288706,16618.0,2529.714111
943,Luxury SUV,Extended,Offer1,Unemployed,Single,Bachelor,1.0,245.0,2345.413441,0.0,8716.571289
944,Sports Car,Basic,Offer2,Employed,Married,College,1.0,100.0,121.928014,56586.0,4181.984375


## Cloud Data Prediction

In [14]:
# Load the model
with open('final_model_xgb3.sav', 'rb') as f:
    loaded_model = pickle.load(f)

y_pred_cloud_new_data = loaded_model.predict(new_test_data)
y_pred_cloud_new_data

array([ 5625.1167, 19218.45  ], dtype=float32)

In [12]:
new_test_cloud = new_test_data.copy()
new_test_cloud['CLV Prediction'] = y_pred_cloud_new_data # new column
new_test_cloud

Unnamed: 0,Vehicle Class,Coverage,Renew Offer Type,EmploymentStatus,Marital Status,Education,Number of Policies,Monthly Premium Auto,Total Claim Amount,Income,CLV Prediction
0,Luxury SUV,Premium,Offer3,Retired,Divorced,Master,5.0,10.0,10000.0,50000.0,5625.116699
1,Luxury Car,Premium,Offer1,Employed,Single,College,2.0,320.0,2000.0,13000.0,19218.449219


In [11]:
loaded_model

In [13]:
# loaded_model.predict(X_test.iloc[3:13])
y_pred_file_cloud = loaded_model.predict(auto_cloud)
y_pred_file_cloud[:13]

array([ 4852.403 ,  4843.5234,  2387.613 ,  7162.051 ,  3647.7017,
       11925.303 ,  3907.652 ,  9733.927 ,  9751.483 ,  6317.7954,
        4756.5107,  5036.245 ,  2378.7725], dtype=float32)

In [14]:
auto_cloud['CLV Prediction'] = y_pred_file_cloud # new column
auto_cloud

Unnamed: 0,Vehicle Class,Coverage,Renew Offer Type,EmploymentStatus,Marital Status,Education,Number of Policies,Monthly Premium Auto,Total Claim Amount,Income,CLV Prediction
0,Four-Door Car,Basic,Offer1,Retired,Divorced,Bachelor,3.0,65.0,312.000000,17622.0,4852.402832
1,Two-Door Car,Basic,Offer1,Retired,Single,College,3.0,65.0,468.000000,28215.0,4843.523438
2,Four-Door Car,Basic,Offer1,Retired,Divorced,High School or Below,1.0,65.0,572.072106,22283.0,2387.613037
3,Four-Door Car,Extended,Offer3,Retired,Married,Master,9.0,93.0,498.452698,16224.0,7162.050781
4,SUV,Basic,Offer1,Retired,Single,High School or Below,1.0,101.0,694.598288,25147.0,3647.701660
...,...,...,...,...,...,...,...,...,...,...,...
941,Four-Door Car,Extended,Offer3,Medical Leave,Married,College,5.0,79.0,344.693511,23843.0,5851.253906
942,Four-Door Car,Extended,Offer1,Medical Leave,Married,High School or Below,7.0,83.0,398.400000,19831.0,6483.369629
943,Two-Door Car,Premium,Offer2,Medical Leave,Married,Master,2.0,107.0,333.886816,15818.0,13298.038086
944,Two-Door Car,Premium,Offer1,Medical Leave,Married,College,1.0,115.0,508.614758,20394.0,4441.895020


---
<center>Thank You</center>

---