# <center>New Unseen Data Prediction Regression</center>
---

<center>Muhammad Difagama Ivanka</center>

In [23]:
# Run locally & on the cloud
import pandas as pd
import numpy as np
import pickle
from xgboost import XGBRegressor

In [24]:
new_test_data = pd.DataFrame({
    'Vehicle Class': ['Luxury SUV', 'Luxury Car'],
    'Coverage': ['Premium', 'Premium'],
    'Renew Offer Type': ['Offer3', 'Offer1'],
    'EmploymentStatus': ['Retired', 'Employed'],
    'Marital Status': ['Divorced', 'Single'],
    'Education': ['Master', 'College'],
    'Number of Policies': [5.0, 2.0],
    'Monthly Premium Auto': [10.0, 320.0],
    'Total Claim Amount': [10_000.0, 2_000.0],
    'Income': [50_000.00, 13_000.0]
})

new_test_data

Unnamed: 0,Vehicle Class,Coverage,Renew Offer Type,EmploymentStatus,Marital Status,Education,Number of Policies,Monthly Premium Auto,Total Claim Amount,Income
0,Luxury SUV,Premium,Offer3,Retired,Divorced,Master,5.0,10.0,10000.0,50000.0
1,Luxury Car,Premium,Offer1,Employed,Single,College,2.0,320.0,2000.0,13000.0


# Variable Need for Google Cloud (or You could just upload all of them to Github)

In [None]:
#import google cloud library
from google.cloud import bigquery
from google.cloud import storage
from google.cloud import aiplatform

In [12]:
## set up authentication using services account 
import os
# Authenticate using service account
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "../vertex-ai-ml/sa-development.json"
# os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "../vertex-ai-ml/dti-ds-31329ac0651d.json"

project_id = 'dti-ds'
dataset_id = 'difgam_dataset_035'
table_id = 'carclv_dataset_difgam'
region = 'us-central1'
bucket_name = 'difgam_gcs_035'
blob_name = 'data/car_insurance_real.csv'

model_name = 'best_model_clv_xgb_20240715123616.sav'

## Google Cloud Storage

### Upload to Google Cloud Storage - skipped

In [None]:
# Upload the model to Google Cloud Storage
try : 
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name) # Add bucket name
    blob_model = bucket.blob(f'model/{model_name}')
    blob_model.upload_from_filename(f'{model_name}')

    print ("Uploading model succeeded")
except:
    raise TypeError("An exception occurred")

### Retrieve from Google Cloud Storage

In [None]:
# Retrieve the model from Google Cloud Storage
try : 
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name) # Add bucket name
    blob_model = bucket.blob(f'model/{model_name}')
    blob_model.download_to_filename('final_model_xgb.sav')

    print ("Read model succeeded")
except:
    raise TypeError("An exception occurred")

In [None]:
# Retrieve the dataset from Google Cloud Storage
try : 
    storage_client = storage.Client(project=project_id)
    bucket = storage_client.get_bucket(bucket_name) # Add bucket name
    blob_model = bucket.blob(blob_name)
    blob_model.download_to_filename('car_insurance_real.csv')

    print ("Read Data succeeded")
except:
    raise TypeError("An exception occurred")

## Bigquery

### Sent to BigQuery (Dataset Creation) - Skipped use auto creation from upload to data folder on GCS

In [None]:
from google.cloud import bigquery

# Construct a BigQuery client object.
client = bigquery.Client()

# TODO(developer): Set dataset_id to the ID of the dataset to create.
# dataset_id = "{}.your_dataset".format(client.project)

# Construct a full Dataset object to send to the API.
dataset = bigquery.Dataset(dataset_id)

# TODO(developer): Specify the geographic location where the dataset should reside.
dataset.location = region

# Send the dataset to the API for creation, with an explicit timeout.
# Raises google.api_core.exceptions.Conflict if the Dataset already
# exists within the project.
dataset = client.create_dataset(dataset, timeout=30)  # Make an API request.
print("Created dataset {}.{}".format(client.project, dataset.dataset_id))

### Load from Bigquery

In [None]:
#load data from BQ
from google.cloud import bigquery
## using bigquery client 
# client = bigquery.Client(credentials=credentials,project=project_id)
client = bigquery.Client(project=project_id)

# query 
query_job = client.query(f"""select * from {dataset_id}.{table_id}""")
auto_cloud = query_job.result().to_dataframe()

# df = df.loc[:, new_test_data.columns]
auto_cloud.columns = new_test_data.columns
#cleansing 
# result = df.drop(['ID'], axis = 1)
auto_cloud

# Model Prediction

## Local Prediction (Please do not run this part on the cloud!!)

In [14]:
# local test
with open(f'../models/{model_name}', 'rb') as f:
    loaded_model = pickle.load(f)
loaded_model

In [22]:
y_pred_local_new_data = loaded_model.predict(new_test_data)
y_pred_local_new_data

array([ 5625.1167, 19218.45  ], dtype=float32)

In [26]:
new_test_local = new_test_data.copy()
new_test_local['CLV Prediction'] = y_pred_local_new_data
new_test_local

Unnamed: 0,Vehicle Class,Coverage,Renew Offer Type,EmploymentStatus,Marital Status,Education,Number of Policies,Monthly Premium Auto,Total Claim Amount,Income,CLV Prediction
0,Luxury SUV,Premium,Offer3,Retired,Divorced,Master,5.0,10.0,10000.0,50000.0,5625.116699
1,Luxury Car,Premium,Offer1,Employed,Single,College,2.0,320.0,2000.0,13000.0,19218.449219


In [20]:
# see new unseend data without target feature

auto = pd.read_csv('../car_insurance_real.csv')
auto

Unnamed: 0,Vehicle Class,Coverage,Renew Offer Type,EmploymentStatus,Marital Status,Education,Number of Policies,Monthly Premium Auto,Total Claim Amount,Income
0,Two-Door Car,Basic,Offer1,Unemployed,Single,Bachelor,2.0,68.0,489.600000,0.0
1,Four-Door Car,Basic,Offer1,Employed,Married,College,1.0,73.0,59.861963,67763.0
2,Two-Door Car,Basic,Offer3,Unemployed,Single,College,1.0,65.0,468.000000,0.0
3,Four-Door Car,Basic,Offer1,Unemployed,Single,High School or Below,3.0,70.0,336.000000,0.0
4,Four-Door Car,Basic,Offer3,Unemployed,Married,Doctor,9.0,63.0,272.843439,0.0
...,...,...,...,...,...,...,...,...,...,...
941,Four-Door Car,Basic,Offer1,Unemployed,Single,Bachelor,1.0,70.0,522.413140,0.0
942,Four-Door Car,Basic,Offer3,Medical Leave,Single,High School or Below,1.0,62.0,219.288706,16618.0
943,Luxury SUV,Extended,Offer1,Unemployed,Single,Bachelor,1.0,245.0,2345.413441,0.0
944,Sports Car,Basic,Offer2,Employed,Married,College,1.0,100.0,121.928014,56586.0


In [29]:
y_pred_file_local = loaded_model.predict(auto)
y_pred_file_local[:13]

array([ 9868.214 ,  2943.0862,  2085.693 ,  5032.478 ,  4819.431 ,
        4983.1313,  5515.4155, 17676.86  ,  4225.032 ,  2922.464 ],
      dtype=float32)

In [28]:
auto_local = auto.copy()
auto_local['CLV Prediction'] = y_pred_file_local
auto_local

Unnamed: 0,Vehicle Class,Coverage,Renew Offer Type,EmploymentStatus,Marital Status,Education,Number of Policies,Monthly Premium Auto,Total Claim Amount,Income,CLV Prediction
0,Two-Door Car,Basic,Offer1,Unemployed,Single,Bachelor,2.0,68.0,489.600000,0.0,9868.213867
1,Four-Door Car,Basic,Offer1,Employed,Married,College,1.0,73.0,59.861963,67763.0,2943.086182
2,Two-Door Car,Basic,Offer3,Unemployed,Single,College,1.0,65.0,468.000000,0.0,2085.693115
3,Four-Door Car,Basic,Offer1,Unemployed,Single,High School or Below,3.0,70.0,336.000000,0.0,5032.478027
4,Four-Door Car,Basic,Offer3,Unemployed,Married,Doctor,9.0,63.0,272.843439,0.0,4819.431152
...,...,...,...,...,...,...,...,...,...,...,...
941,Four-Door Car,Basic,Offer1,Unemployed,Single,Bachelor,1.0,70.0,522.413140,0.0,2485.338379
942,Four-Door Car,Basic,Offer3,Medical Leave,Single,High School or Below,1.0,62.0,219.288706,16618.0,2529.714111
943,Luxury SUV,Extended,Offer1,Unemployed,Single,Bachelor,1.0,245.0,2345.413441,0.0,8716.571289
944,Sports Car,Basic,Offer2,Employed,Married,College,1.0,100.0,121.928014,56586.0,4181.984375


## Cloud Data Prediction

In [None]:
# Load the model
with open('final_model_xgb.sav', 'rb') as f:
    loaded_model = pickle.load(f)

y_pred_cloud_new_data = loaded_model.predict(new_test_data)
y_pred_cloud_new_data

In [None]:
new_test_cloud = new_test_data.copy()
new_test_cloud['CLV Prediction'] = y_pred_cloud_new_data
new_test_cloud

In [26]:
# loaded_model.predict(X_test.iloc[3:13])
y_pred_file_cloud = loaded_model.predict(auto_cloud)
y_pred_file_cloud[:13]

array([ 5625.1167, 19218.45  ], dtype=float32)

In [None]:
auto_cloud['CLV Prediction'] = y_pred_file_cloud
auto_cloud

---
<center>Thank You</center>

---