In [4]:
from __future__ import annotations
import bentoml
from datetime import datetime
from pydantic import BaseModel, PositiveInt
from typing import Optional

In [14]:
import joblib

from xgboost import XGBRegressor
from sklearn.pipeline import Pipeline

In [6]:
class Building(BaseModel):  
    DataYear: int = 2016
    BuildingType: str
    PrimaryPropertyType: str
    SecondLargestPropertyUseType: Optional[str]
    ThirdLargestPropertyUseType: Optional[str]
    ZipCode: str
    CouncilDistrictCode: int
    Neighborhood: str
    YearBuilt: int
    NumberofBuildings: int
    NumberofFloors: int
    PropertyGFATotal: float
    PropertyGFAParking: float
    ListOfAllPropertyUseTypes: str
    LargestPropertyUseType: str
    SteamUsekBtu: float
    NaturalGastherms: float
    DefaultData: bool
    ComplianceStatus: str

In [10]:
external_data = {
    "DataYear": 2016, 
    "BuildingType": "NonResidential",
    "PrimaryPropertyType": "Other",
    "SecondLargestPropertyUseType": None,
    "ThirdLargestPropertyUseType": None,
    "ZipCode": "98101",
    "CouncilDistrictCode": "7",
    "Neighborhood": "DOWNTOWN",
    "YearBuilt": 2004,
    "NumberofBuildings": 1,
    "NumberofFloors": 11,
    "PropertyGFATotal": 299070,
    "PropertyGFAParking": 68432,
    "ListOfAllPropertyUseTypes": "Other",
    "LargestPropertyUseType": "Other",
    "SteamUsekBtu": 0,
    "NaturalGastherms": 346853.3125,
    "DefaultData": False,
    "ComplianceStatus": "Compliant"
}

building = Building(**external_data) 

In [13]:
building.model_dump()

{'DataYear': 2016,
 'BuildingType': 'NonResidential',
 'PrimaryPropertyType': 'Other',
 'SecondLargestPropertyUseType': None,
 'ThirdLargestPropertyUseType': None,
 'ZipCode': '98101',
 'CouncilDistrictCode': 7,
 'Neighborhood': 'DOWNTOWN',
 'YearBuilt': 2004,
 'NumberofBuildings': 1,
 'NumberofFloors': 11,
 'PropertyGFATotal': 299070.0,
 'PropertyGFAParking': 68432.0,
 'ListOfAllPropertyUseTypes': 'Other',
 'LargestPropertyUseType': 'Other',
 'SteamUsekBtu': 0.0,
 'NaturalGastherms': 346853.3125,
 'DefaultData': False,
 'ComplianceStatus': 'Compliant'}

In [None]:

print(user.id)  
#> 123
print(user.model_dump())  
"""
{
    'id': 123,
    'name': 'John Doe',
    'signup_ts': datetime.datetime(2019, 6, 1, 12, 22),
    'tastes': {'wine': 9, 'cheese': 7, 'cabbage': 1},
}
"""

In [16]:
EXAMPLE_INPUT = {
    "DataYear": 2016, 
    "BuildingType": "NonResidential",
    "PrimaryPropertyType": "Other",
    "SecondLargestPropertyUseType": None,
    "ThirdLargestPropertyUseType": None,
    "ZipCode": "98101",
    "CouncilDistrictCode": "7",
    "Neighborhood": "DOWNTOWN",
    "YearBuilt": 2004,
    "NumberofBuildings": 1,
    "NumberofFloors": 11,
    "PropertyGFATotal": 299070,
    "PropertyGFAParking": 68432,
    "ListOfAllPropertyUseTypes": "Other",
    "LargestPropertyUseType": "Other",
    "SteamUsekBtu": 0,
    "NaturalGastherms": 346853.3125,
    "DefaultData": False,
    "ComplianceStatus": "Compliant"
}


@bentoml.service
class Prediction:
    def __init__(self) -> None:
        self.model = joblib.load("model/xgb.model")
        self.pipeline = joblib.load("preprocessing.pipeline")
        self.scaler_y = joblib.load("y.scaler")

    @bentoml.api
    def summarize(self, building: Building = EXAMPLE_INPUT) -> float:
        transformed_data = self.pipeline([building])
        result = self.model.predict(transformed_data)
        return result