In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from lightgbm import LGBMRegressor

In [3]:
df = pd.read_csv('insurance.csv')
df.head()

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1338 entries, 0 to 1337
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       1338 non-null   int64  
 1   sex       1338 non-null   object 
 2   bmi       1338 non-null   float64
 3   children  1338 non-null   int64  
 4   smoker    1338 non-null   object 
 5   region    1338 non-null   object 
 6   charges   1338 non-null   float64
dtypes: float64(2), int64(2), object(3)
memory usage: 73.3+ KB


In [6]:
X = df.drop('charges', axis=1)
y = pd.DataFrame(df['charges'])

X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.3, random_state=0)

cat_cols = list(X.dtypes[X.dtypes == 'object'].keys())

X_train[cat_cols] = X_train[cat_cols].apply(lambda col: col.astype('category'))
X_valid[cat_cols] = X_valid[cat_cols].apply(lambda col: col.astype('category'))

sample_row = X.loc[0].to_dict()

In [7]:
model = LGBMRegressor()
model.fit(X_train, y_train)
preds = model.predict(X_valid)

print(f'mse: {mean_squared_error(y_valid, preds)}')
print(f'mae: {mean_absolute_error(y_valid, preds)}')

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001210 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 320
[LightGBM] [Info] Number of data points in the train set: 936, number of used features: 6
[LightGBM] [Info] Start training from score 13232.916456
mse: 21144689.620558165
mae: 2867.89459889866


In [10]:
from pydantic import BaseModel, ValidationError

class Customer(BaseModel):
    age: int
    sex: object
    bmi: float
    children: int
    smoker: object
    region: object

print(sample_row)
customer = Customer(**sample_row)
inputs = pd.DataFrame([customer.model_dump()])
inputs[cat_cols] = inputs[cat_cols].apply(lambda col: col.astype('category'))
pred = model.predict(inputs)[0]
float(pred)


{'age': 19, 'sex': 'female', 'bmi': 27.9, 'children': 0, 'smoker': 'yes', 'region': 'southwest'}


17944.970402439205

In [None]:
from flask import Flask, jsonify, request

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    try:
        customer = Customer(**request.json)
        inputs = pd.DataFrame([customer.model_dump()])
        inputs[cat_cols] = inputs[cat_cols].apply(lambda col: col.astype('category'))
        pred = model.predict(inputs)[0]
        response = {
            'predict': float(pred)
        }

        return jsonify(response)
    except ValidationError as e:
        return jsonify(e.errors()), 400

if __name__=='__main__':
    app.run()