# Example of Model Develoment and Production

## Generating the Model

In [1]:
# Imports
import numpy as np
import pandas as pd

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

from sklearn.ensemble import RandomForestClassifier

In [2]:
# Dataset Loading
titanic_df = pd.read_csv("titanic.csv")

# Cleaning & Setup
titanic_df = titanic_df.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)
categorical_cols = ['Sex', 'Embarked']
numerical_cols = ['Pclass', 'Age', 'SibSp', 'Parch', 'Fare']
X = titanic_df.drop('Survived', axis=1)
y = titanic_df['Survived']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
# Pipeline Setup
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean'))
])
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_cols),
        ('num', numerical_transformer, numerical_cols)
    ]
)

In [4]:
# What it looks like
X_train.head(2)

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
331,1,male,45.5,0,0,28.5,S
733,2,male,23.0,0,0,13.0,S


In [5]:
# Train RFC
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(random_state=1))
])
pipeline.fit(X_train, y_train)

# Get Accuracy
test_accuracy = pipeline.score(X_test, y_test)
print("Test set accuracy:", test_accuracy)

Test set accuracy: 0.8044692737430168


## Saving the Model

In [6]:
# Importing stuff to save the model
import joblib

In [7]:
# Seriealize
joblib.dump(pipeline, "RandomForestClassifier.pkl")

['RandomForestClassifier.pkl']

In [8]:
# Find specific SK Learn version
!pip3 list | findstr scikit-learn

scikit-learn                  1.2.1
scikit-learn-intelex          20230228.214818


