**Factory Pattern significantly simplies the management of different Machine Learning models**

1. Centralized Creation Logic
    - All the logic for creating different types of models is centralized in the `ModelFactory` class, making it easier to manage and update
2. Extensibility
    - Adding new types of models become straightforward
        - Create a new concrete class that implements the `MLModel` interface.
        - Update the factory method to include the new model type.
3. Maintainability
    - When the instantiation of a model changes, you only need to update the corresponding concrete class, leaving the rest of your code untouched. This reduces the risk of introducing bugs
4. Readability and Clean Code
    - Abstracting complex instantiation logic away from the client node
    - Making the client node simpler and more readable, as it no longer needs to handle the specifics of the model creation
5. Flexibility
    - Allowing different models to be created based on runtime information (e.g., user input, configuration files)
    - Making it easy to switch between different models without changing the client node.


In [9]:
import pandas as pd
import seaborn as sns
from abc import ABC, abstractmethod  # abstract base class
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [10]:
titanic = sns.load_dataset('titanic')

titanic.dropna(subset=['age', 'embarked', 'fare'], inplace=True)


label_encoder = LabelEncoder()
titanic['sex'] = label_encoder.fit_transform(titanic['sex'])
titanic['embarked'] = label_encoder.fit_transform(titanic['embarked'])


X = titanic[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare', 'embarked']]
y = titanic['survived']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [13]:
# Model interface
class MLModel(ABC):

    @abstractmethod # must be implemented by the subclass
    def train(self, X, y):
        pass # placeholder, indicating that this method must be implemented by the subclass

    @abstractmethod
    def predict(self, X):
        pass # placeholder, indicating that this method must be implemented by the subclass

In [14]:
class LogisticRegressionModel(MLModel):

    def __init__(self):
        self.model = LogisticRegression()

    def train(self, X, y):
        self.model.fit(X, y)

    def predict(self, X):
        return self.model.predict(X)
    
    def get_coefficients(self):  # subclass is free to add more methods
        return self.model.coef_
    

class RandomForestModel(MLModel):

    def __init__(self):
        self.model = RandomForestClassifier()

    def train(self, X, y):
        self.model.fit(X, y)

    def predict(self, X):
        return self.model.predict(X)
    

class SVMModel(MLModel):
    
        def __init__(self):
            self.model = SVC()
    
        def train(self, X, y):
            self.model.fit(X, y)
    
        def predict(self, X):
            return self.model.predict(X)

In [6]:
class ModelFactory:
    @staticmethod
    def create(model_type):
        if model_type == 'logistic_regression':
            return LogisticRegressionModel()
        elif model_type == 'random_forest':
            return RandomForestModel()
        elif model_type == 'svm':
            return SVMModel()
        else:
            raise ValueError('Invalid model type')

In [11]:
model_type = 'logistic_regression'
model = ModelFactory.create(model_type)
model.train(X_train, y_train)
y_pred = model.predict(X_test)