# Machine Learning Finding Optimal Model and Hyperparameters

In [1]:
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.tree import DecisionTreeClassifier

In [2]:
from sklearn import datasets
digits = datasets.load_digits()

In [5]:
X = digits.data
y = digits.target

In [8]:
X.shape

(1797, 64)

In [9]:
y.shape

(1797,)

In [10]:
from sklearn.model_selection import cross_val_score

# Define the hyper paramters.

In [11]:
model_params = {
    'svm': {
        'model': svm.SVC(gamma='auto'),
        'params' : {
            'C': [1,10,20],
            'kernel': ['rbf','linear']
        }  
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params' : {
            'n_estimators': [1,5,10]
        }
    },
    'logistic_regression' : {
        'model': LogisticRegression(solver='liblinear',multi_class='auto'),
        'params': {
            'C': [1,5,10]
        }
    },
    'naive_bayes_gaussian': {
        'model': GaussianNB(),
        'params': {}
    },
    'naive_bayes_multinomial': {
        'model': MultinomialNB(),
        'params': {}
    },
    'decision_tree': {
        'model': DecisionTreeClassifier(),
        'params': {
            'criterion': ['gini','entropy'],
            
        }
    }     
}

In [13]:
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
import pandas as pd
scores = []

In [20]:
import time

# Start the timer
start_time = time.time()


for mn,mp in model_params.items():
    clf = GridSearchCV(mp["model"],mp['params'],cv=5, return_train_score=False)
    clf.fit(X,y)
    scores.append({
        "model":mn,
        "best_score":clf.best_score_,
        "best_params":clf.best_params_,
        "best_estimators":clf.best_estimator_
    })


# End the timer
end_time = time.time()

# Calculate the elapsed time
elapsed_time = end_time - start_time
print("Time taken:", elapsed_time, "seconds")

Time taken: 5.065934896469116 seconds


In [21]:
df = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df


Unnamed: 0,model,best_score,best_params
0,svm,0.947697,"{'C': 1, 'kernel': 'linear'}"
1,random_forest,0.899873,{'n_estimators': 10}
2,logistic_regression,0.922114,{'C': 1}
3,naive_bayes_gaussian,0.806928,{}
4,naive_bayes_multinomial,0.87035,{}
5,decision_tree,0.803589,{'criterion': 'entropy'}
6,svm,0.947697,"{'C': 1, 'kernel': 'linear'}"
7,random_forest,0.8982,{'n_estimators': 10}
8,logistic_regression,0.922114,{'C': 1}
9,naive_bayes_gaussian,0.806928,{}


In [22]:
import time

# Start the timer
start_time = time.time()


for mn,mp in model_params.items():
    clf = RandomizedSearchCV(mp["model"],mp['params'],cv=5, return_train_score=False)
    clf.fit(X,y)
    scores.append({
        "model":mn,
        "best_score":clf.best_score_,
        "best_params":clf.best_params_,
        "best_estimators":clf.best_estimator_
    })

# End the timer
end_time = time.time()

# Calculate the elapsed time
elapsed_time = end_time - start_time
print("Time taken:", elapsed_time, "seconds")



Time taken: 5.1617231369018555 seconds


In [23]:
df2 = pd.DataFrame(scores,columns=['model','best_score','best_params'])
df2



Unnamed: 0,model,best_score,best_params
0,svm,0.947697,"{'C': 1, 'kernel': 'linear'}"
1,random_forest,0.899873,{'n_estimators': 10}
2,logistic_regression,0.922114,{'C': 1}
3,naive_bayes_gaussian,0.806928,{}
4,naive_bayes_multinomial,0.87035,{}
5,decision_tree,0.803589,{'criterion': 'entropy'}
6,svm,0.947697,"{'C': 1, 'kernel': 'linear'}"
7,random_forest,0.8982,{'n_estimators': 10}
8,logistic_regression,0.922114,{'C': 1}
9,naive_bayes_gaussian,0.806928,{}
