In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB



In [2]:
model = LinearRegression()
print(model.get_params())

{'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'positive': False}


In [3]:
model = RandomForestClassifier()
print(model.get_params())

{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}


In [4]:
model = KNeighborsClassifier()
print(model.get_params())

{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}


In [5]:
model = DecisionTreeClassifier()
print(model.get_params())

{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'random_state': None, 'splitter': 'best'}


In [6]:
model = GaussianNB()
print(model.get_params())

{'priors': None, 'var_smoothing': 1e-09}


# **Linear Regression Hyperparameters**

In [7]:
df = sns.load_dataset("titanic")
df

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [16]:
df["age"] = df.age.fillna(value = df['age'].mean())

In [17]:
X = df[["age"]]
y = df["fare"]

In [30]:
from sklearn.model_selection import GridSearchCV

model = LinearRegression()

# Define Parameter grid
param_grid = {'fit_intercept': [True, False]}

# make object (creating the model)
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='r2')

#train model
grid_search.fit(X, y)

# y_pred = grid_search.predict([[34]])
# y_pred

#printing best parameters
print("Best Parameters: ", grid_search.best_params_)
print("Best Score: ", grid_search.best_score_)

Best Parameters:  {'fit_intercept': True}
Best Score:  0.3700743582890526


# **KNN Hyperparameters**

In [19]:
df["alive"] = df["alive"].replace("yes", 1)
df["alive"] = df["alive"].replace("no", 0)

df["sex"] = df["sex"].replace("male", 1)
df["sex"] = df["sex"].replace("female", 0)

In [26]:
X = df[["pclass", "sex", "age", "sibsp", "parch", "fare"]]
y = df["alive"]

model = KNeighborsClassifier()

# Define Parameter grid
param_grid = {'n_neighbors': np.arange(1, 200), 'weights': ['uniform', 'distance']}

# make object (creating the model)
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='f1')

#train model
grid_search.fit(X, y)

#printing best parameters
print("Best Parameters: ", grid_search.best_params_)
print("Best Score: ", grid_search.best_score_)

Best Parameters:  {'n_neighbors': 8, 'weights': 'distance'}
Best Score:  0.6275516198433728


# **Decision Tree**

In [29]:
X = df[["pclass", "sex", "age", "sibsp", "parch", "fare"]]
y = df["alive"]

model = DecisionTreeClassifier()

# Define Parameter grid
param_grid = {'max_depth': [3, 5, 7, None], 'min_samples_split': [2, 3, 4]}

# make object (creating the model)
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='precision')

#train model
grid_search.fit(X, y)

#printing best parameters
print("Best Parameters: ", grid_search.best_params_)
print("Best Score: ", grid_search.best_score_)

Best Parameters:  {'max_depth': 5, 'min_samples_split': 4}
Best Score:  0.8162011928728926


In [34]:

model = RandomForestClassifier()

# Define Parameter grid
param_grid = {
    'n_estimators' : [10, 50, 100],
    'max_depth' : [3, 5, 7, None],
    'max_features': ['sqrt', 'log2']
}

# make object (creating the model)
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='precision')

#train model
grid_search.fit(X, y)

#printing best parameters
print("Best Parameters: ", grid_search.best_params_)
print("Best Score: ", grid_search.best_score_)

Best Parameters:  {'max_depth': 3, 'max_features': 'log2', 'n_estimators': 50}
Best Score:  0.8413302816138506
