# Parameters

## Linear regression

In [1]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
print(model.get_params())

{'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'positive': False}


copy_X:

This parameter determines whether the algorithm will make a copy of the input features (X) before performing the regression calculations.\
True: It makes a copy of X, ensuring that the original data is not modified during the fitting process.\
False: It performs the calculations in place, potentially modifying the original data. However, setting copy_X to False may lead to unexpected behavior if the original data is modified elsewhere in your code.

fit_intercept:

This parameter determines whether to calculate the intercept for the linear regression model.\
True: It calculates the intercept, which represents the value of the dependent variable when all independent variables are zero.\
False: It forces the model to pass through the origin (0,0), assuming that the dependent variable is zero when all independent variables are zero. This is useful when you know that the relationship between variables should include the origin.

n_jobs:

This parameter specifies the number of parallel jobs to run during the model fitting process. It is useful for speeding up the computation, especially when dealing with large datasets.\
None: It uses a single core to perform the computation.\
-1: It uses all available cores on your machine to perform the computation in parallel.

positive:

This parameter constrains the coefficients of the regression model to be positive. It is useful when you know that the relationship between variables should only be positive.\
True: It enforces positivity constraints on the coefficients.\
False: It allows both positive and negative coefficients.

## Random Forest

In [6]:
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier()
print(model.get_params())

{'bootstrap': True, 'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': 'sqrt', 'max_leaf_nodes': None, 'max_samples': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 100, 'n_jobs': None, 'oob_score': False, 'random_state': None, 'verbose': 0, 'warm_start': False}


1. **Gini Impurity**: Use when dealing with categorical features and aiming for simpler splits, or when computational resources are limited.

2. **Entropy**: Prefer when working with numerical features or categorical features with multiple levels, or when reducing uncertainty is crucial.

3. **Log Loss**: Opt for problems where accurate probability estimation is vital, such as in binary classification tasks like churn prediction.


## SVM

In [7]:
from sklearn.svm import SVC
model = LinearRegression()
print(model.get_params())

{'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'positive': False}


## Logistic regression

In [8]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
print(model.get_params())

{'C': 1.0, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 100, 'multi_class': 'auto', 'n_jobs': None, 'penalty': 'l2', 'random_state': None, 'solver': 'lbfgs', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}


## Decision  Tree regression

In [9]:
from sklearn.tree import DecisionTreeRegressor
model = DecisionTreeRegressor()
print(model.get_params())

{'ccp_alpha': 0.0, 'criterion': 'squared_error', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'random_state': None, 'splitter': 'best'}


## Decision  Tree Classification

In [10]:
from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()
print(model.get_params())

{'ccp_alpha': 0.0, 'class_weight': None, 'criterion': 'gini', 'max_depth': None, 'max_features': None, 'max_leaf_nodes': None, 'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split': 2, 'min_weight_fraction_leaf': 0.0, 'random_state': None, 'splitter': 'best'}


## KNN

In [12]:
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()
print(model.get_params())

{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}


# Grid Search CV

## Linear Regression

In [7]:
import seaborn as sns
df= sns.load_dataset('titanic')

# Filling nan

df['age'].fillna(df['age'].mean(), inplace=True)

x = df[['age']]
y= df['fare']

from sklearn.linear_model import LinearRegression
model = LinearRegression()

from sklearn.model_selection import GridSearchCV

grid_params= { 'fit_intercept': [ True, False],
               'copy_X':[ True,False],
               'positive':[ False, True]
            }
grid_search = GridSearchCV(model, grid_params, cv=5, scoring='r2')

grid_search.fit(x,y)

print(grid_search.best_params_)
print(grid_search.best_score_)

{'copy_X': True, 'fit_intercept': True, 'positive': True}
-0.0033709951196183364


## KNN

In [14]:
import pandas as pd
import seaborn as sns
df= sns.load_dataset('titanic')

# Filling nan

df['age'].fillna(df['age'].mean(), inplace=True)

x = df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
y = df['survived']

x= pd.get_dummies(x, columns= ['sex'])

from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier()

from sklearn.model_selection import GridSearchCV
import numpy as np

# np.arange(1,50,2)= 1st= start, 2nd = end, 3rd= step/difference

grid_params= { 'n_neighbors': np.arange(1,30,2), 
                    'weights': ['uniform', 'distance'],
                    'algorithm':['auto', 'ball_tree', 'kd_tree', 'brute'],
                    'leaf_size': np.arange(10,50)
                }   
grid_search = GridSearchCV(model, grid_params, cv=5, scoring='accuracy')

grid_search.fit(x,y)

print(grid_search.best_params_)
print(grid_search.best_score_)

{'algorithm': 'auto', 'n_neighbors': 29, 'weights': 'distance'}
0.7206390057121336


In [39]:
import pandas as pd
import seaborn as sns
df= sns.load_dataset('titanic')

# Filling nan

df['age'].fillna(df['age'].mean(), inplace=True)

x = df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
y = df['survived']

x= pd.get_dummies(x, columns= ['sex'])

from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier()

from sklearn.model_selection import GridSearchCV
import numpy as np

# np.arange(1,50,2)= 1st= start, 2nd = end, 3rd= step/difference

grid_params= {  'criterion': 'gini'}   
grid_search = GridSearchCV(model, grid_params, cv=5, scoring='precision')

grid_search.fit(x,y)

print(grid_search.best_params_)
print(grid_search.best_score_)

TypeError: Parameter grid for parameter 'criterion' needs to be a list or a numpy array, but got 'gini' (of type str) instead. Single values need to be wrapped in a list with one element.