In [None]:
import pandas as pd
wine=pd.read_csv('https://bit.ly/wine-date')
wine.head()

In [None]:
data=wine[['alcohol','sugar','pH']].to_numpy()
target=wine['class'].to_numpy()
data

In [None]:
from sklearn.model_selection import train_test_split
train_input, test_input, train_target, test_target = train_test_split(data, target, test_size=0.2, random_state=42)

In [None]:
from sklearn.preprocessing import StandardScaler
ss=StandardScaler()
ss.fit(train_input)
train_scaled=ss.transform(train_input)
test_scaled=ss.transform(test_input)
test_scaled

In [None]:
from sklearn.linear_model import LogisticRegression
lr=LogisticRegression()
lr.fit(train_scaled,train_target)
print(lr.score(train_scaled,train_target))
print(lr.score(test_scaled,test_target))

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt=DecisionTreeClassifier(random_state=42)
dt.fit(train_scaled,train_target)
print(dt.score(train_scaled,train_target))
print(dt.score(test_scaled,test_target))

In [None]:
import matplotlib.pyplot as plt
from sklearn.tree import plot_tree
plt.figure(figsize=(10,7))
plot_tree(dt)
plt.show()

In [None]:
plot_tree(dt,max_depth=1,filled=True,feature_names=['alcohol','sugar','pH'])
plt.show()

In [None]:
dt=DecisionTreeClassifier(max_depth=3, random_state=42)
dt.fit(train_scaled,train_target)
print(dt.score(train_scaled,train_target))
print(dt.score(test_scaled,test_target))

In [None]:
plt.figure(figsize=(20,15))
plot_tree(dt,filled=True,feature_names=['alcohol','sugar','pH'])
plt.show()

In [None]:
dt=DecisionTreeClassifier(max_depth=3, random_state=42)
dt.fit(train_input,train_target)
print(dt.score(train_input,train_target))
print(dt.score(test_input,test_target))

In [None]:
plt.figure(figsize=(20,15))
plot_tree(dt,filled=True,feature_names=['alcohol','sugar','pH'])
plt.show()

In [None]:
dt=DecisionTreeClassifier(min_impurity_decrease=0.0005, random_state=42)
dt.fit(train_input,train_target)
print(dt.score(train_input,train_target))
print(dt.score(test_input,test_target))

In [None]:
plt.figure(figsize=(20,15),dpi=300)
plot_tree(dt,filled=True,feature_names=['alcohol','sugar','pH'])
plt.show()

In [None]:
sub_input, val_input, sub_target, val_target = train_test_split(
    train_input,train_target,test_size=0.2,random_state=42
) 

In [None]:
from sklearn.tree import DecisionTreeClassifier
dt=DecisionTreeClassifier(random_state=42)
dt.fit(sub_input,sub_target)
print(dt.score(sub_input,sub_target))
print(dt.score(val_input,val_target))


In [None]:
from sklearn.model_selection import cross_validate
scores=cross_validate(dt, train_input,train_target)
scores

In [None]:
import numpy as np
np.mean(scores['test_score'])

In [None]:
from sklearn.model_selection import StratifiedKFold
splitter=StratifiedKFold(n_splits=10,shuffle=True, random_state=42)
scores=cross_validate(dt,train_input,train_target,cv=splitter)
np.mean(scores['test_score'])

In [None]:
from sklearn.model_selection import GridSearchCV
params={'min_impurity_decrease': [0.0001, 0.0002, 0.00003, 0.0004, 0.0005]}
gs=GridSearchCV(DecisionTreeClassifier(random_state=42), params, n_jobs=-1)
gs.fit(train_input,train_target)
dt=gs.best_estimator_
dt.score(train_input,train_target)

In [None]:
gs.best_params_

In [None]:
gs.cv_results_['mean_test_score']
gs.cv_results_['params']

In [None]:
best_idx=np.argmax(gs.cv_results_['mean_test_score'])
gs.cv_results_['params'][best_idx]

In [None]:
params={
  'min_impurity_decrease':np.arange(0.0001,0.001,0.0001),
  'max_depth':range(5,20,1),
  'min_samples_split':range(2,100,10)
}

In [None]:
gs=GridSearchCV(DecisionTreeClassifier(random_state=42),params,n_jobs=-1)
gs.fit(train_input,train_target)

In [None]:
gs.best_params_

In [None]:
np.max(gs.cv_results_['mean_test_score'])

In [None]:
from scipy.stats import uniform, randint
params={
  'min_impurity_decrease':uniform(0.0001,0.001),
  'max_depth':randint(20,50),
  'min_samples_split':randint(2,25),
  'min_samples_leaf':randint(1,25)
}
from sklearn.model_selection import RandomizedSearchCV
gs=RandomizedSearchCV(DecisionTreeClassifier(random_state=42,splitter='random'),params,n_iter=100,n_jobs=-1,random_state=42)
gs.fit(train_input,train_target)
gs.best_params_

In [None]:
import numpy as np
np.max(gs.cv_results_['mean_test_score'])

In [None]:
dt=gs.best_estimator_
dt.score(test_input,test_target)

In [None]:
from sklearn.model_selection import cross_validate
from sklearn.ensemble import RandomForestClassifier
rf=RandomForestClassifier(n_jobs=-1,random_state=42)
scores=cross_validate(rf, train_input,train_target,return_train_score=True, n_jobs=-1)
print(np.mean(scores['train_score']), np.mean(scores['test_score']))

In [None]:
rf.fit(train_input,train_target)
rf.feature_importances_

In [None]:
rf=RandomForestClassifier(oob_score=True, n_jobs=-1,random_state=42)
rf.fit(train_input,train_target)
rf.oob_score_

In [None]:
from sklearn.ensemble import ExtraTreesClassifier
et=ExtraTreesClassifier(n_jobs=-1,random_state=42)
scores=cross_validate(et,train_input,train_target,return_train_score=True,n_jobs=-1)
print(np.mean(scores['train_score']), np.mean(scores['test_score']))

In [None]:
et.fit(train_input,train_target)
et.feature_importances_

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
gb=GradientBoostingClassifier(random_state=42)
scores=cross_validate(gb,train_input,train_target,return_train_score=True,n_jobs=-1)
print(np.mean(scores['train_score']), np.mean(scores['test_score']))

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
gb=GradientBoostingClassifier(n_estimators=500,learning_rate=0.2, random_state=42)
scores=cross_validate(gb,train_input,train_target,return_train_score=True,n_jobs=-1)
print(np.mean(scores['train_score']), np.mean(scores['test_score']))

In [None]:
gb.fit(train_input,train_target)
gb.feature_importances_

In [None]:
from sklearn.ensemble import HistGradientBoostingClassifier
hgb=HistGradientBoostingClassifier(random_state=42)
scores=cross_validate(hgb,train_input,train_target,return_train_score=True)
print(np.mean(scores['train_score']), np.mean(scores['test_score']))

In [None]:
hgb.fit(train_input,train_target)
rf.feature_importances_

In [None]:
hgb.score(test_input,test_target)

In [None]:
from xgboost import XGBClassifier
xgb=XGBClassifier(tree_method="hist",random_state=42)
scores=cross_validate(xgb,train_input,train_target,return_train_score=True)
print(np.mean(scores['train_score']), np.mean(scores['test_score']))

In [None]:
from lightgbm import LGBMClassifier
lgb=LGBMClassifier(random_state=42)
scores=cross_validate(lgb,train_input,train_target,return_train_score=True,n_jobs=-1)
print(np.mean(scores['train_score']), np.mean(scores['test_score']))