# Task 1

In [4]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import pandas as pd

In [2]:
data = load_breast_cancer(as_frame=True)

In [8]:
X, y = data['data'], data['target']

In [9]:
from sklearn.ensemble import BaggingClassifier, GradientBoostingClassifier

In [10]:
clf_1 = BaggingClassifier(n_estimators=15, n_jobs=-1)
clf_2 = GradientBoostingClassifier(n_estimators=100, learning_rate=0.05)

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7)

In [12]:
from sklearn.metrics import f1_score, classification_report

In [13]:
%%time
clf_1.fit(X_train, y_train)
clf_2.fit(X_train, y_train)

Wall time: 1.97 s


GradientBoostingClassifier(learning_rate=0.05)

In [15]:
print(f"""
Classification report for Bagging
{classification_report(y_test, clf_1.predict(X_test))}
F1-score {f1_score(y_test, clf_1.predict(X_test))}
""")


Classification report for Bagging
              precision    recall  f1-score   support

           0       0.93      0.94      0.93        66
           1       0.96      0.95      0.96       105

    accuracy                           0.95       171
   macro avg       0.94      0.95      0.94       171
weighted avg       0.95      0.95      0.95       171

F1-score 0.9569377990430622



In [16]:
print(f"""
Classification report for Boosting
{classification_report(y_test, clf_2.predict(X_test))}
F1-score {f1_score(y_test, clf_2.predict(X_test))}
""")


Classification report for Boosting
              precision    recall  f1-score   support

           0       0.98      0.94      0.96        66
           1       0.96      0.99      0.98       105

    accuracy                           0.97       171
   macro avg       0.97      0.96      0.97       171
weighted avg       0.97      0.97      0.97       171

F1-score 0.9765258215962442



# Task 2 

In [18]:
! pip install --user mlxtend

Collecting mlxtend
  Downloading mlxtend-0.17.3-py2.py3-none-any.whl (1.3 MB)
Installing collected packages: mlxtend
Successfully installed mlxtend-0.17.3


In [19]:
#wrapper
from mlxtend.feature_selection import ExhaustiveFeatureSelector
#filter
from sklearn.feature_selection import SelectKBest, chi2

In [26]:
from sklearn.pipeline import make_pipeline
from sklearn.tree import ExtraTreeClassifier

In [27]:
pipe_wrapper = make_pipeline(ExhaustiveFeatureSelector(ExtraTreeClassifier(),
           min_features=2,
           max_features=3,
           scoring='roc_auc',
           print_progress=True,
           cv=2),
            GradientBoostingClassifier(n_estimators=100, learning_rate=0.05))
pipe_filter = make_pipeline(SelectKBest(score_func=chi2, k=3),
            GradientBoostingClassifier(n_estimators=100, learning_rate=0.05))

In [28]:
%%time
pipe_wrapper.fit(X_train, y_train)
pipe_filter.fit(X_train, y_train)

Features: 3000/4495IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

Features: 4495/4495

Wall time: 23.6 s


Pipeline(steps=[('selectkbest',
                 SelectKBest(k=3,
                             score_func=<function chi2 at 0x000001A706703160>)),
                ('gradientboostingclassifier',
                 GradientBoostingClassifier(learning_rate=0.05))])

In [29]:
print(f"""
Classification report for Boosting with EFS
{classification_report(y_test, pipe_wrapper.predict(X_test))}
F1-score {f1_score(y_test, pipe_wrapper.predict(X_test))}
""")


Classification report for Boosting with EFS
              precision    recall  f1-score   support

           0       0.94      0.92      0.93        66
           1       0.95      0.96      0.96       105

    accuracy                           0.95       171
   macro avg       0.95      0.94      0.94       171
weighted avg       0.95      0.95      0.95       171

F1-score 0.957345971563981



In [30]:
print(f"""
Classification report for Boosting with SelectKBest
{classification_report(y_test, pipe_filter.predict(X_test))}
F1-score {f1_score(y_test, pipe_filter.predict(X_test))}
""")


Classification report for Boosting with SelectKBest
              precision    recall  f1-score   support

           0       0.98      0.89      0.94        66
           1       0.94      0.99      0.96       105

    accuracy                           0.95       171
   macro avg       0.96      0.94      0.95       171
weighted avg       0.95      0.95      0.95       171

F1-score 0.962962962962963

