## Using the Scoring Parameter

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
heart_disease = pd.read_csv("resources/heart-disease.csv")
x = heart_disease.drop("target", axis=1)
y = heart_disease["target"]

In [3]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

rfr = RandomForestClassifier(n_estimators=100)

In [4]:
acc = cross_val_score(rfr, x, y, cv=5)
acc

array([0.85245902, 0.90163934, 0.81967213, 0.81666667, 0.78333333])

In [5]:
np.mean(acc)

0.8347540983606556

By the default `cross_val_score` uses the estimator's `.score()`method as the default metric, but we can use the `scoring` parameter to define a different score

In [6]:
prec = cross_val_score(rfr,x,y, cv = 5, scoring = 'precision')
np.mean(prec)

0.842359022556391

In [7]:
recall = cross_val_score(rfr,x,y, cv = 5, scoring = 'recall')
np.mean(recall)

0.8484848484848484

In [8]:
f1 = cross_val_score(rfr,x,y, cv = 5, scoring = 'f1')
np.mean(f1)

0.8320025307923349

In [9]:
# We can use make_scorer and metric functions from sklearn.metrics use those functions as scorers
from sklearn.metrics import roc_auc_score, make_scorer
auc = cross_val_score(rfr,x,y, cv = 5, scoring = make_scorer(roc_auc_score))
np.mean(auc)

0.80494227994228