# Reading Through Scikit-Learn Documentation

## Getting Started

**Fitting and predicting: estimator basics**

In [21]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(random_state = 0)
X = [[1, 2, 3],  [11, 12, 13]]

Y = [0, 1]

clf.fit(X, Y)
clf.predict([[4, 5, 6], [14, 15, 16]])

array([0, 1])

**Transformers and pre-processors**

In [22]:
from sklearn.preprocessing import StandardScaler

X = [[0, 15], [1, -10]]

scaler = StandardScaler()
scaler.fit(X).transform(X)

array([[-1.,  1.],
       [ 1., -1.]])

**Pipelines: chaining pre-processors and estimators**

In [35]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

pipe = make_pipeline(
    StandardScaler(),
    LogisticRegression()
)

X, y = load_iris(return_X_y = True)

X_train, X_test, y_train, y_test = train_test_split(X, y)

pipe.fit(X_train, y_train)

accuracy_score(pipe.predict(X_test), y_test)

0.9473684210526315

**Model evaluation**

In [46]:
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_validate

X, y = make_regression(n_samples = 1000, random_state = 0)
lr = LinearRegression()

result = cross_validate(lr, X, y)
result['test_score']

array([1., 1., 1., 1., 1.])

**Automatic parameter searches**

In [14]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from scipy.stats import randint
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor

X, y = fetch_california_housing(return_X_y = True)

X_train, X_test, y_train, y_test = train_test_split(X, y)

param_distribution = {
    'n_estimators': randint(1, 5),
    'max_depth': randint(5, 10),
}

search = RandomizedSearchCV(
    estimator = RandomForestRegressor(random_state = 0),
    n_iter = 5, 
    param_distributions = param_distribution,
    random_state = 0
)

search.fit(X_train, y_train)

print(search.best_params_)
print(search.score(X_test, y_test))


{'max_depth': 9, 'n_estimators': 4}
0.7450810565794592
