# Training and Testing a Model

![](images/supervised-classification.png)
<div style="text-align: center;">
<strong>Credit:</strong> http://www.nltk.org/book/ch06.html
</div>

### Procedure 1: Train and test on the (same) entire dataset

In [None]:
from sklearn.datasets import load_iris
from sklearn import metrics

iris = load_iris()

X = iris.data
y = iris.target

### Logistic Regression

In [None]:
# import
from sklearn.linear_model import LogisticRegression

# instantiate
logreg = LogisticRegression()

# fit
logreg.fit(X, y)

# predict
y_pred = logreg.predict(X)

metrics.accuracy_score(y, y_pred)

### KNN (K = 5)

In [None]:
# import
from sklearn.neighbors import KNeighborsClassifier

# instantiate
knn = KNeighborsClassifier(n_neighbors=5)

# fit
knn.fit(X, y)

# predict
y_pred = knn.predict(X)

metrics.accuracy_score(y, y_pred)

### KNN (K = 1)

In [None]:
# instantiate
knn = KNeighborsClassifier(n_neighbors=1)

# fit
knn.fit(X, y)

# predict
y_pred = knn.predict(X)

metrics.accuracy_score(y, y_pred)

### Procedure 2: Train and test split

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)

In [None]:
print(X_train.shape)
print(X_test.shape)

In [None]:
print(y_train.shape)
print(y_test.shape)

### Logistic Regression

In [None]:
# instantiate
logreg = LogisticRegression()

# fit
logreg.fit(X_train, y_train)

# predict
y_pred = logreg.predict(X_test)

metrics.accuracy_score(y_test, y_pred)

### KNN (K = 5)

In [None]:
# instantiate
knn = KNeighborsClassifier(n_neighbors=5)

# fit
knn.fit(X_train, y_train)

# predict
y_pred = knn.predict(X_test)

metrics.accuracy_score(y_test, y_pred)

### KNN (K = 1)

In [None]:
# instantiate
knn = KNeighborsClassifier(n_neighbors=1)

# fit
knn.fit(X_train, y_train)

# predict
y_pred = knn.predict(X_test)

metrics.accuracy_score(y_test, y_pred)

### Find a better value for K

In [None]:
k_range = range(1, 26)
scores = []
for k in k_range:
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    scores.append(metrics.accuracy_score(y_test, y_pred))

In [None]:
scores

In [None]:
%matplotlib inline

import matplotlib.pyplot as plt

plt.plot(k_range, scores)
plt.xlabel('Value of K for KNN')
plt.ylabel('Testing Accuracy')

### Select the best value for K

In [None]:
# instantiate
knn = KNeighborsClassifier(n_neighbors=11)

# fit
knn.fit(X, y)

# predict
X_new = [[3, 5, 4, 2]]
knn.predict(X_new)