#  IRIS Classification

### Classification of IRIS dataset using Machine Learning

In [None]:
#import libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import linear_model, datasets,metrics
#draw plots in the browser
%matplotlib inline 

### IRIS data set
#### https://en.wikipedia.org/wiki/Iris_flower_data_set

In [None]:
from IPython.display import Image
from IPython.core.display import HTML 
Image(url= "https://upload.wikimedia.org/wikipedia/commons/4/41/Iris_versicolor_3.jpg", width=200, height=200)

In [None]:
# import Iris dataset from Scikit-Learn's datasets
iris = datasets.load_iris()

print ("Shape of the data ", iris.data.shape)
print ("Shape of the data ", iris.target_names)
print ("Attributes ", iris.feature_names)

#view first 5 rows
print (iris.data[range(5)])
print (iris.target[range(5)])

In [None]:
#show it as a table
df = pd.DataFrame(data=iris.data)
df.columns = [iris.feature_names]
df['Class'] = iris.target
df['Name'] = iris.target_names[iris.target]
df.head()

In [None]:
X = iris.data[:, :2]  # we only take the first two features.
Y = iris.target

### Generate Model

In [None]:
#Logistic Regression
logreg = linear_model.LogisticRegression(C=1e5)

#fit all the data into the classifier
logreg.fit(X, Y)

#output the classifiers prediction
predicted = logreg.predict(X)

### Predict

In [None]:
df['Predicted'] = predicted
df['Predicted Name'] = iris.target_names[predicted]
#df.head()
df.tail() #end of the data

### Plot decision boundaries

In [None]:
# Plot the decision boundary. For that, we will assign a color to each
# point in the mesh [x_min, m_max]x[y_min, y_max].

h = .02  # step size in the mesh
x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
plt.figure(1, figsize=(7, 5))
plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)

# Plot also the training points
plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', cmap=plt.cm.Paired)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')

plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.xticks(())
plt.yticks(())

plt.show()

### Plot confusion matrix

In [None]:
confusion_matrix =  pd.crosstab(index=iris.target, columns=predicted.ravel(), rownames=['Expected'], colnames=['Predicted'])
sns.heatmap(confusion_matrix, annot=True, square=False, fmt='', cbar=False)
plt.title("Classification Matrix", fontsize = 15)
plt.show()

### Classification Report

In [None]:
print (metrics.classification_report(iris.target,predicted))

## <span style="color:cornflowerblue">Exercise:</span>

1. Use code below to split data into train with 70% and test with 30% of the data. Then plot classification matrix using any two other classifiers and compare performance with the Logistic Regression Classifier. Here is list of available classifiers  http://scikit-learn.org/stable/supervised_learning.html#supervised-learning 

2. Write a resusable function that take in any number of classifiers and produces a confusion matrix, accuracy score and other useful metrics about the classification.

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3, random_state=0)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

In [None]:
#Logistic Regression
logreg = linear_model.LogisticRegression(C=1e5)

#fit all the data into the classifier
logreg.fit(X_train, y_train)

#output the classifiers prediction
predicted = logreg.predict(X_test)

In [None]:
predicted

In [None]:
y_test