#Project: Multiclass Classification Of Flower Species

### Step 1: Import Classes and Functions

In [1]:
import numpy
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

### Step 2: Initialize Random Number Generator to a constant value
* to ensure that the results we achieve from this model can be achieved again precisely. 
* ensures that the stochastic process of training a neural network model can be reproduced.

In [2]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

### Step 3: Load The Dataset
split the attributes (columns) into input variables (X) and output variables (Y ).

In [3]:
# load dataset
dataframe = read_csv("iris.csv", header=None)
dataset = dataframe.values
X = dataset[:,0:4].astype(float)
Y = dataset[:,4]

### Step 4: Encode the Output Variable
Output = Iris-setosa, Iris-versicolor, Iris-virginica (strings)

**one hot encoding** or creating dummy variables from a categorical variable = reshape the output attribute from a vector that contains values for each class value to be a matrix with a boolean for each class value and whether or not a given instance has that class value or not.

**LabelEncoder** = encode the strings consistently to integers
**to_categorical** = convert the vector of integers to a one hot encoding

In [4]:
# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)
# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)

### Step 5: Define the Neural Network Model
KerasClassifier = returns the constructed neural network model

4 inputs -> [8 hidden nodes] -> 3 outputs

Hidden layers uses rectifier activation function (relu)

3 outputs = one for each class (because of one hot encoding) 
The output value with the largest value will be taken as the class predicted by the model.


In [5]:
# define baseline model
def baseline_model():
  # create model
  model = Sequential()
  model.add(Dense(8, input_dim=4, activation= 'relu' ))
  model.add(Dense(3, activation= 'softmax' ))
  # Compile model
  model.compile(loss= 'categorical_crossentropy' , optimizer= 'adam' , metrics=[ 'accuracy' ])
  return model

# create KerasClassifier 
estimator = KerasClassifier(build_fn=baseline_model, epochs=200, batch_size=5, verbose=0)

  if sys.path[0] == '':


### Step 6: Evaluate Model with k-Fold Cross Validation
set the number of folds to be 10 (an excellent default) and to
shuffle the data before partitioning it.

In [6]:
# Prepare Cross-Validation
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)

# Evaluate the NN Model
results = cross_val_score(estimator, X, dummy_y, cv=kfold)
print("Accuracy: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Accuracy: nan% (nan%)


10 fits failed out of a total of 10.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
10 fits failed with the following error:
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/usr/local/lib/python3.7/dist-packages/keras/wrappers/scikit_learn.py", line 236, in fit
    return super(KerasClassifier, self).fit(x, y, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/keras/wrappers/scikit_learn.py", line 155, in fit
    self.model = self.build_fn(**self.filter_sk_params(self.build_fn))
  File "<ipython-input-5-8f91117c8b13>", line 5, in baseline_model
    model.add(Dense(8, input

##Full Code

In [7]:
# Multiclass Classification with the Iris Flowers Dataset
import numpy
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder

# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

# load dataset
dataframe = read_csv("iris.csv", header=None)
dataset = dataframe.values
X = dataset[:,0:4].astype(float)
Y = dataset[:,4]

# encode class values as integers
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

# convert integers to dummy variables (i.e. one hot encoded)
dummy_y = np_utils.to_categorical(encoded_Y)

# define baseline model
def baseline_model():
  # create model
  model = Sequential()
  model.add(Dense(8, input_dim=4, activation= 'relu' ))
  model.add(Dense(3, activation= 'softmax' ))

  # Compile model
  model.compile(loss= 'categorical_crossentropy' , optimizer= 'adam' , metrics=[ 'accuracy' ])
  return model

estimator = KerasClassifier(build_fn=baseline_model, epochs=200, batch_size=5, verbose=0)
kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, dummy_y, cv=kfold)
print("Accuracy: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))



Accuracy: 96.00% (4.42%)
