<a href="https://colab.research.google.com/github/urness/CS167Fall22Code/blob/main/Day21_Notes_ANNs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Day 21 Code: Artificial Neural Networks

We're going to start off by using sklearn MLP to implement a multilayer perceptron, and then we're going to use a deep learning framework, Tensorflow with Keras to build a neural network. 

In [None]:
from google.colab import drive
import pandas
drive.mount('/content/drive')

In [None]:
data = pandas.read_csv('/content/drive/MyDrive/CS167Fall22/Datasets/irisData.csv')
data.head()

In [None]:
import pandas
import numpy
from sklearn.model_selection import train_test_split

#Split the dataset
predictors = data.columns.drop('species')
target = "species"
train_data, test_data, train_sln, test_sln = train_test_split(data[predictors], data[target], test_size = 0.2, random_state=41)

#Normalize Data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(train_data)
train_data_norm = scaler.transform(train_data)
test_data_norm = scaler.transform(test_data)


## Build out a Multilayer Perceptron using Scikit-Learn:
Here are the links to the documentation: 
- [sklearn.neural_network.MLPRegressor](https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html)
- [sklearn.neural_network.MLPClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPClassifier.html)


In [None]:
# Set up MLP
from sklearn.neural_network import MLPClassifier
from sklearn import metrics
from sklearn.metrics import confusion_matrix

mlp = MLPClassifier(random_state=0,hidden_layer_sizes = (100,), max_iter = 800)
mlp.fit(train_data_norm,train_sln)
predictions = mlp.predict(test_data_norm)

print("Accuracy: ", metrics.accuracy_score(test_sln,predictions))

# Confusion Matrix
vals = data[target].unique() ## possible classification values (species)
conf_mat = metrics.confusion_matrix(test_sln, predictions, labels=vals)
print(pandas.DataFrame(conf_mat, index = "True " + vals, columns = "Pre " + vals))

## In-Class Exercise:

1. Read in the Boston Housing dataset
2. Normalize your data
3. Use a [MLPRegressor](https://scikit-learn.org/stable/modules/generated/sklearn.neural_network.MLPRegressor.html) to predict the price of a house 'MEDV'
4. Play around with changing the parameters, see what the best R2 score you can get is. 


In [None]:
# Your code goes here for the In-Class Exercise
# 1. Read in the Boston Housing dataset
import pandas
housing_data = pandas.read_csv('/content/drive/MyDrive/CS167Fall22/Datasets/HousingData.csv') 

# clean the data
housing_data['CRIM'].fillna(housing_data['CRIM'].mean(),inplace=True)
housing_data['ZN'].fillna(housing_data['ZN'].mean(),inplace=True)
housing_data['INDUS'].fillna(housing_data['INDUS'].mean(),inplace=True)
housing_data['CHAS'].fillna(housing_data['CHAS'].mean(),inplace=True)
housing_data['AGE'].fillna(housing_data['AGE'].mean(),inplace=True)
housing_data['LSTAT'].fillna(housing_data['LSTAT'].mean(),inplace=True)

#Split the dataset
predictors = housing_data.columns.drop('MEDV')
target = "MEDV"
train_data, test_data, train_sln, test_sln = train_test_split(housing_data[predictors], housing_data[target], test_size = 0.2, random_state=0)



In [None]:
#2. Normalize the data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(train_data)
train_data_normalized = scaler.transform(train_data)
test_data_normalized = scaler.transform(test_data)

In [None]:
#3. Use a MLPRegressor to predict the price of a house 'MEDV'
from sklearn.neural_network import MLPRegressor
from sklearn import metrics

mlp = MLPRegressor(random_state=0)
mlp.fit(train_data_normalized,train_sln)
predictions = mlp.predict(test_data_normalized)

print("MLP Regression R2:", metrics.r2_score(test_sln, predictions))


MLP Regression R2: 0.49660446847147843




In [None]:
#4. Play around with the parameters


# Introducing Deep Learning Frameworks

Go ahead and go up to 'Runtime', and select 'change runtime type' from the dropdown list, select 'GPU'. If you complete this step correctly, the following code should say `Found GPU at: /device:GPU:0` or something similar.

In [None]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

# Iris Dataset with Keras

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=DeprecationWarning)

from keras.models import Sequential
from keras.layers import Dense

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import numpy

# we're going to use the iris dataset, but load it from sklearn 
iris = load_iris()
X = iris['data']
y = iris['target']
names = iris['target_names']
feature_names = iris['feature_names']

# One hot encoding
enc = OneHotEncoder()
Y = enc.fit_transform(y[:, numpy.newaxis]).toarray()

# Split the data set into training and testing
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)

#normalize the data
scaler = StandardScaler()
scaler.fit(X_train)
X_train_norm = scaler.transform(X_train)
X_test_norm = scaler.transform(X_test)



In [None]:
# display a row of data
print(X_train_norm[0,:])
print(Y_train[0,:])

[ 0.37346331 -0.58519388  0.54075378  0.74234434]
[0. 0. 1.]


In [None]:
#build our neural network model
n_features = X.shape[1] #X generally stands for our predictors
n_classes = Y.shape[1] #Y generally stands for our target

model = Sequential(name='iris_1')
model.add(Dense(2, input_dim=n_features, activation='relu'))
model.add(Dense(2, activation='relu'))
model.add(Dense(n_classes, activation='softmax'))

from sklearn.metrics import r2_score

# Compile model
model.compile(loss='mean_squared_error',
              optimizer='sgd', 
              metrics=['accuracy'])
model.summary()

[Keras Model Training APIs](https://keras.io/api/models/model_training_apis/)


In [None]:
# train the model
model.fit(X_train_norm, Y_train, batch_size=15, epochs=5) #add verbose = 0 to make output minimal

# cross-validation; make predictions and get error
print("----"*30)
mse, acc =  model.evaluate(X_test_norm, Y_test)
print('Mean Squared Error:',mse)
print('Test accuracy:', acc)



# Boston Housing Dataset with Keras


In [None]:
from keras.datasets import boston_housing

# we're going to use the Bosting housing dataset, but load it from keras 
housing_data = boston_housing.load_data(test_split=0.2) #it comes with it's own test/train split :) 
(X_train, Y_train), (X_test, Y_test) = housing_data


In [None]:
# normalize the data
scaler = StandardScaler()
scaler.fit(X_train)
X_train_norm = scaler.transform(X_train)
X_test_norm = scaler.transform(X_test)

In [None]:
### need to execute this to get access to RSquare function
!pip install tensorflow_addons

In [None]:
from keras import models
from keras import layers

from tensorflow_addons.metrics import RSquare

#build our model
n_features = X_train.shape[1] # get the number of input values for the input layer

model = Sequential(name='boston_housing1')   #initialize the model

#add some layers. Dense is a fully connected layer
model.add(layers.Dense(64, input_dim= n_features, activation='relu'))
model.add(layers.Dense(64, activation='relu'))

#Since we are doing a regressions, we only want one value as an ouput, so our last layer has a Dense layer with 1 neuron.
model.add(layers.Dense(1)) #default activation function is "linear"

from sklearn.metrics import r2_score
# Compile model
model.compile(loss='mean_squared_error',
              optimizer='sgd', 
              metrics= RSquare())
model.summary()

In [None]:
# train the model
model.fit(X_train_norm, Y_train, epochs=100, batch_size=15, verbose=1)

print("----"*30)

# cross-validation; make predictions and get error
test_mse_score, test_r2_score = model.evaluate(X_test_norm, Y_test)
print('MSE:', test_mse_score)
print('r2:', test_r2_score)

# In Class Exercise #2
What parameters from the models above do you think you can/should change? 


Try these
- Change the number of neruons in each layer.  
- Add a layer to the model.
- Change the activation function of the model, [here is the documentation](https://keras.io/api/layers/activations/)
- Change the optimizer, [here is the documentation](https://keras.io/api/optimizers/) with a list of options
- look at the [metrics](https://keras.io/api/metrics/) try adding another metric.
