## Fundamentals of TensorFlow and learning algorithms
- [Tutorial Video](https://www.youtube.com/watch?v=tPYj3fFJGjk&t=1958s)
- [TensorFlow Introduction Module](https://colab.research.google.com/drive/1F_EWVKa8rbMXi3_fG0w7AtcscFq7Hi7B#forceEdit=true&sandboxMode=true&scrollTo=UU4MMhB_rxvz)
- [Learning Algorithms Module](https://colab.research.google.com/drive/15Cyy2H7nT40sGR7TBN5wBvgTd57mVKay#forceEdit=true&sandboxMode=true&scrollTo=qcII_xj9Ntyo)

### Install and import packages 

In [1]:
# pip install tensorflow
# pip install scikit-learn

from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib

import tensorflow.compat.v2.feature_column as fc
import tensorflow as tf  
from tensorflow.keras import layers, models
print(tf.version) # Should be >= 2

<module 'tensorflow._api.v2.version' from 'c:\\Users\\chris\\OneDrive\\COMP 560\\Midterm Project\\comp-560-project\\Virtual_Environment\\Lib\\site-packages\\tensorflow\\_api\\v2\\version\\__init__.py'>


### Practice data about the Titanic

In [2]:
# Load Titanic deaths dataset - try to find linear pattern correlating to likelihood of death
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv') # training data
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv') # testing data
y_train = dftrain.pop('survived')
y_eval = dfeval.pop('survived')
print(y_train, '\n', y_eval)

clear_output()
dftrain.head() # Returns the first 5 entries of the dataframe

Unnamed: 0,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone
0,male,22.0,1,0,7.25,Third,unknown,Southampton,n
1,female,38.0,1,0,71.2833,First,C,Cherbourg,n
2,female,26.0,0,0,7.925,Third,unknown,Southampton,y
3,female,35.0,1,0,53.1,First,C,Southampton,n
4,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y


### Practice data about the Titanic

In [3]:
# Put the feature columns into a format it can handle
# Define input layers
inputs = {
    'sex': tf.keras.Input(shape=(1,), dtype=tf.string),
    'n_siblings_spouses': tf.keras.Input(shape=(1,), dtype=tf.int64),
    'parch': tf.keras.Input(shape=(1,), dtype=tf.int64),
    'class': tf.keras.Input(shape=(1,), dtype=tf.string),
    'deck': tf.keras.Input(shape=(1,), dtype=tf.string),
    'embark_town': tf.keras.Input(shape=(1,), dtype=tf.string),
    'alone': tf.keras.Input(shape=(1,), dtype=tf.string),
    'age': tf.keras.Input(shape=(1,), dtype=tf.float32),
    'fare': tf.keras.Input(shape=(1,), dtype=tf.float32),
}
# Define categorical processing
sex_lookup = tf.keras.layers.StringLookup(vocabulary=['male', 'female'], output_mode='one_hot')
class_lookup = tf.keras.layers.StringLookup(vocabulary=['Third', 'First', 'Second'], output_mode='one_hot')
deck_lookup = tf.keras.layers.StringLookup(vocabulary=['unknown', 'C', 'G', 'A', 'B', 'D', 'F', 'E'], output_mode='one_hot')
embark_lookup = tf.keras.layers.StringLookup(vocabulary=['Southampton', 'Cherbourg', 'Queenstown', 'unknown'], output_mode='one_hot')
alone_lookup = tf.keras.layers.StringLookup(vocabulary=['n', 'y'], output_mode='one_hot')

# Apply lookup layers to categorical inputs
encoded_features = [
    sex_lookup(inputs['sex']),
    class_lookup(inputs['class']),
    deck_lookup(inputs['deck']),
    embark_lookup(inputs['embark_town']),
    alone_lookup(inputs['alone']),
]

# Define normalizer layers for numerical features
age_normalizer = tf.keras.layers.Normalization()
fare_normalizer = tf.keras.layers.Normalization()

# Normalize numerical features
encoded_features.append(age_normalizer(inputs['age']))
encoded_features.append(fare_normalizer(inputs['fare']))

x = tf.keras.layers.concatenate(encoded_features)
# Define dense layers
x = tf.keras.layers.Dense(128, activation='relu')(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
output = tf.keras.layers.Dense(1, activation='sigmoid')(x)

# Create and compile the model
model = tf.keras.Model(inputs=inputs, outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()



In [37]:
# Train the model
# Prepare training dataset
train_dataset = tf.data.Dataset.from_tensor_slices((
    {col: np.array(dftrain[col]) for col in dftrain.columns},  # Feature dictionary
    y_train  # Labels
)).batch(32)  # Set batch size

# Prepare evaluation dataset
eval_dataset = tf.data.Dataset.from_tensor_slices((
    {col: np.array(dfeval[col]) for col in dfeval.columns},
    y_eval
)).batch(32)

# Fit the model
model.fit(train_dataset, epochs=10, validation_data=eval_dataset)

Epoch 1/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8161 - loss: 0.4344 - val_accuracy: 0.7727 - val_loss: 0.4785
Epoch 2/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8148 - loss: 0.4276 - val_accuracy: 0.7765 - val_loss: 0.4787
Epoch 3/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8123 - loss: 0.4276 - val_accuracy: 0.7727 - val_loss: 0.4789
Epoch 4/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8188 - loss: 0.4229 - val_accuracy: 0.7803 - val_loss: 0.4789
Epoch 5/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8109 - loss: 0.4252 - val_accuracy: 0.7727 - val_loss: 0.4791
Epoch 6/10
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8155 - loss: 0.4208 - val_accuracy: 0.7765 - val_loss: 0.4795
Epoch 7/10
[1m20/20[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x19b661f8ec0>

### Predict chance of survival for a single person

In [24]:
# Convert eval_dataset to a NumPy-friendly format
# def dataset_to_numpy(dataset):
#     return np.array([list(row.values()) for row in dataset])
# Ensure proper input format
input_dict = {col: np.array(dfeval[col]) for col in dfeval.columns}

# Make predictions
# predictions = model.predict(dataset_to_numpy(dfeval))
predictions = model.predict(input_dict)


# Old code below
# predictions = list(model.predict(eval_dataset))
person = np.random.randint(0, len(dfeval))
clear_output()
print(dfeval.loc[person])
print(f"\nPredicted chance of survival: {round(predictions[person][0]*100, 2)}%")
survival = "yes" if y_eval[person] else "no"
print(f"Did they survive: {survival}")

sex                          male
age                          54.0
n_siblings_spouses              1
parch                           0
fare                         26.0
class                      Second
deck                      unknown
embark_town           Southampton
alone                           n
Name: 67, dtype: object

Predicted chance of survival: 15.95%
Did they survive: no


In [36]:
# type(dfeval)
# Ensure proper input format
# input_dict = {col: np.array(dfeval[col]) for col in dfeval.columns}
# input_dict
# predictions
loss, accuracy = model.evaluate(eval_dataset) # Evaluate the model

clear_output()
print('\nAccuracy tested against evaluation dataset:', round(accuracy*100, 2), "%")


Accuracy tested against evaluation dataset: 78.03 %
