#  Extrapolation of CNN Trained on Mars Satellite Data to Classify Lunar Features
### by Aniruddha Prasad and Andrew Hartnett

The following notebook will train a Convolutional Neural Network (CNN) on satellite images taken of Mars from the HiRISE dataset. Afterwards, it will conduct transfer learning onto similar satellite images of Earth's Moon. The model is tested on its ability to classify the **geography(?)** of the Moon having trained on mostly if not entirely on images of Mars.

## Prepare the training data

In [2]:
# Import all required libraries and functions:
import numpy as np
from PIL import Image
import tensorflow as tf
import os

import cv2
import csv
from tensorflow import keras
from tensorflow.keras import utils, layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, Dense, Flatten, Dropout, BatchNormalization
import matplotlib.pyplot as plt
%matplotlib inline

import pathlib
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, array_to_img, load_img
from tensorflow.keras.utils import to_categorical

import pandas as pd

## The way Neihusst Preprocesses their data:

In [None]:
data_images = []
data_labels = []
rel_img_path = 'map-proj/' # add path of folder to image name for later loading

# open up the labeled data file
with open('labels-map-proj.txt') as labels:
  for line in labels:
    file_name, label = line.split(' ')
    data_images.append(rel_img_path + file_name)
    data_labels.append(int(label))

# divide data into testing and training (total len 3820)
train_images, test_images, train_labels, test_labels = train_test_split(
    data_images, data_labels, test_size=0.15, random_state=666)
test_len = len(test_images)   # 573
train_len = len(train_images) # 3247

# label translations
class_labels = ['other','crater','dark_dune','streak',
                'bright_dune','impact','edge']


### Data Preprocessing

In [None]:
#convert image paths into numpy matrices
def parse_image(filename):
  img_obj = Image.open(filename)
  img = np.asarray(img_obj).astype(np.float32)
  #normalize image to 0-1 range
  img /= 255.0
  return img

train_images = np.array(list(map(parse_image, train_images)))
test_images = np.array(list(map(parse_image, test_images)))

### Convert labels to one-hot encoding

In [None]:
def to_one_hot(label):
  encoding = [0 for _ in range(len(class_labels))]
  encoding[label] = 1
  return np.array(encoding).astype(np.float32)

train_labels = np.array(list(map(to_one_hot, train_labels)))
test_labels = np.array(list(map(to_one_hot, test_labels)))

In [1]:
# Path to find HiRISE images at
base_path='"C:\Users\Andrew\Documents\Final-Project---ECE-697ML"'

# Width/height of the images
size = 120

# Load the hirise-map-proj-v3 folder into variables
hirise_images = load_images(base_path + 'hirise-map-proj-v3/', size)
images = np.array(hirise_images)

# Need to store the images as 227x227 matrices in a larger array

# If you want to load one image at a time, you can use the PIL libraries image
# function and the np assaray function.

# Below code is used to load images into a format for machine learning:


# os.chdir('pictures')

# files = tf.data.Dataset.list_files('*jpg') # Change to png if needed

# def load_images(path):
#     image = tf.io.read_file(path)
#     image = tf.io.decode_jpeg(image)
#     image = tf.image.convert_image_dtype(image, tf.float32) # optional
#     image = tf.image.resize(image, (size, size))              # optional
#     return image 

# ds = files.map(load_images(path)).batch(1)


# Now you can store the array into cells using a for loop 
# img = Image.open('sample.png')
# numpydata = asarray(img)

# Translate the y values from label-map-proj-v3.txt using landmarks_map-proj-v3_classmap.csv


# Print the shapes of each to check that we have 1 image for 1 classification




NameError: name 'load_images' is not defined

In [None]:
# Split the images into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42, shuffle=True)

## Define and train the CNN

### An example of Performing image classifaction with the keras library:

*https://www.tensorflow.org/tutorials/images/classification*

The way the data is arranged in this example is that in a directory of all images, each class gets its own folder. This is how they are effectively labeled. This could be a way we could do it as well.

In [None]:
# Define the neural network, would be nice to show the use of GridSearchCV and a param_grid for optimization
num_classes = len(class_names)

model = Sequential([
  layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
  layers.Conv2D(16, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(32, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Conv2D(64, 3, padding='same', activation='relu'),
  layers.MaxPooling2D(),
  layers.Flatten(),
  layers.Dense(128, activation='relu'),
  layers.Dense(num_classes)
])

# Compile the model:

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])


# Train the model:

epochs = 10
history = model.fit(train_ds, validation_data = val_ds, epochs=epochs)

#Train_ds and val_ds is how the image data is stored. We need to store the data in a similar fashion

In [None]:
# Print the best_params_ for GridSearchCV


In [None]:
# Evaluate the CNN on validation set

# Make a plot for the clout


## Evaluate the CNN on remaining Mars images

In [None]:
# Evaluate the CNN on testing set

# Make a plot for the clout


#### Short response to our findings:
Was the output expected? what did we do for optimizations? is it overfit/underfit?

## Evaluate the CNN on Lunar images

In [None]:
# Load Lunar images into a test set


In [None]:
# Evaluate the CNN on Lunar set - we could use data augmentation to increase size of labeled images

# Make a plot for the clout


#### Short response to our findings:
Was the output expected? what did we do for optimizations? is it overfit/underfit?

## Perform transfer learning using a small set of Lunar images

In [None]:
# Learn how to do transfer learning and do that here

# Probably split the lunar images into training and test

# Retrain CNN with Mars and some Lunar images


## Evaluate the transfer learning CNN on Lunar images

In [None]:
# Evaluate the CNN on remaining Lunar set

# Make a plot for the clout


## Compare the results of before/after transfer learning for Lunar images

This could probably be done in text, although a cool double-plot comparison could look good. Something like X: epochs, Y: accuracy with both before/after transfer learning models on the same plot.

## Do something similar with Random Forest Classifier as a base, compare the two approaches based on their test set evaluation

In [None]:
# Same thing, just different model
