In [1]:
import pandas as pd
import numpy as np
import os
import pickle
import random
from glob import glob

from skimage.io import imread, imsave
from skimage.transform import resize
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
import math
from sklearn.metrics import mean_squared_error
from tensorflow.python.util.nest import flatten
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from tensorflow.keras.models import Sequential


np.random.seed(0)

In [2]:
data_path_base = "/Users/xiaotongsun/Documents/ML for ME/HW7/Image Sequences"

In [3]:
image_dirs = glob(f"{data_path_base}/*.jpg")

In [4]:
df = pd.DataFrame(image_dirs)
df.to_csv("dataset.csv", index=False)

In [None]:
print(df)

In [5]:
image_size = (128, 128)

dataset = pd.read_csv("dataset.csv")
n_samples = len(dataset)
data = np.empty((n_samples, image_size[0]*image_size[1]))
for i in range(n_samples):
    path = dataset.iloc[i, 0]
    img = np.float32(imread(path)) / 255.
    image_resized = resize(img, image_size, anti_aliasing=True)
    data[i] = image_resized.flatten()
    
with open("raw.npy", "wb") as f:
    np.save(f, data)

In [None]:
data.shape

In [6]:
with open("raw.npy", "rb") as f:
    data = np.load(f)

In [7]:
n_components = 100
# sc = StandardScaler()
# sc.fit(data)
# data_sc = sc.transform(data)
pca = PCA(n_components=n_components)
principalComponents = pca.fit_transform(data)

In [8]:
with open("pca.pickle", "wb") as f:
    pickle.dump(pca, f)

In [9]:
with open("pcs.npy", "wb") as f:
    np.save(f, principalComponents)

In [10]:
def sample_image_seq(seq_len, data, stride=1):
    total_len = data.shape[0]
    num_seq = (total_len - seq_len) // stride + 1
    indices = (np.arange(seq_len) + np.arange(0, num_seq * stride, stride).reshape(-1,1))
    return data[indices, ...]

In [11]:
def getXY_pcs(seq_len, data, stride=1):
    total_len = data.shape[0]
    X_pcs = sample_image_seq(seq_len, data[:total_len-seq_len, ...])
    Y_pcs = sample_image_seq(seq_len, data[seq_len:, ...])
    return X_pcs, Y_pcs

In [42]:
seq_len = 10
X_pcs, y_pcs = getXY_pcs(seq_len, principalComponents)
y_true = sample_image_seq(seq_len, data[seq_len:, ...])

In [43]:
X_pcs.shape, y_pcs.shape, y_true.shape

((9982, 10, 100), (9982, 10, 100), (9982, 10, 16384))

In [44]:
total_num = len(X_pcs)
split_ratio = 0.9 
train_num = int(total_num * split_ratio)
test_num = total_num - train_num

indices = np.arange(total_num)
np.random.shuffle(indices)

train_indices = indices[:train_num]
test_indices = indices[train_num:]

In [45]:
len(train_indices), len(test_indices)

(8983, 999)

In [46]:
train_X = X_pcs[train_indices, ...]
train_y = y_pcs[train_indices, ...]

test_X = X_pcs[test_indices, ...]
test_y = y_pcs[test_indices, ...]

test_y_true = y_true[test_indices, ...]

# with open("input.npy", "wb") as f:
#     np.save(f, train_X)
#     np.save(f, train_y)
#     np.save(f, test_X)
#     np.save(f, test_y)
#     np.save(f, test_y_true)

In [47]:
def create_RNN(hidden_units=100, time_steps=10):
    model = keras.Sequential()
    model.add(layers.LSTM(hidden_units, activation='relu', input_shape=(time_steps,100)))
    model.add(layers.RepeatVector(time_steps))
    model.add(layers.LSTM(hidden_units, activation='relu', return_sequences=True))
    model.add(layers.TimeDistributed(layers.Dense(100)))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [48]:
checkpoint_filepath = '/Users/xiaotongsun/Documents/ML for ME/HW7/best_model7.hdf5'
model = create_RNN(hidden_units=128, time_steps=10) 
monitor = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='auto', restore_best_weights=True)
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
filepath=checkpoint_filepath,
save_weights_only=True,
monitor='val_loss',
mode='min',
save_best_only=True)
history = model.fit(train_X, train_y, batch_size=32, validation_split=0.2, shuffle=True, callbacks=[monitor,model_checkpoint_callback], verbose=2, epochs=100)
print(model.summary())

Epoch 1/100
225/225 - 4s - loss: 2.0794 - val_loss: 2.0687
Epoch 2/100
225/225 - 2s - loss: 2.0482 - val_loss: 2.0417
Epoch 3/100
225/225 - 2s - loss: 2.0202 - val_loss: 2.0227
Epoch 4/100
225/225 - 2s - loss: 2.0007 - val_loss: 2.0097
Epoch 5/100
225/225 - 2s - loss: 1.9846 - val_loss: 2.0000
Epoch 6/100
225/225 - 2s - loss: 1.9694 - val_loss: 1.9914
Epoch 7/100
225/225 - 2s - loss: 1.9543 - val_loss: 1.9810
Epoch 8/100
225/225 - 2s - loss: 1.9403 - val_loss: 1.9728
Epoch 9/100
225/225 - 2s - loss: 1.9292 - val_loss: 1.9674
Epoch 10/100
225/225 - 2s - loss: 1.9170 - val_loss: 1.9619
Epoch 11/100
225/225 - 2s - loss: 1.9052 - val_loss: 1.9576
Epoch 12/100
225/225 - 3s - loss: 1.8952 - val_loss: 1.9515
Epoch 13/100
225/225 - 2s - loss: 1.8847 - val_loss: 1.9496
Epoch 14/100
225/225 - 2s - loss: 1.8766 - val_loss: 1.9478
Epoch 15/100
225/225 - 2s - loss: 1.8658 - val_loss: 1.9443
Epoch 16/100
225/225 - 3s - loss: 1.8590 - val_loss: 1.9405
Epoch 17/100
225/225 - 3s - loss: 1.8495 - val_lo

In [49]:
y_pred_pcs = model.predict(test_X)
y_pred_image = pca.inverse_transform(y_pred_pcs)

In [50]:
print(y_pred_image.shape)

(999, 10, 16384)


In [51]:
plt.figure()
for i in range(1, 4, 1):
    plt.subplot(2, 3, i)
    plt.imshow(y_pred_image[1, i-1, ...].reshape(128, 128), cmap="gray")
    plt.subplot(2, 3, 3 + i)
    plt.imshow(test_y_true[1, i-1, ...].reshape(128, 128), cmap="gray")
plt.savefig("/Users/xiaotongsun/Documents/ML for ME/HW7/predict_vs_true7.jpg")
plt.close()

In [52]:
y_true_projected = pca.inverse_transform(test_y)
plt.figure()
for i in range(1, 4, 1):
    plt.subplot(2, 3, i)
    plt.imshow(y_true_projected[1, i-1, ...].reshape(128, 128), cmap="gray")
    plt.subplot(2, 3, 3 + i)
    plt.imshow(test_y_true[1, i-1, ...].reshape(128, 128), cmap="gray")
plt.savefig("/Users/xiaotongsun/Documents/ML for ME/HW7/true7.jpg")
plt.close()

In [53]:
plt.figure()
for i in range(1, 4, 1):
    plt.subplot(2, 3, i)
    plt.imshow(y_pred_image[500, i-1, ...].reshape(128, 128), cmap="gray")
    plt.subplot(2, 3, 3 + i)
    plt.imshow(test_y_true[500, i-1, ...].reshape(128, 128), cmap="gray")
plt.savefig("/Users/xiaotongsun/Documents/ML for ME/HW7/predict_vs_true7_2.jpg")
plt.close()