## Important parameters
<br>xt_ocean: longitude, length 3600
<br>yt_ocean: latitude, length 1500
<br> [mind map](https://miro.com/app/board/o9J_lM4N1Pg=/?fromRedirect=1)

In [1]:
import netCDF4 as nc4
from tensorflow import keras
import xarray,numpy as np
import math
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from tensorflow.keras import layers
import tensorflow as tf

In [2]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

# Load data

In [3]:
import glob
input_data_split = []

for index in range(184,195):
    data_path = 'C:/Users/Yuxuan/Desktop/data/dataset_' + str(index) + '/*.np[yz]'
    for np_name in glob.glob(data_path):
        input_data_split.append(np.load(np_name))
    print(len(input_data_split))
    
input_data_split = np.array(input_data_split)
input_data_split.shape

8279
16558
24837
33116
41395
49674
57953
66232
74511
82790
91069


(91069, 128, 128, 1)

# Fix Missing Value

In [4]:
def fix_missing_value(input_data):
    """
    input 3d matrix
    """
    for i in range(0,len(input_data)):
        arr = input_data[i]
        arr[np.isnan(arr)] = 0
    return input_data

# Min Max Scaling

In [5]:
def min_max_scale(input_data, max_value, min_value):
    """
    input 3d matrix
    """
    for i in range(len(input_data)):
        input_data[i] = (input_data[i] - min_value)/(max_value - min_value)
    return input_data

# Preprocessing

In [6]:
# import random
# n = 3000 # sample size
# random.seed(7)
# input_data_split = np.array(random.sample(input_data_split.tolist(),n))
# input_data_split.shape

In [7]:
input_data_split = fix_missing_value(input_data_split)
max_value = np.amax(input_data_split)
min_value = np.amin(input_data_split)
input_data_split_scaled = min_max_scale(input_data_split, max_value, min_value)

In [8]:
print(max_value)
print(min_value)

1.0681478
-0.18311106


In [9]:
input_data_split_scaled.shape

(91069, 128, 128, 1)

# split train set & validation set

In [10]:
train_set, test_set = train_test_split(input_data_split, test_size=0.3333333, random_state=26)

train_set_scaled, test_set_scaled = train_test_split(input_data_split_scaled, test_size=0.3333333, random_state=26)
np.array(train_set_scaled).shape

(60712, 128, 128, 1)

# Autoencoder

In [11]:
lat = 128 # chunk size
long = 128 # chunk size

input_img = keras.Input(shape=(lat, long,1))

x = layers.Conv2D(64,(4,4), activation='relu', padding='same')(input_img)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(32, (4, 4), activation='relu', padding='same')(x)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(16, (4, 4), activation='relu', padding='same')(x)
# x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(8, (4, 4), activation='relu', padding='same')(x)
# x = layers.MaxPooling2D((2, 2), padding='same')(x)
# x = layers.Conv2D(4, (4, 4), activation='relu', padding='same')(x)
# x = layers.MaxPooling2D((2, 2), padding='same')(x)
encoded = layers.Conv2D(4, (4, 4), activation='relu',strides=(1,1), padding='same')(x)

x = layers.Conv2DTranspose(8, (4, 4), strides=(1, 1),activation='relu', padding='same')(encoded)
x = layers.Conv2DTranspose(16, (4, 4), strides=(2, 2),activation='relu', padding='same')(x)
x = layers.Conv2DTranspose(32, (4, 4), strides=(2, 2),activation='relu', padding='same')(x)
x = layers.Conv2DTranspose(64, (4, 4), strides=(1, 1), activation='relu', padding='same')(x)
x = layers.Conv2DTranspose(64, (4, 4), strides=(1, 1),activation='relu', padding='same')(x)
# x = layers.Conv2DTranspose(128, (4, 4), strides=(2, 2),activation='relu', padding='same')(x)
decoded = layers.Conv2DTranspose(1, (3, 3), activation='sigmoid', padding='same')(x)

autoencoder = keras.Model(input_img, decoded)
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 128, 128, 1)]     0         
_________________________________________________________________
conv2d (Conv2D)              (None, 128, 128, 64)      1088      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 64, 64, 64)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 64, 64, 32)        32800     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 32, 32, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 32, 32, 16)        8208      
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 32, 32, 8)         2056  

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, min_delta=0.000001)
history = autoencoder.fit(train_set_scaled, train_set_scaled,
                epochs=1000, batch_size=512, validation_data=(test_set_scaled, test_set_scaled),callbacks=[callback])

Epoch 1/1000


In [None]:
autoencoder.save("autoencoder(32,32,4) layer(7-6) 11 regions all data-CPU")

In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

In [None]:
test_sample_index = 122

# select a sample
original_data = test_set[test_sample_index]
original_data.shape

test_data = (original_data - min_value)/(max_value - min_value) # preprocessing
test_data.shape

decoded_data = autoencoder.predict(np.expand_dims(test_data, 0)) 
decoded_data = decoded_data*(max_value-min_value) + min_value   # scale back

In [None]:
fig = plt.figure(figsize=(16,32)) # image
ax1 = fig.add_subplot(1,2,1) 
ax1.imshow(original_data)
ax2 = fig.add_subplot(1,2,2)
ax2.imshow(decoded_data.reshape(128,128))

In [None]:
original_data[0]

In [None]:
np.amax(original_data)

In [None]:
np.amin(original_data)

In [None]:
np.amax(decoded_data)

In [None]:
np.amin(decoded_data)

In [None]:
ax3.imshow((original_data-decoded_data).reshape(128,128),cmap='hot')

In [None]:
test_sample_index = 6546

# select a sample
original_data = test_set[test_sample_index]
original_data.shape

test_data = (original_data - min_value)/(max_value - min_value) # preprocessing
test_data.shape

decoded_data = autoencoder.predict(np.expand_dims(test_data, 0)) 
decoded_data = decoded_data*(max_value-min_value) + min_value   # scale back

fig = plt.figure(figsize=(16,32)) # image
ax1 = fig.add_subplot(1,2,1) 
ax1.imshow(original_data)
ax2 = fig.add_subplot(1,2,2)
ax2.imshow(decoded_data.reshape(128,128))
print('mse: ',np.square((original_data-decoded_data)).mean())

In [None]:
test_sample_index = 110

# select a sample
original_data = test_set[test_sample_index]
original_data.shape

test_data = (original_data - min_value)/(max_value - min_value) # preprocessing
test_data.shape

decoded_data = autoencoder.predict(np.expand_dims(test_data, 0)) 
decoded_data = decoded_data*(max_value-min_value) + min_value   # scale back

fig = plt.figure(figsize=(16,32)) # image
ax1 = fig.add_subplot(1,2,1) 
ax1.imshow(original_data,cmap='hot')
ax2 = fig.add_subplot(1,2,2)
ax2.imshow(decoded_data.reshape(128,128),cmap='hot')
print('mse: ',np.square((original_data-decoded_data)).mean())

In [None]:
mse

In [None]:
# autoencoder.save("autoencoder(32,32,4) layer(7-6) 9 regions all data")