In [34]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv3D, UpSampling3D, Add
from tensorflow.keras.models import Model


# Clear tensor sessions to free up memory

tf.keras.backend.clear_session()


In [35]:
import netCDF4 as nc
from netCDF4 import num2date
import numpy as np

# Load the data for both years
#data_2020 = nc.Dataset('/Users/heyj/Desktop/sql project/2020data.nc')
#data_2021 = nc.Dataset('/Users/heyj/Desktop/sql project/2020data.nc')
dataset = nc.Dataset('/Users/heyj/Desktop/ARM/Analogue-Nowcasting-Model/ResNet_V3/Data20_21.nc') #(choose a specific timestamp: 00UTC for each day)


times_dataset = dataset['time'][:]
dates_dataset = nc.num2date(times_dataset, units=dataset['time'].units)
all_dates = dates_dataset


In [36]:
print(dataset.variables.keys())


dict_keys(['longitude', 'latitude', 'level', 'time', 'z', 'r', 'u', 'v'])


In [37]:
# 700r, 850z, 850u, 850v, 925u, 925v
r700 = dataset['r'][:,0,:,:]
z850 = dataset['z'][:,1,:,:]
u850 = dataset['u'][:,1,:,:]
v850 = dataset['v'][:,1,:,:]
u925 = dataset['u'][:,2,:,:]
v925 = dataset['u'][:,2,:,:]

In [32]:
np.info(r700)

class:  MaskedArray
shape:  (731, 29, 61)
strides:  (14152, 488, 8)
itemsize:  8
aligned:  True
contiguous:  True
fortran:  False
data pointer: 0x7f8865146000
byteorder:  little
byteswap:  False
type: float64


In [38]:
range_r700 = calculate_range(r700)
range_z850 = calculate_range(z850)
range_u850 = calculate_range(u850)
range_v850 = calculate_range(v850)
range_u925 = calculate_range(u925)
range_v925 = calculate_range(v925)

# Printing the ranges
print("Range of r700:", range_r700)
print("Range of z850:", range_z850)
print("Range of u850:", range_u850)
print("Range of v850:", range_v850)
print("Range of u925:", range_u925)
print("Range of v925:", range_v925)

Range of r700: (0.3653841018676758, 112.01492309570312)
Range of z850: (14406.385834474615, 15139.173973656401)
Range of u850: (-16.829574584960938, 25.113705800722833)
Range of v850: (-17.32435415839366, 16.448002542807707)
Range of u925: (-16.304343371265926, 22.2310414650944)
Range of v925: (-16.304343371265926, 22.2310414650944)


In [24]:
# Preprocessing the data

# Modified code using StandardScaler for Z-score normalization
from sklearn.preprocessing import StandardScaler


scaler = StandardScaler()

u925_normalized = scaler.fit_transform(u925.reshape(-1, 1)).reshape(u925.shape)
v925_normalized = scaler.fit_transform(v925.reshape(-1, 1)).reshape(v925.shape)

u850_normalized = scaler.fit_transform(u850.reshape(-1, 1)).reshape(u850.shape)
v850_normalized = scaler.fit_transform(v850.reshape(-1, 1)).reshape(v850.shape)
z850_normalized = scaler.fit_transform(z850.reshape(-1, 1)).reshape(z850.shape)

r700_normalized = scaler.fit_transform(r700.reshape(-1, 1)).reshape(r700.shape)


# Combine the parameters to form a single dataset
data_combined = np.stack((u925_normalized, v925_normalized, u850_normalized, v850_normalized,z850_normalized,r700_normalized), axis=-1)

# Pad the data to get even dimension
padded_data = np.pad(data_combined, ((0, 0), (0, 1), (0, 1), (0, 0)), mode='constant')


from sklearn.model_selection import train_test_split

# Split the data into training and validation sets
X_train_padded, X_val_padded = train_test_split(padded_data, test_size=0.3, shuffle=False)


In [25]:
print(np.shape(u925))



(731, 29, 61)


In [26]:
print(u925_normalized)

[[[0.39164526 0.37893297 0.37249341 ... 0.20493143 0.21025324 0.21227186]
  [0.40467452 0.39221247 0.37264356 ... 0.21070368 0.21861132 0.22408328]
  [0.3917954  0.39428114 0.38935971 ... 0.21522472 0.22276534 0.23072303]
  ...
  [0.36635414 0.36924027 0.38246972 ... 0.42576157 0.4285476  0.436205  ]
  [0.38288679 0.38637349 0.40077075 ... 0.43421975 0.43650529 0.44511361]
  [0.40022021 0.3994528  0.41181475 ... 0.4484168  0.44796637 0.45550699]]

 [[0.33936138 0.33707584 0.34948784 ... 0.15531681 0.15379867 0.15233059]
  [0.34067932 0.33849388 0.33966167 ... 0.16505956 0.15922058 0.15643455]
  [0.35527677 0.34933769 0.36181642 ... 0.17098195 0.16312435 0.15998799]
  ...
  [0.38111841 0.38233626 0.38497214 ... 0.4670348  0.46641754 0.46875313]
  [0.38618998 0.3881085  0.3923793  ... 0.48026426 0.47646058 0.47822895]
  [0.39049414 0.39318007 0.39835174 ... 0.49739748 0.49247606 0.49257616]]

 [[0.37449535 0.37277702 0.36984085 ... 0.14642488 0.14403924 0.14322178]
  [0.38807514 0.384388

In [7]:
X_train_padded.shape

(511, 30, 62, 6)

In [27]:
print(X_train_padded[0].shape)
input = Input(shape=X_train_padded[0].shape)

(30, 62, 6)


In [28]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, UpSampling2D, Add
from tensorflow.keras.models import Model, load_model

# Define ResNet block
def resnet_block(input_tensor, filters, kernel_size=(3, 3), strides=(1, 1)):
    x = Conv2D(filters, kernel_size, strides=strides, padding='same')(input_tensor)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.Activation('relu')(x)
    
    x = Conv2D(filters, kernel_size, padding='same')(x)
    x = tf.keras.layers.BatchNormalization()(x)
    
    # Adjust the shortcut connection
    if strides != (1, 1) or input_tensor.shape[-1] != filters:
        shortcut = Conv2D(filters, (1, 1), strides=strides, padding='same')(input_tensor)
    else:
        shortcut = input_tensor
    
    x = Add()([x, shortcut])
    x = tf.keras.layers.Activation('relu')(x)
    return x

In [29]:
# Encoder
x = resnet_block(input, 32)
x = resnet_block(x, 64)
x = resnet_block(x, 128)
x = resnet_block(x, 256)
encoded = resnet_block(x, 256, strides=(2, 2))

# Decoder
x = UpSampling2D((2, 2))(encoded)
x = resnet_block(x, 256)
x = UpSampling2D((2, 2))(encoded)
x = resnet_block(x, 128)
x = UpSampling2D((2, 2))(encoded)
x = resnet_block(x, 64)
x = UpSampling2D((1, 1))(x)
x = resnet_block(x, 32)
decoded = Conv2D(6, (3, 3), activation='linear', padding='same')(x)

# Compile the autoencoder
autoencoder = Model(input, decoded)
autoencoder.compile(optimizer='adam', loss='mse', metrics=['accuracy'])

In [30]:
autoencoder.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 30, 62, 6)]          0         []                            
                                                                                                  
 conv2d_54 (Conv2D)          (None, 30, 62, 32)           1760      ['input_2[0][0]']             
                                                                                                  
 batch_normalization_36 (Ba  (None, 30, 62, 32)           128       ['conv2d_54[0][0]']           
 tchNormalization)                                                                                
                                                                                                  
 activation_36 (Activation)  (None, 30, 62, 32)           0         ['batch_normalization_36

 add_21 (Add)                (None, 30, 62, 256)          0         ['batch_normalization_43[0][0]
                                                                    ',                            
                                                                     'conv2d_65[0][0]']           
                                                                                                  
 activation_43 (Activation)  (None, 30, 62, 256)          0         ['add_21[0][0]']              
                                                                                                  
 conv2d_66 (Conv2D)          (None, 15, 31, 256)          590080    ['activation_43[0][0]']       
                                                                                                  
 batch_normalization_44 (Ba  (None, 15, 31, 256)          1024      ['conv2d_66[0][0]']           
 tchNormalization)                                                                                
          

In [31]:
# Train the model 
history = autoencoder.fit(X_train_padded, X_train_padded, epochs=20, batch_size=32, validation_data=(X_val_padded, X_val_padded))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20


KeyboardInterrupt: 

In [14]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

decoded_val = autoencoder.predict(X_val_padded)

# Mean Squared Error (MSE)
mse = mean_squared_error(X_val_padded.flatten(), decoded_val.flatten())

# Mean Absolute Error (MAE)
mae = mean_absolute_error(X_val_padded.flatten(), decoded_val.flatten())

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)

Mean Squared Error: 0.4601659701685599
Mean Absolute Error: 0.4382819244608609
