In [1]:
import netCDF4 as nc
import numpy as np
from tqdm import tqdm
import math

In [2]:
def fix_missing_value(input_data):
    """
    fixed missing values with 0
    """
    for i in range(0,len(input_data)):
        arr = input_data[i]
        arr[arr == -32768] = 0

In [3]:
def normalize_matrix(matrix):
    """
    Normalize the input
    
    """
    norm = np.linalg.norm(matrix)
    matrix = matrix/norm  # normalized matrix
    return matrix

In [4]:
lat = 128 # chunk size
long = 128 # chunk size
def split_array(input_data, lat, long,region=100):
    """
    Split the whole 3600*1500 array into smaller chunks.
    Padding with zeros to make the array be divisible by chunk size.
    
    Args:
        input_data: data of a single day from a eta_t instance
        lat: chunk size
        long: chunk size
    """
    pad_cols = math.ceil(3600/long)*long - 3600 # number of zero columns adding to the right
    pad_rows = math.ceil(1500/lat)*lat - 1500 # number of zero rows adding to the bottom
    

    input_data=np.pad(input_data,((0,pad_rows),(0,pad_cols)), 'constant',constant_values=(0,0)) # padding with zeros, right & bottom
    
    l = np.array_split(input_data,len(input_data)/lat,axis=0)
    input_data_split = []
    for i in range(len(l)):
        dd = np.array_split(l[i],len(input_data[0])/long,axis=1)
        input_data_split += dd
    input_data_split = np.array(input_data_split)
    return input_data_split[region]

In [14]:
folder = 'dataset1'
num_of_years = 2
region = 100

basic_url = 'http://dapds00.nci.org.au/thredds/dodsC/gb6/BRAN/BRAN_2016/OFAM/ocean_eta_t_'
year = 1993
for i in range(num_of_years):
  year += 1
  month = 0
  for j in tqdm(range(12)):
    month += 1
    if month > 9:
      m = str(month)
    else:
      m = '0'+str(month)
    print('year: ',year,' month: ',month)
    y = str(year)
    url = basic_url + y + '_' + m + '.nc'
    data = nc.Dataset(url)
    variables = data.variables
    eta_t = variables['eta_t'][:]
    size = eta_t.shape[0]
    for i in range(size):
      eta_t_day_i = eta_t[i]
      eta_t_numpy = np.array(eta_t_day_i)
      fix_missing_value(eta_t_numpy)
      normalize_matrix(eta_t_numpy)
      inputs = split_array(eta_t_numpy,lat,long,region)
      inputsx = np.expand_dims(inputs,axis=2)
      file_name = './'+folder+'/128x128_'+y+'_'+m+'_'+str(i+1)+'.npy'
      np.save(file_name,inputsx) 

  0%|          | 0/12 [00:00<?, ?it/s]

year:  1994  month:  1


  8%|▊         | 1/12 [06:16<1:08:57, 376.16s/it]

year:  1994  month:  2


 17%|█▋        | 2/12 [12:29<1:02:34, 375.45s/it]

year:  1994  month:  3


 25%|██▌       | 3/12 [17:34<53:07, 354.21s/it]  

year:  1994  month:  4


 33%|███▎      | 4/12 [21:38<42:49, 321.23s/it]

year:  1994  month:  5


 42%|████▏     | 5/12 [25:17<33:52, 290.39s/it]

year:  1994  month:  6


 50%|█████     | 6/12 [28:26<26:00, 260.14s/it]

year:  1994  month:  7


 58%|█████▊    | 7/12 [31:54<20:21, 244.31s/it]

year:  1994  month:  8


 67%|██████▋   | 8/12 [35:25<15:37, 234.35s/it]

year:  1994  month:  9


 75%|███████▌  | 9/12 [38:17<10:47, 215.72s/it]

year:  1994  month:  10


 83%|████████▎ | 10/12 [41:04<06:42, 201.17s/it]

year:  1994  month:  11


 92%|█████████▏| 11/12 [43:55<03:11, 191.89s/it]

year:  1994  month:  12


100%|██████████| 12/12 [47:14<00:00, 236.24s/it]
  0%|          | 0/12 [00:00<?, ?it/s]

year:  1995  month:  1


  8%|▊         | 1/12 [03:18<36:19, 198.13s/it]

year:  1995  month:  2


 17%|█▋        | 2/12 [06:06<31:33, 189.31s/it]

year:  1995  month:  3


 25%|██▌       | 3/12 [09:08<28:03, 187.09s/it]

year:  1995  month:  4


 33%|███▎      | 4/12 [12:12<24:48, 186.09s/it]

year:  1995  month:  5


 42%|████▏     | 5/12 [15:41<22:29, 192.85s/it]

year:  1995  month:  6


 50%|█████     | 6/12 [19:16<19:56, 199.50s/it]

year:  1995  month:  7


 58%|█████▊    | 7/12 [23:48<18:26, 221.27s/it]

year:  1995  month:  8


 67%|██████▋   | 8/12 [27:28<14:43, 220.90s/it]

year:  1995  month:  9


 75%|███████▌  | 9/12 [35:26<14:53, 297.98s/it]

year:  1995  month:  10


 83%|████████▎ | 10/12 [41:49<10:46, 323.45s/it]

year:  1995  month:  11


 92%|█████████▏| 11/12 [50:05<06:15, 375.30s/it]

year:  1995  month:  12


100%|██████████| 12/12 [55:22<00:00, 276.91s/it]
