# Library


In [1]:
import os
import numpy as np
import torch

In [None]:
# Device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Read in the orignal training dataset 


In [None]:
# Read in training data
data_dir = "/data/"
output_dir = "/outputs/"
os.makedirs(output_dir, exist_ok=True)

# Parameters
filename = os.path.join(data_dir, 'all_params.npy')
params_array = np.load(filename).astype(np.float32)
all_params = ['DC', 'DN', 'DA', 'DB', 'aC','aA', 'aB', 'aT', 'aL', 'bN','dA', 'dB', 'dT', 'dL', 'k1', 
              'k2', 'KN', 'KP', 'KT', 'KA', 'KB', 'alpha','beta', 'Cmax', 'a', 'b', 'm', 'n', 'Kphi', 'l', 
              'N0', 'G1','G2','G3','G4','G5','G6','G7','G8','G9','G10','G11','G12', 'G13','G14',
             'G15','G16','G17','G18', 'G19', 'alpha_p','beta_p', 'seeding_v']
sceening_params = ['DC',  'aC', 'aA', 'aT', 'aL', 'dA', 'dT', 'dL', 'alpha', 'beta', 'Kphi', 'N0']
selected_param_idx = [all_params.index(param) for param in sceening_params]
params_array = params_array[:, selected_param_idx]

# Pattern types
pattern_types_array = np.load(os.path.join(data_dir, 'all_types.npy'))
pattern_types_array = pattern_types_array[:, 1]

# Check size
print(f"PDE Parameters: {params_array.shape}")
print(f"Pattern types:  {pattern_types_array.shape}")

# Select data to augment

In [27]:
# Sort pattern_types_array in descending order and get the indices
sorted_indices = np.argsort(pattern_types_array)[::-1]
sorted_pattern_types_array = pattern_types_array[sorted_indices]
sorted_params_array = params_array[sorted_indices]

# Keep 3+ ring patterns
filter_mask = sorted_pattern_types_array >= 5
filtered_pattern_types_array = sorted_pattern_types_array[filter_mask]
filtered_params_array = sorted_params_array[filter_mask]

In [None]:
# Save 
filename = output_dir + 'augment_params.npy'
np.save(filename, np.array(filtered_params_array))
filename = output_dir + 'augment_types.npy'
np.save(filename, np.array(filtered_pattern_types_array))

# save params as txt
filename = os.path.join(output_dir, 'augment_params.txt')
np.savetxt(filename, filtered_params_array, delimiter=',', fmt='%0.8f')
print(filename)

# Duplicate augment datapoints

In [33]:
def expand_to_num(types_array, params_array):
    
    unique_values, counts = np.unique(types_array, return_counts=True)
    expanded_array = []
    expanded_params_array = []
    for value, count in zip(unique_values, counts):
        total_num = 10000 # Target # of parameters
        repeat_times = round(total_num/count)
        indices = np.where(types_array == value)
        print('type to augment: ', value)

        print('final #', len(indices[0])*repeat_times)
        
        # extend type array
        new = np.repeat(types_array[indices[0]], repeat_times, axis = 0)
        expanded_array.append(new)
        
        # extend param
        new = np.repeat(params_array[indices[0], :], repeat_times, axis = 0)
        expanded_params_array.append(new)
        
        
    # type
    print(' ----------- Types ---------- ')
    expanded_array = np.hstack(expanded_array)
    print('ori type:', types_array.shape)
    print('extend type:', expanded_array.shape)
    types_array = np.concatenate((types_array, expanded_array), axis = 0)
    print('total type:', types_array.shape)

    # param
    print(' ----------- params ---------- ')
    expanded_params_array = np.vstack(expanded_params_array)
    print('ori param:', params_array.shape)
    print('extend param:', expanded_params_array.shape)
    params_array = np.concatenate((params_array, expanded_params_array), axis = 0)
    print('total param:', params_array.shape)
    return types_array, params_array


In [None]:
# Repeat the chosen parameters
filtered_pattern_types_array = np.array(filtered_pattern_types_array)
expanded_pattern_types_array, expanded_params_array = expand_to_num(filtered_pattern_types_array, filtered_params_array)

In [35]:
# Save
filename = output_dir + 'augment_params.npy'
np.save(filename, np.array(expanded_params_array))
filename = output_dir + 'augment_types.npy'
np.save(filename, np.array(expanded_pattern_types_array))

# Save params as txt
filename = os.path.join(output_dir, 'augment_params.txt')
np.savetxt(filename, expanded_params_array, delimiter=',', fmt='%0.8f')