# Library

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os

# Load datasets

## Dataset 1

In [None]:
# Read in training data
data_directory = "/data/"

# RFP profiles
data_array = np.load(os.path.join(data_directory, 'all_outputs.npy'))
data_array = data_array.reshape([-1, 3, 201])
norm_data_array = data_array / data_array.max(axis=2, keepdims=True)
print(f"Normalized profiles: {data_array.shape}")

# Parameters 
filename = data_directory + 'all_params.npy' # original scale
params_array = np.load(filename)
scaling_ranges = {
    'DC': [0.5e-3, 12.5e-2],
    'aC': [0.1, 1],
    'aA': [100, 100000],
    'aT': [10, 8000],
    'aL': [5, 500],
    'dA': [0.001, 0.1],
    'dT': [3, 300],
    'dL': [0.144, 14.4],
    'alpha': [1, 5],
    'beta':  [2, 2000],
    'Kphi':  [1, 10],
    'N0':  [200000, 5000000]
}
scaling_options = ['exp','linear','exp','exp','exp',
                   'exp','exp','exp','linear','exp',
                   'linear','linear']
all_params = ['DC', 'DN', 'DA', 'DB', 'aC','aA', 'aB', 'aT', 'aL', 'bN','dA', 'dB', 'dT', 'dL', 'k1', 
              'k2', 'KN', 'KP', 'KT', 'KA', 'KB', 'alpha','beta', 'Cmax', 'a', 'b', 'm', 'n', 'Kphi', 'l', 
              'N0', 'G1','G2','G3','G4','G5','G6','G7','G8','G9','G10','G11','G12', 'G13','G14',
             'G15','G16','G17','G18', 'G19', 'alpha_p','beta_p', 'seeding_v']
sceening_params = ['DC',  'aC', 'aA', 'aT', 'aL', 'dA','dT', 'dL', 'alpha','beta','Kphi', 'N0']
selected_param_idx = [all_params.index(param) for param in sceening_params]
params_array = params_array[:, selected_param_idx]

# Pattern types
pattern_types_array = np.load(os.path.join(data_directory, 'all_types.npy'))

print('---------------------------------------------')
print(f"RFP profiles: {norm_data_array.shape}")
print(f"Parameters: {params_array.shape}")
print(f"Pattern types:  {pattern_types_array.shape}")


# --------------------------------------------------------------------
# Plot -- Create 100 panels (10x10), each showing a random data series
num_panels = 100
rows = 10
cols = 10
fig, axs = plt.subplots(rows, cols, figsize=(20, 20))
fig.suptitle('Random RFP Data Series', fontsize=16)
for i in range(rows):
    for j in range(cols):
        random_index = np.random.randint(norm_data_array.shape[0])
        axs[i, j].plot(norm_data_array[random_index, 1, :])
        axs[i, j].set_ylim([0, 1])
        axs[i, j].axis('off')
plt.tight_layout()
plt.subplots_adjust(top=0.95)
plt.show()

plt.hist(pattern_types_array)

## Dataset 2

In [None]:
# Read in augmented data
data_directory_2 = data_directory + 'augment_1/'
output_dir = data_directory_2

# RFP profiles
data_array_2 = np.load(os.path.join(data_directory_2, 'filtered_PDE_outputs.npy'))
data_array_2 = data_array_2.reshape([-1, 9, 201])
data_array_2 = data_array_2[:, 0:3, :]
norm_data_array_2 = data_array_2 / data_array_2.max(axis=2, keepdims=True)
print(f"Normalized profiles: {norm_data_array_2.shape}")

# Parameters 
filename = data_directory_2 + 'filtered_PDE_params.npy' # original scale
params_array_2 = np.load(filename)
params_array_2 = params_array_2[:, selected_param_idx]

# Pattern types
pattern_types_array_2 = np.load(os.path.join(data_directory_2, 'filtered_PDE_types.npy'))

print('---------------------------------------------')
print(f"RFP profiles: {norm_data_array_2.shape}")
print(f"Parameters: {params_array_2.shape}")
print(f"Pattern types:  {pattern_types_array_2.shape}")

# ----------------------------------
# Create 100 panels (10x10), each showing a random data series
num_panels = 100
rows = 10
cols = 10

fig, axs = plt.subplots(rows, cols, figsize=(20, 20))
fig.suptitle('Random RFP Data Series', fontsize=16)

for i in range(rows):
    for j in range(cols):
        random_index = np.random.randint(norm_data_array_2.shape[0])
        axs[i, j].plot(norm_data_array_2[random_index, 1, :])
        axs[i, j].set_ylim([0, 1])
        axs[i, j].axis('off')

plt.tight_layout()
plt.subplots_adjust(top=0.95)
plt.show()
plt.hist(pattern_types_array_2)
print(np.unique(pattern_types_array_2))


## Dataset 3

In [None]:
# Read in augmented data
data_directory_3 = data_directory + 'augment_2/'

# RFP profiles
data_array_3 = np.load(os.path.join(data_directory_3, 'filtered_PDE_outputs.npy'))
data_array_3 = data_array_3.reshape([-1, 9, 201])
data_array_3 = data_array_3[:, 0:3, :]
norm_data_array_3 = data_array_3 / data_array_3.max(axis=2, keepdims=True)
print(f"Normalized profiles: {norm_data_array_3.shape}")

# Parameters 
filename = data_directory_3 + 'filtered_PDE_params.npy' # original scale
params_array_3 = np.load(filename)
params_array_3 = params_array_3[:, selected_param_idx]

# Pattern types
pattern_types_array_3 = np.load(os.path.join(data_directory_3, 'filtered_PDE_types.npy'))

print('---------------------------------------------')
print(f"RFP profiles: {norm_data_array_3.shape}")
print(f"Parameters: {params_array_3.shape}")
print(f"Pattern types:  {pattern_types_array_3.shape}")

# ----------------------------------
# Create 100 panels (10x10), each showing a random data series
num_panels = 100
rows = 10
cols = 10

fig, axs = plt.subplots(rows, cols, figsize=(20, 20))
fig.suptitle('Random RFP Data Series', fontsize=16)

for i in range(rows):
    for j in range(cols):
        random_index = np.random.randint(norm_data_array_3.shape[0])
        axs[i, j].plot(norm_data_array_3[random_index, 1, :])
        axs[i, j].set_ylim([0, 1])
        axs[i, j].axis('off')

plt.tight_layout()
plt.subplots_adjust(top=0.95)
plt.show()
plt.hist(pattern_types_array_3)
print(np.unique(pattern_types_array_3))

# Concatenate

In [None]:
# Concatenate
output_dir = "./outputs/"

# outputs
data_array_cont = np.concatenate((data_array, 
                                  data_array_2, 
                                  data_array_3, 
                                 ), axis=0)
print(data_array_cont.shape)
filename = output_dir + 'all_outputs_cont.npy'
np.save(filename, np.array(data_array_cont))
print(filename)

# params
params_array_cont = np.concatenate((params_array,
                                    params_array_2, 
                                    params_array_3, 
                                   ), axis=0)
print(params_array_cont.shape)
filename = output_dir + 'all_params_cont.npy'
np.save(filename, np.array(params_array_cont))
print(filename)

# save params as txt
filename = os.path.join(output_dir, 'all_params_cont.txt')
np.savetxt(filename, params_array_cont, delimiter=',', fmt='%0.8f')
print(filename)

# Types
pattern_types_array_cont = np.concatenate((pattern_types_array, 
                                           pattern_types_array_2, 
                                           pattern_types_array_3, 
                                          ), axis=0)
print(pattern_types_array_cont.shape)
filename = output_dir + 'all_types_cont.npy'
np.save(filename, np.array(pattern_types_array_cont))
print(filename)

In [None]:
counts, bin_edges, _ = plt.hist(pattern_types_array_cont[:, 1])
print(counts)
print(bin_edges)

In [None]:
num_augmented = len(pattern_types_array_cont) - len(params_array)
print('augmented data #: ', num_augmented)
print('final dataset size: ', len(pattern_types_array_cont))