In [26]:
import sys
sys.path.append("../../src")
import os
import datetime
import pandas as pd
import numpy as np
from sindy_utils import library_size
import tensorflow as tf
from training import train_network
from preprocess_utils import split_data, build_network_layers
import pickle

In [27]:
#Innitialize the parameter dictionary
params = {}

In [37]:
#Parameters used for defining the preprocessing over the dataset
params['data_path'] = os.getcwd() + '/'
params['window_size']=4#The window length of averaging
params['stride']=2

In [38]:
path="gene_names.pkl"
with open(path, 'rb') as pkl_file:
    name_genes = pickle.load(pkl_file)
path="time_series.pkl"
with open(path, 'rb') as pkl_file:
    training_dict = pickle.load(pkl_file)
path="preprocess_params.pkl"
with open(path, 'rb') as pkl_file:
    preprocess_params = pickle.load(pkl_file)

In [None]:
for key, val in preprocess_params.items():
    params[key]=val

In [39]:
training_data, validation_data = split_data(training_dict, validation_ratio=0.1) #split the data into training and validation sets


In [None]:
print(training_data['x'].shape, training_data['dx'].shape, training_data['classes'].shape) #verify the shapes of the training data
print(validation_data['x'].shape, validation_data['dx'].shape,validation_data['classes'].shape) #verify the shapes of the validation data

In [None]:
check=training_data['classes']
if not isinstance(check, pd.DataFrame):
    classes_df = pd.DataFrame(check)
else:
    classes_df = check

# Sum along the rows to get the count of each class
class_counts = classes_df.sum(axis=0)

# Convert the result to a dictionary for easy access
class_counts_dict = class_counts.to_dict()

print("Number of elements of each class:")
print(class_counts_dict)

In [None]:
check=validation_data['classes']
if not isinstance(check, pd.DataFrame):
    classes_df = pd.DataFrame(check)
else:
    classes_df = check

# Sum along the rows to get the count of each class
class_counts = classes_df.sum(axis=0)

# Convert the result to a dictionary for easy access
class_counts_dict = class_counts.to_dict()

print("Number of elements of each class:")
print(class_counts_dict)


In [None]:
# Target folder name to track the experiments
params['folder']='test'

latent_dim=6
params['model']='Bone Marrows'
params['input_dim'] = 2000
params['latent_dim'] =6
params['model_order'] = 1
params['poly_order'] = 2
params['include_sine'] = True
params['include_constant']=True
#The below inclide parameters have only been simulated for model_order 1 for now
if params['model_order']==1:
    params['include_tan']=False
    params['include_log']=False #this can be modified later such that the domain restirctions of log doesnt become a problem. For now it is kept to false.
    params['include_exp']=True
    params['include_reciprocal_func']=True
else:
    params['include_tan']=False
    params['include_log']=False
    params['include_exp']=True
    params['include_reciprocal_func']=False
params['library_dim'] = library_size(n=params['latent_dim'], poly_order=params['poly_order'], use_sine=params['include_sine'], include_constant=params['include_constant'],use_tan=params['include_tan'],use_log=params['include_log'],use_exp=params['include_exp'],use_reciprocal=params['include_reciprocal_func'])

# sequential thresholding parameters
params['sequential_thresholding'] = True
params['coefficient_threshold'] = 0.1
params['threshold_frequency'] = 200
params['coefficient_mask'] = np.ones((params['library_dim'], params['latent_dim']))

# loss function weighting
params['loss_weight_decoder'] = 10
params['loss_weight_sindy_z'] = 1e-2
params['loss_weight_sindy_x'] = 1e-1
params['loss_weight_sindy_regularization'] =1e-2
params['autoencoder_regularization']=1e-6

params['weights']=[params['loss_weight_decoder'],params['loss_weight_sindy_z'],params['loss_weight_sindy_x'],params['loss_weight_sindy_regularization'],params['autoencoder_regularization'], params['latent_dim'],params['coefficient_threshold']]

params['activation'] = 'relu'
params['widths'] = [512,32]
# training parameters
params['epoch_size'] = training_data['x'].shape[0]
params['batch_size'] = training_data['x'].shape[0]
params['learning_rate'] =1e-3
params['print_progress'] = True
params['print_frequency'] = 50

params['num_classes']=8
# training time cutoffs
params['max_epochs'] = 1
params['refinement_epochs'] =1
params['terms']=4*latent_dim+6 # put as None if the training should go for entire epoch defined. If the number of active terms go below this, the training stops


#classifier parameters
params['classify']=True
params['classifier_widths']=[8]
params['weights']=params['weights']+[params['classifier_widths']]
params['loss_class']=1
num_instance = 4

df = pd.DataFrame()
for i in range(num_instance):
    print('EXPERIMENT %d' % i)
    params['encoder_weights']=build_network_layers(params['input_dim'], params['latent_dim'], params['widths'], 'encoder')
    params['decoder_weights']=build_network_layers(params['latent_dim'], params['input_dim'], params['widths'][::-1], 'decoder')
    params['classifier_weights']=build_network_layers( params['latent_dim'], params['num_classes'], params['classifier_widths'], 'encoder')
    params['coefficient_mask'] = np.ones((params['library_dim'], params['latent_dim']))
    
    # This can be any relevant format
    # params['folder'] = params['model']+str(params['widths'])+str(params['weights'])+str(params['learning_rate'])
    params['save_name']=datetime.datetime.now().strftime("%Y_%m_%d_%H%f")
    params['coefficient_initialization'] = 'specified'

    params['init_coefficients']=np.asarray(np.random.choice([-1, 1], size=(params['library_dim'], params['latent_dim'])), dtype=np.float32)

    # Create a save folder if it doesn't exist
    save_folder = os.path.join(params['data_path'], params['folder'])
    params['save_folder']=save_folder+"\\"
    os.makedirs(save_folder, exist_ok=True)
    os.chdir(save_folder)
    tf.reset_default_graph()

    # Train the network and obtain results
    results_dict = train_network(training_data, validation_data, params)
    df = df.append({**results_dict, **params}, ignore_index=True)

# Change back to the original data path
os.chdir(params['data_path'])    
# Save the DataFrame to a pickle file with a timestamped filename
df.to_pickle(f'experiment_results_{params["folder"]}.pkl')