### SQL2Circuits development notebook

In [None]:
import os
import json
import numpy as np
from data_preparation.queries import QueryGenerator
from data_preparation.prepare import DataPreparation
from data_preparation.database import Database
from circuit_preparation.circuits import Circuits
from training.train import SQL2CircuitsEstimator
from training.sample_feature_preparation import SampleFeaturePreparator
from skopt import BayesSearchCV
from skopt.space import Real
import jax
jax.config.update("jax_enable_x64", True)

this_folder = os.path.abspath(os.getcwd())
seed_paths = ["data_preparation//query_seeds//JOB_query_seed_execution_time.json",
              "data_preparation//query_seeds//JOB_query_seed_cardinality.json"]
workload_types = ["execution_time", "cardinality"]
run_id = 1
ty = 1
workload_type = workload_types[ty]
database = Database("IMDB")
generator = QueryGenerator(run_id, workload_type = "cardinality", database = "IMDB", query_seed_file_path = seed_paths[ty])
query_file = generator.get_query_file()
data_preparator = DataPreparation(run_id, query_file, database = database, workload_type = workload_type, classification = 2)


In [None]:
this_folder = os.path.abspath(os.getcwd())
output_folder = this_folder + "//circuit_preparation//data//circuits//" + str(run_id) + "//"
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
    print("The new directory: ", output_folder, " is created.")

circuits = Circuits(run_id, query_file, output_folder, write_cfg_to_file = True, write_pregroup_to_file=True, generate_circuit_png_diagrams = True)
circuits.execute_full_transformation()

In [None]:
sf = SampleFeaturePreparator(run_id, data_preparator, circuits, "all")
X_train = sf.get_X_train()
X_valid = sf.get_X_valid()
y = sf.get_y()

opt = BayesSearchCV(SQL2CircuitsEstimator(run_id, 
                        workload = "cardinality",  
                        classification = 2, 
                        optimization_method = "SPSA",
                        epochs = 500), 
                        { 'a': Real(1e-5, 10, 'uniform'), 
                         'c': Real(1e-5, 10, 'uniform') }, 
                         n_iter = 5,
                         n_jobs = 3,
                         n_points = 3)

opt.fit(X_train, y, X_valid = X_valid)

In [7]:
results = dict(opt.cv_results_)
for key, value in results.items():
    if isinstance(value, np.ndarray):
        results[key] = value.tolist()
best_params = dict(opt.best_params_)
for key, value in best_params.items():
    if isinstance(value, np.ndarray):
        best_params[key] = value.tolist()
results["best_params"] = best_params
with open("training//results//" + str(run_id) + "//" + str(run_id) + "_cv_results_.json", "w") as f:
    json.dump(results, f)

initial_number_of_circuits = 20
number_of_circuits_to_add = 20
total_number_of_circuits = len(data_preparator.get_training_data_labels())

trainer = SQL2CircuitsEstimator(run_id, 
                              workload = "cardinality", 
                              a = 0.001, 
                              c = 0.001, 
                              classification = 2, 
                              optimization_method = "SPSA")

for i in range(initial_number_of_circuits, total_number_of_circuits, number_of_circuits_to_add):
    sf = SampleFeaturePreparator(data_preparator, circuits, i)
    X_train = sf.get_X_train()
    X_valid = sf.get_X_valid()
    y = sf.get_y()

    trainer.fit_with_lambeq_noisyopt(X_train, y, X_valid = X_valid, save_parameters = True)
    #trainer.evaluate
    
sf = SampleFeaturePreparator(data_preparator, circuits, "all")
X_train = sf.get_X_train()
X_valid = sf.get_X_valid()
y = sf.get_y()

trainer.fit(X_train, y, X_valid = X_valid, save_parameters = True)
