### SQL2Circuits development notebook

In [1]:
import os
from data_preparation.queries import QueryGenerator
from data_preparation.prepare import DataPreparation
from data_preparation.database import Database
from circuit_preparation.circuits import Circuits
from training.train import SQL2CircuitsEstimator
from training.sample_feature_preparation import SampleFeaturePreparator
from skopt import BayesSearchCV
from skopt.space import Real
import jax
jax.config.update("jax_enable_x64", True)

this_folder = os.path.abspath(os.getcwd())
seed_paths = ["data_preparation//query_seeds//JOB_query_seed_execution_time.json",
              "data_preparation//query_seeds//JOB_query_seed_cardinality.json"]
workload_types = ["execution_time", "cardinality"]
run_id = 1
ty = 1
workload_type = workload_types[ty]
database = Database("IMDB")
generator = QueryGenerator(run_id, workload_type = "cardinality", database = "IMDB", query_seed_file_path = seed_paths[ty])
query_file = generator.get_query_file()
data_preparator = DataPreparation(run_id, query_file, database = database, workload_type = workload_type, classification = 2)


Number of training queries is  398
Number of test queries is  140
Number of validation queries is  135
cardinality
Error while fetching data from PostgreSQL connection to server at "localhost" (127.0.0.1), port 5432 failed: Connection refused
	Is the server running on that host and accepting TCP/IP connections?



In [2]:
this_folder = os.path.abspath(os.getcwd())
output_folder = this_folder + "//circuit_preparation//data//circuits//" + str(run_id) + "//"
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
    print("The new directory: ", output_folder, " is created.")

circuits = Circuits(run_id, query_file, output_folder, write_cfg_to_file = True, write_pregroup_to_file=True, generate_circuit_png_diagrams = True)
circuits.execute_full_transformation()

In [3]:
sf = SampleFeaturePreparator(data_preparator, circuits, "all")
X_train = sf.get_X_train()
X_valid = sf.get_X_valid()
y = sf.get_y()

opt = BayesSearchCV(SQL2CircuitsEstimator(run_id, 
                        workload = "cardinality",  
                        classification = 2, 
                        optimization_method = "SPSA",
                        epochs = 500), 
                        { 'a': Real(1e-5, 10, 'uniform'), 
                         'c': Real(1e-5, 10, 'uniform') }, 
                         n_iter=32, 
                         cv=3 )

opt.fit(X_train, y, X_valid = X_valid)

In [4]:
initial_number_of_circuits = 20
number_of_circuits_to_add = 20
total_number_of_circuits = len(data_preparator.get_training_data_labels())

trainer = SQL2CircuitsEstimator(run_id, 
                              workload = "cardinality", 
                              a = 0.001, 
                              c = 0.001, 
                              classification = 2, 
                              optimization_method = "SPSA")

for i in range(initial_number_of_circuits, total_number_of_circuits, number_of_circuits_to_add):
    sf = SampleFeaturePreparator(data_preparator, circuits, i)
    X_train = sf.get_X_train()
    X_valid = sf.get_X_valid()
    y = sf.get_y()

    trainer.fit_with_lambeq_noisyopt(X_train, y, X_valid = X_valid, save_parameters = True)
    #trainer.evaluate
    
sf = SampleFeaturePreparator(data_preparator, circuits, "all")
X_train = sf.get_X_train()
X_valid = sf.get_X_valid()
y = sf.get_y()

trainer.fit(X_train, y, X_valid = X_valid, save_parameters = True)


id: 1
a: 0.001
c: 0.001
epochs: 1000
classification: 4
workload: cardinality
optimization_medthod: SPSA



Initializing new parameters
20 20


iters: 20
train/loss: 7.0069
train/acc: 0.15
valid/loss: 8.4978
valid/acc: 0.1

iters: 40
train/loss: 6.5343
train/acc: 0.2
valid/loss: 8.2938
valid/acc: 0.0

iters: 60
train/loss: 6.107
train/acc: 0.25
valid/loss: 8.2133
valid/acc: 0.05

iters: 80
train/loss: 5.8728
train/acc: 0.45
valid/loss: 8.1255
valid/acc: 0.05

iters: 100
train/loss: 5.6931
train/acc: 0.5
valid/loss: 8.0556
valid/acc: 0.05

iters: 120
train/loss: 5.5004
train/acc: 0.55
valid/loss: 7.8126
valid/acc: 0.05

iters: 140
train/loss: 5.3903
train/acc: 0.55
valid/loss: 7.7216
valid/acc: 0.1

iters: 160
train/loss: 5.266
train/acc: 0.55
valid/loss: 7.6111
valid/acc: 0.1

iters: 180
train/loss: 5.1388
train/acc: 0.55
valid/loss: 7.5626
valid/acc: 0.1

iters: 200
train/loss: 5.0633
train/acc: 0.55
valid/loss: 7.4837
valid/acc: 0.15

iters: 220
train/loss: 4.9855
train/acc: 0.55
valid/loss: 7.4382
valid/acc: 0.15

iters: 240
train/loss: 4.9229
train/acc: 0.55
valid/loss: 7.3853
valid/acc: 0.15

iters: 260
train/loss: 4.8539

Store parameters:  138 138
Storing parameters in file training//checkpoints//1.npz


ValueError: Object arrays cannot be loaded when allow_pickle=False