### Obtain cardinality estimations for the queries on PostgreSQL

In [1]:
import os
from data_preparation.queries import QueryGenerator
from data_preparation.prepare import DataPreparation
from data_preparation.database import Database
from data_preparation.mysql_database import MySQLDatabase
from data_preparation.sql_server import SQLServerDatabase
from evaluation.classical_algorithms.classical_estimator import ClassicalEstimator

this_folder = os.path.abspath(os.getcwd())
seed_paths = ["data_preparation//query_seeds//JOB_query_seed_execution_time.json",
              "data_preparation//query_seeds//JOB_query_seed_cardinality.json"]
seed_file = seed_paths[1]
workload_types = ["execution_time", "cardinality"]
workload_type = workload_types[1]
run_id = 1
classification = 3

postgres_database = Database("IMDB")
mysql_database = MySQLDatabase("IMDB")
sql_server_database = SQLServerDatabase("IMDB")

generator = QueryGenerator(run_id, workload_type = workload_type, database = "IMDB", query_seed_file_path = seed_file)
query_file = generator.get_query_file()
data_preparator = DataPreparation(run_id, query_file, database = postgres_database, workload_type = workload_type, classification = classification)

classes = data_preparator.get_classes()
true_cardinalities = data_preparator.get_data()
print("Classes: ", classes)
#print("True cardinalities: ", true_cardinalities)

for db in [postgres_database, mysql_database, sql_server_database]:
    estimator = ClassicalEstimator(query_file, true_cardinalities, db, classes)
    if db.supports_cardinality_estimation():
        estimator.create_estimates("cardinality")
        estimates = estimator.evaluate_accuracy_of_estimations("cardinality")
        print(estimator.get_results())
    if db.supports_latency_estimation():
        estimator.create_estimates("latency")
        estimates = estimator.evaluate_accuracy_of_estimations("latency")
        print(estimator.get_results())
    if db.supports_cost_estimation():
        estimator.create_estimates("cost")
        estimates = estimator.evaluate_accuracy_of_estimations("cost")
        print(estimator.get_results())

Query file already exists.
Number of training queries is  398
Number of test queries is  140
Number of validation queries is  135
cardinality
Data file is  c:\Users\valte\OneDrive - University of Helsinki\Desktop\quantum-computing-for-database-query-languages\sql2circuits//data_preparation//data//cardinality//1_data.json
Classes:  [(0, 0), (0, 0), (0, 4829), (5536, 29757), (33181, 143849), (150166, 577460), (635188, 1572370), (1626450, 102555254)]
{'cardinality': {'training': 0.4418604651162791, 'test': 0.45985401459854014, 'validation': 0.556390977443609, 'total': 0.48603515238614275}}
{'cardinality': {'training': 0.12919896640826872, 'test': 0.1386861313868613, 'validation': 0.14285714285714285, 'total': 0.1369140802174243}}
{'cardinality': {'training': 0.4780361757105943, 'test': 0.45255474452554745, 'validation': 0.556390977443609, 'total': 0.4956606325599169}}
