### Obtain cardinality estimations for the queries on PostgreSQL

In [1]:
import os
from data_preparation.queries import QueryGenerator
from data_preparation.prepare import DataPreparation
from data_preparation.database import Database
from data_preparation.mysql_database import MySQLDatabase
from data_preparation.sql_server import SQLServerDatabase
from evaluation.classical_algorithms.classical_estimator import ClassicalEstimator

this_folder = os.path.abspath(os.getcwd())
seed_paths = ["data_preparation//query_seeds//JOB_query_seed_execution_time.json",
              "data_preparation//query_seeds//JOB_query_seed_cardinality.json"]
seed_file = seed_paths[1]
workload_types = ["execution_time", "cardinality"]
workload_type = workload_types[1]
run_id = 1
classification = 1

postgres_database = Database("IMDB")
mysql_database = MySQLDatabase("IMDB")
sql_server_database = SQLServerDatabase("IMDB")

generator = QueryGenerator(run_id, workload_type = workload_type, database = "IMDB", query_seed_file_path = seed_file)
query_file = generator.get_query_file()
data_preparator = DataPreparation(run_id, query_file, database = postgres_database, workload_type = workload_type, classification = classification)

classes = data_preparator.get_classes()
true_cardinalities = data_preparator.get_data()
print("Classes: ", classes)
#print("True cardinalities: ", true_cardinalities)

for db in [sql_server_database]:
    estimator = ClassicalEstimator(query_file, true_cardinalities, db, classes)
    if db.supports_cardinality_estimation():
        estimator.create_estimates("cardinality")
        estimates = estimator.evaluate_accuracy_of_estimations("cardinality")
        print(estimator.get_results())
    if db.supports_latency_estimation():
        estimator.create_estimates("latency")
        estimates = estimator.evaluate_accuracy_of_estimations("latency")
        print(estimator.get_results())
    if db.supports_cost_estimation():
        estimator.create_estimates("cost")
        estimates = estimator.evaluate_accuracy_of_estimations("cost")
        print(estimator.get_results())

Query file already exists.
Number of training queries is  398
Number of test queries is  140
Number of validation queries is  135
cardinality
Data file is  c:\Users\valte\OneDrive - University of Helsinki\Desktop\quantum-computing-for-database-query-languages\sql2circuits//data_preparation//data//cardinality//1_data.json
Classes:  [(0, 29757), (29757, 102555254)]
1360780
2535010
74393400
203880000
25717200
5237080
1254590
2563970
34202
1089170
18368
12678
23618
693106
48792
238879
1693430
1592940
1190290
14642700
72550
78727
2310350
2734610
798672
162642
79626
389171
2347770
1367570
74764500
204897000
25845500
1260850
2610520
1520620
2832770
83131700
227828000
98397500
28738000
5852230
2865130
1204370
34202
19922
1089170
76674
375381
37538
1899500
239602
238879
797868
750521
560809
11394
6898950
42260
78727
6331680
798672
796264
79626
183359
936377
1744380
140293000
60591800
17696500
17643100
1764310
1
1
1
1
1
1
1
1
1
567442
66010
22819
22750
2275
11681
10988
14346
8210
2593
166
101005