In [1]:
import time
import psycopg2
import asyncio
import os
import sys
sys.path.append("../")
os.environ["PGCLIENTENCODING"] = "utf-8"
from typing import Optional, Tuple, Union, MutableMapping, List


In [2]:
import psycopg
import asyncio
import time
import numpy as np
from typing import Optional, Tuple, Union, MutableMapping, List, Mapping
from utils.load_brad_trace import (
    load_trace,
    create_concurrency_dataset,
)
from scheduler.base_scheduler import BaseScheduler
from simulator.simulator import QueryBank


async def submit_query_and_wait_for_result(connection: psycopg.AsyncConnection,
                                           query_rep: Union[int, str],
                                           sql: str,
                                           ) -> Tuple[Union[int, str], float, float, bool, bool]:
    error = False
    timeout = False
    async with connection.cursor() as cur:
        t = time.time()
        try:
            await cur.execute(sql)
            await cur.fetchall()
        except psycopg.errors.QueryCanceled as e:
            # this occurs in timeout
            timeout = True
        except:
            error = True
        runtime = time.time() - t
    return query_rep, t, runtime, timeout, error


class Executor:
    # this is the thin execution layer all users connect to instead of directly connect to the DB instance
    def __init__(
        self,
        scheduler: BaseScheduler,
        database_kwargs: Mapping[str, Union[str, int]],
        timeout: int,
        database: str,
        query_bank: Optional[QueryBank] = None,
        pause_wait_s: float = 5.0,
    ):
        self.scheduler = scheduler
        self.database_kwargs = database_kwargs
        self.database = database
        self.db_conn = None
        self.timeout = timeout
        asyncio.run(self.get_connection())
        self.query_bank = query_bank
        self.pause_wait_s = pause_wait_s
        self.pending_jobs = []

    async def get_connection(self):
        self.db_conn = await psycopg.AsyncConnection.connect(**self.database_kwargs)
        acur = self.db_conn.cursor()
        timeout_ms = int(self.timeout * 1000)
        await acur.execute(f"set statement_timeout = {timeout_ms};")
        await self.db_conn.commit()
        if self.database == "Redshift":
            await acur.execute("SET enable_result_cache_for_session = OFF;")
            await self.db_conn.commit()

def check_query_finished(pending_jobs,
                         current_time: float,
                         ) -> bool:
    has_finished_queries = False
    if len(pending_jobs) != 0:
        # check if existing jobs are finished
        for task in pending_jobs:
            if task.done():
                has_finished_queries = True
                query_rep, runtime, timeout, error = task.result()
                print(query_rep, runtime, timeout, error)
                pending_jobs.remove(task)
    return has_finished_queries

In [5]:
with open("/Users/ziniuw/Desktop/research/Data/AWS_trace/mixed_aurora/aurora_mixed.sql", 'r') as f:
    queries = f.readlines()

In [3]:
database_kwargs = {
    'host': "brad-redshift-cluster.cmdzoy6ck5ua.us-east-1.redshift.amazonaws.com",
    'dbname': "imdb_100g",
    'port': 5439,
    'user': "awsuser",
    'password': "Giftedcoconut!#4"
}
db_conn = await psycopg.AsyncConnection.connect(**database_kwargs)
acur = db_conn.cursor()
timeout_ms = int(20000)
await acur.execute(f"set statement_timeout = {timeout_ms};")
await db_conn.commit()
await acur.execute("SET enable_result_cache_for_session = OFF;")
await db_conn.commit()

In [6]:
pending_jobs = []
all_start_time = [1, 4, 9, 10, 14]
function_start_time = time.time()
for i in range(len(all_start_time)):
    current_time = time.time() - function_start_time
    current_query_start_time = all_start_time[i]
    while current_time < current_query_start_time - 0.5:
        print(i, current_time, len(pending_jobs))
        check_query_finished(pending_jobs, current_time)
        time.sleep(0.5)
        current_time = time.time() - function_start_time
    future = asyncio.ensure_future(submit_query_and_wait_for_result(db_conn, i+50, queries[i+50]))
    pending_jobs.append(future)
    print([p.done() for p in pending_jobs])
print([p.done() for p in pending_jobs])

0 0.00011229515075683594 0
[False]
1 0.5039982795715332 1
1 1.0064961910247803 1
1 1.5104701519012451 1
1 2.015347957611084 1
1 2.5197231769561768 1
1 3.021318197250366 1
[False, False]
2 3.5268852710723877 2
2 4.030250072479248 2
2 4.531398296356201 2
2 5.036517143249512 2
2 5.537211179733276 2
2 6.038813352584839 2
2 6.5408971309661865 2
2 7.042265176773071 2
2 7.5455780029296875 2
2 8.050384283065796 2
[False, False, False]
3 8.553382158279419 3
3 9.058443069458008 3
[False, False, False, False]
4 9.564241170883179 4
4 10.064687252044678 4
4 10.565054416656494 4
4 11.066465139389038 4
4 11.56852102279663 4
4 12.073767185211182 4
4 12.574305057525635 4
4 13.07953929901123 4
[False, False, False, False, False]
[False, False, False, False, False]


In [232]:
print([p.done() for p in pending_jobs])
print([p.result() for p in pending_jobs])

[True, True, True, True, True]
[(50, 1715888053.706999, 20.102179050445557, True, False), (51, 1715888053.707516, 20.11885404586792, False, True), (52, 1715888053.707541, 20.144122838974, False, True), (53, 1715888053.707555, 20.162642002105713, False, True), (54, 1715888053.7075672, 20.181647777557373, False, True)]


In [233]:
function_start_time

1715888040.107349

In [134]:
ENDPOINT="brad-redshift-cluster.cmdzoy6ck5ua.us-east-1.redshift.amazonaws.com"
PORT="5439"
USER="awsuser"
REGION="us-east-1"
DBNAME="imdb_100g"
token="Giftedcoconut!#4"
conn = psycopg2.connect(host=ENDPOINT, port=PORT, dbname=DBNAME, user=USER, password=token)
conn.autocommit = True
cur = conn.cursor()

In [140]:
cur.execute(f"set statement_timeout = {100};")
conn.commit()
cur.execute("SET enable_result_cache_for_session = OFF;")
conn.commit()

In [136]:
cur.execute("SELECT COUNT(*) FROM title;")
cur.fetchall()

[(68264424,)]

In [137]:
cur.execute(queries[0])

In [156]:
aconn = await psycopg.AsyncConnection.connect(host=ENDPOINT, port=PORT, dbname=DBNAME, user=USER, password=token)

In [157]:
acur = aconn.cursor()
await acur.execute(f"set statement_timeout = {100};")
await aconn.commit()
await acur.execute("SET enable_result_cache_for_session = OFF;")
await aconn.commit()

In [158]:
await acur.execute(queries[0])
await acur.fetchall()

QueryCanceled: Query (5814805) cancelled on user's request

In [180]:
async def submit_query_and_wait_for_result(connection: psycopg.AsyncConnection,
                                           query_rep: Union[int, str],
                                           sql: str,
                                           ) -> Tuple[Union[int, str], float, bool, bool]:
    error = False
    timeout = False
    async with connection.cursor() as cur:
        t = time.time()
        try:
            await cur.execute(sql)
            await cur.fetchall()
        except psycopg.errors.QueryCanceled as e:
            # this occurs in timeout
            timeout = True
        except:
            error = True
        runtime = time.time() - t
    return query_rep, runtime, timeout, error

In [181]:
aconn = await psycopg.AsyncConnection.connect(host=ENDPOINT, port=PORT, dbname=DBNAME, user=USER, password=token)
acur = aconn.cursor()
await acur.execute(f"set statement_timeout = {10000};")
await aconn.commit()
await acur.execute("SET enable_result_cache_for_session = OFF;")
await aconn.commit()

In [182]:
future = asyncio.ensure_future(submit_query_and_wait_for_result(aconn, 0, queries[113]))
print(future.done())
pending = [future]
done, pending = await asyncio.wait(
                pending, return_when=asyncio.FIRST_COMPLETED
            )
print(future.done())
print(future.result())

False
True
(0, 1.7526252269744873, False, False)


In [183]:
future = asyncio.ensure_future(submit_query_and_wait_for_result(aconn, 0, queries[0]))
print(future.done())
pending = [future]
done, pending = await asyncio.wait(
                pending, return_when=asyncio.FIRST_COMPLETED
            )
print(future.done())
print(future.result())

False
True
(0, 5.712521076202393, False, False)


In [96]:
t = time.time()
future = asyncio.ensure_future(acur.execute(queries[-2]))
print(time.time() - t)
t = time.time()
#await acur.execute(queries[1])
#print(time.time() - t)
print(await acur.fetchall())


0.0001609325408935547
[(None, 5, 1992)]


In [103]:
future.result()

<psycopg.AsyncCursor [TUPLES_OK] [INTRANS] (host=brad-redshift-cluster.cmdzoy6ck5ua.us-east-1.redshift.amazonaws.com port=5439 user=awsuser database=imdb_100g) at 0x1155eecf0>

In [97]:
pending = [future]
t = time.time()
done, pending = await asyncio.wait(
                pending, return_when=asyncio.FIRST_COMPLETED
            )
print(time.time() - t)

5.563902854919434


In [93]:
done

{<Task finished name='Task-607' coro=<AsyncCursor.execute() done, defined at /Users/ziniuw/miniconda3/envs/exploration/lib/python3.11/site-packages/psycopg/cursor_async.py:75> result=<psycopg.Asyn...t 0x1155eecf0>>}

In [86]:
print(await acur.fetchall())

[(1,)]


In [70]:
async with aconn.cursor() as acur:
    await acur.execute(sql)
    print("asd")
    await acur.execute("SELECT COUNT(*) FROM title;")
    rows = await acur.fetchall()

asd


In [71]:
rows

[(68264424,)]

In [66]:
print(await acur.fetchone())

InterfaceError: the cursor is closed