In [1]:
import numpy as np
import json
import requests
import time
import typing

def send(obj: typing.Dict[str, typing.Any], s: requests.Session, port: int) -> list[float] | None:
    start = time.time()
    res = s.post(f"http://localhost:{port}/vectors", json={
        "text": obj["raw"],
    })
    taken = time.time() - start
    # print(f"Retrieved vector in {taken:.2f} seconds")
    if res.status_code != 200:
        print(f"Failed to retrieve vector for {obj['id']} with text {obj['raw']}")
        print(res.text)
        return None
    return res.json()["vector"]

def run() -> None:
    objs: typing.List[str] = []
    with open("data/sphere.1M.jsonl", "r") as file:
        for i, obj in enumerate(file):
            objs.append(obj)
            if (i + 1) % 10000 == 0:
                break
    session = requests.Session()
    for obj in objs:
        rs = send(json.loads(obj), session, 3000)
        py = send(json.loads(obj), session, 8080)
        if rs is None or py is None:
            continue
        print(np.dot(rs, py) / (np.linalg.norm(rs) * np.linalg.norm(py)))
        # break

In [None]:
run()