In [1]:
import vastai
import asyncio
import random
import logging

logging.getLogger("Serverless").propagate = False
# or, if the library uses a package logger name:
logging.getLogger("vastai").propagate = False


In [None]:
import asyncio
import random
import vastai

async def start_session():
    client = vastai.Serverless()
    session_a = None
    session_b = None

    try:
        endpoint = await client.get_endpoint("my-comfy-endpoint")

        payload = {
            "input": {
                "modifier": "Text2Image",
                "modifications": {
                    "prompt": "Generate a page from a peanuts comic strip.",
                    "width": 512,
                    "height": 512,
                    "steps": 10,
                    "seed": random.randint(1, 1000)
                }
            }
        }

        session_a = await endpoint.session()
        session_b = await endpoint.session()

        def print_response(response):
            print(response["response"]["output"][0]["local_path"])

        while True:
            await asyncio.gather(
                session_a.request("/generate/sync", payload).then(print_response),
                session_b.request("/generate/sync", payload).then(print_response),
            )

    except asyncio.CancelledError:
        print("Notebook cell cancelled.")
        raise

    finally:
        if session_a:
            await session_a.close()
        if session_b:
            await session_b.close()
        await client.close()
        print("Sessions closed.")


In [None]:
await start_session()

In [None]:
import asyncio
import random
import vastai
def status_printout(status):
    status_obj = status["response"]["status"]

    print(
        f"{status_obj['message']:<30} | "
        f"Train loss: {status_obj['train_loss']:>8.4f} | "
        f"Accuracy: {status_obj['val_acc'] * 100:>6.2f}%"
    )


async def start_training_session():
    async with vastai.Serverless() as client:
        endpoint = await client.get_endpoint("my-pytorch-endpoint-2")

        payload = {'max_train_batches_per_epoch': 100, "epochs" : 200}



        async with await endpoint.session() as session:
            async def on_close(session):
                await session.request(route="/cancel_task", payload={})
            session.set_on_close(on_close)
            await session.request(route="/start_task", payload=payload)
            count = 0
            while True:
                await asyncio.sleep(1)
                status = await session.request(route="/status", payload={})
                status_printout(status)
                if status["response"]["status"]["state"] != "running":
                    break
                if count > 15:
                    status = await session.request(route="/cancel_task", payload={})
                count += 1
        print("Training complete")

    print("Sessions closed.")


In [30]:
await asyncio.gather(start_training_session())

Training epoch 2/200 batch 40/100 | Train loss:   0.2025 | Accuracy:  90.97%
Training epoch 3/200 batch 100/100 | Train loss:   0.1240 | Accuracy:  95.16%
Training epoch 5/200 batch 40/100 | Train loss:   0.0951 | Accuracy:  96.56%
Training epoch 6/200 batch 100/100 | Train loss:   0.0836 | Accuracy:  97.03%
Training epoch 8/200 batch 50/100 | Train loss:   0.0810 | Accuracy:  97.12%
Training epoch 9/200 batch 100/100 | Train loss:   0.0614 | Accuracy:  97.62%
Training epoch 11/200 batch 60/100 | Train loss:   0.0569 | Accuracy:  97.62%
Validation epoch 12/200 complete | Train loss:   0.0451 | Accuracy:  97.59%
Training epoch 14/200 batch 80/100 | Train loss:   0.0406 | Accuracy:  97.81%
Training epoch 16/200 batch 10/100 | Train loss:   0.0596 | Accuracy:  97.81%
Training epoch 17/200 batch 100/100 | Train loss:   0.0465 | Accuracy:  98.00%
Training epoch 19/200 batch 40/100 | Train loss:   0.0386 | Accuracy:  98.47%
Training epoch 20/200 batch 100/100 | Train loss:   0.0296 | Accurac

[None]

In [31]:
await asyncio.gather(start_training_session(), start_training_session())

Training epoch 2/200 batch 40/100 | Train loss:   0.2025 | Accuracy:  90.97%
Training epoch 3/200 batch 100/100 | Train loss:   0.1239 | Accuracy:  95.12%
Training epoch 5/200 batch 60/100 | Train loss:   0.0940 | Accuracy:  96.62%
Training epoch 6/200 batch 100/100 | Train loss:   0.0830 | Accuracy:  97.03%
Training epoch 8/200 batch 90/100 | Train loss:   0.0770 | Accuracy:  97.12%
Training epoch 10/200 batch 20/100 | Train loss:   0.0502 | Accuracy:  97.72%
Training epoch 11/200 batch 100/100 | Train loss:   0.0556 | Accuracy:  97.72%
Training epoch 13/200 batch 70/100 | Train loss:   0.0465 | Accuracy:  97.47%
Training epoch 15/200 batch 10/100 | Train loss:   0.0418 | Accuracy:  97.56%
Training epoch 16/200 batch 100/100 | Train loss:   0.0507 | Accuracy:  97.75%
Training epoch 18/200 batch 30/100 | Train loss:   0.0462 | Accuracy:  97.84%
Training epoch 19/200 batch 100/100 | Train loss:   0.0372 | Accuracy:  98.53%
Training epoch 21/200 batch 50/100 | Train loss:   0.0316 | Accu

CancelledError: 

In [19]:
import asyncio
import random
import vastai
def status_printout(status):
    status_obj = status["response"]["status"]

    print(
        f"{status_obj['message']:<30} | "
        f"Train loss: {status_obj['train_loss']:>8.4f} | "
        f"Accuracy: {status_obj['val_acc'] * 100:>6.2f}%"
    )


async def start_full_training_session():
    async with vastai.Serverless() as client:
        endpoint = await client.get_endpoint("my-pytorch-endpoint")





        session = await endpoint.session()
        payload = {'max_train_batches_per_epoch': 10, "epochs" : 20, "session_id": session.session_id}
        await session.request(route="/start_task", payload=payload)
        while True:
            await asyncio.sleep(1)
            status = await session.request(route="/status", payload={}, retry=False)
            print(status)
            #status_printout(status)
        print("Training complete")

    print("Sessions closed.")


In [20]:
await asyncio.gather(start_full_training_session())

{'response': {'ok': True, 'status': {'task_id': '5VhxuE6AOMZXE', 'state': 'running', 'message': 'Validation epoch 2/20 complete', 'created_at': 1766011801.3936226, 'started_at': 1766011801.393623, 'finished_at': None, 'epoch': 2, 'step': 20, 'total_steps': 200, 'train_loss': 1.234742820262909, 'train_acc': 0.6984375, 'val_loss': 0.8508174633979797, 'val_acc': 0.79, 'last_update_at': 1766011802.5518765, 'config': {'epochs': 20, 'batch_size': 64, 'lr': 0.001, 'max_train_batches_per_epoch': 10, 'max_val_batches': 50, 'seed': 1337, 'data_dir': './data', 'num_workers': 2, 'device': 'auto', 'pin_memory': True, 'task_id': '5VhxuE6AOMZXE'}, 'error_type': None, 'error': None}}, 'latency': 0.18895339965820312, 'url': 'https://145.236.166.111:41798', 'reuqest_idx': 0, 'auth_data': {'__request_id': '24851e66-6b4a-45f9-8859-46cf7fc9ecd2', 'cost': 100.0, 'endpoint': 'my-pytorch-endpoint', 'reqnum': 0, 'request_idx': 9, 'signature': 'mKo2taDu8NBrOgLLqOVljA8K8e+5FwUiA2ISc86dqZ53fWBbeYZIBj+fw8vrMjBpK43

CancelledError: 

In [1]:
import vastai
import asyncio

In [2]:
my_data = []
my_data_lock = asyncio.Lock()

In [1]:
my_data = [{'session_id': 'OJ2Oz7unDo3ed', 'session_auth': {'__request_id': 'b8bcc36d-b8b3-4d2d-a0fe-fa639f37000c', 'cost': 100.0, 'endpoint': 'my-pytorch-endpoint', 'reqnum': 0, 'request_idx': 12, 'signature': 'hGhgJH1FRNUf8ZWypSM3abPkc89Ze86PYHp9d3U399I3irZ5tdGOigBAqT5ZrHj0ziBci57mv6+6NcwxD8qfsnOi8f6n2DKIbNSyx39hnvLWziiOoGH4BZhhhc3PLj2vzapgmbE4eQ5l5xWam0pphIj0cqZwZDGxk3dl3a59k/fzEL1A4Suc+ZvI1a8T+OMYTgKsZZQNNPOHz1IqZ4HeeSjowFj31hLA9cQPG2gp+i0/I51nNgc1tOvkivEUS6EemQy2DKOop5iY7iecjEjrCZmq+gJ6BkerENQoZc29bcfN/7j9q2CX0EAZPDGrzd12IxwAFbeb1i26qb31HaHgXg==', 'url': 'https://145.236.166.111:41101'}, 'session_url': 'https://145.236.166.111:41101'}]


In [None]:



async def create_session(client):
    endpoint = await client.get_endpoint("my-pytorch-endpoint2")
    session = await endpoint.session(cost=10.0)

    results = {
        "session_id": session.session_id,
        "session_auth": session.auth_data,
    }

    async with my_data_lock:
        my_data.extend([results])

    return results



NUM_SESSIONS = 3

async def main():
    async with vastai.Serverless(debug=False) as client:
        tasks = [create_session(client) for _ in range(NUM_SESSIONS)]
        results = await asyncio.gather(*tasks)




await main()


Did not get request_idx from initial route
Did not get request_idx from initial route
Did not get request_idx from initial route
Did not get request_idx from initial route
Did not get request_idx from initial route
Did not get request_idx from initial route
Did not get request_idx from initial route
Did not get request_idx from initial route
Did not get request_idx from initial route
Did not get request_idx from initial route


CancelledError: 

In [4]:
async def main():
    async with vastai.Serverless(debug=True) as client:
        endpoint = await client.get_endpoint("my-pytorch-endpoint")

        session_data = my_data[0]

        session = await endpoint.get_session(session_data["session_id"], session_auth=session_data["session_auth"])
        if session is None:
            print("session is closed")
            return
        payload = {'max_train_batches_per_epoch': 10, "epochs" : 20, "session_id": session.session_id}
        await session.request(route="/start_task", payload=payload)
        while await session.is_open():
            await asyncio.sleep(1)
            try:
                status = await session.request(route="/status", payload={}, retry=False)
            except:
                print('session was closed')
                break
            print(status)
await main()

[2025-12-17 18:35:51,078] Serverless - INFO - Started aiohttp ClientSession
[2025-12-17 18:35:51,375] Serverless - INFO - Loaded Vast.ai SSL certificate
[2025-12-17 18:35:51,670] Serverless - INFO - Found 2 endpoints
[2025-12-17 18:35:51,848] Serverless - INFO - Queued endpoint request
[2025-12-17 18:35:51,848] Serverless - DEBUG - Found worker machine, starting work
[2025-12-17 18:35:51,918] Serverless - INFO - Endpoint request task completed
[2025-12-17 18:35:52,979] Serverless - INFO - Queued endpoint request
[2025-12-17 18:35:52,979] Serverless - DEBUG - Found worker machine, starting work
[2025-12-17 18:35:53,046] Serverless - INFO - Endpoint request task completed


{'response': {'ok': True, 'status': {'task_id': 'J5iUIUNozbfoQ', 'state': 'running', 'message': 'Validation epoch 2/20 complete', 'created_at': 1766025351.8858962, 'started_at': 1766025351.8858967, 'finished_at': None, 'epoch': 2, 'step': 20, 'total_steps': 200, 'train_loss': 1.23474280834198, 'train_acc': 0.6984375, 'val_loss': 0.8508174741268157, 'val_acc': 0.79, 'last_update_at': 1766025352.9558358, 'config': {'epochs': 20, 'batch_size': 64, 'lr': 0.001, 'max_train_batches_per_epoch': 10, 'max_val_batches': 50, 'seed': 1337, 'data_dir': './data', 'num_workers': 2, 'device': 'auto', 'pin_memory': True, 'task_id': 'J5iUIUNozbfoQ'}, 'error_type': None, 'error': None}}, 'latency': 0.06621670722961426, 'url': 'https://136.59.129.136:34969', 'reuqest_idx': 0, 'auth_data': {'__request_id': 'f28da04c-dbcf-4e9b-8279-f7f671dc5408', 'cost': 10.0, 'endpoint': 'my-pytorch-endpoint', 'reqnum': 0, 'request_idx': 109, 'signature': 'Szq3GoPNdlXjBdZhFzlqCUtwUK/j4HlX++TJ6LUSDozaiQLXn4B2QMMCTHN2tA8i5NS

[2025-12-17 18:35:54,106] Serverless - INFO - Queued endpoint request
[2025-12-17 18:35:54,107] Serverless - DEBUG - Found worker machine, starting work
[2025-12-17 18:35:54,171] Serverless - INFO - Endpoint request task completed


{'response': {'ok': True, 'status': {'task_id': 'J5iUIUNozbfoQ', 'state': 'running', 'message': 'Training epoch 6/20 batch 10/10', 'created_at': 1766025351.8858962, 'started_at': 1766025351.8858967, 'finished_at': None, 'epoch': 6, 'step': 60, 'total_steps': 200, 'train_loss': 0.3942483693361282, 'train_acc': 0.8890625, 'val_loss': 0.44132314026355746, 'val_acc': 0.8684375, 'last_update_at': 1766025354.0985534, 'config': {'epochs': 20, 'batch_size': 64, 'lr': 0.001, 'max_train_batches_per_epoch': 10, 'max_val_batches': 50, 'seed': 1337, 'data_dir': './data', 'num_workers': 2, 'device': 'auto', 'pin_memory': True, 'task_id': 'J5iUIUNozbfoQ'}, 'error_type': None, 'error': None}}, 'latency': 0.0638275146484375, 'url': 'https://136.59.129.136:34969', 'reuqest_idx': 0, 'auth_data': {'__request_id': 'f28da04c-dbcf-4e9b-8279-f7f671dc5408', 'cost': 10.0, 'endpoint': 'my-pytorch-endpoint', 'reqnum': 0, 'request_idx': 109, 'signature': 'Szq3GoPNdlXjBdZhFzlqCUtwUK/j4HlX++TJ6LUSDozaiQLXn4B2QMMCTHN

[2025-12-17 18:35:55,231] Serverless - INFO - Queued endpoint request
[2025-12-17 18:35:55,232] Serverless - DEBUG - Found worker machine, starting work
[2025-12-17 18:35:55,295] Serverless - INFO - Endpoint request task completed


{'response': {'ok': True, 'status': {'task_id': 'J5iUIUNozbfoQ', 'state': 'running', 'message': 'Training epoch 9/20 batch 10/10', 'created_at': 1766025351.8858962, 'started_at': 1766025351.8858967, 'finished_at': None, 'epoch': 9, 'step': 90, 'total_steps': 200, 'train_loss': 0.3405292421579361, 'train_acc': 0.90625, 'val_loss': 0.367501477599144, 'val_acc': 0.8921875, 'last_update_at': 1766025355.2094455, 'config': {'epochs': 20, 'batch_size': 64, 'lr': 0.001, 'max_train_batches_per_epoch': 10, 'max_val_batches': 50, 'seed': 1337, 'data_dir': './data', 'num_workers': 2, 'device': 'auto', 'pin_memory': True, 'task_id': 'J5iUIUNozbfoQ'}, 'error_type': None, 'error': None}}, 'latency': 0.06324076652526855, 'url': 'https://136.59.129.136:34969', 'reuqest_idx': 0, 'auth_data': {'__request_id': 'f28da04c-dbcf-4e9b-8279-f7f671dc5408', 'cost': 10.0, 'endpoint': 'my-pytorch-endpoint', 'reqnum': 0, 'request_idx': 109, 'signature': 'Szq3GoPNdlXjBdZhFzlqCUtwUK/j4HlX++TJ6LUSDozaiQLXn4B2QMMCTHN2tA

[2025-12-17 18:35:56,356] Serverless - INFO - Queued endpoint request
[2025-12-17 18:35:56,356] Serverless - DEBUG - Found worker machine, starting work
[2025-12-17 18:35:56,420] Serverless - INFO - Endpoint request task completed


{'response': {'ok': True, 'status': {'task_id': 'J5iUIUNozbfoQ', 'state': 'running', 'message': 'Training epoch 12/20 batch 10/10', 'created_at': 1766025351.8858962, 'started_at': 1766025351.8858967, 'finished_at': None, 'epoch': 12, 'step': 120, 'total_steps': 200, 'train_loss': 0.24009961485862732, 'train_acc': 0.925, 'val_loss': 0.27664068818092347, 'val_acc': 0.918125, 'last_update_at': 1766025356.317134, 'config': {'epochs': 20, 'batch_size': 64, 'lr': 0.001, 'max_train_batches_per_epoch': 10, 'max_val_batches': 50, 'seed': 1337, 'data_dir': './data', 'num_workers': 2, 'device': 'auto', 'pin_memory': True, 'task_id': 'J5iUIUNozbfoQ'}, 'error_type': None, 'error': None}}, 'latency': 0.06323432922363281, 'url': 'https://136.59.129.136:34969', 'reuqest_idx': 0, 'auth_data': {'__request_id': 'f28da04c-dbcf-4e9b-8279-f7f671dc5408', 'cost': 10.0, 'endpoint': 'my-pytorch-endpoint', 'reqnum': 0, 'request_idx': 109, 'signature': 'Szq3GoPNdlXjBdZhFzlqCUtwUK/j4HlX++TJ6LUSDozaiQLXn4B2QMMCTHN2

[2025-12-17 18:35:57,481] Serverless - INFO - Queued endpoint request
[2025-12-17 18:35:57,481] Serverless - DEBUG - Found worker machine, starting work
[2025-12-17 18:35:57,545] Serverless - INFO - Endpoint request task completed


{'response': {'ok': True, 'status': {'task_id': 'J5iUIUNozbfoQ', 'state': 'running', 'message': 'Training epoch 15/20 batch 10/10', 'created_at': 1766025351.8858962, 'started_at': 1766025351.8858967, 'finished_at': None, 'epoch': 15, 'step': 150, 'total_steps': 200, 'train_loss': 0.17485131174325944, 'train_acc': 0.9484375, 'val_loss': 0.25134636610746386, 'val_acc': 0.925625, 'last_update_at': 1766025357.4609878, 'config': {'epochs': 20, 'batch_size': 64, 'lr': 0.001, 'max_train_batches_per_epoch': 10, 'max_val_batches': 50, 'seed': 1337, 'data_dir': './data', 'num_workers': 2, 'device': 'auto', 'pin_memory': True, 'task_id': 'J5iUIUNozbfoQ'}, 'error_type': None, 'error': None}}, 'latency': 0.06371116638183594, 'url': 'https://136.59.129.136:34969', 'reuqest_idx': 0, 'auth_data': {'__request_id': 'f28da04c-dbcf-4e9b-8279-f7f671dc5408', 'cost': 10.0, 'endpoint': 'my-pytorch-endpoint', 'reqnum': 0, 'request_idx': 109, 'signature': 'Szq3GoPNdlXjBdZhFzlqCUtwUK/j4HlX++TJ6LUSDozaiQLXn4B2QMM

[2025-12-17 18:35:58,606] Serverless - INFO - Queued endpoint request
[2025-12-17 18:35:58,607] Serverless - DEBUG - Found worker machine, starting work
[2025-12-17 18:35:58,684] Serverless - INFO - Endpoint request task completed


{'response': {'ok': True, 'status': {'task_id': 'J5iUIUNozbfoQ', 'state': 'running', 'message': 'Training epoch 18/20 batch 10/10', 'created_at': 1766025351.8858962, 'started_at': 1766025351.8858967, 'finished_at': None, 'epoch': 18, 'step': 180, 'total_steps': 200, 'train_loss': 0.17782035544514657, 'train_acc': 0.9515625, 'val_loss': 0.2290162818133831, 'val_acc': 0.9253125, 'last_update_at': 1766025358.5820968, 'config': {'epochs': 20, 'batch_size': 64, 'lr': 0.001, 'max_train_batches_per_epoch': 10, 'max_val_batches': 50, 'seed': 1337, 'data_dir': './data', 'num_workers': 2, 'device': 'auto', 'pin_memory': True, 'task_id': 'J5iUIUNozbfoQ'}, 'error_type': None, 'error': None}}, 'latency': 0.07689785957336426, 'url': 'https://136.59.129.136:34969', 'reuqest_idx': 0, 'auth_data': {'__request_id': 'f28da04c-dbcf-4e9b-8279-f7f671dc5408', 'cost': 10.0, 'endpoint': 'my-pytorch-endpoint', 'reqnum': 0, 'request_idx': 109, 'signature': 'Szq3GoPNdlXjBdZhFzlqCUtwUK/j4HlX++TJ6LUSDozaiQLXn4B2QMM

[2025-12-17 18:35:59,745] Serverless - INFO - Queued endpoint request
[2025-12-17 18:35:59,746] Serverless - DEBUG - Found worker machine, starting work
[2025-12-17 18:35:59,807] Serverless - ERROR - Attempt 1 failed: HTTP 410 from https://136.59.129.136:34969/status: {"error": "invalid session"}
[2025-12-17 18:35:59,808] Serverless - ERROR - Request errored: Too many retries for https://136.59.129.136:34969/status (last_status=410, last_text='{"error": "invalid session"}')
[2025-12-17 18:35:59,808] Serverless - INFO - Closed aiohttp ClientSession


session was closed


In [5]:
print(len(my_data))

0


In [6]:
async def run_session(endpoint, session_data):
    session = await endpoint.get_session(
        session_data["session_id"],
        session_auth=session_data["session_auth"],
    )

    if session is None:
        print(f"Session {session_data['session_id']} is closed")
        return

    payload = {
        "max_train_batches_per_epoch": 10,
        "epochs": 20,
        "session_id": session.session_id,
    }

    await session.request(route="/start_task", payload=payload)

    while await session.is_open():
        await asyncio.sleep(1)
        try:
            status = await session.request(
                route="/status",
                payload={},
                retry=False,
            )
            print(f"Session {session.session_id} status:", status)
        except Exception:
            print(f"Session {session.session_id} was closed")
            break


async def main():
    async with vastai.Serverless(debug=False) as client:
        endpoint = await client.get_endpoint("my-pytorch-endpoint")

        tasks = [
            run_session(endpoint, session_data)
            for session_data in my_data
        ]

        await asyncio.gather(*tasks)


await main()


Attempt 1 failed: HTTP 410 from https://45.29.62.113:20163/session/get: {"error": "session does not exist"}


Session mk0ZQiQFxlmPw is closed


Attempt 1 failed: HTTP 410 from https://74.48.140.178:27356/session/get: {"error": "session does not exist"}
Attempt 1 failed: HTTP 410 from https://58.224.7.136:30566/session/get: {"error": "session does not exist"}
Attempt 1 failed: HTTP 410 from https://85.195.201.222:40167/session/get: {"error": "session does not exist"}
Attempt 1 failed: HTTP 410 from https://79.112.58.103:30064/session/get: {"error": "session does not exist"}
Attempt 1 failed: HTTP 410 from https://109.228.173.28:30362/session/get: {"error": "session does not exist"}
Attempt 1 failed: HTTP 410 from https://87.205.21.33:42952/session/get: {"error": "session does not exist"}
Attempt 1 failed: HTTP 410 from https://90.224.159.6:40969/session/get: {"error": "session does not exist"}


Session srAEUAm1UFjk4 is closed
Session khVPvAwE4kAeR is closed
Session ALRX5IqHWHMSF is closed
Session Z9fbPfOhuE2VM is closed
Session ltkV64Lr155NS is closed
Session Atxr07eHrzQd1 is closed
Session 0z6SlB08J3sjB is closed
Session bEFgxsaBQBXjQ status: {'response': {'ok': True, 'status': {'task_id': 'bEFgxsaBQBXjQ', 'state': 'running', 'message': 'Training epoch 3/20 batch 10/10', 'created_at': 1766031109.839975, 'started_at': 1766031109.8399754, 'finished_at': None, 'epoch': 3, 'step': 30, 'total_steps': 200, 'train_loss': 0.717763465642929, 'train_acc': 0.7828125, 'val_loss': 0.8508174586296081, 'val_acc': 0.79, 'last_update_at': 1766031110.81273, 'config': {'epochs': 20, 'batch_size': 64, 'lr': 0.001, 'max_train_batches_per_epoch': 10, 'max_val_batches': 50, 'seed': 1337, 'data_dir': './data', 'num_workers': 2, 'device': 'auto', 'pin_memory': True, 'task_id': 'bEFgxsaBQBXjQ'}, 'error_type': None, 'error': None}}, 'latency': 0.025380849838256836, 'url': 'https://45.29.62.113:20163',

Attempt 1 failed: HTTP 410 from https://58.224.7.136:30566/status: {"error": "invalid session"}
Request errored: Too many retries for https://58.224.7.136:30566/status (last_status=410, last_text='{"error": "invalid session"}')


Session Ib87ZYptqvmpB was closed
Session aUPuup8p2np98 status: {'response': {'ok': True, 'status': {'task_id': 'aUPuup8p2np98', 'state': 'running', 'message': 'Training epoch 10/20 batch 10/10', 'created_at': 1766031110.316808, 'started_at': 1766031110.3168085, 'finished_at': None, 'epoch': 10, 'step': 100, 'total_steps': 200, 'train_loss': 0.2395697206258774, 'train_acc': 0.9328125, 'val_loss': 0.30364997178316117, 'val_acc': 0.9075, 'last_update_at': 1766031114.0849411, 'config': {'epochs': 20, 'batch_size': 64, 'lr': 0.001, 'max_train_batches_per_epoch': 10, 'max_val_batches': 50, 'seed': 1337, 'data_dir': './data', 'num_workers': 2, 'device': 'auto', 'pin_memory': True, 'task_id': 'aUPuup8p2np98'}, 'error_type': None, 'error': None}}, 'latency': 0.15977096557617188, 'url': 'https://85.195.201.222:40167', 'request_idx': 0, 'auth_data': {'__request_id': '030f0420-630e-47c2-962b-d9c05d76dc54', 'cost': 10.0, 'endpoint': 'my-pytorch-endpoint', 'reqnum': 2, 'request_idx': 154, 'signature

Attempt 1 failed: HTTP 410 from https://87.205.21.33:42952/status: {"error": "invalid session"}
Request errored: Too many retries for https://87.205.21.33:42952/status (last_status=410, last_text='{"error": "invalid session"}')


Session TH537Kxmq2AJx status: {'response': {'ok': True, 'status': {'task_id': 'TH537Kxmq2AJx', 'state': 'running', 'message': 'Training epoch 17/20 batch 10/10', 'created_at': 1766031110.3662791, 'started_at': 1766031110.3662794, 'finished_at': None, 'epoch': 17, 'step': 170, 'total_steps': 200, 'train_loss': 0.2101828396320343, 'train_acc': 0.9375, 'val_loss': 0.22539275780320167, 'val_acc': 0.929375, 'last_update_at': 1766031114.2302442, 'config': {'epochs': 20, 'batch_size': 64, 'lr': 0.001, 'max_train_batches_per_epoch': 10, 'max_val_batches': 50, 'seed': 1337, 'data_dir': './data', 'num_workers': 2, 'device': 'auto', 'pin_memory': True, 'task_id': 'TH537Kxmq2AJx'}, 'error_type': None, 'error': None}}, 'latency': 0.17328357696533203, 'url': 'https://79.112.58.103:30064', 'request_idx': 0, 'auth_data': {'__request_id': '52185f91-e760-4e21-ae1a-8cfcf9446c2e', 'cost': 10.0, 'endpoint': 'my-pytorch-endpoint', 'reqnum': 1, 'request_idx': 145, 'signature': 'QmXfEoZGEU/8tmSU/r2CPNSPA4oqJu

Attempt 1 failed: HTTP 410 from https://79.112.58.103:30064/status: {"error": "invalid session"}
Request errored: Too many retries for https://79.112.58.103:30064/status (last_status=410, last_text='{"error": "invalid session"}')


Session SPx41OUdNEfG1 status: {'response': {'ok': True, 'status': {'task_id': 'SPx41OUdNEfG1', 'state': 'running', 'message': 'Training epoch 8/20 batch 10/10', 'created_at': 1766031109.980925, 'started_at': 1766031109.9809258, 'finished_at': None, 'epoch': 8, 'step': 80, 'total_steps': 200, 'train_loss': 0.3788644105195999, 'train_acc': 0.9125, 'val_loss': 0.3556075930595398, 'val_acc': 0.890625, 'last_update_at': 1766031115.079302, 'config': {'epochs': 20, 'batch_size': 64, 'lr': 0.001, 'max_train_batches_per_epoch': 10, 'max_val_batches': 50, 'seed': 1337, 'data_dir': './data', 'num_workers': 2, 'device': 'auto', 'pin_memory': True, 'task_id': 'SPx41OUdNEfG1'}, 'error_type': None, 'error': None}}, 'latency': 0.06518912315368652, 'url': 'https://136.59.129.136:34950', 'request_idx': 0, 'auth_data': {'__request_id': '691856bd-8eed-420d-9bf4-871a253830cc', 'cost': 10.0, 'endpoint': 'my-pytorch-endpoint', 'reqnum': 12, 'request_idx': 153, 'signature': 'R+HdUOKiF6jGF2q9O3IPrHvPQq7ts90HS/

Attempt 1 failed: HTTP 410 from https://109.228.173.28:30362/status: {"error": "invalid session"}
Request errored: Too many retries for https://109.228.173.28:30362/status (last_status=410, last_text='{"error": "invalid session"}')


Session x5BpubZZHavY7 was closed
Session bEFgxsaBQBXjQ status: {'response': {'ok': True, 'status': {'task_id': 'bEFgxsaBQBXjQ', 'state': 'running', 'message': 'Training epoch 17/20 batch 10/10', 'created_at': 1766031109.839975, 'started_at': 1766031109.8399754, 'finished_at': None, 'epoch': 17, 'step': 170, 'total_steps': 200, 'train_loss': 0.21088864430785179, 'train_acc': 0.9375, 'val_loss': 0.2259671561419964, 'val_acc': 0.92875, 'last_update_at': 1766031115.9232311, 'config': {'epochs': 20, 'batch_size': 64, 'lr': 0.001, 'max_train_batches_per_epoch': 10, 'max_val_batches': 50, 'seed': 1337, 'data_dir': './data', 'num_workers': 2, 'device': 'auto', 'pin_memory': True, 'task_id': 'bEFgxsaBQBXjQ'}, 'error_type': None, 'error': None}}, 'latency': 0.02259039878845215, 'url': 'https://45.29.62.113:20163', 'request_idx': 0, 'auth_data': {'__request_id': 'ad7f801d-940a-48de-977e-091f31257df2', 'cost': 10.0, 'endpoint': 'my-pytorch-endpoint', 'reqnum': 108, 'request_idx': 143, 'signature':

Attempt 1 failed: HTTP 410 from https://183.89.209.74:55609/status: {"error": "invalid session"}
Request errored: Too many retries for https://183.89.209.74:55609/status (last_status=410, last_text='{"error": "invalid session"}')


Session fcOTw2kBuIsfW status: {'response': {'ok': True, 'status': {'task_id': 'fcOTw2kBuIsfW', 'state': 'running', 'message': 'Training epoch 18/20 batch 10/10', 'created_at': 1766031110.531146, 'started_at': 1766031110.5311463, 'finished_at': None, 'epoch': 18, 'step': 180, 'total_steps': 200, 'train_loss': 0.17774924486875535, 'train_acc': 0.9515625, 'val_loss': 0.22883765205740927, 'val_acc': 0.9253125, 'last_update_at': 1766031117.7004614, 'config': {'epochs': 20, 'batch_size': 64, 'lr': 0.001, 'max_train_batches_per_epoch': 10, 'max_val_batches': 50, 'seed': 1337, 'data_dir': './data', 'num_workers': 2, 'device': 'auto', 'pin_memory': True, 'task_id': 'fcOTw2kBuIsfW'}, 'error_type': None, 'error': None}}, 'latency': 0.2209458351135254, 'url': 'https://74.48.140.178:27356', 'request_idx': 0, 'auth_data': {'__request_id': '587cdf96-93ff-40b7-a7c9-438af6d88f90', 'cost': 10.0, 'endpoint': 'my-pytorch-endpoint', 'reqnum': 2, 'request_idx': 150, 'signature': 'nX/eqRzYCyHbD6/ApVzfqTH8svU

Attempt 1 failed: HTTP 410 from https://45.29.62.113:20163/status: {"error": "invalid session"}
Request errored: Too many retries for https://45.29.62.113:20163/status (last_status=410, last_text='{"error": "invalid session"}')


Session bEFgxsaBQBXjQ was closed
Session aUPuup8p2np98 status: {'response': {'ok': True, 'status': {'task_id': 'aUPuup8p2np98', 'state': 'running', 'message': 'Validation epoch 20/20 complete', 'created_at': 1766031110.316808, 'started_at': 1766031110.3168085, 'finished_at': None, 'epoch': 20, 'step': 200, 'total_steps': 200, 'train_loss': 0.1432774044573307, 'train_acc': 0.9640625, 'val_loss': 0.17572276301681997, 'val_acc': 0.9459375, 'last_update_at': 1766031118.0906467, 'config': {'epochs': 20, 'batch_size': 64, 'lr': 0.001, 'max_train_batches_per_epoch': 10, 'max_val_batches': 50, 'seed': 1337, 'data_dir': './data', 'num_workers': 2, 'device': 'auto', 'pin_memory': True, 'task_id': 'aUPuup8p2np98'}, 'error_type': None, 'error': None}}, 'latency': 0.16130757331848145, 'url': 'https://85.195.201.222:40167', 'request_idx': 0, 'auth_data': {'__request_id': '030f0420-630e-47c2-962b-d9c05d76dc54', 'cost': 10.0, 'endpoint': 'my-pytorch-endpoint', 'reqnum': 2, 'request_idx': 154, 'signatu

Attempt 1 failed: HTTP 410 from https://85.195.201.222:40167/session/get: {"error": "session does not exist"}


Session SPx41OUdNEfG1 status: {'response': {'ok': True, 'status': {'task_id': 'SPx41OUdNEfG1', 'state': 'running', 'message': 'Validation epoch 13/20 complete', 'created_at': 1766031109.980925, 'started_at': 1766031109.9809258, 'finished_at': None, 'epoch': 13, 'step': 130, 'total_steps': 200, 'train_loss': 0.2078607402741909, 'train_acc': 0.946875, 'val_loss': 0.22616989336907864, 'val_acc': 0.9325, 'last_update_at': 1766031118.984889, 'config': {'epochs': 20, 'batch_size': 64, 'lr': 0.001, 'max_train_batches_per_epoch': 10, 'max_val_batches': 50, 'seed': 1337, 'data_dir': './data', 'num_workers': 2, 'device': 'auto', 'pin_memory': True, 'task_id': 'SPx41OUdNEfG1'}, 'error_type': None, 'error': None}}, 'latency': 0.06339406967163086, 'url': 'https://136.59.129.136:34950', 'request_idx': 0, 'auth_data': {'__request_id': '691856bd-8eed-420d-9bf4-871a253830cc', 'cost': 10.0, 'endpoint': 'my-pytorch-endpoint', 'reqnum': 12, 'request_idx': 153, 'signature': 'R+HdUOKiF6jGF2q9O3IPrHvPQq7ts90

Attempt 1 failed: HTTP 410 from https://74.48.140.178:27356/status: {"error": "invalid session"}
Request errored: Too many retries for https://74.48.140.178:27356/status (last_status=410, last_text='{"error": "invalid session"}')


Session fcOTw2kBuIsfW was closed
Session SPx41OUdNEfG1 status: {'response': {'ok': True, 'status': {'task_id': 'SPx41OUdNEfG1', 'state': 'running', 'message': 'Training epoch 15/20 batch 10/10', 'created_at': 1766031109.980925, 'started_at': 1766031109.9809258, 'finished_at': None, 'epoch': 15, 'step': 150, 'total_steps': 200, 'train_loss': 0.17481325566768646, 'train_acc': 0.9484375, 'val_loss': 0.25149013102054596, 'val_acc': 0.925, 'last_update_at': 1766031119.7764854, 'config': {'epochs': 20, 'batch_size': 64, 'lr': 0.001, 'max_train_batches_per_epoch': 10, 'max_val_batches': 50, 'seed': 1337, 'data_dir': './data', 'num_workers': 2, 'device': 'auto', 'pin_memory': True, 'task_id': 'SPx41OUdNEfG1'}, 'error_type': None, 'error': None}}, 'latency': 0.06464004516601562, 'url': 'https://136.59.129.136:34950', 'request_idx': 0, 'auth_data': {'__request_id': '691856bd-8eed-420d-9bf4-871a253830cc', 'cost': 10.0, 'endpoint': 'my-pytorch-endpoint', 'reqnum': 12, 'request_idx': 153, 'signatur

Attempt 1 failed: HTTP 410 from https://136.59.129.136:34950/status: {"error": "invalid session"}
Request errored: Too many retries for https://136.59.129.136:34950/status (last_status=410, last_text='{"error": "invalid session"}')


Session SPx41OUdNEfG1 was closed


In [4]:
import asyncio
import random
import vastai

async def main():
    async with vastai.Serverless(debug=True) as client:
        endpoint = await client.get_endpoint("my-pytorch-endpoint")

        session_data = my_data[0]

        session = await endpoint.get_session(session_data["session_id"], session_auth=session_data["session_auth"])
        await session.close()
await main()

[2025-12-17 16:17:38,097] Serverless - INFO - Started aiohttp ClientSession
[2025-12-17 16:17:38,097] Serverless - INFO - Started aiohttp ClientSession
[2025-12-17 16:17:38,397] Serverless - INFO - Loaded Vast.ai SSL certificate
[2025-12-17 16:17:38,397] Serverless - INFO - Loaded Vast.ai SSL certificate
[2025-12-17 16:17:38,716] Serverless - INFO - Found 1 endpoints
[2025-12-17 16:17:38,716] Serverless - INFO - Found 1 endpoints
[2025-12-17 16:17:39,264] Serverless - INFO - Queued endpoint request
[2025-12-17 16:17:39,264] Serverless - INFO - Queued endpoint request
[2025-12-17 16:17:39,265] Serverless - DEBUG - Found worker machine, starting work
[2025-12-17 16:17:39,265] Serverless - DEBUG - Found worker machine, starting work
[2025-12-17 16:17:39,447] Serverless - INFO - Endpoint request task completed
[2025-12-17 16:17:39,447] Serverless - INFO - Endpoint request task completed
[2025-12-17 16:17:39,448] Serverless - INFO - Closed aiohttp ClientSession
[2025-12-17 16:17:39,448] Ser