In [None]:
import os

os.environ["GOOGLE_API_KEY"] = ""

In [2]:
import sys

sys.path.append("../../app/")
sys.path.append("../../")

# Metrics

In [3]:
import numpy as np
from sklearn.metrics import mean_absolute_error


def accuracy_at_threshold(gold_scores, pred_scores, threshold):
    gold_scores = np.array(gold_scores)
    pred_scores = np.array(pred_scores)
    absolute_errors = np.abs(gold_scores - pred_scores)
    within_threshold_count = np.sum(absolute_errors <= threshold)
    return within_threshold_count / len(gold_scores)


# Evaluate

In [4]:
import pandas as pd

train_df = pd.read_csv("../../data/train.csv")
val_df = pd.read_csv("../../data/test.csv")

## Kaggle dataset

In [5]:
import uuid

from langchain.chat_models import init_chat_model
from langgraph.graph import END, START, StateGraph

import app.workflow.node as node
from app.agents.evaluation import EvaluationAgent
from app.agents.jd_extractor import JDExtractor
from app.agents.resume_extractor import ResumeExtractor
from app.workflow.state import State

model = init_chat_model(
    model="gemini-2.5-flash", model_provider="google_genai", temperature=0
)
RESUME_EXTRACTOR = ResumeExtractor(llm=model)
EVALUATION_AGENT = EvaluationAgent(llm=model)
JD_EXTRACTOR = JDExtractor(llm=model)

CONFIGS = [
    {"obj": node.ValidateInputNode, "configs": {"name": "VALIDATION_INPUT"}},
    {
        "obj": node.ParseContentNode,
        "configs": {
            "name": "CONTENT_EXTRACTION",
            "jd_extractor": JD_EXTRACTOR,
            "resume_extractor": RESUME_EXTRACTOR,
        },
    },
    {
        "obj": node.EvaluatePairMatchingNode,
        "configs": {"name": "EVALUATION", "evaluation_agent": EVALUATION_AGENT},
    },
]

NODES: list[node.BaseNode] = [value["obj"](**value["configs"]) for value in CONFIGS]


def build_graph():
    graph = StateGraph(state_schema=State)
    for user_node in NODES:
        graph.add_node(user_node.name, user_node)

    graph.add_edge(START, "VALIDATION_INPUT")
    graph.add_edge("VALIDATION_INPUT", "CONTENT_EXTRACTION")
    graph.add_edge("CONTENT_EXTRACTION", "EVALUATION")
    graph.add_edge("EVALUATION", END)
    return graph


  from .autonotebook import tqdm as notebook_tqdm
E0000 00:00:1760458976.410134 25809150 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


In [6]:
import time

from tqdm import tqdm

gold_scores = []
pred_scores = []
graph = build_graph()
app = graph.compile(checkpointer=None)

print(f"\n--- Starting Evaluation on {len(val_df)} examples ---")
batch_size = 4
for i in tqdm(
    range(0, len(val_df), batch_size),
    desc="Evaluating LangGraph in Batches",
    total=len(val_df) // batch_size,
):
    batch_df = val_df.iloc[i : i + batch_size]
    batch_gold_scores = batch_df["match_score"].tolist()

    # Run the LangGraph app
    state = State(
        inputs=[
            {
                "job_description": example["job_description"],
                "resume": example["resume"],
            }
            for _, example in batch_df.iterrows()
        ]
    )
    try:
        result = app.invoke(state)
        batch_pred_scores = [
            result["results"][j].get("score") for j in range(len(result["results"]))
        ]
    except Exception as e:
        print(f"An error occurred during graph execution: {e}")
        batch_pred_scores = [0.0] * len(batch_df)  # Assign a penalty score

    gold_scores.extend(batch_gold_scores)
    pred_scores.extend(batch_pred_scores)

    # To avoid rate limiting
    time.sleep(1)

print("--- Evaluation Complete ---")



--- Starting Evaluation on 3000 examples ---


Evaluating LangGraph in Batches:   0%|          | 0/750 [00:00<?, ?it/s]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   0%|          | 1/750 [00:50<10:32:59, 50.71s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   0%|          | 2/750 [02:08<13:48:23, 66.45s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   0%|          | 3/750 [03:33<15:32:25, 74.89s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   1%|          | 4/750 [04:11<12:31:33, 60.45s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   1%|          | 5/750 [05:40<14:38:23, 70.74s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   1%|          | 6/750 [06:29<13:05:36, 63.36s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   1%|          | 7/750 [08:10<15:35:29, 75.54s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   1%|          | 8/750 [09:00<13:55:39, 67.57s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   1%|          | 9/750 [10:10<14:05:01, 68.42s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   1%|▏         | 10/750 [11:00<12:53:21, 62.71s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   1%|▏         | 11/750 [11:50<12:04:21, 58.81s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   2%|▏         | 12/750 [12:37<11:18:32, 55.17s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   2%|▏         | 13/750 [14:02<13:09:33, 64.28s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   2%|▏         | 14/750 [14:52<12:13:46, 59.82s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   2%|▏         | 15/750 [15:45<11:49:01, 57.88s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   2%|▏         | 16/750 [16:50<12:12:08, 59.85s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   2%|▏         | 17/750 [17:36<11:22:39, 55.88s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   2%|▏         | 18/750 [18:28<11:07:27, 54.71s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   3%|▎         | 19/750 [19:38<12:02:30, 59.30s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   3%|▎         | 20/750 [20:20<10:56:45, 53.98s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   3%|▎         | 21/750 [21:18<11:10:42, 55.20s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   3%|▎         | 22/750 [22:18<11:27:12, 56.64s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   3%|▎         | 23/750 [23:05<10:50:50, 53.71s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   3%|▎         | 24/750 [23:58<10:49:52, 53.71s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   3%|▎         | 25/750 [25:25<12:46:55, 63.47s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   3%|▎         | 26/750 [26:16<12:01:04, 59.76s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   4%|▎         | 27/750 [27:59<14:35:15, 72.64s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   4%|▎         | 28/750 [29:15<14:49:17, 73.90s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   4%|▍         | 29/750 [30:20<14:13:42, 71.04s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   4%|▍         | 30/750 [31:02<12:30:00, 62.50s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   4%|▍         | 31/750 [31:55<11:53:37, 59.55s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   4%|▍         | 32/750 [32:54<11:50:11, 59.35s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   4%|▍         | 33/750 [33:38<10:53:08, 54.66s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   5%|▍         | 34/750 [34:43<11:30:48, 57.89s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   5%|▍         | 35/750 [35:35<11:10:12, 56.24s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   5%|▍         | 36/750 [36:22<10:36:20, 53.47s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   5%|▍         | 37/750 [37:49<12:32:13, 63.30s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   5%|▌         | 38/750 [38:43<11:59:32, 60.64s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   5%|▌         | 39/750 [40:26<14:28:07, 73.26s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   5%|▌         | 40/750 [41:22<13:26:34, 68.16s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   5%|▌         | 41/750 [42:17<12:37:10, 64.08s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   6%|▌         | 42/750 [43:17<12:24:55, 63.13s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   6%|▌         | 43/750 [44:26<12:41:39, 64.64s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   6%|▌         | 44/750 [45:08<11:21:46, 57.94s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   6%|▌         | 45/750 [46:35<13:03:36, 66.69s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   6%|▌         | 46/750 [47:21<11:51:05, 60.60s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   6%|▋         | 47/750 [49:00<14:03:52, 72.02s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   6%|▋         | 48/750 [49:53<12:54:31, 66.20s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   7%|▋         | 49/750 [50:37<11:36:00, 59.57s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   7%|▋         | 50/750 [51:29<11:09:00, 57.34s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   7%|▋         | 51/750 [52:15<10:27:52, 53.90s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   7%|▋         | 52/750 [53:28<11:34:34, 59.71s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   7%|▋         | 53/750 [54:23<11:17:08, 58.29s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   7%|▋         | 54/750 [55:44<12:35:35, 65.14s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   7%|▋         | 55/750 [56:38<11:53:46, 61.62s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   7%|▋         | 56/750 [57:22<10:52:02, 56.37s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   8%|▊         | 57/750 [58:01<9:52:13, 51.27s/it] 

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   8%|▊         | 58/750 [59:40<12:36:11, 65.57s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   8%|▊         | 59/750 [1:00:30<11:41:14, 60.89s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   8%|▊         | 60/750 [1:01:04<10:06:44, 52.76s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   8%|▊         | 61/750 [1:01:36<8:54:57, 46.59s/it] 

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   8%|▊         | 62/750 [1:02:37<9:44:09, 50.94s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   8%|▊         | 63/750 [1:03:06<8:28:38, 44.42s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   9%|▊         | 64/750 [1:04:34<10:56:15, 57.40s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   9%|▊         | 65/750 [1:05:17<10:07:12, 53.19s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   9%|▉         | 66/750 [1:05:57<9:21:21, 49.24s/it] 

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   9%|▉         | 67/750 [1:06:38<8:51:28, 46.69s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   9%|▉         | 68/750 [1:07:16<8:19:52, 43.98s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   9%|▉         | 69/750 [1:08:01<8:23:23, 44.35s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   9%|▉         | 70/750 [1:08:51<8:40:59, 45.97s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   9%|▉         | 71/750 [1:09:31<8:20:36, 44.24s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  10%|▉         | 72/750 [1:10:05<7:45:08, 41.16s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  10%|▉         | 73/750 [1:10:37<7:15:14, 38.57s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  10%|▉         | 74/750 [1:11:34<8:14:44, 43.91s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  10%|█         | 75/750 [1:12:18<8:14:27, 43.95s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  10%|█         | 76/750 [1:13:23<9:23:58, 50.20s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  10%|█         | 77/750 [1:14:10<9:11:55, 49.21s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  10%|█         | 78/750 [1:15:04<9:27:19, 50.65s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  11%|█         | 79/750 [1:15:51<9:14:25, 49.58s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  11%|█         | 80/750 [1:16:25<8:23:27, 45.09s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  11%|█         | 81/750 [1:17:08<8:14:05, 44.31s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  11%|█         | 82/750 [1:17:37<7:23:15, 39.81s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  11%|█         | 83/750 [1:18:27<7:57:00, 42.91s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  11%|█         | 84/750 [1:19:28<8:55:59, 48.29s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  11%|█▏        | 85/750 [1:20:15<8:51:04, 47.92s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  11%|█▏        | 86/750 [1:21:06<9:00:05, 48.80s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  12%|█▏        | 87/750 [1:21:48<8:38:24, 46.92s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  12%|█▏        | 88/750 [1:22:28<8:14:40, 44.83s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  12%|█▏        | 89/750 [1:23:26<8:54:49, 48.55s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  12%|█▏        | 90/750 [1:24:05<8:24:26, 45.86s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  12%|█▏        | 91/750 [1:25:06<9:12:39, 50.32s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  12%|█▏        | 92/750 [1:25:56<9:10:34, 50.20s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  12%|█▏        | 93/750 [1:26:41<8:52:30, 48.63s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  13%|█▎        | 94/750 [1:27:15<8:03:17, 44.20s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  13%|█▎        | 95/750 [1:28:10<8:40:22, 47.67s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  13%|█▎        | 96/750 [1:28:59<8:41:06, 47.81s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  13%|█▎        | 97/750 [1:29:39<8:14:54, 45.47s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  13%|█▎        | 98/750 [1:30:54<9:49:56, 54.29s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  13%|█▎        | 99/750 [1:31:34<9:04:58, 50.23s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  13%|█▎        | 100/750 [1:32:36<9:41:11, 53.65s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  13%|█▎        | 101/750 [1:33:36<10:02:41, 55.72s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  14%|█▎        | 102/750 [1:34:35<10:10:41, 56.55s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  14%|█▎        | 103/750 [1:35:21<9:35:11, 53.34s/it] 

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  14%|█▍        | 104/750 [1:36:19<9:49:20, 54.74s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  14%|█▍        | 105/750 [1:37:14<9:51:08, 54.99s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  14%|█▍        | 106/750 [1:37:56<9:08:49, 51.13s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  14%|█▍        | 107/750 [1:38:54<9:27:40, 52.97s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  14%|█▍        | 108/750 [1:39:39<9:01:14, 50.58s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  15%|█▍        | 109/750 [1:40:56<10:24:13, 58.43s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  15%|█▍        | 110/750 [1:41:33<9:15:57, 52.12s/it] 

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  15%|█▍        | 111/750 [1:42:22<9:05:46, 51.25s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  15%|█▍        | 112/750 [1:43:08<8:46:44, 49.54s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  15%|█▌        | 113/750 [1:43:49<8:18:31, 46.96s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  15%|█▌        | 114/750 [1:44:29<7:55:40, 44.87s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  15%|█▌        | 115/750 [1:45:18<8:09:35, 46.26s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  15%|█▌        | 116/750 [1:46:43<10:10:08, 57.74s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  16%|█▌        | 117/750 [1:48:04<11:24:26, 64.88s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  16%|█▌        | 118/750 [1:48:50<10:24:39, 59.30s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  16%|█▌        | 119/750 [1:49:28<9:14:47, 52.75s/it] 

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  16%|█▌        | 120/750 [1:50:18<9:03:53, 51.80s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  16%|█▌        | 121/750 [1:50:54<8:13:44, 47.10s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  16%|█▋        | 122/750 [1:51:46<8:28:46, 48.61s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  16%|█▋        | 123/750 [1:52:21<7:45:12, 44.52s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  17%|█▋        | 124/750 [1:52:56<7:14:49, 41.68s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  17%|█▋        | 125/750 [1:53:54<8:06:54, 46.74s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  17%|█▋        | 126/750 [1:54:45<8:18:27, 47.93s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  17%|█▋        | 127/750 [1:55:30<8:07:21, 46.94s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  17%|█▋        | 128/750 [1:56:22<8:23:22, 48.56s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  17%|█▋        | 129/750 [1:57:07<8:09:57, 47.34s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  17%|█▋        | 130/750 [1:58:08<8:52:59, 51.58s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  17%|█▋        | 131/750 [1:59:08<9:16:46, 53.97s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  18%|█▊        | 132/750 [2:00:05<9:27:50, 55.13s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  18%|█▊        | 133/750 [2:00:45<8:40:07, 50.58s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  18%|█▊        | 134/750 [2:01:26<8:10:17, 47.75s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  18%|█▊        | 135/750 [2:02:23<8:35:30, 50.29s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  18%|█▊        | 136/750 [2:03:08<8:20:44, 48.93s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  18%|█▊        | 137/750 [2:04:09<8:55:25, 52.41s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  18%|█▊        | 138/750 [2:04:49<8:15:29, 48.58s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  19%|█▊        | 139/750 [2:05:38<8:16:20, 48.74s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  19%|█▊        | 140/750 [2:06:55<9:42:25, 57.29s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  19%|█▉        | 141/750 [2:08:03<10:15:09, 60.61s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  19%|█▉        | 142/750 [2:08:52<9:36:22, 56.88s/it] 

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  19%|█▉        | 143/750 [2:09:41<9:12:25, 54.61s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  19%|█▉        | 144/750 [2:10:18<8:18:35, 49.37s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  19%|█▉        | 145/750 [2:11:16<8:44:57, 52.06s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  19%|█▉        | 146/750 [2:12:23<9:28:37, 56.49s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  20%|█▉        | 147/750 [2:13:11<9:01:53, 53.92s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  20%|█▉        | 148/750 [2:13:50<8:16:50, 49.52s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  20%|█▉        | 149/750 [2:14:29<7:42:44, 46.20s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  20%|██        | 150/750 [2:15:35<8:42:52, 52.29s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  20%|██        | 151/750 [2:16:12<7:55:31, 47.63s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  20%|██        | 152/750 [2:16:56<7:44:22, 46.59s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  20%|██        | 153/750 [2:17:31<7:08:00, 43.02s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  21%|██        | 154/750 [2:18:24<7:36:11, 45.93s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  21%|██        | 155/750 [2:19:17<7:58:14, 48.23s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  21%|██        | 156/750 [2:19:59<7:39:24, 46.41s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  21%|██        | 157/750 [2:20:51<7:55:36, 48.12s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  21%|██        | 158/750 [2:21:33<7:35:01, 46.12s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  21%|██        | 159/750 [2:22:17<7:29:30, 45.64s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  21%|██▏       | 160/750 [2:23:00<7:20:21, 44.78s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  21%|██▏       | 161/750 [2:23:53<7:43:42, 47.24s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  22%|██▏       | 162/750 [2:24:23<6:51:06, 41.95s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  22%|██▏       | 163/750 [2:25:28<7:59:57, 49.06s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  22%|██▏       | 164/750 [2:26:26<8:24:54, 51.70s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  22%|██▏       | 165/750 [2:27:08<7:54:21, 48.65s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  22%|██▏       | 166/750 [2:27:47<7:27:01, 45.93s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  22%|██▏       | 167/750 [2:28:27<7:06:38, 43.91s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  22%|██▏       | 168/750 [2:29:02<6:42:28, 41.49s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  23%|██▎       | 169/750 [2:29:56<7:17:54, 45.22s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  23%|██▎       | 170/750 [2:30:42<7:17:16, 45.24s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  23%|██▎       | 171/750 [2:31:24<7:09:06, 44.47s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  23%|██▎       | 172/750 [2:32:21<7:44:52, 48.26s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  23%|██▎       | 173/750 [2:33:02<7:21:10, 45.88s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  23%|██▎       | 174/750 [2:33:48<7:20:46, 45.91s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  23%|██▎       | 175/750 [2:34:34<7:20:16, 45.94s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  23%|██▎       | 176/750 [2:35:21<7:23:05, 46.32s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  24%|██▎       | 177/750 [2:35:51<6:34:50, 41.34s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  24%|██▎       | 178/750 [2:36:58<7:49:00, 49.20s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  24%|██▍       | 179/750 [2:37:53<8:04:13, 50.88s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  24%|██▍       | 180/750 [2:39:01<8:51:42, 55.97s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  24%|██▍       | 181/750 [2:39:51<8:33:54, 54.19s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  24%|██▍       | 182/750 [2:40:33<7:58:11, 50.51s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  24%|██▍       | 183/750 [2:41:14<7:29:45, 47.59s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  25%|██▍       | 184/750 [2:41:56<7:13:50, 45.99s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  25%|██▍       | 185/750 [2:42:36<6:56:13, 44.20s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  25%|██▍       | 186/750 [2:43:36<7:40:56, 49.04s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  25%|██▍       | 187/750 [2:44:14<7:07:53, 45.60s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  25%|██▌       | 188/750 [2:45:00<7:09:06, 45.81s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  25%|██▌       | 189/750 [2:45:43<7:01:37, 45.09s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  25%|██▌       | 190/750 [2:46:33<7:14:17, 46.53s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  25%|██▌       | 191/750 [2:47:44<8:21:59, 53.88s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  26%|██▌       | 192/750 [2:48:23<7:38:58, 49.35s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  26%|██▌       | 193/750 [2:49:15<7:44:13, 50.01s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  26%|██▌       | 194/750 [2:50:19<8:23:45, 54.36s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  26%|██▌       | 195/750 [2:51:11<8:14:30, 53.46s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  26%|██▌       | 196/750 [2:52:03<8:09:49, 53.05s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  26%|██▋       | 197/750 [2:52:38<7:19:26, 47.68s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  26%|██▋       | 198/750 [2:53:17<6:56:17, 45.25s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  27%|██▋       | 199/750 [2:54:09<7:12:30, 47.10s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  27%|██▋       | 200/750 [2:54:47<6:46:57, 44.39s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  27%|██▋       | 201/750 [2:55:57<7:56:26, 52.07s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  27%|██▋       | 202/750 [2:56:32<7:08:53, 46.96s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  27%|██▋       | 203/750 [2:57:12<6:48:37, 44.82s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  27%|██▋       | 204/750 [2:57:51<6:33:27, 43.24s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  27%|██▋       | 205/750 [2:58:32<6:25:49, 42.48s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  27%|██▋       | 206/750 [2:59:15<6:26:08, 42.59s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  28%|██▊       | 207/750 [3:00:13<7:07:34, 47.25s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  28%|██▊       | 208/750 [3:00:59<7:02:55, 46.82s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  28%|██▊       | 209/750 [3:01:40<6:46:23, 45.07s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  28%|██▊       | 210/750 [3:02:25<6:47:23, 45.26s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  28%|██▊       | 211/750 [3:03:41<8:08:06, 54.34s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  28%|██▊       | 212/750 [3:04:53<8:54:08, 59.57s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  28%|██▊       | 213/750 [3:05:37<8:11:49, 54.95s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  29%|██▊       | 214/750 [3:06:28<7:59:21, 53.66s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  29%|██▊       | 215/750 [3:07:18<7:48:49, 52.58s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  29%|██▉       | 216/750 [3:07:55<7:05:59, 47.86s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  29%|██▉       | 217/750 [3:08:32<6:37:25, 44.74s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  29%|██▉       | 218/750 [3:09:53<8:13:55, 55.71s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  29%|██▉       | 219/750 [3:11:27<9:55:12, 67.26s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  29%|██▉       | 220/750 [3:11:57<8:14:16, 55.96s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  29%|██▉       | 221/750 [3:12:41<7:42:10, 52.42s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  30%|██▉       | 222/750 [3:13:35<7:43:56, 52.72s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  30%|██▉       | 223/750 [3:14:41<8:19:35, 56.88s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  30%|██▉       | 224/750 [3:15:34<8:08:53, 55.77s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  30%|███       | 225/750 [3:16:20<7:40:25, 52.62s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  30%|███       | 226/750 [3:16:58<7:01:43, 48.29s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  30%|███       | 227/750 [3:17:51<7:14:09, 49.81s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  30%|███       | 228/750 [3:18:42<7:16:06, 50.13s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  31%|███       | 229/750 [3:19:20<6:43:43, 46.49s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  31%|███       | 230/750 [3:20:03<6:32:30, 45.29s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  31%|███       | 231/750 [3:20:44<6:20:31, 43.99s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  31%|███       | 232/750 [3:21:27<6:17:30, 43.73s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  31%|███       | 233/750 [3:22:10<6:16:00, 43.64s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  31%|███       | 234/750 [3:22:42<5:46:04, 40.24s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  31%|███▏      | 235/750 [3:23:33<6:12:58, 43.45s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  31%|███▏      | 236/750 [3:24:29<6:44:18, 47.20s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  32%|███▏      | 237/750 [3:25:06<6:16:37, 44.05s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  32%|███▏      | 238/750 [3:25:42<5:55:34, 41.67s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  32%|███▏      | 239/750 [3:26:27<6:04:09, 42.76s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  32%|███▏      | 240/750 [3:27:18<6:22:49, 45.04s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  32%|███▏      | 241/750 [3:28:04<6:24:21, 45.31s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  32%|███▏      | 242/750 [3:28:37<5:52:44, 41.66s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  32%|███▏      | 243/750 [3:29:31<6:23:41, 45.41s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  33%|███▎      | 244/750 [3:30:36<7:12:48, 51.32s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  33%|███▎      | 245/750 [3:31:27<7:11:11, 51.23s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  33%|███▎      | 246/750 [3:32:14<6:58:38, 49.84s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  33%|███▎      | 247/750 [3:32:59<6:45:04, 48.32s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  33%|███▎      | 248/750 [3:33:43<6:34:46, 47.18s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  33%|███▎      | 249/750 [3:35:05<8:00:06, 57.50s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  33%|███▎      | 250/750 [3:35:56<7:43:08, 55.58s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  33%|███▎      | 251/750 [3:37:03<8:10:57, 59.03s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  34%|███▎      | 252/750 [3:37:43<7:22:35, 53.32s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  34%|███▎      | 253/750 [3:38:34<7:17:39, 52.84s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  34%|███▍      | 254/750 [3:39:19<6:54:57, 50.20s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  34%|███▍      | 255/750 [3:40:05<6:44:37, 49.05s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  34%|███▍      | 256/750 [3:40:50<6:34:17, 47.89s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  34%|███▍      | 257/750 [3:41:25<6:02:08, 44.07s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  34%|███▍      | 258/750 [3:42:06<5:54:23, 43.22s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  35%|███▍      | 259/750 [3:42:50<5:55:29, 43.44s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  35%|███▍      | 260/750 [3:43:33<5:52:22, 43.15s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  35%|███▍      | 261/750 [3:44:13<5:44:01, 42.21s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  35%|███▍      | 262/750 [3:44:57<5:46:51, 42.65s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  35%|███▌      | 263/750 [3:45:41<5:49:34, 43.07s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  35%|███▌      | 264/750 [3:46:10<5:15:50, 38.99s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  35%|███▌      | 265/750 [3:46:37<4:46:37, 35.46s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  35%|███▌      | 266/750 [3:47:18<4:57:39, 36.90s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  36%|███▌      | 267/750 [3:48:25<6:11:31, 46.15s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  36%|███▌      | 268/750 [3:49:04<5:51:40, 43.78s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  36%|███▌      | 269/750 [3:49:45<5:44:36, 42.99s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  36%|███▌      | 270/750 [3:50:55<6:49:29, 51.19s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  36%|███▌      | 271/750 [3:51:55<7:10:03, 53.87s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  36%|███▋      | 272/750 [3:52:49<7:10:05, 53.99s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  36%|███▋      | 273/750 [3:53:33<6:44:53, 50.93s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  37%|███▋      | 274/750 [3:54:04<5:55:12, 44.77s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  37%|███▋      | 275/750 [3:54:44<5:43:00, 43.33s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  37%|███▋      | 276/750 [3:55:15<5:13:11, 39.64s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  37%|███▋      | 277/750 [3:55:59<5:23:40, 41.06s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  37%|███▋      | 278/750 [3:56:37<5:15:27, 40.10s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  37%|███▋      | 279/750 [3:57:20<5:21:36, 40.97s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  37%|███▋      | 280/750 [3:58:29<6:26:29, 49.34s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  37%|███▋      | 281/750 [3:59:10<6:06:06, 46.84s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  38%|███▊      | 282/750 [3:59:49<5:46:42, 44.45s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  38%|███▊      | 283/750 [4:00:30<5:39:03, 43.56s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  38%|███▊      | 284/750 [4:01:09<5:27:51, 42.21s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  38%|███▊      | 285/750 [4:01:49<5:22:31, 41.62s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  38%|███▊      | 286/750 [4:02:26<5:10:23, 40.14s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  38%|███▊      | 287/750 [4:03:04<5:04:25, 39.45s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  38%|███▊      | 288/750 [4:03:57<5:35:04, 43.52s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  39%|███▊      | 289/750 [4:04:33<5:17:33, 41.33s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  39%|███▊      | 290/750 [4:05:20<5:29:50, 43.02s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  39%|███▉      | 291/750 [4:06:01<5:23:15, 42.26s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  39%|███▉      | 292/750 [4:06:43<5:22:46, 42.29s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  39%|███▉      | 293/750 [4:07:50<6:19:13, 49.79s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  39%|███▉      | 294/750 [4:08:28<5:51:16, 46.22s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  39%|███▉      | 295/750 [4:09:13<5:48:22, 45.94s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  39%|███▉      | 296/750 [4:10:08<6:06:42, 48.46s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  40%|███▉      | 297/750 [4:10:40<5:28:00, 43.45s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  40%|███▉      | 298/750 [4:11:20<5:21:31, 42.68s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  40%|███▉      | 299/750 [4:11:54<5:00:24, 39.97s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  40%|████      | 300/750 [4:12:53<5:41:46, 45.57s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  40%|████      | 301/750 [4:13:30<5:22:35, 43.11s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  40%|████      | 302/750 [4:14:05<5:03:47, 40.69s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  40%|████      | 303/750 [4:14:44<4:58:52, 40.12s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  41%|████      | 304/750 [4:15:31<5:14:50, 42.36s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  41%|████      | 305/750 [4:16:30<5:49:10, 47.08s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  41%|████      | 306/750 [4:17:20<5:56:43, 48.21s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  41%|████      | 307/750 [4:18:00<5:37:29, 45.71s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  41%|████      | 308/750 [4:18:43<5:29:37, 44.74s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  41%|████      | 309/750 [4:19:12<4:55:41, 40.23s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  41%|████▏     | 310/750 [4:19:44<4:35:44, 37.60s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  41%|████▏     | 311/750 [4:20:28<4:49:49, 39.61s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  42%|████▏     | 312/750 [4:21:15<5:03:52, 41.63s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  42%|████▏     | 313/750 [4:22:00<5:12:09, 42.86s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  42%|████▏     | 314/750 [4:22:48<5:22:43, 44.41s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  42%|████▏     | 315/750 [4:23:21<4:56:20, 40.88s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  42%|████▏     | 316/750 [4:23:50<4:30:08, 37.35s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  42%|████▏     | 317/750 [4:24:29<4:32:40, 37.78s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  42%|████▏     | 318/750 [4:24:58<4:12:34, 35.08s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  43%|████▎     | 319/750 [4:25:34<4:14:56, 35.49s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  43%|████▎     | 320/750 [4:26:13<4:22:07, 36.58s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  43%|████▎     | 321/750 [4:26:47<4:15:44, 35.77s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  43%|████▎     | 322/750 [4:27:42<4:56:21, 41.54s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  43%|████▎     | 323/750 [4:28:11<4:28:38, 37.75s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  43%|████▎     | 324/750 [4:29:07<5:06:49, 43.21s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  43%|████▎     | 325/750 [4:29:42<4:49:10, 40.82s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  43%|████▎     | 326/750 [4:30:17<4:34:56, 38.91s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  44%|████▎     | 327/750 [4:31:12<5:09:30, 43.90s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  44%|████▎     | 328/750 [4:31:49<4:54:40, 41.90s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  44%|████▍     | 329/750 [4:32:41<5:14:44, 44.86s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  44%|████▍     | 330/750 [4:33:17<4:55:53, 42.27s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  44%|████▍     | 331/750 [4:33:45<4:23:47, 37.78s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  44%|████▍     | 332/750 [4:34:34<4:47:44, 41.30s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  44%|████▍     | 333/750 [4:35:09<4:34:01, 39.43s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  45%|████▍     | 334/750 [4:35:49<4:33:26, 39.44s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  45%|████▍     | 335/750 [4:36:28<4:31:50, 39.30s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  45%|████▍     | 336/750 [4:37:12<4:41:10, 40.75s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  45%|████▍     | 337/750 [4:37:58<4:50:38, 42.22s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  45%|████▌     | 338/750 [4:38:45<4:59:42, 43.65s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  45%|████▌     | 339/750 [4:39:28<4:59:22, 43.71s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  45%|████▌     | 340/750 [4:39:57<4:28:49, 39.34s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  45%|████▌     | 341/750 [4:40:35<4:24:18, 38.77s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  46%|████▌     | 342/750 [4:41:13<4:22:37, 38.62s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  46%|████▌     | 343/750 [4:41:40<3:57:20, 34.99s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  46%|████▌     | 344/750 [4:42:25<4:17:15, 38.02s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  46%|████▌     | 345/750 [4:43:22<4:55:54, 43.84s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  46%|████▌     | 346/750 [4:43:54<4:31:22, 40.30s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  46%|████▋     | 347/750 [4:44:38<4:37:55, 41.38s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  46%|████▋     | 348/750 [4:45:25<4:48:18, 43.03s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  47%|████▋     | 349/750 [4:46:17<5:04:37, 45.58s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  47%|████▋     | 350/750 [4:46:54<4:47:04, 43.06s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  47%|████▋     | 351/750 [4:47:36<4:44:38, 42.80s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  47%|████▋     | 352/750 [4:48:13<4:31:37, 40.95s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  47%|████▋     | 353/750 [4:48:44<4:12:11, 38.11s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  47%|████▋     | 354/750 [4:49:19<4:04:48, 37.09s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  47%|████▋     | 355/750 [4:49:50<3:53:01, 35.40s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  47%|████▋     | 356/750 [4:50:19<3:38:41, 33.30s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  48%|████▊     | 357/750 [4:50:54<3:41:30, 33.82s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  48%|████▊     | 358/750 [4:51:35<3:55:18, 36.02s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  48%|████▊     | 359/750 [4:52:09<3:50:53, 35.43s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  48%|████▊     | 360/750 [4:52:42<3:46:14, 34.81s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  48%|████▊     | 361/750 [4:53:31<4:12:15, 38.91s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  48%|████▊     | 362/750 [4:54:02<3:57:26, 36.72s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  48%|████▊     | 363/750 [4:54:42<4:03:07, 37.69s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  49%|████▊     | 364/750 [4:55:09<3:41:12, 34.38s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  49%|████▊     | 365/750 [4:55:48<3:48:59, 35.69s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  49%|████▉     | 366/750 [4:56:18<3:38:59, 34.22s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  49%|████▉     | 367/750 [4:56:49<3:30:44, 33.01s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  49%|████▉     | 368/750 [4:57:31<3:47:20, 35.71s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  49%|████▉     | 369/750 [4:57:59<3:33:23, 33.61s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  49%|████▉     | 370/750 [4:58:37<3:41:01, 34.90s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  49%|████▉     | 371/750 [4:59:14<3:43:18, 35.35s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  50%|████▉     | 372/750 [4:59:54<3:52:14, 36.86s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  50%|████▉     | 373/750 [5:00:39<4:07:01, 39.32s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  50%|████▉     | 374/750 [5:01:10<3:50:38, 36.81s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  50%|█████     | 375/750 [5:01:38<3:33:20, 34.13s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  50%|█████     | 376/750 [5:02:05<3:20:07, 32.10s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  50%|█████     | 377/750 [5:02:41<3:25:35, 33.07s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  50%|█████     | 378/750 [5:03:16<3:29:11, 33.74s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  51%|█████     | 379/750 [5:03:59<3:46:11, 36.58s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  51%|█████     | 380/750 [5:04:28<3:31:58, 34.37s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  51%|█████     | 381/750 [5:05:01<3:27:53, 33.80s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  51%|█████     | 382/750 [5:05:40<3:37:59, 35.54s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  51%|█████     | 383/750 [5:06:15<3:34:48, 35.12s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  51%|█████     | 384/750 [5:06:44<3:23:03, 33.29s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  51%|█████▏    | 385/750 [5:07:23<3:33:05, 35.03s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  51%|█████▏    | 386/750 [5:08:00<3:36:10, 35.63s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  52%|█████▏    | 387/750 [5:08:38<3:40:57, 36.52s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  52%|█████▏    | 388/750 [5:09:10<3:31:54, 35.12s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  52%|█████▏    | 389/750 [5:09:43<3:27:11, 34.44s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  52%|█████▏    | 390/750 [5:10:35<3:57:47, 39.63s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  52%|█████▏    | 391/750 [5:11:26<4:17:08, 42.98s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  52%|█████▏    | 392/750 [5:12:10<4:19:21, 43.47s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  52%|█████▏    | 393/750 [5:12:43<3:59:49, 40.31s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  53%|█████▎    | 394/750 [5:13:12<3:38:07, 36.76s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  53%|█████▎    | 395/750 [5:13:48<3:37:11, 36.71s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  53%|█████▎    | 396/750 [5:14:27<3:41:07, 37.48s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  53%|█████▎    | 397/750 [5:15:06<3:42:54, 37.89s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  53%|█████▎    | 398/750 [5:15:38<3:31:24, 36.04s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  53%|█████▎    | 399/750 [5:16:22<3:44:40, 38.41s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  53%|█████▎    | 400/750 [5:17:19<4:15:50, 43.86s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  53%|█████▎    | 401/750 [5:17:48<3:49:25, 39.44s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  54%|█████▎    | 402/750 [5:18:31<3:55:41, 40.64s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  54%|█████▎    | 403/750 [5:19:25<4:17:53, 44.59s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  54%|█████▍    | 404/750 [5:20:04<4:07:51, 42.98s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  54%|█████▍    | 405/750 [5:20:31<3:39:16, 38.13s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  54%|█████▍    | 406/750 [5:21:09<3:37:50, 37.99s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  54%|█████▍    | 407/750 [5:21:40<3:26:24, 36.11s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  54%|█████▍    | 408/750 [5:22:14<3:22:06, 35.46s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  55%|█████▍    | 409/750 [5:22:46<3:15:13, 34.35s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  55%|█████▍    | 410/750 [5:23:22<3:17:12, 34.80s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  55%|█████▍    | 411/750 [5:23:58<3:18:50, 35.19s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  55%|█████▍    | 412/750 [5:24:50<3:46:37, 40.23s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  55%|█████▌    | 413/750 [5:25:47<4:14:45, 45.36s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  55%|█████▌    | 414/750 [5:26:19<3:51:20, 41.31s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  55%|█████▌    | 415/750 [5:27:04<3:55:54, 42.25s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  55%|█████▌    | 416/750 [5:27:46<3:55:36, 42.32s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  56%|█████▌    | 417/750 [5:28:23<3:45:04, 40.55s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  56%|█████▌    | 418/750 [5:28:57<3:33:26, 38.57s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  56%|█████▌    | 419/750 [5:29:45<3:49:00, 41.51s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  56%|█████▌    | 420/750 [5:30:36<4:03:42, 44.31s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  56%|█████▌    | 421/750 [5:31:15<3:55:26, 42.94s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  56%|█████▋    | 422/750 [5:31:52<3:43:35, 40.90s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  56%|█████▋    | 423/750 [5:32:25<3:29:57, 38.52s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  57%|█████▋    | 424/750 [5:33:04<3:30:17, 38.71s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  57%|█████▋    | 425/750 [5:33:47<3:36:40, 40.00s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  57%|█████▋    | 426/750 [5:34:27<3:35:58, 39.99s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  57%|█████▋    | 427/750 [5:35:03<3:28:55, 38.81s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  57%|█████▋    | 428/750 [5:35:50<3:41:55, 41.35s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  57%|█████▋    | 429/750 [5:36:31<3:40:12, 41.16s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  57%|█████▋    | 430/750 [5:37:00<3:19:40, 37.44s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  57%|█████▋    | 431/750 [5:37:51<3:41:00, 41.57s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  58%|█████▊    | 432/750 [5:38:36<3:46:14, 42.69s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  58%|█████▊    | 433/750 [5:39:21<3:49:48, 43.50s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  58%|█████▊    | 434/750 [5:39:50<3:25:52, 39.09s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  58%|█████▊    | 435/750 [5:40:21<3:11:54, 36.55s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  58%|█████▊    | 436/750 [5:40:55<3:07:57, 35.91s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  58%|█████▊    | 437/750 [5:41:26<2:58:44, 34.26s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  58%|█████▊    | 438/750 [5:42:21<3:31:06, 40.60s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  59%|█████▊    | 439/750 [5:42:58<3:24:00, 39.36s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  59%|█████▊    | 440/750 [5:43:46<3:38:12, 42.23s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  59%|█████▉    | 441/750 [5:44:26<3:33:13, 41.40s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  59%|█████▉    | 442/750 [5:45:12<3:39:38, 42.79s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  59%|█████▉    | 443/750 [5:45:46<3:25:57, 40.25s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  59%|█████▉    | 444/750 [5:46:10<3:00:31, 35.40s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  59%|█████▉    | 445/750 [5:46:50<3:07:00, 36.79s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  59%|█████▉    | 446/750 [5:47:32<3:13:38, 38.22s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  60%|█████▉    | 447/750 [5:48:11<3:14:16, 38.47s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  60%|█████▉    | 448/750 [5:48:44<3:04:59, 36.75s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  60%|█████▉    | 449/750 [5:49:22<3:06:36, 37.20s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  60%|██████    | 450/750 [5:50:16<3:31:30, 42.30s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  60%|██████    | 451/750 [5:50:53<3:22:44, 40.68s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  60%|██████    | 452/750 [5:51:30<3:16:25, 39.55s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  60%|██████    | 453/750 [5:52:13<3:20:58, 40.60s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  61%|██████    | 454/750 [5:52:44<3:06:05, 37.72s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  61%|██████    | 455/750 [5:53:27<3:12:59, 39.25s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  61%|██████    | 456/750 [5:54:10<3:18:03, 40.42s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  61%|██████    | 457/750 [5:54:45<3:09:15, 38.75s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  61%|██████    | 458/750 [5:55:18<3:01:00, 37.19s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  61%|██████    | 459/750 [5:55:59<3:04:59, 38.14s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  61%|██████▏   | 460/750 [5:56:39<3:07:59, 38.90s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  61%|██████▏   | 461/750 [5:57:14<3:01:00, 37.58s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  62%|██████▏   | 462/750 [5:58:04<3:18:13, 41.30s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  62%|██████▏   | 463/750 [5:58:55<3:31:43, 44.26s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  62%|██████▏   | 464/750 [5:59:36<3:26:35, 43.34s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  62%|██████▏   | 465/750 [6:00:07<3:07:28, 39.47s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  62%|██████▏   | 466/750 [6:00:40<2:57:44, 37.55s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  62%|██████▏   | 467/750 [6:01:05<2:39:13, 33.76s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  62%|██████▏   | 468/750 [6:01:34<2:32:46, 32.51s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  63%|██████▎   | 469/750 [6:02:08<2:34:18, 32.95s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  63%|██████▎   | 470/750 [6:02:48<2:43:51, 35.11s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  63%|██████▎   | 471/750 [6:03:30<2:52:34, 37.11s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  63%|██████▎   | 472/750 [6:03:57<2:38:10, 34.14s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  63%|██████▎   | 473/750 [6:04:51<3:05:11, 40.11s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  63%|██████▎   | 474/750 [6:05:33<3:06:31, 40.55s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  63%|██████▎   | 475/750 [6:05:55<2:40:37, 35.05s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  63%|██████▎   | 476/750 [6:06:26<2:33:51, 33.69s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  64%|██████▎   | 477/750 [6:07:02<2:36:24, 34.37s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  64%|██████▎   | 478/750 [6:08:01<3:09:51, 41.88s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  64%|██████▍   | 479/750 [6:08:43<3:08:38, 41.77s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  64%|██████▍   | 480/750 [6:09:12<2:50:55, 37.98s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  64%|██████▍   | 481/750 [6:09:49<2:49:31, 37.81s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  64%|██████▍   | 482/750 [6:10:28<2:50:36, 38.19s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  64%|██████▍   | 483/750 [6:11:05<2:48:32, 37.87s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  65%|██████▍   | 484/750 [6:11:33<2:34:06, 34.76s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  65%|██████▍   | 485/750 [6:12:03<2:27:07, 33.31s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  65%|██████▍   | 486/750 [6:12:41<2:32:17, 34.61s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  65%|██████▍   | 487/750 [6:13:15<2:31:10, 34.49s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  65%|██████▌   | 488/750 [6:13:54<2:36:24, 35.82s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  65%|██████▌   | 489/750 [6:14:43<2:53:56, 39.99s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  65%|██████▌   | 490/750 [6:15:20<2:49:24, 39.09s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  65%|██████▌   | 491/750 [6:15:53<2:39:59, 37.06s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  66%|██████▌   | 492/750 [6:16:21<2:27:52, 34.39s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  66%|██████▌   | 493/750 [6:17:15<2:53:14, 40.44s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  66%|██████▌   | 494/750 [6:17:49<2:43:31, 38.33s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  66%|██████▌   | 495/750 [6:18:14<2:25:55, 34.33s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  66%|██████▌   | 496/750 [6:18:46<2:21:58, 33.54s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  66%|██████▋   | 497/750 [6:19:17<2:18:40, 32.89s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  66%|██████▋   | 498/750 [6:19:55<2:24:22, 34.38s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  67%|██████▋   | 499/750 [6:20:36<2:32:51, 36.54s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  67%|██████▋   | 500/750 [6:21:29<2:52:33, 41.41s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  67%|██████▋   | 501/750 [6:22:16<2:59:05, 43.15s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  67%|██████▋   | 502/750 [6:22:56<2:54:28, 42.21s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  67%|██████▋   | 503/750 [6:23:36<2:50:10, 41.34s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  67%|██████▋   | 504/750 [6:24:16<2:47:50, 40.94s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  67%|██████▋   | 505/750 [6:24:45<2:33:22, 37.56s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  67%|██████▋   | 506/750 [6:25:23<2:32:44, 37.56s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  68%|██████▊   | 507/750 [6:25:52<2:21:53, 35.03s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  68%|██████▊   | 508/750 [6:26:42<2:38:49, 39.38s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  68%|██████▊   | 509/750 [6:27:12<2:27:14, 36.66s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  68%|██████▊   | 510/750 [6:28:40<3:28:10, 52.05s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  68%|██████▊   | 511/750 [6:29:08<2:59:24, 45.04s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  68%|██████▊   | 512/750 [6:29:55<3:00:13, 45.43s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  68%|██████▊   | 513/750 [6:30:24<2:39:37, 40.41s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  69%|██████▊   | 514/750 [6:30:58<2:31:25, 38.50s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  69%|██████▊   | 515/750 [6:31:35<2:29:58, 38.29s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  69%|██████▉   | 516/750 [6:32:05<2:19:15, 35.71s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  69%|██████▉   | 517/750 [6:32:50<2:28:53, 38.34s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  69%|██████▉   | 518/750 [6:33:23<2:22:18, 36.80s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  69%|██████▉   | 519/750 [6:34:05<2:28:06, 38.47s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  69%|██████▉   | 520/750 [6:34:40<2:22:58, 37.30s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  69%|██████▉   | 521/750 [6:35:45<2:54:42, 45.77s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  70%|██████▉   | 522/750 [6:36:28<2:50:33, 44.89s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  70%|██████▉   | 523/750 [6:36:59<2:34:05, 40.73s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  70%|██████▉   | 524/750 [6:37:28<2:19:41, 37.08s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  70%|███████   | 525/750 [6:38:12<2:27:08, 39.24s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  70%|███████   | 526/750 [6:38:52<2:27:44, 39.57s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  70%|███████   | 527/750 [6:39:21<2:15:24, 36.43s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  70%|███████   | 528/750 [6:39:54<2:10:21, 35.23s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  71%|███████   | 529/750 [6:40:27<2:07:12, 34.54s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  71%|███████   | 530/750 [6:41:06<2:11:44, 35.93s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  71%|███████   | 531/750 [6:41:41<2:10:09, 35.66s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  71%|███████   | 532/750 [6:42:14<2:06:37, 34.85s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  71%|███████   | 533/750 [6:43:00<2:17:46, 38.09s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  71%|███████   | 534/750 [6:43:38<2:17:28, 38.19s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  71%|███████▏  | 535/750 [6:44:14<2:14:15, 37.47s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  71%|███████▏  | 536/750 [6:45:10<2:33:26, 43.02s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  72%|███████▏  | 537/750 [6:46:04<2:44:32, 46.35s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  72%|███████▏  | 538/750 [6:46:30<2:22:52, 40.43s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  72%|███████▏  | 539/750 [6:47:01<2:11:45, 37.47s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  72%|███████▏  | 540/750 [6:47:29<2:01:36, 34.74s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  72%|███████▏  | 541/750 [6:48:04<2:00:30, 34.59s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  72%|███████▏  | 542/750 [6:48:37<1:58:22, 34.15s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  72%|███████▏  | 543/750 [6:49:06<1:53:00, 32.76s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  73%|███████▎  | 544/750 [6:49:46<1:59:14, 34.73s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  73%|███████▎  | 545/750 [6:50:15<1:53:08, 33.11s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  73%|███████▎  | 546/750 [6:50:59<2:03:55, 36.45s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  73%|███████▎  | 547/750 [6:51:48<2:15:40, 40.10s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  73%|███████▎  | 548/750 [6:52:14<2:00:57, 35.93s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  73%|███████▎  | 549/750 [6:53:02<2:12:08, 39.44s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  73%|███████▎  | 550/750 [6:53:40<2:10:33, 39.17s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  73%|███████▎  | 551/750 [6:54:45<2:35:02, 46.74s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  74%|███████▎  | 552/750 [6:55:17<2:20:11, 42.48s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  74%|███████▎  | 553/750 [6:56:03<2:22:47, 43.49s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  74%|███████▍  | 554/750 [6:56:42<2:17:39, 42.14s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  74%|███████▍  | 555/750 [6:57:24<2:17:09, 42.20s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  74%|███████▍  | 556/750 [6:57:57<2:07:24, 39.40s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  74%|███████▍  | 557/750 [6:58:26<1:56:55, 36.35s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  74%|███████▍  | 558/750 [6:59:02<1:55:08, 35.98s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  75%|███████▍  | 559/750 [6:59:43<1:59:55, 37.67s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  75%|███████▍  | 560/750 [7:00:18<1:56:54, 36.92s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  75%|███████▍  | 561/750 [7:00:48<1:49:54, 34.89s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  75%|███████▍  | 562/750 [7:01:18<1:44:28, 33.34s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  75%|███████▌  | 563/750 [7:02:02<1:53:49, 36.52s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  75%|███████▌  | 564/750 [7:02:26<1:41:08, 32.62s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  75%|███████▌  | 565/750 [7:03:04<1:46:04, 34.40s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  75%|███████▌  | 566/750 [7:03:40<1:46:23, 34.69s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  76%|███████▌  | 567/750 [7:04:04<1:36:22, 31.60s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  76%|███████▌  | 568/750 [7:04:33<1:33:06, 30.69s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  76%|███████▌  | 569/750 [7:05:05<1:34:24, 31.30s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  76%|███████▌  | 570/750 [7:06:27<2:19:32, 46.51s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  76%|███████▌  | 571/750 [7:07:04<2:10:13, 43.65s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  76%|███████▋  | 572/750 [7:07:50<2:11:22, 44.28s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  76%|███████▋  | 573/750 [7:08:25<2:02:05, 41.39s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  77%|███████▋  | 574/750 [7:08:58<1:54:36, 39.07s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  77%|███████▋  | 575/750 [7:09:33<1:49:59, 37.71s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  77%|███████▋  | 576/750 [7:10:09<1:47:36, 37.11s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  77%|███████▋  | 577/750 [7:10:33<1:36:27, 33.45s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  77%|███████▋  | 578/750 [7:11:05<1:34:07, 32.83s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  77%|███████▋  | 579/750 [7:11:30<1:27:12, 30.60s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  77%|███████▋  | 580/750 [7:12:16<1:39:48, 35.23s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  77%|███████▋  | 581/750 [7:12:46<1:34:49, 33.66s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  78%|███████▊  | 582/750 [7:13:16<1:30:58, 32.49s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  78%|███████▊  | 583/750 [7:14:00<1:39:45, 35.84s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  78%|███████▊  | 584/750 [7:14:26<1:31:20, 33.02s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  78%|███████▊  | 585/750 [7:14:55<1:27:07, 31.68s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  78%|███████▊  | 586/750 [7:15:29<1:28:26, 32.36s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  78%|███████▊  | 587/750 [7:16:09<1:34:28, 34.77s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  78%|███████▊  | 588/750 [7:16:41<1:31:43, 33.97s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  79%|███████▊  | 589/750 [7:17:19<1:34:23, 35.18s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  79%|███████▊  | 590/750 [7:17:47<1:28:16, 33.10s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  79%|███████▉  | 591/750 [7:18:14<1:22:21, 31.08s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  79%|███████▉  | 592/750 [7:18:53<1:28:00, 33.42s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  79%|███████▉  | 593/750 [7:19:32<1:31:52, 35.11s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  79%|███████▉  | 594/750 [7:20:10<1:34:06, 36.20s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  79%|███████▉  | 595/750 [7:20:50<1:36:21, 37.30s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  79%|███████▉  | 596/750 [7:21:22<1:31:23, 35.61s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  80%|███████▉  | 597/750 [7:21:49<1:23:58, 32.93s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  80%|███████▉  | 598/750 [7:22:16<1:19:22, 31.33s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  80%|███████▉  | 599/750 [7:22:52<1:21:50, 32.52s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  80%|████████  | 600/750 [7:23:21<1:18:41, 31.48s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  80%|████████  | 601/750 [7:24:05<1:27:46, 35.35s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  80%|████████  | 602/750 [7:24:42<1:28:38, 35.93s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  80%|████████  | 603/750 [7:25:12<1:23:10, 33.95s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  81%|████████  | 604/750 [7:25:52<1:27:23, 35.91s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  81%|████████  | 605/750 [7:26:27<1:25:53, 35.54s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  81%|████████  | 606/750 [7:27:02<1:25:07, 35.47s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  81%|████████  | 607/750 [7:27:38<1:24:47, 35.58s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  81%|████████  | 608/750 [7:28:12<1:23:16, 35.19s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  81%|████████  | 609/750 [7:28:55<1:28:17, 37.57s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  81%|████████▏ | 610/750 [7:29:31<1:26:05, 36.90s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  81%|████████▏ | 611/750 [7:30:13<1:28:59, 38.41s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  82%|████████▏ | 612/750 [7:30:47<1:25:58, 37.38s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  82%|████████▏ | 613/750 [7:31:29<1:27:53, 38.50s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  82%|████████▏ | 614/750 [7:31:58<1:21:03, 35.76s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  82%|████████▏ | 615/750 [7:32:38<1:23:03, 36.91s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  82%|████████▏ | 616/750 [7:33:08<1:18:04, 34.96s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  82%|████████▏ | 617/750 [7:33:44<1:17:55, 35.16s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  82%|████████▏ | 618/750 [7:34:12<1:12:40, 33.03s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  83%|████████▎ | 619/750 [7:34:49<1:15:02, 34.37s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  83%|████████▎ | 620/750 [7:35:25<1:15:31, 34.86s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  83%|████████▎ | 621/750 [7:35:53<1:10:29, 32.79s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  83%|████████▎ | 622/750 [7:36:21<1:07:06, 31.45s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  83%|████████▎ | 623/750 [7:37:04<1:13:36, 34.77s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  83%|████████▎ | 624/750 [7:37:34<1:10:06, 33.38s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  83%|████████▎ | 625/750 [7:39:47<2:11:34, 63.15s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  83%|████████▎ | 626/750 [7:40:22<1:53:26, 54.89s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  84%|████████▎ | 627/750 [7:40:56<1:39:41, 48.63s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  84%|████████▎ | 628/750 [7:41:23<1:25:43, 42.16s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  84%|████████▍ | 629/750 [7:41:54<1:17:56, 38.65s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  84%|████████▍ | 630/750 [7:42:24<1:12:04, 36.04s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  84%|████████▍ | 631/750 [7:42:58<1:10:04, 35.33s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  84%|████████▍ | 632/750 [7:43:29<1:07:14, 34.19s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  84%|████████▍ | 633/750 [7:43:57<1:02:59, 32.30s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  85%|████████▍ | 634/750 [7:44:41<1:09:26, 35.91s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  85%|████████▍ | 635/750 [7:45:16<1:08:04, 35.52s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  85%|████████▍ | 636/750 [7:45:54<1:09:05, 36.37s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  85%|████████▍ | 637/750 [7:46:33<1:09:53, 37.11s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  85%|████████▌ | 638/750 [7:47:13<1:10:53, 37.97s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  85%|████████▌ | 639/750 [7:47:47<1:07:52, 36.69s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  85%|████████▌ | 640/750 [7:48:35<1:13:27, 40.07s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  85%|████████▌ | 641/750 [7:49:15<1:12:46, 40.06s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  86%|████████▌ | 642/750 [7:49:41<1:04:40, 35.93s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  86%|████████▌ | 643/750 [7:50:09<59:43, 33.49s/it]  

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  86%|████████▌ | 644/750 [7:50:42<58:47, 33.28s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  86%|████████▌ | 645/750 [7:51:19<1:00:30, 34.57s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  86%|████████▌ | 646/750 [7:51:57<1:01:44, 35.62s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  86%|████████▋ | 647/750 [7:52:33<1:01:15, 35.68s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  86%|████████▋ | 648/750 [7:53:04<58:21, 34.33s/it]  

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  87%|████████▋ | 649/750 [7:53:30<53:21, 31.69s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  87%|████████▋ | 650/750 [7:54:20<1:02:09, 37.29s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  87%|████████▋ | 651/750 [7:55:20<1:12:55, 44.19s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  87%|████████▋ | 652/750 [7:55:50<1:04:59, 39.79s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  87%|████████▋ | 653/750 [7:56:30<1:04:34, 39.94s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  87%|████████▋ | 654/750 [7:56:56<57:06, 35.69s/it]  

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  87%|████████▋ | 655/750 [7:57:30<55:39, 35.15s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  87%|████████▋ | 656/750 [7:57:58<51:46, 33.05s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  88%|████████▊ | 657/750 [7:58:46<58:17, 37.60s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  88%|████████▊ | 658/750 [7:59:17<54:22, 35.47s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  88%|████████▊ | 659/750 [7:59:56<55:31, 36.61s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  88%|████████▊ | 660/750 [8:00:36<56:31, 37.69s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  88%|████████▊ | 661/750 [8:01:12<54:55, 37.02s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  88%|████████▊ | 662/750 [8:01:51<55:20, 37.73s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  88%|████████▊ | 663/750 [8:02:20<50:44, 35.00s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  89%|████████▊ | 664/750 [8:02:46<46:12, 32.23s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  89%|████████▊ | 665/750 [8:03:27<49:33, 34.98s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  89%|████████▉ | 666/750 [8:04:02<49:06, 35.07s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  89%|████████▉ | 667/750 [8:04:28<44:42, 32.32s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  89%|████████▉ | 668/750 [8:05:08<47:24, 34.69s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  89%|████████▉ | 669/750 [8:05:48<48:56, 36.25s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  89%|████████▉ | 670/750 [8:06:20<46:31, 34.89s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  89%|████████▉ | 671/750 [8:06:55<45:54, 34.86s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  90%|████████▉ | 672/750 [8:07:24<43:08, 33.19s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  90%|████████▉ | 673/750 [8:07:55<41:35, 32.41s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  90%|████████▉ | 674/750 [8:08:27<41:01, 32.39s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  90%|█████████ | 675/750 [8:09:11<44:50, 35.88s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  90%|█████████ | 676/750 [8:09:42<42:15, 34.27s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  90%|█████████ | 677/750 [8:10:05<37:38, 30.93s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  90%|█████████ | 678/750 [8:10:40<38:37, 32.19s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  91%|█████████ | 679/750 [8:11:26<43:03, 36.39s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  91%|█████████ | 680/750 [8:11:55<39:43, 34.05s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  91%|█████████ | 681/750 [8:12:40<43:05, 37.47s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  91%|█████████ | 682/750 [8:13:22<43:56, 38.78s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  91%|█████████ | 683/750 [8:14:03<43:57, 39.36s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  91%|█████████ | 684/750 [8:14:42<43:28, 39.53s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  91%|█████████▏| 685/750 [8:15:04<36:54, 34.08s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  91%|█████████▏| 686/750 [8:15:33<34:47, 32.62s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  92%|█████████▏| 687/750 [8:16:17<37:41, 35.89s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  92%|█████████▏| 688/750 [8:16:51<36:34, 35.40s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  92%|█████████▏| 689/750 [8:17:19<33:38, 33.09s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  92%|█████████▏| 690/750 [8:17:44<30:44, 30.74s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  92%|█████████▏| 691/750 [8:18:05<27:23, 27.86s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  92%|█████████▏| 692/750 [8:18:28<25:32, 26.43s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  92%|█████████▏| 693/750 [8:18:59<26:21, 27.75s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  93%|█████████▎| 694/750 [8:20:46<47:59, 51.42s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  93%|█████████▎| 695/750 [8:21:38<47:20, 51.64s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  93%|█████████▎| 696/750 [8:22:04<39:33, 43.95s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  93%|█████████▎| 697/750 [8:22:40<36:48, 41.68s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  93%|█████████▎| 698/750 [8:23:20<35:38, 41.12s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  93%|█████████▎| 699/750 [8:23:43<30:23, 35.75s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  93%|█████████▎| 700/750 [8:24:20<30:03, 36.08s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  93%|█████████▎| 701/750 [8:24:52<28:24, 34.79s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  94%|█████████▎| 702/750 [8:25:18<25:54, 32.38s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  94%|█████████▎| 703/750 [8:26:01<27:48, 35.50s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  94%|█████████▍| 704/750 [8:26:51<30:22, 39.62s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  94%|█████████▍| 705/750 [8:27:30<29:42, 39.61s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  94%|█████████▍| 706/750 [8:28:11<29:17, 39.95s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  94%|█████████▍| 707/750 [8:28:36<25:29, 35.58s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  94%|█████████▍| 708/750 [8:29:09<24:21, 34.79s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  95%|█████████▍| 709/750 [8:29:39<22:49, 33.39s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  95%|█████████▍| 710/750 [8:30:20<23:46, 35.67s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  95%|█████████▍| 711/750 [8:30:48<21:38, 33.31s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  95%|█████████▍| 712/750 [8:31:25<21:50, 34.49s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  95%|█████████▌| 713/750 [8:32:03<21:47, 35.33s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  95%|█████████▌| 714/750 [8:32:35<20:35, 34.33s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  95%|█████████▌| 715/750 [8:33:31<23:56, 41.04s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  95%|█████████▌| 716/750 [8:33:58<20:51, 36.81s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  96%|█████████▌| 717/750 [8:34:36<20:19, 36.97s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  96%|█████████▌| 718/750 [8:35:08<18:54, 35.46s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  96%|█████████▌| 719/750 [8:35:40<17:53, 34.62s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  96%|█████████▌| 720/750 [8:36:18<17:46, 35.55s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  96%|█████████▌| 721/750 [8:36:48<16:25, 33.98s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  96%|█████████▋| 722/750 [8:37:23<15:57, 34.18s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  96%|█████████▋| 723/750 [8:37:56<15:16, 33.96s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  97%|█████████▋| 724/750 [8:38:36<15:28, 35.73s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  97%|█████████▋| 725/750 [8:39:14<15:10, 36.41s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  97%|█████████▋| 726/750 [8:39:47<14:08, 35.34s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  97%|█████████▋| 727/750 [8:40:31<14:32, 37.92s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  97%|█████████▋| 728/750 [8:41:24<15:31, 42.33s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  97%|█████████▋| 729/750 [8:42:01<14:17, 40.82s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  97%|█████████▋| 730/750 [8:42:35<12:58, 38.94s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  97%|█████████▋| 731/750 [8:43:27<13:33, 42.81s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  98%|█████████▊| 732/750 [8:44:00<11:53, 39.65s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  98%|█████████▊| 733/750 [8:44:35<10:50, 38.24s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  98%|█████████▊| 734/750 [8:45:09<09:52, 37.02s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  98%|█████████▊| 735/750 [8:46:27<12:23, 49.54s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  98%|█████████▊| 736/750 [8:47:10<11:03, 47.36s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  98%|█████████▊| 737/750 [8:47:57<10:14, 47.24s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  98%|█████████▊| 738/750 [8:48:35<08:54, 44.54s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  99%|█████████▊| 739/750 [8:49:15<07:56, 43.35s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  99%|█████████▊| 740/750 [8:50:05<07:30, 45.09s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  99%|█████████▉| 741/750 [8:50:50<06:45, 45.03s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  99%|█████████▉| 742/750 [8:51:26<05:38, 42.34s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  99%|█████████▉| 743/750 [8:52:04<04:47, 41.11s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  99%|█████████▉| 744/750 [8:52:33<03:44, 37.46s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  99%|█████████▉| 745/750 [8:53:07<03:02, 36.50s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  99%|█████████▉| 746/750 [8:53:41<02:22, 35.62s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches: 100%|█████████▉| 747/750 [8:54:13<01:44, 34.70s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches: 100%|█████████▉| 748/750 [8:54:50<01:10, 35.36s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches: 100%|█████████▉| 749/750 [8:55:23<00:34, 34.65s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches: 100%|██████████| 750/750 [8:56:00<00:00, 42.88s/it]

--- Evaluation Complete ---





In [7]:
# missing_index = 754
# pred_scores.insert(missing_index, 5)

# 1. Mean Absolute Error (MAE)
final_mae = mean_absolute_error(gold_scores, pred_scores)

# 2. Accuracy@threshold
acc_at_1 = accuracy_at_threshold(gold_scores, pred_scores, threshold=1)
acc_at_2 = accuracy_at_threshold(gold_scores, pred_scores, threshold=2)
acc_at_3 = accuracy_at_threshold(gold_scores, pred_scores, threshold=3)


# --- Print Results ---

print("\n--- LangGraph Evaluation Results ---")
print(f"📊 Mean Absolute Error (MAE): {final_mae:.4f}")
print("---")
print(f"🎯 Accuracy@1 (error <= 1.0): {acc_at_1:.2%}")
print(f"🎯 Accuracy@2 (error <= 2.0): {acc_at_2:.2%}")
print(f"🎯 Accuracy@3 (error <= 3.0): {acc_at_3:.2%}")
print("--------------------------------------")


--- LangGraph Evaluation Results ---
📊 Mean Absolute Error (MAE): 1.7247
---
🎯 Accuracy@1 (error <= 1.0): 53.27%
🎯 Accuracy@2 (error <= 2.0): 76.57%
🎯 Accuracy@3 (error <= 3.0): 89.80%
--------------------------------------


In [8]:
import json

with open("preds/preds_ee_gemini_kaggle.json", "w") as f:
    json.dump(pred_scores, f)

## Collected dataset

In [9]:
import json
import time

from tqdm import tqdm

collected_folder = "/Users/nhannguyen/ngtuthanhan@gmail.com - Google Drive/My Drive/HCMUS/LLM/Data_Collector_30_8"
matching_json = os.path.join(collected_folder, "Matching_Result.json")

with open(matching_json, "r") as f:
    matching_data = json.load(f)

pred_scores = []
gold_scores = []

batch_size = 1
for i in tqdm(
    range(0, len(matching_data), batch_size),
    desc="Evaluating LangGraph in Batches",
    total=len(matching_data) // batch_size,
):
    batch_examples = matching_data[i : i + batch_size]
    batch_gold_scores = [example["Score"] for example in batch_examples]
    batch_jd = []
    batch_resume = []
    for example in batch_examples:
        jd_path = example["JD"]
        resume_path = example["CV"]
        gold_score = example["Score"]
        with open(os.path.join(collected_folder, "JD", jd_path) + ".txt", "r") as f:
            jd = f.read()
        with open(os.path.join(collected_folder, "CV", resume_path) + ".txt", "r") as f:
            resume = f.read()
        batch_jd.append(jd)
        batch_resume.append(resume)

    state = State(
        inputs=[
            {
                "job_description": jd,
                "resume": resume,
            }
            for jd, resume in zip(batch_jd, batch_resume)
        ]
    )
    try:
        result = app.invoke(state)
        pred_score = result["results"][0].get("score")
    except Exception as e:
        print(f"An error occurred during graph execution: {e}")
        pred_score = 0.0  # Assign a penalty score

    gold_scores.append(gold_score)
    pred_scores.append(pred_score)

    # To avoid rate limiting
    time.sleep(1)


print("--- Evaluation Complete ---")

# --- Calculate Metrics ---

# 1. Mean Absolute Error (MAE)
final_mae = mean_absolute_error(gold_scores, pred_scores)

# 2. Accuracy@threshold
acc_at_1 = accuracy_at_threshold(gold_scores, pred_scores, threshold=1)
acc_at_2 = accuracy_at_threshold(gold_scores, pred_scores, threshold=2)
acc_at_3 = accuracy_at_threshold(gold_scores, pred_scores, threshold=3)


# --- Print Results ---

print("\n--- LangGraph Evaluation Results ---")
print(f"📊 Mean Absolute Error (MAE): {final_mae:.4f}")
print("---")
print(f"🎯 Accuracy@1 (error <= 1.0): {acc_at_1:.2%}")
print(f"🎯 Accuracy@2 (error <= 2.0): {acc_at_2:.2%}")
print(f"🎯 Accuracy@3 (error <= 3.0): {acc_at_3:.2%}")
print("--------------------------------------")

Evaluating LangGraph in Batches:   0%|          | 0/87 [00:00<?, ?it/s]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   1%|          | 1/87 [00:42<1:01:02, 42.58s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   2%|▏         | 2/87 [01:34<1:08:01, 48.02s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   3%|▎         | 3/87 [02:19<1:05:22, 46.70s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   5%|▍         | 4/87 [03:30<1:17:54, 56.32s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   6%|▌         | 5/87 [04:32<1:19:36, 58.25s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   7%|▋         | 6/87 [05:35<1:21:03, 60.04s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   8%|▊         | 7/87 [06:35<1:20:05, 60.07s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:   9%|▉         | 8/87 [07:26<1:15:16, 57.18s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  10%|█         | 9/87 [08:14<1:10:20, 54.12s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  11%|█▏        | 10/87 [09:12<1:11:00, 55.33s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  13%|█▎        | 11/87 [10:19<1:14:32, 58.85s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  14%|█▍        | 12/87 [11:08<1:09:54, 55.92s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  15%|█▍        | 13/87 [12:23<1:16:08, 61.74s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  16%|█▌        | 14/87 [13:51<1:24:53, 69.78s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  17%|█▋        | 15/87 [15:05<1:25:05, 70.91s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  18%|█▊        | 16/87 [16:12<1:22:26, 69.67s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  20%|█▉        | 17/87 [17:13<1:18:26, 67.24s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  21%|██        | 18/87 [18:09<1:13:12, 63.65s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  22%|██▏       | 19/87 [19:05<1:09:49, 61.61s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  23%|██▎       | 20/87 [20:16<1:11:50, 64.34s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  24%|██▍       | 21/87 [21:12<1:07:48, 61.65s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  25%|██▌       | 22/87 [22:25<1:10:34, 65.15s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  26%|██▋       | 23/87 [23:28<1:08:59, 64.68s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  28%|██▊       | 24/87 [24:39<1:09:47, 66.47s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  29%|██▊       | 25/87 [25:43<1:08:02, 65.85s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  30%|██▉       | 26/87 [26:42<1:04:35, 63.54s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  31%|███       | 27/87 [27:38<1:01:16, 61.28s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  32%|███▏      | 28/87 [28:21<55:02, 55.98s/it]  

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  33%|███▎      | 29/87 [29:12<52:36, 54.43s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  34%|███▍      | 30/87 [29:50<47:09, 49.63s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  36%|███▌      | 31/87 [30:38<45:45, 49.02s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  37%|███▋      | 32/87 [31:24<43:58, 47.97s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  38%|███▊      | 33/87 [32:18<44:50, 49.82s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  39%|███▉      | 34/87 [33:10<44:46, 50.70s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  40%|████      | 35/87 [33:52<41:36, 48.01s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  41%|████▏     | 36/87 [34:42<41:18, 48.60s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  43%|████▎     | 37/87 [35:44<43:45, 52.51s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  44%|████▎     | 38/87 [36:44<44:52, 54.96s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  45%|████▍     | 39/87 [37:36<43:14, 54.05s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  46%|████▌     | 40/87 [39:03<49:55, 63.73s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  47%|████▋     | 41/87 [40:33<54:57, 71.68s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  48%|████▊     | 42/87 [41:48<54:34, 72.76s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  49%|████▉     | 43/87 [42:59<52:56, 72.19s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  51%|█████     | 44/87 [44:03<49:58, 69.73s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  52%|█████▏    | 45/87 [44:55<45:05, 64.42s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  53%|█████▎    | 46/87 [45:32<38:17, 56.03s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  54%|█████▍    | 47/87 [46:22<36:19, 54.50s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  55%|█████▌    | 48/87 [47:17<35:29, 54.61s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  56%|█████▋    | 49/87 [48:31<38:08, 60.21s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  57%|█████▋    | 50/87 [49:41<38:58, 63.20s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  59%|█████▊    | 51/87 [50:42<37:38, 62.73s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  60%|█████▉    | 52/87 [51:33<34:22, 58.94s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  61%|██████    | 53/87 [52:29<32:58, 58.20s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  62%|██████▏   | 54/87 [53:33<33:01, 60.04s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  63%|██████▎   | 55/87 [54:13<28:49, 54.05s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  64%|██████▍   | 56/87 [55:09<28:10, 54.54s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  66%|██████▌   | 57/87 [55:53<25:42, 51.42s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  67%|██████▋   | 58/87 [56:48<25:23, 52.55s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  68%|██████▊   | 59/87 [57:53<26:08, 56.02s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  69%|██████▉   | 60/87 [58:50<25:21, 56.36s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  70%|███████   | 61/87 [59:40<23:36, 54.50s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  71%|███████▏  | 62/87 [1:00:21<21:04, 50.60s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  72%|███████▏  | 63/87 [1:01:09<19:50, 49.62s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  74%|███████▎  | 64/87 [1:01:44<17:21, 45.29s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  75%|███████▍  | 65/87 [1:02:26<16:16, 44.37s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  76%|███████▌  | 66/87 [1:03:16<16:05, 45.99s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  77%|███████▋  | 67/87 [1:04:19<17:00, 51.01s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  78%|███████▊  | 68/87 [1:05:30<18:05, 57.12s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  79%|███████▉  | 69/87 [1:06:27<17:08, 57.12s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  80%|████████  | 70/87 [1:07:20<15:48, 55.80s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  82%|████████▏ | 71/87 [1:08:09<14:20, 53.77s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  83%|████████▎ | 72/87 [1:09:05<13:36, 54.43s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  84%|████████▍ | 73/87 [1:09:51<12:06, 51.93s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  85%|████████▌ | 74/87 [1:10:39<10:59, 50.72s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  86%|████████▌ | 75/87 [1:11:26<09:55, 49.61s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  87%|████████▋ | 76/87 [1:12:22<09:27, 51.59s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  89%|████████▊ | 77/87 [1:13:14<08:37, 51.71s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  90%|████████▉ | 78/87 [1:14:13<08:04, 53.79s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  91%|█████████ | 79/87 [1:15:07<07:12, 54.03s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  92%|█████████▏| 80/87 [1:15:48<05:51, 50.19s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  93%|█████████▎| 81/87 [1:16:48<05:17, 52.94s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  94%|█████████▍| 82/87 [1:17:36<04:17, 51.48s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  95%|█████████▌| 83/87 [1:18:43<03:45, 56.28s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  97%|█████████▋| 84/87 [1:19:42<02:50, 56.92s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  98%|█████████▊| 85/87 [1:20:45<01:57, 58.93s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches:  99%|█████████▉| 86/87 [1:21:39<00:57, 57.35s/it]

[VALIDATION_INPUT] Validating input...


Evaluating LangGraph in Batches: 100%|██████████| 87/87 [1:22:33<00:00, 56.94s/it]

--- Evaluation Complete ---

--- LangGraph Evaluation Results ---
📊 Mean Absolute Error (MAE): 2.5057
---
🎯 Accuracy@1 (error <= 1.0): 42.53%
🎯 Accuracy@2 (error <= 2.0): 56.32%
🎯 Accuracy@3 (error <= 3.0): 66.67%
--------------------------------------





In [10]:
import json

with open("preds/preds_ee_gemini_collected.json", "w") as f:
    json.dump(pred_scores, f)