Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: test whether CV is effective #649

Closed
wants to merge 10 commits into from
Prev Previous commit
Next Next commit
feat: add type checker to api backend & align litellm and old backend (
…#647)

* move cache auto continue and retry to all api backend

* add type checker to json mode output

* fix CI

* feat: Add json_mode handling and streaming support in chat completion function

* lint

* fix a bug when returning a dict which value could contain int or bool

* remove litellm

---------

Co-authored-by: Xu Yang <xuyang1@microsoft.com>
Co-authored-by: Young <afe.young@gmail.com>
  • Loading branch information
3 people authored and WinstonLiyt committed Mar 4, 2025
commit 3dbce8e79941a8691bd999195383e9d39f8ac24b
3 changes: 2 additions & 1 deletion rdagent/app/qlib_rd_loop/factor_from_report.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from pathlib import Path
from typing import Any, Tuple
from typing import Any, Dict, Tuple

import fire
from jinja2 import Environment, StrictUndefined
@@ -49,6 +49,7 @@ def generate_hypothesis(factor_result: dict, report_content: str) -> str:
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str],
)

response_json = json.loads(response)
3 changes: 2 additions & 1 deletion rdagent/components/coder/CoSTEER/knowledge_management.py
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@
import re
from itertools import combinations
from pathlib import Path
from typing import Union
from typing import List, Union

from jinja2 import Environment, StrictUndefined

@@ -339,6 +339,7 @@ def analyze_component(
system_prompt=analyze_component_system_prompt,
user_prompt=analyze_component_user_prompt,
json_mode=True,
json_target_type=List[int],
),
)["component_no_list"]
return [all_component_nodes[index - 1] for index in sorted(list(set(component_no_list)))]
6 changes: 5 additions & 1 deletion rdagent/components/coder/data_science/ensemble/__init__.py
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@
"""

import json
from typing import Dict

from rdagent.components.coder.CoSTEER import CoSTEER
from rdagent.components.coder.CoSTEER.evaluators import (
@@ -85,7 +86,10 @@ def implement_one_task(
for _ in range(5):
ensemble_code = json.loads(
APIBackend().build_messages_and_create_chat_completion(
user_prompt=user_prompt, system_prompt=system_prompt, json_mode=True
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str],
)
)["code"]
if ensemble_code != workspace.file_dict.get("ensemble.py"):
6 changes: 5 additions & 1 deletion rdagent/components/coder/data_science/feature/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
from typing import Dict

from rdagent.components.coder.CoSTEER import CoSTEER
from rdagent.components.coder.CoSTEER.evaluators import (
@@ -70,7 +71,10 @@ def implement_one_task(
for _ in range(5):
feature_code = json.loads(
APIBackend().build_messages_and_create_chat_completion(
user_prompt=user_prompt, system_prompt=system_prompt, json_mode=True
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str],
)
)["code"]
if feature_code != workspace.file_dict.get("feature.py"):
3 changes: 3 additions & 0 deletions rdagent/components/coder/data_science/model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Dict

from rdagent.components.coder.CoSTEER import CoSTEER
from rdagent.components.coder.CoSTEER.evaluators import (
CoSTEERMultiEvaluator,
@@ -83,6 +85,7 @@ def implement_one_task(
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=BatchEditOut.json_mode,
json_target_type=Dict[str, str],
)
)

42 changes: 28 additions & 14 deletions rdagent/components/coder/data_science/raw_data_loader/__init__.py
Original file line number Diff line number Diff line change
@@ -24,6 +24,7 @@

import json
import re
from typing import Dict

from rdagent.app.data_science.conf import DS_RD_SETTING
from rdagent.components.coder.CoSTEER import CoSTEER
@@ -108,20 +109,30 @@ def implement_one_task(
spec_session = APIBackend().build_chat_session(session_system_prompt=system_prompt)

data_loader_spec = json.loads(
spec_session.build_chat_completion(user_prompt=data_loader_prompt, json_mode=True)
spec_session.build_chat_completion(
user_prompt=data_loader_prompt, json_mode=True, json_target_type=Dict[str, str]
)
)["spec"]
feature_spec = json.loads(
spec_session.build_chat_completion(
user_prompt=feature_prompt, json_mode=True, json_target_type=Dict[str, str]
)
)["spec"]
model_spec = json.loads(
spec_session.build_chat_completion(
user_prompt=model_prompt, json_mode=True, json_target_type=Dict[str, str]
)
)["spec"]
ensemble_spec = json.loads(
spec_session.build_chat_completion(
user_prompt=ensemble_prompt, json_mode=True, json_target_type=Dict[str, str]
)
)["spec"]
workflow_spec = json.loads(
spec_session.build_chat_completion(
user_prompt=workflow_prompt, json_mode=True, json_target_type=Dict[str, str]
)
)["spec"]
feature_spec = json.loads(spec_session.build_chat_completion(user_prompt=feature_prompt, json_mode=True))[
"spec"
]
model_spec = json.loads(spec_session.build_chat_completion(user_prompt=model_prompt, json_mode=True))[
"spec"
]
ensemble_spec = json.loads(spec_session.build_chat_completion(user_prompt=ensemble_prompt, json_mode=True))[
"spec"
]
workflow_spec = json.loads(spec_session.build_chat_completion(user_prompt=workflow_prompt, json_mode=True))[
"spec"
]
else:
data_loader_spec = workspace.file_dict["spec/data_loader.md"]
feature_spec = workspace.file_dict["spec/feature.md"]
@@ -146,7 +157,10 @@ def implement_one_task(
for _ in range(5):
data_loader_code = json.loads(
APIBackend().build_messages_and_create_chat_completion(
user_prompt=user_prompt, system_prompt=system_prompt, json_mode=True
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str],
)
)["code"]
if data_loader_code != workspace.file_dict.get("load_data.py"):
6 changes: 5 additions & 1 deletion rdagent/components/coder/data_science/workflow/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
from typing import Dict

from rdagent.components.coder.CoSTEER import CoSTEER
from rdagent.components.coder.CoSTEER.evaluators import (
@@ -73,7 +74,10 @@ def implement_one_task(
for _ in range(5):
workflow_code = json.loads(
APIBackend().build_messages_and_create_chat_completion(
user_prompt=user_prompt, system_prompt=system_prompt, json_mode=True
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str],
)
)["code"]
if workflow_code != workspace.file_dict.get("main.py"):
8 changes: 6 additions & 2 deletions rdagent/components/coder/factor_coder/eva_utils.py
Original file line number Diff line number Diff line change
@@ -2,7 +2,7 @@
import json
from abc import abstractmethod
from pathlib import Path
from typing import Tuple
from typing import Dict, Tuple

import pandas as pd
from jinja2 import Environment, StrictUndefined
@@ -212,7 +212,10 @@ def evaluate(
try:
api = APIBackend() if attempts == 0 else APIBackend(use_chat_cache=False)
resp = api.build_messages_and_create_chat_completion(
user_prompt=gen_df_info_str, system_prompt=system_prompt, json_mode=True
user_prompt=gen_df_info_str,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str | bool | int],
)
resp_dict = json.loads(resp)
resp_dict["output_format_decision"] = str(resp_dict["output_format_decision"]).lower() in ["true", "1"]
@@ -556,6 +559,7 @@ def evaluate(
system_prompt=system_prompt,
json_mode=True,
seed=attempts, # in case of useless retrying when cache enabled.
json_target_type=Dict[str, str | bool | int],
),
)
final_decision = final_evaluation_dict["final_decision"]
6 changes: 5 additions & 1 deletion rdagent/components/coder/factor_coder/evolving_strategy.py
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@

import json
from pathlib import Path
from typing import Dict

from jinja2 import Environment, StrictUndefined

@@ -168,7 +169,10 @@ def implement_one_task(
APIBackend(
use_chat_cache=FACTOR_COSTEER_SETTINGS.coder_use_cache
).build_messages_and_create_chat_completion(
user_prompt=user_prompt, system_prompt=system_prompt, json_mode=True
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str],
)
)["code"]
return code
3 changes: 2 additions & 1 deletion rdagent/components/coder/model_coder/eva_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from pathlib import Path
from typing import Tuple
from typing import Dict, Tuple

import numpy as np
from jinja2 import Environment, StrictUndefined
@@ -177,6 +177,7 @@ def evaluate(
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str | bool | int],
),
)
if isinstance(final_evaluation_dict["final_decision"], str) and final_evaluation_dict[
2 changes: 2 additions & 0 deletions rdagent/components/coder/model_coder/evolving_strategy.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
from pathlib import Path
from typing import Dict

from jinja2 import Environment, StrictUndefined

@@ -96,6 +97,7 @@ def implement_one_task(
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str],
),
)["code"]
return code
4 changes: 2 additions & 2 deletions rdagent/core/experiment.py
Original file line number Diff line number Diff line change
@@ -18,7 +18,7 @@
from rdagent.utils.fmt import shrink_text

if typing.TYPE_CHECKING:
from rdagent.core.proposal import ExperimentFeedback, Hypothesis
from rdagent.core.proposal import Hypothesis
from rdagent.utils.env import Env

"""
@@ -225,7 +225,7 @@ def inject_code_from_file_dict(self, workspace: FBWorkspace) -> None:
"""
for name, code in workspace.file_dict.items():
self.inject_files(**{name: code})

def copy(self) -> FBWorkspace:
"""
copy the workspace from the original one
Loading
Oops, something went wrong.