Skip to content

test: test whether CV is effective #649

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
Closed
Binary file modified docs/WeChat_QR_code.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 7 additions & 3 deletions rdagent/app/data_science/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,13 +142,15 @@ def record(self, prev_out: dict[str, Any]):
logger.log_object(self.trace.sota_experiment(), tag="SOTA experiment")


def main(path=None, output_path=None, step_n=None, loop_n=None, competition="bms-molecular-translation"):
def main(
path=None, output_path=None, step_n=None, loop_n=None, competition="bms-molecular-translation", do_truncate=True
):
"""

Parameters
----------
path :
path like `$LOG_PATH/__session__/1/0_propose`. It indicates that we restore the state that after finish the step 0 in loop1
path like `$LOG_PATH/__session__/1/0_propose`. It indicates that we restore the state that after finish the step 0 in loop 1
output_path :
path like `$LOG_PATH`. It indicates that where we want to save our session and log information.
step_n :
Expand All @@ -158,6 +160,8 @@ def main(path=None, output_path=None, step_n=None, loop_n=None, competition="bms
- if current loop is incomplete, it will be counted as the first loop for completion.
- if both step_n and loop_n are provided, the process will stop as soon as either condition is met.
competition :
do_truncate :
If set to True, the logger will truncate the future log messages by calling `logger.storage.truncate`.


Auto R&D Evolving loop for models in a Kaggle scenario.
Expand All @@ -181,7 +185,7 @@ def main(path=None, output_path=None, step_n=None, loop_n=None, competition="bms
if path is None:
kaggle_loop = DataScienceRDLoop(DS_RD_SETTING)
else:
kaggle_loop = DataScienceRDLoop.load(path, output_path)
kaggle_loop = DataScienceRDLoop.load(path, output_path, do_truncate)
kaggle_loop.run(step_n=step_n, loop_n=loop_n)


Expand Down
3 changes: 2 additions & 1 deletion rdagent/app/qlib_rd_loop/factor_from_report.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from pathlib import Path
from typing import Any, Tuple
from typing import Any, Dict, Tuple

import fire
from jinja2 import Environment, StrictUndefined
Expand Down Expand Up @@ -49,6 +49,7 @@ def generate_hypothesis(factor_result: dict, report_content: str) -> str:
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str],
)

response_json = json.loads(response)
Expand Down
3 changes: 2 additions & 1 deletion rdagent/components/coder/CoSTEER/knowledge_management.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import re
from itertools import combinations
from pathlib import Path
from typing import Union
from typing import List, Union

from jinja2 import Environment, StrictUndefined

Expand Down Expand Up @@ -339,6 +339,7 @@ def analyze_component(
system_prompt=analyze_component_system_prompt,
user_prompt=analyze_component_user_prompt,
json_mode=True,
json_target_type=List[int],
),
)["component_no_list"]
return [all_component_nodes[index - 1] for index in sorted(list(set(component_no_list)))]
Expand Down
6 changes: 5 additions & 1 deletion rdagent/components/coder/data_science/ensemble/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"""

import json
from typing import Dict

from rdagent.components.coder.CoSTEER import CoSTEER
from rdagent.components.coder.CoSTEER.evaluators import (
Expand Down Expand Up @@ -85,7 +86,10 @@ def implement_one_task(
for _ in range(5):
ensemble_code = json.loads(
APIBackend().build_messages_and_create_chat_completion(
user_prompt=user_prompt, system_prompt=system_prompt, json_mode=True
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str],
)
)["code"]
if ensemble_code != workspace.file_dict.get("ensemble.py"):
Expand Down
6 changes: 5 additions & 1 deletion rdagent/components/coder/data_science/feature/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
from typing import Dict

from rdagent.components.coder.CoSTEER import CoSTEER
from rdagent.components.coder.CoSTEER.evaluators import (
Expand Down Expand Up @@ -70,7 +71,10 @@ def implement_one_task(
for _ in range(5):
feature_code = json.loads(
APIBackend().build_messages_and_create_chat_completion(
user_prompt=user_prompt, system_prompt=system_prompt, json_mode=True
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str],
)
)["code"]
if feature_code != workspace.file_dict.get("feature.py"):
Expand Down
3 changes: 3 additions & 0 deletions rdagent/components/coder/data_science/model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Dict

from rdagent.components.coder.CoSTEER import CoSTEER
from rdagent.components.coder.CoSTEER.evaluators import (
CoSTEERMultiEvaluator,
Expand Down Expand Up @@ -83,6 +85,7 @@ def implement_one_task(
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=BatchEditOut.json_mode,
json_target_type=Dict[str, str],
)
)

Expand Down
42 changes: 28 additions & 14 deletions rdagent/components/coder/data_science/raw_data_loader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

import json
import re
from typing import Dict

from rdagent.app.data_science.conf import DS_RD_SETTING
from rdagent.components.coder.CoSTEER import CoSTEER
Expand Down Expand Up @@ -108,20 +109,30 @@ def implement_one_task(
spec_session = APIBackend().build_chat_session(session_system_prompt=system_prompt)

data_loader_spec = json.loads(
spec_session.build_chat_completion(user_prompt=data_loader_prompt, json_mode=True)
spec_session.build_chat_completion(
user_prompt=data_loader_prompt, json_mode=True, json_target_type=Dict[str, str]
)
)["spec"]
feature_spec = json.loads(
spec_session.build_chat_completion(
user_prompt=feature_prompt, json_mode=True, json_target_type=Dict[str, str]
)
)["spec"]
model_spec = json.loads(
spec_session.build_chat_completion(
user_prompt=model_prompt, json_mode=True, json_target_type=Dict[str, str]
)
)["spec"]
ensemble_spec = json.loads(
spec_session.build_chat_completion(
user_prompt=ensemble_prompt, json_mode=True, json_target_type=Dict[str, str]
)
)["spec"]
workflow_spec = json.loads(
spec_session.build_chat_completion(
user_prompt=workflow_prompt, json_mode=True, json_target_type=Dict[str, str]
)
)["spec"]
feature_spec = json.loads(spec_session.build_chat_completion(user_prompt=feature_prompt, json_mode=True))[
"spec"
]
model_spec = json.loads(spec_session.build_chat_completion(user_prompt=model_prompt, json_mode=True))[
"spec"
]
ensemble_spec = json.loads(spec_session.build_chat_completion(user_prompt=ensemble_prompt, json_mode=True))[
"spec"
]
workflow_spec = json.loads(spec_session.build_chat_completion(user_prompt=workflow_prompt, json_mode=True))[
"spec"
]
else:
data_loader_spec = workspace.file_dict["spec/data_loader.md"]
feature_spec = workspace.file_dict["spec/feature.md"]
Expand All @@ -146,7 +157,10 @@ def implement_one_task(
for _ in range(5):
data_loader_code = json.loads(
APIBackend().build_messages_and_create_chat_completion(
user_prompt=user_prompt, system_prompt=system_prompt, json_mode=True
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str],
)
)["code"]
if data_loader_code != workspace.file_dict.get("load_data.py"):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,29 @@ spec:

3. Dataset Splitting
- The dataset returned by `load_data` is not pre-split. After calling `feat_eng`, split the data into training and test sets.
- If feasible, apply cross-validation on the training set (`X_transformed`, `y_transformed`) to ensure a reliable assessment of model performance.
- [Notice] Apply cross-validation (e.g. KFold) on the training set (`X_transformed`, `y_transformed`) to ensure a reliable assessment of model performance.
- Keep the test set (`X_test_transformed`) unchanged, as it is only used for generating the final predictions.
- Pseudocode logic for reference:
```
Set number of splits and initialize KFold cross-validator.

Create dictionaries for validation and test predictions.

For each model file:
Import the model dynamically.
Initialize arrays for out-of-fold (OOF) and test predictions.

For each fold in KFold:
Split data into training and validation sets.
Run model workflow to get validation and test predictions.
Validate shapes.
Store validation and test predictions.

Compute average test predictions across folds.
Save OOF and averaged test predictions.

Ensemble predictions from all models and print the final shape.
```

4. Submission File:
- Save the final predictions as `submission.csv`, ensuring the format matches the competition requirements (refer to `sample_submission` in the Folder Description for the correct structure).
Expand Down
6 changes: 5 additions & 1 deletion rdagent/components/coder/data_science/workflow/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
from typing import Dict

from rdagent.components.coder.CoSTEER import CoSTEER
from rdagent.components.coder.CoSTEER.evaluators import (
Expand Down Expand Up @@ -73,7 +74,10 @@ def implement_one_task(
for _ in range(5):
workflow_code = json.loads(
APIBackend().build_messages_and_create_chat_completion(
user_prompt=user_prompt, system_prompt=system_prompt, json_mode=True
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str],
)
)["code"]
if workflow_code != workspace.file_dict.get("main.py"):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from pathlib import Path
import pandas as pd
import hashlib

def calculate_md5(file_path):
with open(file_path, "rb") as f:
file_hash = hashlib.md5(f.read()).hexdigest()
return file_hash

file_md5 = calculate_md5("scores.csv")

"""
find . | grep -i sample | grep -i submission | grep -v sample_submission.csv | grep -v zip_files | grep -v 'sample/'
Expand Down Expand Up @@ -66,4 +73,5 @@ def print_first_rows(file_path, file_name, num_rows=5):
print_first_rows(SAMPLE_SUBMISSION_PATH, sample_submission_name)
print_first_rows('submission.csv', 'submission.csv')

assert calculate_md5("scores.csv") == file_md5, "scores.csv should not be rewritten"
print(f"\nPlease Checked the content of the submission file(submission.csv should align with {sample_submission_name}). ")
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ workflow_eval:
[Note]
1. The individual components (data loading, feature engineering, model tuning, etc.) have already been evaluated by the user. You should only evaluate and improve the workflow code, unless there are critical issues in the components.
2. Model performance is NOT a concern in this evaluation—only correct execution and formatting matter.
3. As long as the execution does not exceed the time limit, ensure that the code uses cross-validation to split the training data and train the model. If cross-validation is not used, mention it in the execution section and set `final_decision` to `false`.

## Evaluation Criteria
You will be given the workflow execution output (`stdout`) to determine correctness.
Expand Down
8 changes: 6 additions & 2 deletions rdagent/components/coder/factor_coder/eva_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json
from abc import abstractmethod
from pathlib import Path
from typing import Tuple
from typing import Dict, Tuple

import pandas as pd
from jinja2 import Environment, StrictUndefined
Expand Down Expand Up @@ -212,7 +212,10 @@ def evaluate(
try:
api = APIBackend() if attempts == 0 else APIBackend(use_chat_cache=False)
resp = api.build_messages_and_create_chat_completion(
user_prompt=gen_df_info_str, system_prompt=system_prompt, json_mode=True
user_prompt=gen_df_info_str,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str | bool | int],
)
resp_dict = json.loads(resp)
resp_dict["output_format_decision"] = str(resp_dict["output_format_decision"]).lower() in ["true", "1"]
Expand Down Expand Up @@ -556,6 +559,7 @@ def evaluate(
system_prompt=system_prompt,
json_mode=True,
seed=attempts, # in case of useless retrying when cache enabled.
json_target_type=Dict[str, str | bool | int],
),
)
final_decision = final_evaluation_dict["final_decision"]
Expand Down
6 changes: 5 additions & 1 deletion rdagent/components/coder/factor_coder/evolving_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import json
from pathlib import Path
from typing import Dict

from jinja2 import Environment, StrictUndefined

Expand Down Expand Up @@ -168,7 +169,10 @@ def implement_one_task(
APIBackend(
use_chat_cache=FACTOR_COSTEER_SETTINGS.coder_use_cache
).build_messages_and_create_chat_completion(
user_prompt=user_prompt, system_prompt=system_prompt, json_mode=True
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str],
)
)["code"]
return code
Expand Down
3 changes: 2 additions & 1 deletion rdagent/components/coder/model_coder/eva_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import json
from pathlib import Path
from typing import Tuple
from typing import Dict, Tuple

import numpy as np
from jinja2 import Environment, StrictUndefined
Expand Down Expand Up @@ -177,6 +177,7 @@ def evaluate(
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str | bool | int],
),
)
if isinstance(final_evaluation_dict["final_decision"], str) and final_evaluation_dict[
Expand Down
2 changes: 2 additions & 0 deletions rdagent/components/coder/model_coder/evolving_strategy.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
from pathlib import Path
from typing import Dict

from jinja2 import Environment, StrictUndefined

Expand Down Expand Up @@ -96,6 +97,7 @@ def implement_one_task(
user_prompt=user_prompt,
system_prompt=system_prompt,
json_mode=True,
json_target_type=Dict[str, str],
),
)["code"]
return code
Expand Down
4 changes: 2 additions & 2 deletions rdagent/core/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from rdagent.utils.fmt import shrink_text

if typing.TYPE_CHECKING:
from rdagent.core.proposal import ExperimentFeedback, Hypothesis
from rdagent.core.proposal import Hypothesis
from rdagent.utils.env import Env

"""
Expand Down Expand Up @@ -225,7 +225,7 @@ def inject_code_from_file_dict(self, workspace: FBWorkspace) -> None:
"""
for name, code in workspace.file_dict.items():
self.inject_files(**{name: code})

def copy(self) -> FBWorkspace:
"""
copy the workspace from the original one
Expand Down
Loading