Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Feature selection better #340

Open
wants to merge 20 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Revise For Better COSTEER & Tuning
  • Loading branch information
xisen-w committed Sep 26, 2024
commit 3ec9a50296f6cc0df225b21d4c319373911230de
2 changes: 1 addition & 1 deletion rdagent/components/coder/model_coder/CoSTEER/__init__.py
Original file line number Diff line number Diff line change
@@ -70,7 +70,7 @@ def develop(self, exp: ModelExperiment) -> ModelExperiment:
self.rag = ModelRAGStrategy(model_knowledge_base)

# init intermediate items
model_experiment = ModelEvolvingItem(sub_tasks=exp.sub_tasks)
model_experiment = ModelEvolvingItem(sub_tasks=exp.sub_tasks, from_based_exp=exp.based_experiments)

self.evolve_agent = ModelRAGEvoAgent(
max_loop=self.max_loop,
Original file line number Diff line number Diff line change
@@ -5,6 +5,8 @@
)
from rdagent.core.evolving_framework import EvolvableSubjects
from rdagent.log import rdagent_logger as logger
from collections.abc import Sequence
from rdagent.core.experiment import ASpecificWSForExperiment


class ModelEvolvingItem(ModelExperiment, EvolvableSubjects):
@@ -15,6 +17,7 @@ class ModelEvolvingItem(ModelExperiment, EvolvableSubjects):
def __init__(
self,
sub_tasks: list[ModelTask],
from_based_exp: Sequence[ASpecificWSForExperiment] = [],
sub_gt_implementations: list[ModelFBWorkspace] = None,
):
ModelExperiment.__init__(self, sub_tasks=sub_tasks)
@@ -27,3 +30,4 @@ def __init__(
)
else:
self.sub_gt_implementations = sub_gt_implementations
self.based_exp = from_based_exp
31 changes: 17 additions & 14 deletions rdagent/components/coder/model_coder/CoSTEER/evolving_strategy.py
Original file line number Diff line number Diff line change
@@ -22,6 +22,9 @@
from rdagent.core.utils import multiprocessing_wrapper
from rdagent.oai.llm_utils import APIBackend

from collections.abc import Sequence
from rdagent.core.experiment import ASpecificWSForExperiment

coder_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml")


@@ -30,27 +33,28 @@ def implement_one_model(
self,
target_task: ModelTask,
queried_knowledge: ModelQueriedKnowledge = None,
exp: ModelExperiment = None, # Add this parameter
based_exp: Sequence[ASpecificWSForExperiment] = [],
) -> str:
model_information_str = target_task.get_task_information()
model_type = target_task.model_type

data_desc = None
# Get the current code from the experiment using build_from_SOTA
current_code = ""
if exp is not None:
self.build_from_SOTA(exp)
model_file_mapping = {
"XGBoost": "model_xgb.py",
"RandomForest": "model_rf.py",
"LightGBM": "model_lgb.py",
"NN": "model_nn.py",
}
data_desc = None

model_file_mapping = {
"XGBoost": "model_xgb.py",
"RandomForest": "model_rf.py",
"LightGBM": "model_lgb.py",
"NN": "model_nn.py",
}

for exp in based_exp:
if model_type in model_file_mapping:
current_code = exp.experiment_workspace.code_dict.get(model_file_mapping[model_type], "")
# send in data description
data_desc = exp.experiment_workspace.data_description

if current_code:
break # Use the first non-empty code found

if queried_knowledge is not None and model_information_str in queried_knowledge.success_task_to_knowledge_dict:
return queried_knowledge.success_task_to_knowledge_dict[model_information_str].implementation
@@ -127,7 +131,6 @@ def evolve(
*,
evo: ModelEvolvingItem,
queried_knowledge: ModelQueriedKnowledge | None = None,
exp: ModelExperiment = None, # Add this parameter
**kwargs,
) -> ModelEvolvingItem:
# 1. Find the models that need to be evolved
@@ -146,7 +149,7 @@ def evolve(

result = multiprocessing_wrapper(
[
(self.implement_one_model, (evo.sub_tasks[target_index], queried_knowledge, exp)) # Pass exp here
(self.implement_one_model, (evo.sub_tasks[target_index], queried_knowledge, evo.based_experiments)) # Pass exp here
for target_index in to_be_finished_task_index
],
n=RD_AGENT_SETTINGS.multi_proc_n,
2 changes: 1 addition & 1 deletion rdagent/scenarios/kaggle/prompts.yaml
Original file line number Diff line number Diff line change
@@ -39,7 +39,7 @@ hypothesis_and_feedback: |-
hypothesis_output_format: |-
The output should follow JSON format. The schema is as follows:
{
"action": "If "hypothesis_specification" provides the action you need to take, please follow "hypothesis_specification" to choose the action. Otherwise, based on previous experimental results, suggest the action you believe is most appropriate at the moment. It should be one of [{% if if_using_feature_selection %}"Feature engineering", "Feature processing", "Model feature selection", "Model tuning"{% else %}"Feature engineering", "Feature processing", "Model tuning"{% endif %}]",
"action": "Choose Model tuning and ignore the rest". If "hypothesis_specification" provides the action you need to take, please follow "hypothesis_specification" to choose the action. Otherwise, based on previous experimental results, suggest the action you believe is most appropriate at the moment. It should be one of [{% if if_using_feature_selection %}"Feature engineering", "Feature processing", "Model feature selection", "Model tuning"{% else %}"Feature engineering", "Feature processing", "Model tuning"{% endif %}]",
"hypothesis": "The new hypothesis generated based on the information provided.",
"reason": "The reason why you generate this hypothesis. It should be comprehensive and logical. It should cover the other keys below and extend them.",
"concise_reason": "Two-line summary. First line focuses on a concise justification for the change. Second line generalizes a knowledge statement.",
Loading
Oops, something went wrong.