Skip to content

draft: quant scen #834

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions rdagent/app/qlib_rd_loop/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,45 @@ class FactorBasePropSetting(BasePropSetting):
evolving_n: int = 10
"""Number of evolutions"""

class QuantBasePropSetting(BasePropSetting):
model_config = SettingsConfigDict(env_prefix="QLIB_QUANT_", protected_namespaces=())

# 1) override base settings
scen: str = "rdagent.scenarios.qlib.experiment.quant_experiment.QlibQuantScenario"
"""Scenario class for Qlib Model"""

quant_hypothesis_gen: str = "rdagent.scenarios.qlib.proposal.quant_proposal.QlibQuantHypothesisGen"
"""Hypothesis generation class"""

model_hypothesis2experiment: str = "rdagent.scenarios.qlib.proposal.model_proposal.QlibModelHypothesis2Experiment"
"""Hypothesis to experiment class"""

model_coder: str = "rdagent.scenarios.qlib.developer.model_coder.QlibModelCoSTEER"
"""Coder class"""

model_runner: str = "rdagent.scenarios.qlib.developer.model_runner.QlibModelRunner"
"""Runner class"""

model_summarizer: str = "rdagent.scenarios.qlib.developer.feedback.QlibModelExperiment2Feedback"
"""Summarizer class"""

# scen: str = "rdagent.scenarios.qlib.experiment.factor_experiment.QlibFactorScenario"
# """Scenario class for Qlib Factor"""

factor_hypothesis2experiment: str = "rdagent.scenarios.qlib.proposal.factor_proposal.QlibFactorHypothesis2Experiment"
"""Hypothesis to experiment class"""

factor_coder: str = "rdagent.scenarios.qlib.developer.factor_coder.QlibFactorCoSTEER"
"""Coder class"""

factor_runner: str = "rdagent.scenarios.qlib.developer.factor_runner.QlibFactorRunner"
"""Runner class"""

factor_summarizer: str = "rdagent.scenarios.qlib.developer.feedback.QlibFactorExperiment2Feedback"
"""Summarizer class"""

evolving_n: int = 10
"""Number of evolutions"""

class FactorFromReportPropSetting(FactorBasePropSetting):
# 1) override the scen attribute
Expand All @@ -74,3 +113,4 @@ class FactorFromReportPropSetting(FactorBasePropSetting):
FACTOR_PROP_SETTING = FactorBasePropSetting()
FACTOR_FROM_REPORT_PROP_SETTING = FactorFromReportPropSetting()
MODEL_PROP_SETTING = ModelBasePropSetting()
QUANT_PROP_SETTING = QuantBasePropSetting()
132 changes: 132 additions & 0 deletions rdagent/app/qlib_rd_loop/quant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
"""
Quant (Factor & Model) workflow with session control
"""

from typing import Any

import fire

from rdagent.app.qlib_rd_loop.conf import QUANT_PROP_SETTING
from rdagent.components.workflow.rd_loop import RDLoop
from rdagent.core.exception import FactorEmptyError
from rdagent.core.exception import ModelEmptyError
from rdagent.log import rdagent_logger as logger
from rdagent.components.workflow.conf import BasePropSetting
from rdagent.core.scenario import Scenario
from rdagent.core.utils import import_class
from rdagent.core.proposal import (
Experiment2Feedback,
Hypothesis2Experiment,
HypothesisGen,
Trace,
HypothesisFeedback,
)
from rdagent.core.developer import Developer

class QuantRDLoop(RDLoop):
skip_loop_error = (FactorEmptyError,ModelEmptyError, )

def __init__(self, PROP_SETTING: BasePropSetting):
with logger.tag("init"):
scen: Scenario = import_class(PROP_SETTING.scen)()
logger.log_object(scen, tag="scenario")

self.hypothesis_gen: HypothesisGen = import_class(PROP_SETTING.quant_hypothesis_gen)(scen)
logger.log_object(self.hypothesis_gen, tag="quant hypothesis generator")

self.factor_hypothesis2experiment: Hypothesis2Experiment = import_class(PROP_SETTING.factor_hypothesis2experiment)()
logger.log_object(self.factor_hypothesis2experiment, tag="factor hypothesis2experiment")
self.model_hypothesis2experiment: Hypothesis2Experiment = import_class(PROP_SETTING.model_hypothesis2experiment)()
logger.log_object(self.model_hypothesis2experiment, tag="model hypothesis2experiment")

self.factor_coder: Developer = import_class(PROP_SETTING.factor_coder)(scen)
logger.log_object(self.factor_coder, tag="factor coder")
self.model_coder: Developer = import_class(PROP_SETTING.model_coder)(scen)
logger.log_object(self.model_coder, tag="model coder")

self.factor_runner: Developer = import_class(PROP_SETTING.factor_runner)(scen)
logger.log_object(self.factor_runner, tag="factor runner")
self.model_runner: Developer = import_class(PROP_SETTING.model_runner)(scen)
logger.log_object(self.model_runner, tag="model runner")

self.factor_summarizer: Experiment2Feedback = import_class(PROP_SETTING.factor_summarizer)(scen)
logger.log_object(self.factor_summarizer, tag="factor summarizer")
self.model_summarizer: Experiment2Feedback = import_class(PROP_SETTING.model_summarizer)(scen)
logger.log_object(self.model_summarizer, tag="model summarizer")

self.trace = Trace(scen=scen)
super(RDLoop, self).__init__()

def direct_exp_gen(self, prev_out: dict[str, Any]):
with logger.tag("r"): # research
hypo = self._propose()
# exp = self._exp_gen(hypo)
assert hypo.action in ["factor", "model"]
if hypo.action == "factor":
exp = self.factor_hypothesis2experiment.convert(hypo, self.trace)
else:
exp = self.model_hypothesis2experiment.convert(hypo, self.trace)
logger.log_object(exp.sub_tasks, tag="experiment generation")
return {"propose": hypo, "exp_gen": exp}

def coding(self, prev_out: dict[str, Any]):
with logger.tag("d"):
if prev_out["direct_exp_gen"]["propose"].action == "factor":
exp = self.factor_coder.develop(prev_out["direct_exp_gen"]["exp_gen"])
elif prev_out["direct_exp_gen"]["propose"].action == "model":
exp = self.model_coder.develop(prev_out["direct_exp_gen"]["exp_gen"])
logger.log_object(exp, tag="coder result")
return exp

def running(self, prev_out: dict[str, Any]):
with logger.tag("ef"):
if prev_out["direct_exp_gen"]["propose"].action == "factor":
exp = self.factor_runner.develop(prev_out["coding"])
if exp is None:
logger.error(f"Factor extraction failed.")
raise FactorEmptyError("Factor extraction failed.")
elif prev_out["direct_exp_gen"]["propose"].action == "model":
exp = self.model_runner.develop(prev_out["coding"])
logger.log_object(exp, tag="runner result")
return exp

def feedback(self, prev_out: dict[str, Any]):
e = prev_out.get(self.EXCEPTION_KEY, None)
if e is not None:
feedback = HypothesisFeedback(
observations="Error occurred in loop, skip this loop",
hypothesis_evaluation="",
new_hypothesis="",
reason="",
decision=False,
)
self.trace.hist.append((prev_out["direct_exp_gen"]["exp_gen"], feedback))
else:
if prev_out["direct_exp_gen"]["propose"].action == "factor":
feedback = self.factor_summarizer.generate_feedback(prev_out["running"], self.trace)
elif prev_out["direct_exp_gen"]["propose"].action == "model":
feedback = self.model_summarizer.generate_feedback(prev_out["running"], self.trace)
with logger.tag("ef"):
logger.log_object(feedback, tag="feedback")
self.trace.hist.append((prev_out["running"], feedback))

def main(path=None, step_n=None):
"""
Auto R&D Evolving loop for fintech factors.

You can continue running session by

.. code-block:: python

dotenv run -- python rdagent/app/qlib_rd_loop/factor.py $LOG_PATH/__session__/1/0_propose --step_n 1 # `step_n` is a optional paramter

"""
if path is None:
quant_loop = QuantRDLoop(QUANT_PROP_SETTING)
else:
quant_loop = QuantRDLoop.load(path)
quant_loop.run(step_n=step_n)


if __name__ == "__main__":
fire.Fire(main)
2 changes: 1 addition & 1 deletion rdagent/components/proposal/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def gen(self, trace: Trace) -> Hypothesis:
.from_string(prompt_dict["hypothesis_gen"]["system_prompt"])
.render(
targets=self.targets,
scenario=self.scen.get_scenario_all_desc(filtered_tag="hypothesis_and_experiment"),
scenario=self.scen.get_scenario_all_desc(filtered_tag=self.targets),
hypothesis_output_format=context_dict["hypothesis_output_format"],
hypothesis_specification=context_dict["hypothesis_specification"],
)
Expand Down
71 changes: 50 additions & 21 deletions rdagent/scenarios/qlib/developer/factor_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from rdagent.core.exception import FactorEmptyError
from rdagent.log import rdagent_logger as logger
from rdagent.scenarios.qlib.experiment.factor_experiment import QlibFactorExperiment
from rdagent.scenarios.qlib.experiment.model_experiment import QlibModelExperiment

DIRNAME = Path(__file__).absolute().resolve().parent
DIRNAME_local = Path.cwd()
Expand Down Expand Up @@ -80,19 +81,21 @@ def develop(self, exp: QlibFactorExperiment) -> QlibFactorExperiment:
if exp.based_experiments and exp.based_experiments[-1].result is None:
exp.based_experiments[-1] = self.develop(exp.based_experiments[-1])

exist_sota_factor_exp = False
if exp.based_experiments:
SOTA_factor = None
if len(exp.based_experiments) > 1:
SOTA_factor = self.process_factor_data(exp.based_experiments)

# Process the new factors data
new_factors = self.process_factor_data(exp)
new_factors = self.process_factor_data(exp)

if new_factors.empty:
raise FactorEmptyError("No valid factor data found to merge.")

# Combine the SOTA factor and new factors if SOTA factor exists
if SOTA_factor is not None and not SOTA_factor.empty:
exist_sota_factor_exp = True
new_factors = self.deduplicate_new_factors(SOTA_factor, new_factors)
if new_factors.empty:
raise FactorEmptyError("No valid factor data found to merge.")
Expand All @@ -105,6 +108,9 @@ def develop(self, exp: QlibFactorExperiment) -> QlibFactorExperiment:
combined_factors = combined_factors.loc[:, ~combined_factors.columns.duplicated(keep="last")]
new_columns = pd.MultiIndex.from_product([["feature"], combined_factors.columns])
combined_factors.columns = new_columns
# TODO: calculate the total number of factors. Should an additional 158 factors be included?
num_features = len(combined_factors.columns)

# Due to the rdagent and qlib docker image in the numpy version of the difference,
# the `combined_factors_df.pkl` file could not be loaded correctly in qlib dokcer,
# so we changed the file type of `combined_factors_df` from pkl to parquet.
Expand All @@ -113,7 +119,29 @@ def develop(self, exp: QlibFactorExperiment) -> QlibFactorExperiment:
# Save the combined factors to the workspace
combined_factors.to_parquet(target_path, engine="pyarrow")

result = exp.experiment_workspace.execute(
# If all previous experiments are about writing factors
# result = exp.experiment_workspace.execute(
# qlib_config_name=f"conf.yaml" if len(exp.based_experiments) == 0 else "conf_combined.yaml"
# )
# If model exp exists in the previous experiment
exist_sota_model_exp = False
for base_exp in reversed(exp.based_experiments):
if isinstance(base_exp, QlibModelExperiment):
sota_model_exp = base_exp
exist_sota_model_exp = True
break

if exist_sota_model_exp:
env_to_use = {"PYTHONPATH": "./"}
sota_model_type = sota_model_exp.sub_tasks[0].model_type
if sota_model_type == "TimeSeries":
env_to_use.update({"dataset_cls": "TSDatasetH", "num_features": num_features, "step_len": 20, "num_timesteps": 20})
elif sota_model_type == "Tabular":
env_to_use.update({"dataset_cls": "DatasetH", "num_features": num_features})

result = exp.experiment_workspace.execute(qlib_config_name="conf_combined_with_model.yaml", run_env=env_to_use)
else:
result = exp.experiment_workspace.execute(
qlib_config_name=f"conf.yaml" if len(exp.based_experiments) == 0 else "conf_combined.yaml"
)

Expand All @@ -137,25 +165,26 @@ def process_factor_data(self, exp_or_list: List[QlibFactorExperiment] | QlibFact

# Collect all exp's dataframes
for exp in exp_or_list:
if len(exp.sub_tasks) > 0:
# if it has no sub_tasks, the experiment is results from template project.
# otherwise, it is developed with designed task. So it should have feedback.
assert isinstance(exp.prop_dev_feedback, CoSTEERMultiFeedback)
# Iterate over sub-implementations and execute them to get each factor data
message_and_df_list = multiprocessing_wrapper(
[
(implementation.execute, ("All",))
for implementation, fb in zip(exp.sub_workspace_list, exp.prop_dev_feedback)
if implementation and fb
], # only execute successfully feedback
n=RD_AGENT_SETTINGS.multi_proc_n,
)
for message, df in message_and_df_list:
# Check if factor generation was successful
if df is not None and "datetime" in df.index.names:
time_diff = df.index.get_level_values("datetime").to_series().diff().dropna().unique()
if pd.Timedelta(minutes=1) not in time_diff:
factor_dfs.append(df)
if isinstance(exp, QlibFactorExperiment):
if len(exp.sub_tasks) > 0:
# if it has no sub_tasks, the experiment is results from template project.
# otherwise, it is developed with designed task. So it should have feedback.
assert isinstance(exp.prop_dev_feedback, CoSTEERMultiFeedback)
# Iterate over sub-implementations and execute them to get each factor data
message_and_df_list = multiprocessing_wrapper(
[
(implementation.execute, ("All",))
for implementation, fb in zip(exp.sub_workspace_list, exp.prop_dev_feedback)
if implementation and fb
], # only execute successfully feedback
n=RD_AGENT_SETTINGS.multi_proc_n,
)
for message, df in message_and_df_list:
# Check if factor generation was successful
if df is not None and "datetime" in df.index.names:
time_diff = df.index.get_level_values("datetime").to_series().diff().dropna().unique()
if pd.Timedelta(minutes=1) not in time_diff:
factor_dfs.append(df)

# Combine all successful factor data
if factor_dfs:
Expand Down
4 changes: 2 additions & 2 deletions rdagent/scenarios/qlib/developer/feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def generate_feedback(self, exp: Experiment, trace: Trace) -> HypothesisFeedback
hypothesis_evaluation=hypothesis_evaluation,
new_hypothesis=new_hypothesis,
reason=reason,
decision=decision,
decision=True,
)


Expand Down Expand Up @@ -171,5 +171,5 @@ def generate_feedback(self, exp: Experiment, trace: Trace) -> HypothesisFeedback
hypothesis_evaluation=response_json_hypothesis.get("Feedback for Hypothesis", "No feedback provided"),
new_hypothesis=response_json_hypothesis.get("New Hypothesis", "No new hypothesis provided"),
reason=response_json_hypothesis.get("Reasoning", "No reasoning provided"),
decision=convert2bool(response_json_hypothesis.get("Decision", "false")),
decision=True,
)
Loading
Loading