Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Archive] Code cot #350

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Code cot
  • Loading branch information
you-n-g committed Sep 26, 2024
commit 2f067f83cfd5e7d97eb6e2e589d821c1f74496c6
22 changes: 6 additions & 16 deletions rdagent/components/coder/factor_coder/CoSTEER/evolving_strategy.py
Original file line number Diff line number Diff line change
@@ -23,6 +23,7 @@
from rdagent.core.prompts import Prompts
from rdagent.core.utils import multiprocessing_wrapper
from rdagent.oai.llm_utils import APIBackend
from rdagent.utils.agent.tpl import T

if TYPE_CHECKING:
from rdagent.components.coder.factor_coder.CoSTEER.knowledge_management import (
@@ -127,34 +128,22 @@ def implement_one_factor(

queried_former_failed_knowledge_to_render = queried_former_failed_knowledge

system_prompt = (
Environment(undefined=StrictUndefined)
.from_string(
implement_prompts["evolving_strategy_factor_implementation_v1_system"],
)
.render(
system_prompt = T(".prompts:evolving_strategy_factor_implementation_v1_system").r(
scenario=self.scen.get_scenario_all_desc(),
queried_former_failed_knowledge=queried_former_failed_knowledge_to_render,
)
enable_code_cot=FACTOR_IMPLEMENT_SETTINGS.enable_code_cot,
)
session = APIBackend(use_chat_cache=FACTOR_IMPLEMENT_SETTINGS.coder_use_cache).build_chat_session(
session_system_prompt=system_prompt,
)

queried_similar_successful_knowledge_to_render = queried_similar_successful_knowledge
for _ in range(10): # max attempt to reduce the length of user_prompt
user_prompt = (
Environment(undefined=StrictUndefined)
.from_string(
implement_prompts["evolving_strategy_factor_implementation_v1_user"],
)
.render(
user_prompt = T(".prompts:evolving_strategy_factor_implementation_v1_user").r(
factor_information_str=factor_information_str,
queried_similar_successful_knowledge=queried_similar_successful_knowledge_to_render,
queried_former_failed_knowledge=queried_former_failed_knowledge_to_render,
)
.strip("\n")
)
).strip("\n")
if (
session.build_chat_completion_message_and_calculate_token(
user_prompt,
@@ -228,6 +217,7 @@ def implement_one_factor(
.render(
scenario=self.scen.get_scenario_all_desc(),
queried_former_failed_knowledge=queried_former_failed_knowledge_to_render,
enable_code_cot=FACTOR_IMPLEMENT_SETTINGS.enable_code_cot,
)
)

3 changes: 3 additions & 0 deletions rdagent/components/coder/factor_coder/config.py
Original file line number Diff line number Diff line change
@@ -52,6 +52,9 @@ class Config:
max_loop: int = 10
"""Maximum number of task implementation loops"""

enable_code_cot: bool = False
"""Indicates whether to enable code cot"""

knowledge_base_path: Union[str, None] = None
"""Path to the knowledge base"""

40 changes: 39 additions & 1 deletion rdagent/components/coder/factor_coder/prompts.yaml
Original file line number Diff line number Diff line change
@@ -57,9 +57,46 @@ evolving_strategy_factor_implementation_v1_system: |-
{{ queried_former_failed_knowledge[-1].feedback }}
{% endif %}

{% if enable_code_cot %}
-------------- **The code standard** ----------------
You must write code with detailed comments to explain your thought process!!!
Even if other example code does not follow this, you should strictly adhere to this code standard.
If data processing is involved, include comments to describe the index and columns at each step.
Here is an example:
```python
import pandas as pd
focused_fields = ["NET_PROFIT_INCL_MIN_INT_INC"]

df = pd.read_hdf("ASHAREINCOME.h5", key="data") # type: pd.DataFrame; index with multi-level [datetime, instrument]; columns with single level that contains values like [REPORT_PERIOD, STATEMENT_TYPE, ..., NET_PROFIT_INCL_MIN_INT_INC]

# filter only part of the report statement
df = df[df["STATEMENT_TYPE"] == "408006000"] # type: pd.DataFrame; index with multi-level [datetime, instrument]; columns with single level that contains values like [REPORT_PERIOD, STATEMENT_TYPE, ..., NET_PROFIT_INCL_MIN_INT_INC]
df = df[["REPORT_PERIOD"] + focused_fields] # type: pd.DataFrame; index with multi-level [datetime, instrument]; columns with single level that contains values like [REPORT_PERIOD, NET_PROFIT_INCL_MIN_INT_INC]
df.columns.name = "field" # type: pd.DataFrame; index with multi-level [datetime, instrument]; columns level [filed] that contains values like [REPORT_PERIOD, NET_PROFIT_INCL_MIN_INT_INC]
df = df.set_index("REPORT_PERIOD", append=True).unstack(level="REPORT_PERIOD") # type: pd.DataFrame; index with multi-level [datetime, instrument]; columns mulit-level [filed, REPORT_PERIOD]
df = df.unstack("instrument") # type: pd.DataFrame; index with level [datetime]; columns mulit-level [filed, REPORT_PERIOD, instrument]
# fill the previous published value to later release date
df = df.sort_index().ffill(axis=0) # type: pd.DataFrame; index with level [datetime]; columns mulit-level [filed, REPORT_PERIOD, instrument]

pit_f_final = {}
for idx, row in df.iterrows():
pit_df = row.unstack("field").sort_index() # ensure the report_period is ascending order # type: pd.DataFrame; index with level [REPORT_PERIOD, instrument]; columns mulit-level [filed]
# 1) collapse all the columns
pit_f = pit_df["NET_PROFIT_INCL_MIN_INT_INC"] # type: pd.Series; index with level [REPORT_PERIOD, instrument]
# 2)
pit_f_df = pit_f.unstack("instrument") # type: pd.DataFrame; index with level [REPORT_PERIOD]; columns multi-level [instrument]
pit_f_ttm = pit_f_df.iloc[-4:, :].sum() # this for TTM # type: pd.Series; index with level [instrument]
pit_f_final[idx] = pit_f_ttm

pit_f_final = pd.DataFrame(pit_f_final).T # type: pd.DataFrame; index with level [datetime]; columns multi-level [instrument]
pit_f_final = pit_f_final.stack() # type: pd.Series; index with level [datetime, instrument]
pit_f_final.to_frame("net_profit_ttm").to_hdf('result.h5', key='data', mode='w')
```
{% endif %}

Please response the code in the following json format. Here is an example structure for the JSON output:
{
"code": "The Python code as a string."
"code": "The Python code as a string that follow the code standard."
}

evolving_strategy_factor_implementation_v1_user: |-
@@ -86,6 +123,7 @@ evolving_strategy_factor_implementation_v1_user: |-
{% endfor %}
{% endif %}


evolving_strategy_factor_implementation_v2_user: |-
--------------Target factor information:---------------
{{ factor_information_str }}
8 changes: 8 additions & 0 deletions rdagent/utils/agent/tpl.py
Original file line number Diff line number Diff line change
@@ -17,6 +17,14 @@
PROJ_PATH = DIRNAME.parent.parent


# TODO: It could be better to add the Truncator class into T class for reusing.
# class Truncator:
# def __init__(self, limit: int):
# self.limit = limit
#
# def render_with_trunc(self, render_func, context: dict):


# class T(SingletonBaseClass): TODO: singleton does not support args now.
class T:
"""Use the simplest way to (C)reate a Template and (r)ender it!!"""
Loading
Oops, something went wrong.