Revised to support submission specifications

microsoft · xisen-w · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024
commit 81bd49775f3aba19c0d9d5ad850e8d92f860c79d
diff --git a/rdagent/scenarios/kaggle/experiment/prompts.yaml b/rdagent/scenarios/kaggle/experiment/prompts.yaml
@@ -8,6 +8,8 @@ kg_description_template:
       "Competition Type": "The type of competition, e.g., 'Classification', 'Regression', 'Clustering', 'Prediction", "Time-Series Forecasting",
       "Competition Description": "A brief description of the competition",
       "Target Description": "A description of the target variable to be predicted",
+      "Competition Features": "Two-line description of the overall features involved within the competition as background."
+      "Submission Specifications": "The submission specification & sample submission csv descriptions for the model to output."
     }
     Since these might be very similar column names in data like one_hot_encoded columns, you can use some regex to group them together.
 
@@ -20,7 +22,7 @@ kg_description_template:
 
 kg_background: |-
   You are solving a data science tasks and the type of the competition is {{ competition_type }}.
-  The competition description is:{{competition_description}}
+  The competition description is:{{competition_description}}. 
 
   We provide an overall script in file: train.py. The user will run the train.py script along with several feature and model scripts to train several model to get a good performance on this task.
 
@@ -63,6 +65,8 @@ kg_background: |-
 
   For each loop, you need to help user decide which action item to choose and provide the corresponding code to implement the action item.
 
+  Most importantly, the output format & submission requirements are listed here: {submission_specifications}
+
 kg_feature_interface: |-
   Your code should contain several parts:
   1. The import part: import the necessary libraries.

diff --git a/rdagent/scenarios/kaggle/experiment/scenario.py b/rdagent/scenarios/kaggle/experiment/scenario.py
@@ -29,11 +29,12 @@ def __init__(self, competition: str) -> None:
         self._output_format = self.output_format
         self._interface = self.interface
         self._simulator = self.simulator
-
+    
         self.competition_type = None
         self.competition_description = None
         self.target_description = None
         self.competition_features = None
+        self.submission_specifications = None
         self._analysis_competition_description()
         self.if_action_choosing_based_on_UCB = KAGGLE_IMPLEMENT_SETTING.if_action_choosing_based_on_UCB
 
@@ -65,14 +66,15 @@ def _analysis_competition_description(self):
         self.competition_type = response_json_analysis.get("Competition Type", "No type provided")
         self.competition_description = response_json_analysis.get("Competition Description", "No description provided")
         self.target_description = response_json_analysis.get("Target Description", "No target provided")
-        self.competition_features = response_json_analysis.get("Competition Features", "No features provided")
+        self.competition_features = response_json_analysis.get("Competition Features", "No features provided") 
+        self.submission_specifications = response_json_analysis.get("Submission Specifications", "No submission requirements provided")
 
     def get_competition_full_desc(self) -> str:
         return f"""Competition Type: {self.competition_type}
-Competition Description: {self.competition_description}
-Target Description: {self.target_description}
-Competition Features: {self.competition_features}
-"""
+    Competition Description: {self.competition_description}
+    Target Description: {self.target_description}
+    Competition Features: {self.competition_features}
+    """
 
     @property
     def background(self) -> str:
@@ -91,6 +93,7 @@ def background(self) -> str:
                 competition_description=self.competition_description,
                 target_description=self.target_description,
                 competition_features=self.competition_features,
+                submission_specifications=self.submission_specifications
             )
         )
         return background_prompt
@@ -165,4 +168,6 @@ def get_scenario_all_desc(self) -> str:
 {self._output_format}
 The simulator user can use to test your model:
 {self._simulator}
+The expected output & submission format specifications:
+{self.submission_specifications} # Added again to emphasize the importance
 """