Merge branch 'main' into feature-selection-better

microsoft · xisen-w · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024
commit 9a8951cac7aa59540b6470e55fa40ffad8d18b50
diff --git a/rdagent/scenarios/kaggle/experiment/prompts.yaml b/rdagent/scenarios/kaggle/experiment/prompts.yaml
@@ -316,11 +316,12 @@ kg_feature_simulator: |-
 
 kg_model_output_format: |-
   For feature related tasks, the output should be a pandas DataFrame with the new features. The columns should be the new features, and the rows should correspond to the number of samples in the input DataFrame.
-  For model related tasks:
-  1. the output should be an np.ndarray with the appropriate number of predictions & the appropriate values within each prediction 
-  2. the output should be a 2D array with dimensions corresponding to the number of predictions and the number of things to output. Eg, if 4 predictions, each prediction needs to predict 3 probabilities, then (4,3). Or (8, 1) if there are 8 predictions but each prediction is only one value.
-  3. please reference the competition's submission requirement and align with that. 
-  Submission Requirements here:\n: {{submission_specifications}}
+  For model related tasks, the output should be an np.ndarray with the appropriate number of predictions.
+  {% if channel == 1 %}
+  For each sample, the output should be a single value (e.g., (8, 1) if there are 8 samples).
+  {% else %}
+  For each sample, the output should be multiple values with {{ channel }} numbers (e.g., (8, {{ channel }}) if there are 8 samples).
+  {% endif %}
 
 kg_model_simulator: |-
   The models will be trained on the competition dataset and evaluated on their ability to predict the target. Metrics like accuracy and AUC-ROC is used to evaluate the model performance. 

diff --git a/rdagent/scenarios/kaggle/experiment/scenario.py b/rdagent/scenarios/kaggle/experiment/scenario.py
@@ -42,6 +42,12 @@ def __init__(self, competition: str) -> None:
         self._background = self.background
 
         self.if_action_choosing_based_on_UCB = KAGGLE_IMPLEMENT_SETTING.if_action_choosing_based_on_UCB
+        self.if_using_feature_selection = KAGGLE_IMPLEMENT_SETTING.if_using_feature_selection
+
+        self._output_format = self.output_format
+        self._interface = self.interface
+        self._simulator = self.simulator
+        self._background = self.background
 
     def _analysis_competition_description(self):
         sys_prompt = (
@@ -65,25 +71,15 @@ def _analysis_competition_description(self):
             json_mode=True,
         )
 
-        try:
-            response_json_analysis = json.loads(response_analysis)
-            self.competition_type = response_json_analysis.get("Competition Type", "No type provided")
-            self.competition_description = response_json_analysis.get(
-                "Competition Description", "No description provided"
-            )
-            self.target_description = response_json_analysis.get("Target Description", "No target provided")
-            self.competition_features = response_json_analysis.get("Competition Features", "No features provided")
-            self.submission_specifications = response_json_analysis.get(
-                "Submission Specifications", "No submission requirements provided"
-            )
-        except json.JSONDecodeError:
-            print(f"Failed to parse JSON response: {response_analysis}")
-            # Set default values if JSON parsing fails
-            self.competition_type = "Unknown"
-            self.competition_description = "No description available"
-            self.target_description = "No target available"
-            self.competition_features = "No features available"
-            self.submission_specifications = "No submission requirements available"
+        response_json_analysis = json.loads(response_analysis)
+        self.competition_type = response_json_analysis.get("Competition Type", "No type provided")
+        self.competition_description = response_json_analysis.get("Competition Description", "No description provided")
+        self.target_description = response_json_analysis.get("Target Description", "No target provided")
+        self.competition_features = response_json_analysis.get("Competition Features", "No features provided")
+        self.submission_specifications = response_json_analysis.get(
+            "Submission Specifications", "No submission requirements provided"
+        )
+        self.model_output_channel = response_json_analysis.get("Submission channel number to each sample", 1)
 
     def get_competition_full_desc(self) -> str:
         return f"""Competition Type: {self.competition_type}
@@ -156,7 +152,7 @@ def output_format(self) -> str:
         return (
             Environment(undefined=StrictUndefined)
             .from_string(prompt_dict["kg_model_output_format"])
-            .render(submission_specifications=self.submission_specifications)
+            .render(channel=self.model_output_channel)
         )
 
     @property