microsoft · qew21 · Feb 21, 2025 · Feb 24, 2025
diff --git a/rdagent/components/coder/data_science/ensemble/eval_tests/ensemble_test.txt b/rdagent/components/coder/data_science/ensemble/eval_tests/ensemble_test.txt
@@ -37,6 +37,9 @@ train_X, val_X, train_y, val_y = train_test_split(X, y, test_size=0.2, random_st
 
 # Print the types of train_y and val_y
 print(f"train_y type: {type(train_y)}, val_y type: {type(val_y)}")
+if isinstance(val_y, pd.Series | pd.DataFrame):
+    print(f"val_y shape: {val_y.shape} and first few rows of val_y:")
+    print(val_y.head())
 
 test_preds_dict = {}
 val_preds_dict = {}

diff --git a/rdagent/components/coder/data_science/model/eval_tests/model_test.txt b/rdagent/components/coder/data_science/model/eval_tests/model_test.txt
@@ -24,6 +24,8 @@ print(f"train_X.shape: {train_X.shape}")
 print(f"train_y.shape: {train_y.shape}" if not isinstance(train_y, list) else f"train_y(list)'s length: {len(train_y)}")
 print(f"val_X.shape: {val_X.shape}")
 print(f"val_y.shape: {val_y.shape}" if not isinstance(val_y, list) else f"val_y(list)'s length: {len(val_y)}")
+if isinstance(train_y, list) and not isinstance(val_y[0], int | str | float):
+    print(f"first 3 train_y: {train_y[:3]}")
 
 # First execution
 print("The first execution begins.\n")

diff --git a/rdagent/components/coder/data_science/raw_data_loader/eval_tests/data_loader_test.txt b/rdagent/components/coder/data_science/raw_data_loader/eval_tests/data_loader_test.txt
@@ -30,6 +30,7 @@ assert X is not None, "Training data (X) is None."
 assert y is not None, "Training labels (y) are None."
 assert X_test is not None, "Test data (X_test) is None."
 assert test_ids is not None, "Test IDs (test_ids) are None."
+assert X.shape
 
 assert get_length(X_test) == get_length(
     test_ids

diff --git a/rdagent/components/coder/data_science/workflow/prompts.yaml b/rdagent/components/coder/data_science/workflow/prompts.yaml
@@ -40,6 +40,7 @@ workflow_coder:
     3. The user may provide specific code organization rules and instructions. Ensure that the integration follows the given framework and structure.
     4. After predicting the output, print the shape and other information of the output to stdout to help the evaluator assess the code.
     5. You should avoid using logging module to output information in your generated code, and instead use the print() function.
+    6. The code will be evaluated on sampled dataset, so don't hardcode the length of the input data and output.
 
     ## Output Format
     Please response the code in the following json format. Here is an example structure for the JSON output:
@@ -105,6 +106,7 @@ workflow_eval:
     Your focus is to check whether the workflow code:
     1. Executes successfully, correctly organizing components and generating a final submission.
     2. Generates predictions in the correct format, ensuring they align with the **sample submission** structure!
+    3. The code will be evaluated on sampled dataset, so don't check the length of the input data and output.
 
     [Note] 
     1. The individual components (data loading, feature engineering, model tuning, etc.) have already been evaluated by the user. You should only evaluate and improve the workflow code, unless there are critical issues in the components.

diff --git a/rdagent/scenarios/data_science/dev/runner/prompts.yaml b/rdagent/scenarios/data_science/dev/runner/prompts.yaml
@@ -18,6 +18,7 @@ DSCoSTEER_eval:
     The user will provide you the whole code base, some logs generated during the execution of the whole workflow. Your evaluation scope includes whether the workflow code:
     1. Executes successfully, correctly organizing components and generating a final submission.
     2. Generates predictions in the correct format, ensuring they align with the **sample submission** structure!
+    3. The code will be evaluated on sampled dataset, so don't check the length of the input data and output.
 
 
     Please respond with your feedback in the following JSON format and order

diff --git a/rdagent/scenarios/data_science/scen/__init__.py b/rdagent/scenarios/data_science/scen/__init__.py
@@ -213,7 +213,7 @@ def describe_data_folder(folder_path, indent=0, max_files=2, partial_expand_subf
                     result.append(" " * (indent + 2) + f"- Content of {file}:")
                     with open(path, "r", encoding="utf-8") as f:
                         for i, line in enumerate(f):
-                            if i < 2:
+                            if i < 4:
                                 result.append(
                                     " " * (indent + 4) + line.strip()[:100] + ("..." if len(line.strip()) > 100 else "")
                                 )
@@ -287,9 +287,7 @@ def background(self) -> str:
         background_prompt = background_template.r(
             task_type=self.task_type,
             data_type=self.data_type,
-            brief_description=self.brief_description,
-            dataset_description=self.dataset_description,
-            target_description=self.target_description,
+            raw_description=self.raw_description,
         )
         return background_prompt
 

diff --git a/rdagent/scenarios/data_science/scen/prompts.yaml b/rdagent/scenarios/data_science/scen/prompts.yaml
@@ -48,9 +48,7 @@ competition_background: |-
 
   The task type for this competition is {{ task_type }}.
   The data type used in this competition is {{ data_type }}.
-  Briefly, the competition involves: {{ brief_description }}.
-  The dataset used in this competition is: {{ dataset_description }}.
-  Your goal in this competition is to: {{target_description }}.
+  The description of the competition is: {{ raw_description }}
 
 rich_style_description: |-
   ### {{ name }} Agent: Automated Feature Engineering & Model Tuning Evolution