In [1]:
# make sure julia is insalled in your local env ( can use docker env if needed but this notebook is made assuming julia is insalled locally  because wanted to save time )
%env PATH=/root/.juliaup/bin:$PATH

env: PATH=/root/.juliaup/bin:$PATH


In [6]:
import pandas as pd
import subprocess
import tempfile
import os
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed

# Read the parquet file
df = pd.read_parquet('julia_code_output_smaller_model.parquet')

successful_tasks = []
failed_tasks = []

print(f"Starting tests for {len(df)} tasks...")
print(f"{'='*80}\n")

def run_julia_test(idx, task_id, julia_code, julia_test):
    full_code = f"{julia_code}\n\n{julia_test}"
    with tempfile.NamedTemporaryFile(mode='w', suffix='.jl', delete=False) as f:
        f.write(full_code)
        temp_file = f.name

    try:
        result = subprocess.run(
            ['julia', temp_file],
            capture_output=True,
            text=True,
            timeout=30
        )

        if result.returncode == 0:
            return ("success", task_id, idx, result.stdout, result.stderr)
        else:
            return ("failed", task_id, idx, result.stdout, result.stderr, result.returncode)

    except subprocess.TimeoutExpired:
        return ("timeout", task_id, idx, '', 'Test exceeded 30 second timeout', 'TIMEOUT')

    except Exception as e:
        return ("exception", task_id, idx, '', str(e), 'EXCEPTION')

    finally:
        if os.path.exists(temp_file):
            os.remove(temp_file)


# Number of threads — adjust based on your CPU (e.g., 4–8)
MAX_WORKERS = 8

with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
    future_to_task = {
        executor.submit(run_julia_test, idx, row['task_id'], row['julia_code'], row['julia_test']): idx
        for idx, row in df.iterrows()
    }

    for future in as_completed(future_to_task):
        idx = future_to_task[future]
        try:
            status, task_id, row_index, stdout, stderr, *extra = future.result()
            if status == "success":
                print(f"✅ SUCCESS: {task_id} (Row {row_index + 1}/{len(df)})")
                successful_tasks.append(task_id)
            elif status == "failed":
                return_code = extra[0]
                print(f"❌ FAILED: {task_id} (Row {row_index + 1})")
                print(f"   Error output: {stderr[:200]}...")
                failed_tasks.append({
                    'task_id': task_id,
                    'row_index': row_index,
                    'return_code': return_code,
                    'stderr': stderr,
                    'stdout': stdout
                })
            elif status == "timeout":
                print(f"⏱️ TIMEOUT: {task_id}")
                failed_tasks.append({
                    'task_id': task_id,
                    'row_index': row_index,
                    'return_code': 'TIMEOUT',
                    'stderr': stderr,
                    'stdout': stdout
                })
            else:
                print(f"💥 ERROR: {task_id} - {stderr}")
                failed_tasks.append({
                    'task_id': task_id,
                    'row_index': row_index,
                    'return_code': 'EXCEPTION',
                    'stderr': stderr,
                    'stdout': stdout
                })

        except Exception as e:
            print(f"💥 ERROR in future: {e}")

# Print summary
print(f"\n{'='*80}")
print("SUMMARY")
print(f"{'='*80}")
print(f"Total tasks: {len(df)}")
print(f"✅ Successful: {len(successful_tasks)}")
print(f"❌ Failed: {len(failed_tasks)}")
print(f"Success rate: {len(successful_tasks)/len(df)*100:.2f}%")

if failed_tasks:
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    failed_df = pd.DataFrame(failed_tasks)
    failed_df.to_csv(f'failed_tasks_{timestamp}.csv', index=False)
    print(f"\n❌ Failed tasks saved to: failed_tasks_{timestamp}.csv")

    print("\nFailed task IDs:")
    for task in failed_tasks:
        print(f"  - {task['task_id']} (row {task['row_index']})")
else:
    print("\n🎉 All tasks passed!")

print(f"\n{'='*80}")


Starting tests for 1247 tasks...

✅ SUCCESS: task_9 (Row 5/1247)
✅ SUCCESS: task_2 (Row 2/1247)
✅ SUCCESS: task_0 (Row 1/1247)
✅ SUCCESS: task_14 (Row 9/1247)
✅ SUCCESS: task_20 (Row 12/1247)
❌ FAILED: task_11 (Row 7)
   Error output: ERROR: LoadError: Some tests did not pass: 0 passed, 0 failed, 3 errored, 0 broken.
in expression starting at /tmp/tmpsdpq4zen.jl:17
...
❌ FAILED: task_12 (Row 8)
   Error output: ERROR: LoadError: Some tests did not pass: 1 passed, 2 failed, 0 errored, 0 broken.
in expression starting at /tmp/tmp2e0iz3ih.jl:20
...
❌ FAILED: task_4 (Row 4)
   Error output: ERROR: LoadError: Some tests did not pass: 0 passed, 0 failed, 3 errored, 0 broken.
in expression starting at /tmp/tmpw_ssgzjn.jl:21
...
❌ FAILED: task_3 (Row 3)
   Error output: ERROR: LoadError: Some tests did not pass: 0 passed, 0 failed, 3 errored, 0 broken.
in expression starting at /tmp/tmp4tl0a5em.jl:47
...
✅ SUCCESS: task_30 (Row 15/1247)
❌ FAILED: task_17 (Row 11)
   Error output: ERROR: LoadEr

In [14]:
import pandas as pd
import glob
from datetime import datetime

# Read the original parquet file
df = pd.read_parquet('clean_julia_set_3.parquet')
print(f"Original dataset: {len(df)} rows")

# Find the most recent failed_tasks CSV file
failed_files = glob.glob('failed_tasks_*.csv')

if not failed_files:
    print("\n❌ No failed_tasks CSV file found!")
    print("Please run the test script first to generate failed tasks.")
    exit()

# Get the most recent file
latest_failed_file = max(failed_files, key=lambda x: x.split('_')[2])
print(f"\nUsing failed tasks from: {latest_failed_file}")

# Read the failed tasks
failed_df = pd.read_csv(latest_failed_file)
failed_task_ids = failed_df['task_id'].tolist()

print(f"Failed tasks to remove: {len(failed_task_ids)}")
print(f"\nFailed task IDs:")
for task_id in failed_task_ids:
    print(f"  - {task_id}")

# Remove failed tasks
df_filtered = df[~df['task_id'].isin(failed_task_ids)]

print(f"\n{'='*80}")
print(f"Original rows: {len(df)}")
print(f"Failed rows removed: {len(failed_task_ids)}")
print(f"Remaining rows: {len(df_filtered)}")
print(f"{'='*80}")

# Save the filtered dataset
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_file = f'cleaned_file_passed_only_{timestamp}.parquet'
df_filtered.to_parquet(output_file, index=False)

print(f"\n✅ Filtered dataset saved to: {output_file}")
print(f"Success rate: {len(df_filtered)/len(df)*100:.2f}%")

Original dataset: 1000 rows

Using failed tasks from: failed_tasks_20251026_085318.csv
Failed tasks to remove: 387

Failed task IDs:
  - task_1235
  - task_1236
  - task_1237
  - task_1244
  - task_1245
  - task_1252
  - task_1257
  - task_1266
  - task_1267
  - task_1268
  - task_1269
  - task_1273
  - task_1280
  - task_1281
  - task_1282
  - task_1285
  - task_1287
  - task_1288
  - task_1289
  - task_1290
  - task_1291
  - task_1293
  - task_1294
  - task_1295
  - task_1296
  - task_1297
  - task_1298
  - task_1299
  - task_1305
  - task_1307
  - task_1309
  - task_1310
  - task_1312
  - task_1314
  - task_1316
  - task_1318
  - task_1321
  - task_1323
  - task_1325
  - task_1326
  - task_1327
  - task_1329
  - task_1336
  - task_1343
  - task_1344
  - task_1346
  - task_1351
  - task_1352
  - task_1356
  - task_1358
  - task_1360
  - task_1365
  - task_1367
  - task_1369
  - task_1370
  - task_1373
  - task_1378
  - task_1385
  - task_1386
  - task_1395
  - task_1397
  - task_1400

In [11]:
pwd

'/workspace/AIAC'