In [3]:
import pandas as pd
df = pd.read_parquet("cargo_test_passed_train.parquet")
print(df.columns)

Index(['task_id', 'rust_prompt', 'rust_code', 'rust_test_list'], dtype='object')


In [2]:
translate_to_julia = """The following prompt may have Rust syntax, variable types, and Rust-specific behavior mentioned. Translate the question to the appropriate Julia syntax and replace anything mentioning Rust with Julia.

Do NOT use or mention:
* Complex type annotations
* Overly specific type constraints
* Advanced Julia metaprogramming
* External packages

Make sure to use:
* Simple types: Int64, Float64, String, Vector, etc.

Only return the translated question and do not answer the question.

Rust Prompt:
{prompt}

Julia Prompt:"""

In [3]:
from openai import OpenAI

client = OpenAI(
    base_url="http://localhost:8001/v1",
    api_key="not-needed"
)

def get_model_response(prompt: str) -> str:
    try:
        response = client.chat.completions.create(
            model="Unsloth/Llama-3.3-70B-Instruct",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ]
        )
        return (
            response.choices[0].message.content.strip()
            if response.choices and response.choices[0].message and response.choices[0].message.content
            else "[No response returned]"
        )
    except Exception as e:
        return f"[Error: {e}]"

In [4]:
start_idx = 1000
end_idx = 2000
results = []

for i, row in df.iloc[start_idx:end_idx].iterrows():
    task_id = row["task_id"]
    rust_prompt = row["rust_prompt"]

    final_prompt = translate_to_julia.format(prompt=rust_prompt)
    julia_prompt = get_model_response(final_prompt)

    results.append({
        "task_id": task_id,
        "julia_prompt": julia_prompt
    })
    
    print(f"✅ Processed index {i} | Task ID: {task_id}")

output_df = pd.DataFrame(results)
output_df.to_parquet("translated_julia_prompts.parquet", index=False)
print(f"✅ Successfully saved {len(output_df)} translated prompts to 'translated_julia_prompts.parquet'")

✅ Processed index 1000 | Task ID: task_1233
✅ Processed index 1001 | Task ID: task_1234
✅ Processed index 1002 | Task ID: task_1235
✅ Processed index 1003 | Task ID: task_1236
✅ Processed index 1004 | Task ID: task_1237
✅ Processed index 1005 | Task ID: task_1238
✅ Processed index 1006 | Task ID: task_1239
✅ Processed index 1007 | Task ID: task_1241
✅ Processed index 1008 | Task ID: task_1242
✅ Processed index 1009 | Task ID: task_1243
✅ Processed index 1010 | Task ID: task_1244
✅ Processed index 1011 | Task ID: task_1245
✅ Processed index 1012 | Task ID: task_1247
✅ Processed index 1013 | Task ID: task_1248
✅ Processed index 1014 | Task ID: task_1249
✅ Processed index 1015 | Task ID: task_1250
✅ Processed index 1016 | Task ID: task_1252
✅ Processed index 1017 | Task ID: task_1253
✅ Processed index 1018 | Task ID: task_1254
✅ Processed index 1019 | Task ID: task_1255
✅ Processed index 1020 | Task ID: task_1256
✅ Processed index 1021 | Task ID: task_1257
✅ Processed index 1022 | Task ID

In [11]:
df = pd.read_parquet("translated_julia_prompts.parquet")

In [12]:
julia_code_prompt = """You are a pragmatic Julia programmer. 
Given the following question, write a Julia function to complete the task. 
Make the code simple and easy to understand. 
The code should be syntactically correct and follow Julia best practices and should run. 
Try to limit library usage to the standard library. 
Since this is a single file, do not use a module. Do not add a main function. Respond with only the Julia function and nothing else.

Question:
{julia_prompt}

Code:"""

In [13]:
start_idx = 0
end_idx = 1000

df["julia_code"] = ""

for i, row in df.iloc[start_idx:end_idx].iterrows():
    julia_prompt = row["julia_prompt"]
    task_id = row["task_id"]

    final_prompt = julia_code_prompt.format(julia_prompt=julia_prompt)
    julia_code = get_model_response(final_prompt)

    df.at[i, "julia_code"] = julia_code

    print(f"✅ Processed index {i} | Task ID: {task_id}")

df.to_parquet("with_julia_code.parquet", index=False)
print("💾 Saved updated file with julia_code column.")


✅ Processed index 0 | Task ID: task_1233
✅ Processed index 1 | Task ID: task_1234
✅ Processed index 2 | Task ID: task_1235
✅ Processed index 3 | Task ID: task_1236
✅ Processed index 4 | Task ID: task_1237
✅ Processed index 5 | Task ID: task_1238
✅ Processed index 6 | Task ID: task_1239
✅ Processed index 7 | Task ID: task_1241
✅ Processed index 8 | Task ID: task_1242
✅ Processed index 9 | Task ID: task_1243
✅ Processed index 10 | Task ID: task_1244
✅ Processed index 11 | Task ID: task_1245
✅ Processed index 12 | Task ID: task_1247
✅ Processed index 13 | Task ID: task_1248
✅ Processed index 14 | Task ID: task_1249
✅ Processed index 15 | Task ID: task_1250
✅ Processed index 16 | Task ID: task_1252
✅ Processed index 17 | Task ID: task_1253
✅ Processed index 18 | Task ID: task_1254
✅ Processed index 19 | Task ID: task_1255
✅ Processed index 20 | Task ID: task_1256
✅ Processed index 21 | Task ID: task_1257
✅ Processed index 22 | Task ID: task_1261
✅ Processed index 23 | Task ID: task_1262
✅ 

In [14]:
julia_tests_prompt = """You are a pragmatic Julia programmer. Given the following question and Julia function, write three unit tests for the function. The tests should be a simple line delimited list of @test statements using the Test module.

For example, if the function is:
```julia
function add_nums(x::Int64, y::Int64)::Int64
  x + y
end
```

The unit tests should be
```julia
using Test
@test add_nums(1, 2) == 3
@test add_nums(10, 2) == 12
@test add_nums(-10, 2) == -8
end
```

Make the tests simple and easy to understand. The code should be syntactically correct and run without errors. Do not add any other code. Respond with only the test statements (including the `using Test` line) and nothing else.
Donot forget to add end at end of the test

============

Question:
{julia_prompt}

Code:
{julia_code}

Unit Tests:"""

In [15]:
df = pd.read_parquet("with_julia_code.parquet")

In [16]:
start_idx = 0
end_idx = 1000

df["julia_test"] = ""

for i, row in df.iloc[start_idx:end_idx].iterrows():
    julia_prompt = row["julia_prompt"]
    julia_code = row["julia_code"]
    task_id = row["task_id"]

    final_prompt = julia_tests_prompt.format(
        julia_prompt=julia_prompt,
        julia_code=julia_code
    )

    julia_test = get_model_response(final_prompt)

    df.at[i, "julia_test"] = julia_test

    print(f"✅ Processed index {i} | Task ID: {task_id}")

df.to_parquet("julia_set_3.parquet", index=False)
print("💾 Saved updated file with julia_test column.")


✅ Processed index 0 | Task ID: task_1233
✅ Processed index 1 | Task ID: task_1234
✅ Processed index 2 | Task ID: task_1235
✅ Processed index 3 | Task ID: task_1236
✅ Processed index 4 | Task ID: task_1237
✅ Processed index 5 | Task ID: task_1238
✅ Processed index 6 | Task ID: task_1239
✅ Processed index 7 | Task ID: task_1241
✅ Processed index 8 | Task ID: task_1242
✅ Processed index 9 | Task ID: task_1243
✅ Processed index 10 | Task ID: task_1244
✅ Processed index 11 | Task ID: task_1245
✅ Processed index 12 | Task ID: task_1247
✅ Processed index 13 | Task ID: task_1248
✅ Processed index 14 | Task ID: task_1249
✅ Processed index 15 | Task ID: task_1250
✅ Processed index 16 | Task ID: task_1252
✅ Processed index 17 | Task ID: task_1253
✅ Processed index 18 | Task ID: task_1254
✅ Processed index 19 | Task ID: task_1255
✅ Processed index 20 | Task ID: task_1256
✅ Processed index 21 | Task ID: task_1257
✅ Processed index 22 | Task ID: task_1261
✅ Processed index 23 | Task ID: task_1262
✅ 