In [None]:
# import libraries
import os
# check current path
current_path = os.getcwd()
os.chdir(current_path)
print(current_path)

b:\Ernst\bi_direction Ita and Lad\llama and gpt


###  Convert the dataset into JSONL before performin fine-tuning

In [None]:
# Convert CSV file into .jsonl file
# We assume that the CSV file contains two columns (Source and target sentences)
!python json_converter_tai.py \
  --dataset_dir ./dataset \
  --file_name eng2lad_dataset \
  --batch_sample 10 

## Create FT model of Llama using Together AI

#### Check the file format

In [None]:
from together.utils import check_file
file_name = 'dataset/eng2lad_dataset_tai.jsonl'
report = check_file(file_name)
print(report)
assert report["is_check_passed"] == True

###  Uploading & checking the dataset to Together AI

In [None]:
from together import Together

# Please set your API Key from Together AI
client = Together(api_key='xxx')

# Upload formatted data and get back the file ID
response = client.files.upload(file=file_name)
fileId = response.model_dump()["id"]
# Verify that the file was uploaded successfully
file_metadata = client.files.retrieve(fileId)
print(file_metadata)

In [None]:
# Checking the uploaded file in Together AI
import requests

url = "https://api.together.xyz/v1/files"

headers = {
    "accept": "application/json",
    "authorization": "Bearer xxxx" # xxxx = api
}

response = requests.get(url, headers=headers)

print(response.text)

## Starting a Fine-tuning job

In [28]:
# Trigger fine-tuning job
resp = client.fine_tuning.create(
    suffix="mt_ita_lad",
    model="meta-llama/Meta-Llama-3.1-70B-Instruct-Reference",
    training_file=fileId,
    n_epochs=3,
    batch_size=8,
    learning_rate=1e-5,
    lora = True,
    lora_r = 32
    #wandb_api_key=os.environ.get("WANDB_API_KEY"),
)

# Perform Translation on the Fine-tuned model

In [None]:
!python ft_test.py \
  --model_name ft_llama_31_70b \
  --dataset_dir ./dataset \
  --test_data test_3_eng2lad \
  --target_lang ladin \
  --batch_size 10 \
  --save_dir ./save_results

# Evaluate the translation results using evaluation Metrics

In [None]:
# metrics
import evaluate
import pandas as pd
import json
rouge_score = evaluate.load("rouge")
bleu_score = evaluate.load("bleu")
chrf_score = evaluate.load("chrf")
sacrebleu_score = evaluate.load("sacrebleu")

In [None]:
def eval_metrics(predictions, references):
    # Ensure both predictions and references are lists of strings
    predictions = [str(pred) if pred is not None else "" for pred in predictions]
    references = [str(ref) if ref is not None else "" for ref in references]

    # Compute scores
    result = rouge_score.compute(predictions=predictions, references=references, rouge_types=["rouge1", "rouge2", "rougeL"])
    score = sacrebleu_score.compute(
            predictions=predictions,
            references=references
        )
    result["sacrebleu"] = score["score"]
    bleu = bleu_score.compute(predictions=predictions, references=references)
    result["bleu"] = bleu['bleu']
    chrf = chrf_score.compute(predictions=predictions, references=references) ##The higher the value, the better the translations
    chrf_plus = chrf_score.compute(predictions=predictions, references=references, word_order=2)  # chrF++
    result["chrf++"] = chrf_plus["score"]
    result["chrf"] = chrf["score"] #The higher the value, the better the translations
            
    return result

In [None]:
# Compare the translation result with the gound truth
def get_json_files(llm_model, test_data, target_lang, batch_size):
    # Get JSON files for specific translation test data using a specific llm
    # Define the file prefix file name
    file_prefix = (f'translation_{llm_model}_{test_data}_eng2lad_size of_{batch_size}_batch_')
  
    # List all files in the directory that start with the specified prefix
    save_dir = 'save_results'
    matching_files = [f for f in os.listdir(save_dir) if f.startswith(file_prefix)] #current_path+'/save_results'
    # Count the number of matching files
    num_files = len(matching_files)
    print(f"Found {num_files} files.")

    scores = {}
    scores['rouge1'] = []
    scores['rouge2'] = []
    scores['rougeL'] = []
    scores['bleu'] = []
    scores['chrf'] = []
    scores['sacrebleu'] = []
    scores['chrf++'] = []
    scores = {'rouge1': [], 'rouge2': [], 'rougeL': [], 'bleu': [], 'chrf': [], 'sacrebleu': [], 'chrf++': []}
    all_scores = []
    batch_start = 0
    # get the ral data / ground truth
    ref_data = pd.read_csv(f'dataset/{test_data}_ita2lad.csv')
    for i in range(num_files):
         # Slicing for the current batch of data
        real_data = ref_data.iloc[batch_start:batch_start + batch_size]
        batch_start = (i + 1) * batch_size
        print(f"Processing batch {i+1}, starting at index {batch_start}")
        # Get the real data as a list
        real_data = real_data[target_lang].tolist() 

        # Open and read the JSON files of translation result
        file_loc=os.path.join(save_dir+f'/{file_prefix}{i}.json') #save_dir
        print("load the json file", file_loc)
        f = open(file_loc, encoding='utf8')
        data = json.load(f)
        # Get the target translation using llm API
        # if json data is in str, convert to dict
        if isinstance(data, str):
            data = json.loads(data)

        # Ensure 'choices' exists and contains data
        if "choices" in data and data["choices"]:
            translation_output = data["choices"][0].get("message", {}).get("content", "")
            #print(translation_output)
            if "choices" in data and data["choices"]:
                translation_output = data["choices"][0].get("message", {}).get("content", "")
                if translation_output.strip():  # Check if content is not empty
                    #if isinstance(translation_output, str):
                    # Clean and process translation output
                    translation_output = translation_output.strip('```json\n').strip('```')
                     
                    # Remove the additional response
                    if translation_output.startswith(f"Here are the {target_lang} translations:"):
                        translation_output = translation_output.replace(f"Here are the {target_lang} translations:", "").strip()
                    
                    # Convert the remaining text to a Python list
                    try:
                        
                        ladin_translations = eval(translation_output)  # Caution: Use `eval` only if you're sure of the data source
                        
                        # calculate the evaluation metric score
                        
                        scores =eval_metrics(ladin_translations, real_data)
                        all_scores.append(
                                {'rouge1': scores['rouge1'],
                                'rouge2':scores['rouge2'],
                                'rougeL': scores['rougeL'],
                                'bleu': scores['bleu'],
                                'sacrebleu': scores['sacrebleu'],
                                'chrf': scores['chrf'],
                                'chrf++': scores['chrf++']}
                                )

                    except Exception as e:
                        print(translation_output)
                        print(f"Error parsing the translations: {e}")
            else:
                print("No translations found.")
    return all_scores


In [None]:
# Set parameter for performing Evaluations
test_data = 'test_1'# test_2 / test_3
llm_model = 'ft_llama_31_70b_tai' #
batch_size = 15
target_lang = 'ladin'

### Get the evaluation scores

In [None]:
translation_result=get_json_files(llm_model, test_data, target_lang, batch_size)

In [None]:
# Convert the list of dictionaries to a DataFrame
fr = pd.DataFrame(translation_result)
print(len(translation_result))
# Calculate the mean for each column
mean_scores = fr.mean()
# Print the mean scores
print(mean_scores)