In [None]:
model_id = "bigcode/tiny_starcoder_py" 
huggingface_token= None


dataset_path = "/data_pre_process.json" 

In [None]:
pip install git+https://github.com/k4black/codebleu#egg=codebleu

In [None]:
pip install tree-sitter-java

In [None]:
pip list | egrep 'codebleu|tree-sitter*'

In [None]:
import json
import evaluate
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from tqdm import tqdm

with open(dataset_path, 'r') as f:
    code_completion_data = json.load(f)

tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=huggingface_token)
model = AutoModelForCausalLM.from_pretrained(model_id, use_auth_token=huggingface_token)

device = "cuda" if torch.cuda.is_available() else "cpu"

model.to(device)
model.eval()


if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.config.pad_token_id = model.config.eos_token_id

predictions = []
references = [] 

for sample in tqdm(code_completion_data, desc="Generating Completions"):
    prefix = sample['prefix']
    middle = sample['middle']
    suffix = sample['suffix']
    
    references.append(middle)

    input_text = f"<fim_prefix>{sample['prefix']}<fim_suffix>{sample['suffix']}<fim_middle>"
    inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
    outputs = model.generate(inputs, pad_token_id=tokenizer.pad_token_id)

    decoded_output = tokenizer.decode(outputs[0])
    start_index = decoded_output.find("<fim_middle>")


    fim_middle_with_suffix = decoded_output[start_index + len("<fim_middle>") : ]
        
    
    predictions.append(fim_middle_with_suffix)
 

In [None]:

from codebleu import calc_codebleu

exact_match_metric = evaluate.load("exact_match")
chrf_metric = evaluate.load("chrf")
bleu_metric = evaluate.load("bleu")
rouge_metric = evaluate.load("rouge")
 
ter_metric = evaluate.load("ter") 


annotated_data = []
for i, sample in enumerate(code_completion_data):
    

    chrf_result = chrf_metric.compute(predictions=[predictions[i]], references=[references[i]])
    bleu_result = bleu_metric.compute(predictions=[predictions[i]], references=[references[i]])
    rouge_result = rouge_metric.compute(predictions=[predictions[i]], references=[references[i]])
    rouge_result["avg"] =  sum(rouge_result.values()) / len(rouge_result)
    ter_result = ter_metric.compute(predictions=[predictions[i]], references=[references[i]])
    codebleu_result = calc_codebleu(predictions=[predictions[i]], references=[references[i]], lang="java")
 
    annotated_data.append({
        "filename": sample.get('filename', 'unknown'),  # Handle cases where 'filename' might be missing
        "prefix": sample.get('prefix', 'unknown'),
        "middle": sample.get('middle', 'unknown'),
        "suffix": sample.get('suffix', 'unknown'),
        "middle_prediction": predictions[i], 
        "exact_match": predictions[i] == references[i],
        "chrf": chrf_result,
        "ter": ter_result,  
        "bleu": bleu_result,
        "rouge": rouge_result,
        "codebleu": codebleu_result,
        "manual_evaluation": None
    })
    
annotated_dataset_path = "data_evaluated.json"
with open(annotated_dataset_path, 'w') as f:
    json.dump(annotated_data, f, indent=4)
    