<a href="https://colab.research.google.com/github/youngIcom/Machine_Learning_Project/blob/main/Benchmarking_model_with_GPU_final_best.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Instalasi library yang dibutuhkan

In [None]:
!pip install -q transformers torch accelerate bitsandbytes pandas google-generativeai

# Load Model Merak dan GPT 2

In [None]:
import torch
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, LlamaTokenizer
import time
import pandas as pd
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold

# --- Konfigurasi Gemini ---
# GANTILAH DENGAN KUNCI API GEMINI ANDA
# Anda bisa mendapatkan API Key di: https://ai.google.dev/
GEMINI_API_KEY = "AIzaSyBErar_wPD_oLuRMcVDMLqbEyW3LVu_jxI"

if GEMINI_API_KEY == "AIzaSyBErar_wPD_oLuRMcVDMLqbEyW3LVu_jxI":
    print("\nPERINGATAN: API Key Gemini belum diatur. Penilaian relevansi akan dinonaktifkan.")
    gemini_model = None
else:
    try:
        genai.configure(api_key=GEMINI_API_KEY)
        gemini_model = genai.GenerativeModel(
            "gemini-pro",
            safety_settings={
                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
            }
        )
        print("Gemini model berhasil diinisialisasi.")
    except Exception as e:
        print(f"Gagal menginisialisasi Gemini model: {e}. Penilaian relevansi akan dinonaktifkan.")
        gemini_model = None


# --- Konfigurasi Kuantisasi 4-bit ---
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16, # Efisien untuk GPU T4
    bnb_4bit_use_double_quant=True, # Meningkatkan akurasi
)

print("\nMemuat model Merak...")
# --- Memuat Model Merak ---
model_id_merak = "Ichsan2895/Merak-7B-v4"
try:
    merak_model = AutoModelForCausalLM.from_pretrained(
        model_id_merak,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )
    merak_tokenizer = LlamaTokenizer.from_pretrained(model_id_merak)
    print("Model Merak berhasil dimuat.")
except Exception as e:
    print(f"Gagal memuat model Merak: {e}")
    merak_model = None
    merak_tokenizer = None


print("\nMemuat model GPT-2...")
# --- Memuat Model GPT-2 ---
gpt2_model_name = "cahya/gpt2-small-indonesian-522M"
try:
    gpt2_model = AutoModelForCausalLM.from_pretrained(
        gpt2_model_name,
        quantization_config=bnb_config, # Menerapkan kuantisasi juga pada GPT-2
        device_map="auto"
    )
    gpt2_tokenizer = AutoTokenizer.from_pretrained(gpt2_model_name)
    gpt2_pipe = pipeline("text-generation", model=gpt2_model, tokenizer=gpt2_tokenizer) # Tentukan device
    print("Model GPT-2 berhasil dimuat.")
except Exception as e:
    print(f"Gagal memuat model GPT-2: {e}")
    gpt2_model = None
    gpt2_tokenizer = None
    gpt2_pipe = None

print("\nInisialisasi model selesai.")

# Generasi Model dengan Prompt pertanyaan

In [None]:
# --- Fungsi Generasi untuk Merak (sesuai panduan penggunaan aslinya) ---
def generate_merak_response(question: str, model, tokenizer, max_new_tokens: int = 256, temperature: float = 0.67, top_p: float = 0.9, top_k: int = 90) -> str:
    if model is None or tokenizer is None:
        return "Model Merak tidak dimuat."

    chat = [
        {"role": "system", "content": "Anda adalah Merak, sebuah model kecerdasan buatan yang dilatih oleh Muhammad Ichsan. Mohon jawab pertanyaan berikut dengan benar, faktual, dan ramah."},
        {"role": "user", "content": question},
    ]

    prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)

    inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=True).to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs.attention_mask,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            top_p=top_p,
            top_k=top_k,
            do_sample=True, # Penting untuk temperature/top_p/top_k
        )
        response = tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]

        # Logika pemotongan respons untuk Merak
        # Mencari marker yang menandai dimulainya respons asisten
        assistant_start_marker = f'{question} \n assistant\n '
        response_start_index = response.find(assistant_start_marker)
        if response_start_index != -1:
            return response[response_start_index + len(assistant_start_marker) :].strip()
        else:
            # Fallback jika marker tidak ditemukan, coba potong jika prompt ada di awal
            if response.startswith(prompt):
                 return response[len(prompt):].strip()
            return response.strip() # Fallback terakhir


# --- Daftar Prompt untuk Benchmark ---
prompts = [
    "Siapa penulis naskah proklamasi kemerdekaan Indonesia?",
    "apa yang menyebabkan perang dunia terjadi",
    "budi membeli sebuah roti dengan harga 10000, dan kemudian dia membayar dengan uang 20000 berapa sisa kembalian uang budi",
    "apa yang dimaksud dengan metode"
]

results = []

# --- Parameter Generasi untuk Merak ---
merak_gen_params = dict(
    max_new_tokens=100,
    temperature=0.67,
    top_p=0.9,
    top_k=90,
)

# --- Parameter Generasi untuk GPT-2 ---
gpt2_gen_params = {
    "max_length": 100, # Batas total token output (termasuk prompt)
    "num_return_sequences": 1,
    "num_beams": 2, # Menggunakan beam search
    "do_sample": False, # Nonaktifkan sampling jika num_beams > 1
    "no_repeat_ngram_size": 2,
    "eos_token_id": gpt2_tokenizer.eos_token_id if gpt2_tokenizer else None # Optional
}

print("\nMemulai proses benchmark...")
for prompt in prompts:
    # === MERAK BENCHMARK ===
    if merak_model and merak_tokenizer:
        print(f"\n[{prompt}] Menjalankan Merak...")
        start_time = time.time()
        merak_response = generate_merak_response(prompt, merak_model, merak_tokenizer, **merak_gen_params)
        merak_time = time.time() - start_time

        relevance_score_merak = "N/A (Gemini tidak aktif)"
        if gemini_model:
            gemini_prompt_merak = (
                f"Nilai relevansi respons berikut terhadap prompt asli dalam skala 1 hingga 5 "
                f"(1 = tidak relevan, 5 = sangat relevan).\n"
                f"Prompt Asli: {prompt}\nRespons Model: {merak_response}\nNilai Relevansi (1-5):"
            )
            try:
                gemini_response_merak = gemini_model.generate_content(gemini_prompt_merak)
                score_str = ''.join(filter(str.isdigit, gemini_response_merak.text.strip()))
                relevance_score_merak = int(score_str[0]) if score_str else "Error"
            except Exception as e:
                print(f"[Merak] Error saat menilai relevansi dengan Gemini: {e}")
                relevance_score_merak = "Error"
    else:
        merak_time = "N/A"
        merak_response = "Model Merak gagal dimuat."
        relevance_score_merak = "N/A"

    results.append({
        "Model": "Merak-7B-v4",
        "Prompt": prompt,
        "Latency (s)": f"{merak_time:.4f}" if isinstance(merak_time, float) else merak_time,
        "Response": merak_response,
        "Relevance Score (1-5)": relevance_score_merak
    })

    # === GPT-2 BENCHMARK ===
    if gpt2_pipe:
        print(f"[{prompt}] Menjalankan GPT-2...")
        start_time = time.time()
        gpt2_output = gpt2_pipe(prompt, **gpt2_gen_params)
        gpt2_response = gpt2_output[0]['generated_text'].strip()
        gpt2_time = time.time() - start_time

        # Potong prompt dari respons jika pipeline mengembalikannya
        if gpt2_response.startswith(prompt):
            gpt2_response = gpt2_response[len(prompt):].strip()

        relevance_score_gpt2 = "N/A (Gemini tidak aktif)"
        if gemini_model:
            gemini_prompt_gpt2 = (
                f"Nilai relevansi respons berikut terhadap prompt asli dalam skala 1 hingga 5 "
                f"(1 = tidak relevan, 5 = sangat relevan).\n"
                f"Prompt Asli: {prompt}\nRespons Model: {gpt2_response}\nNilai Relevansi (1-5):"
            )
            try:
                gemini_response_gpt2 = gemini_model.generate_content(gemini_prompt_gpt2)
                score_str = ''.join(filter(str.isdigit, gemini_response_gpt2.text.strip()))
                relevance_score_gpt2 = int(score_str[0]) if score_str else "Error"
            except Exception as e:
                print(f"[GPT2] Error saat menilai relevansi dengan Gemini: {e}")
                relevance_score_gpt2 = "Error"
    else:
        gpt2_time = "N/A"
        gpt2_response = "Model GPT-2 gagal dimuat."
        relevance_score_gpt2 = "N/A"

    results.append({
        "Model": "gpt2-small-indonesian-522M",
        "Prompt": prompt,
        "Latency (s)": f"{gpt2_time:.4f}" if isinstance(gpt2_time, float) else gpt2_time,
        "Response": gpt2_response,
        "Relevance Score (1-5)": relevance_score_gpt2
    })

print("\nBenchmark selesai.")

# Tampilkan output pada tabel

In [None]:
# --- Menampilkan Hasil dalam Format Tabel ---
df_results = pd.DataFrame(results)
print("\n" + "="*50)
print("             HASIL BENCHMARK MODEL             ")
print("="*50)
print(df_results.to_markdown(index=False))
print("="*50 + "\n")

# tampilkan respon pada tabel

In [None]:
pd.set_option("display.max_colwidth", None)  # supaya respons panjang tidak terpotong

results_df = pd.DataFrame(results)

# (Opsional) urutkan kolom agar rapi
ordered_cols = ["Model",
                "Prompt",
                "Latency (s)",
                "Relevance Score (1-5)",
                "Response"]
results_df = results_df[ordered_cols]

# Ganti titik desimal dengan koma agar konsisten dengan format sebelumnya
results_df["Latency (s)"] = results_df["Latency (s)"].astype(str).str.replace('.', ',', regex=False)

display(results_df)

# Tampilkan pada spreadsheet

In [None]:
from google.colab import sheets
sheet = sheets.InteractiveSheet(df=results_df)