<a href="https://colab.research.google.com/github/vichitrarora/warp/blob/main/Evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os

In [2]:
print(os.listdir("/content/"))

['.config', 'sample_data']


In [4]:
!pip install unsloth
!pip install rouge-score nltk rapidfuzz scikit-learn
!pip install rouge

Collecting unsloth
  Using cached unsloth-2025.2.15-py3-none-any.whl.metadata (57 kB)
Collecting unsloth_zoo>=2025.2.7 (from unsloth)
  Using cached unsloth_zoo-2025.2.7-py3-none-any.whl.metadata (16 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Using cached xformers-0.0.29.post3-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth)
  Using cached bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting tyro (from unsloth)
  Using cached tyro-0.9.16-py3-none-any.whl.metadata (9.4 kB)
Collecting datasets>=2.16.0 (from unsloth)
  Using cached datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting trl!=0.15.0,!=0.9.0,!=0.9.1,!=0.9.2,!=0.9.3,>=0.7.9 (from unsloth)
  Using cached trl-0.15.2-py3-none-any.whl.metadata (11 kB)
Collecting protobuf<4.0.0 (from unsloth)
  Using cached protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Collecting hf_transfer (from unsloth)
  Using cached hf_transfer-0.1.9-cp38

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting rapidfuzz
  Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Downloading rapidfuzz-3.12.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m87.8 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=9762837a55ac79760704f4f78da034c5a3d9bbbef902bcdb6ebe5561b42cd381
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge-score
Installing collected packages: rapidfuzz, rouge-score
Successfully installed rapidfuzz-3.12.1 rouge-score-0.1.2
Co

In [6]:
import nltk
import shutil
import pandas as pd
import torch
from unsloth import FastLanguageModel
from transformers import AutoTokenizer
from peft import PeftModel
from rapidfuzz.distance import Levenshtein
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity



In [7]:
# 🔹 Step 1: Fix NLTK Issues (Ensure No Errors)
nltk.data.path.append('/usr/local/nltk_data')
shutil.rmtree('/root/nltk_data/tokenizers/punkt', ignore_errors=True)
nltk.download('punkt')

# 🔹 Step 2: Load Model & Tokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path = "/content/drive/MyDrive/merged_model_fp16"  # Update with actual model path

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Load fine-tuned model
model = FastLanguageModel.from_pretrained(model_path, load_in_4bit=True)[0]
model.to(device)

FastLanguageModel.for_inference(model)  # Optimize for inference
print("✅ Merged fine-tuned model loaded successfully!")

# 🔹 Step 3: Load Evaluation Data
eval_file = "/content/eval_data_v4.csv"  # Update with actual evaluation file path
df_eval = pd.read_csv(eval_file)

# 🔹 Step 4: MongoDB Query Generation Function
def generate_mongo_query(db_id, schema_info, nl_query):
    """ Generate MongoDB query using the fine-tuned model """
    prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context.
    Write a response that appropriately completes the request.

    ### Instruction:
    Generate a MongoDB query from the given database schema and natural language request.

    ### Input:
    Database ID: {db_id}
    Schema: {schema_info}
    Query: {nl_query}

    ### Response:
    """

    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    output = model.generate(**inputs, max_new_tokens=150)
    generated_query = tokenizer.decode(output[0], skip_special_tokens=True)

    return generated_query.split("### Response:")[-1].strip()

# 🔹 Step 5: Evaluation Metrics (No NLTK Errors!)
def compute_metrics(true_query, generated_query):
    """Compute similarity metrics between true and generated MongoDB queries"""

    # Levenshtein Distance (Lower = More Similar)
    lev_dist = Levenshtein.distance(true_query, generated_query)

    # Cosine Similarity (Higher = More Similar)
    vectorizer = CountVectorizer().fit_transform([true_query, generated_query])
    vectors = vectorizer.toarray()
    cos_sim = cosine_similarity([vectors[0]], [vectors[1]])[0][0]

    return {
        "Levenshtein Distance": lev_dist,
        "Cosine Similarity": cos_sim
    }

# 🔹 Step 6: Run Evaluation
all_metrics = []

for index, row in df_eval.iterrows():
    db_id = row["database_id"]  # Fixed column name
    schema_info = row["schema"]
    true_query = row["mongo_query"]
    nl_query = row["natural_language_query"]

    # Generate MongoDB query
    generated_query = generate_mongo_query(db_id, schema_info, nl_query)

    # Compute evaluation metrics
    metrics = compute_metrics(true_query, generated_query)
    all_metrics.append(metrics)

# 🔹 Step 7: Save & Display Results
df_results = pd.DataFrame(all_metrics)
print(df_results.head())

# Save results to a CSV file
df_results.to_csv("/content/evaluation_results.csv", index=False)
print("✅ Evaluation completed and saved as 'evaluation_results.csv'.")


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


==((====))==  Unsloth 2025.2.15: Fast Qwen2 patching. Transformers: 4.48.3.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
✅ Merged fine-tuned model loaded successfully!
   Levenshtein Distance  Cosine Similarity
0                     2           1.000000
1                     2           1.000000
2                     5           1.000000
3                     5           1.000000
4                    16           0.730297
✅ Evaluation completed and saved as 'evaluation_results.csv'.
