
(a) Prompt Debugging

In [None]:
#1.

from typing import List, Dict
import itertools

# 1. Generate prompt variations
def generate_variations(base_prompt: str, variations: Dict[str, List[str]]) -> List[str]:
    keys = list(variations.keys())
    combos = list(itertools.product(*(variations[k] for k in keys)))
    prompts = []
    for combo in combos:
        p = base_prompt
        for k, v in zip(keys, combo):
 
# 2. Simple mock translator (to show meaningful debugging output)
def mock_runner(prompt: str) -> str:
    french_to_eng = {
        "Bonjour": "Hello",
        "Au revoir": "Goodbye",
        "Maison": "House",
        "Comment ça va?": "How are you?"
    }

    for fr, eng in french_to_eng.items():
        if fr in prompt:
            return f"Translated Output: {eng}"

    return "No translation found."

# 3. Run tests
def run_prompt_tests(prompts: List[str]):
    results = []
    for p in prompts:
        resp = mock_runner(p)
        results.append({"prompt": p, "response": resp})
    return results

# Example Usage
base = "Translate into {tone} English: '{sentence}'"
vars = {"tone": ["formal", "casual"], "sentence": ["Bonjour", "Comment ça va?"]}

prompts = generate_variations(base, vars)
results = run_prompt_tests(prompts)

for r in results:
    print("Prompt:", r["prompt"])
    print("Response:", r["response"])
    print("---")

Prompt: Translate into formal English: 'Bonjour'
Response: Response to: Translate into formal English: 'Bonjour'
---
Prompt: Translate into formal English: 'Comment ça va?'
Response: Response to: Translate into formal English: 'Comment ça va?'
---
Prompt: Translate into casual English: 'Bonjour'
Response: Response to: Translate into casual English: 'Bonjour'
---
Prompt: Translate into casual English: 'Comment ça va?'
Response: Response to: Translate into casual English: 'Comment ça va?'
---


In [None]:
# 2 Marathi -> Hindi
from typing import List, Dict
import itertools
from transformers import pipeline

# Map target languages to valid model names
LANG_MODELS = {
    "Marathi": "Helsinki-NLP/opus-mt-en-mr",     # English → Marathi
    "Hindi": "Helsinki-NLP/opus-mt-en-hi",       # English → Hindi (if available)
       # English → Punjabi (if available)
    # Add more once you confirm model names exist
}

# Preload pipelines for valid languages
TRANSLATORS = {
    lang: pipeline("translation", model=model_name)
    for lang, model_name in LANG_MODELS.items()
}

def generate_variations(base_prompt: str, variations: Dict[str, List[str]]) -> List[str]:
    keys = list(variations.keys())
    combos = list(itertools.product(*(variations[k] for k in keys)))
    prompts = []
    for combo in combos:
        p = base_prompt
        for k, v in zip(keys, combo):
            p = p.replace("{" + k + "}", v)
        prompts.append(p)
    return prompts

def hf_runner(prompt: str) -> str:
    # Attempt to detect language
    lang = None
    for L in LANG_MODELS:
        if L in prompt:
            lang = L
            break
    if not lang:
        return "⚠️ Could not detect target language."

    # Extract the sentence (inside quotes after colon)
    if ": '" in prompt:
        sentence = prompt.split(": '")[-1].strip("'")
    else:
        sentence = prompt  # fallback

    translator = TRANSLATORS.get(lang)
    if translator is None:
        return f"⚠️ No translator loaded for {lang}."

    try:
        result = translator(sentence)[0]["translation_text"]
        return result
    except Exception as e:
        return f"⚠️ Translation error: {e}"

def run_prompt_tests(prompts: List[str]):
    results = []
    for p in prompts:
        resp = hf_runner(p)
        results.append({"prompt": p, "response": resp})
    return results

if __name__ == "__main__":
    base = "Translate into {tone} {language}: '{sentence}'"
    vars = {
        "tone": ["formal", "casual"],
        "language": ["Hindi"],  # you can add "Hindi", "Punjabi" if their models exist
        "sentence": ["Hello", "How are you?"]
    }

    prompts = generate_variations(base, vars)
    results = run_prompt_tests(prompts)

    for r in results:
        print("Prompt:", r["prompt"])
        print("Response:", r["response"])
        print("---")

(b) Performance Evaluation Metrics

In [None]:
#1.
!pip install sacrebleu rouge-score sentence-transformers

from typing import List
import sacrebleu
from rouge_score import rouge_scorer
from sentence_transformers import SentenceTransformer, util

# 1. Accuracy / Exact Match
def accuracy(preds: List[str], refs: List[str]) -> float:
    return sum([p.strip() == r.strip() for p, r in zip(preds, refs)]) / len(preds)

# 2. BLEU Score
def bleu(preds: List[str], refs: List[str]) -> float:
    return sacrebleu.corpus_bleu(preds, [refs]).score

# 3. ROUGE Scores
def rouge(preds: List[str], refs: List[str]):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
    scores = [scorer.score(r, p) for p, r in zip(preds, refs)]
    return scores

# 4. Semantic Similarity
def semantic_similarity(preds: List[str], refs: List[str]) -> List[float]:
    model = SentenceTransformer("all-MiniLM-L6-v2")
    pred_emb = model.encode(preds, convert_to_tensor=True)
    ref_emb = model.encode(refs, convert_to_tensor=True)
    sims = util.cos_sim(pred_emb, ref_emb)
    return [float(sims[i,i]) for i in range(len(preds))]

# Example Usage
preds = ["Hello", "How are you?"]
refs = ["Hello", "How are you?"]

print("Accuracy:", accuracy(preds, refs))
print("BLEU:", bleu(preds, refs))
print("ROUGE:", rouge(preds, refs))
print("Semantic similarity:", semantic_similarity(preds, refs))


Accuracy: 1.0
BLEU: 100.00000000000004
ROUGE: [{'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}, {'rouge1': Score(precision=1.0, recall=1.0, fmeasure=1.0), 'rougeL': Score(precision=1.0, recall=1.0, fmeasure=1.0)}]
Semantic similarity: [1.0, 1.0]


In [None]:
#2.
# Install libraries if not already installed
!pip install sacrebleu rouge-score sentence-transformers

import sacrebleu
from rouge_score import rouge_scorer
from sentence_transformers import SentenceTransformer, util

# Reference and candidate outputs
reference = ["The cat is on the mat"]
candidate = "The cat is on mat"

# BLEU Score
bleu = sacrebleu.corpus_bleu([candidate], [reference])
print("BLEU:", bleu.score)

# ROUGE Score
scorer = rouge_scorer.RougeScorer(["rouge1", "rougeL"], use_stemmer=True)
scores = scorer.score(reference[0], candidate)
print("ROUGE-1:", scores["rouge1"].fmeasure)
print("ROUGE-L:", scores["rougeL"].fmeasure)

# Semantic Similarity
model = SentenceTransformer("all-MiniLM-L6-v2")
ref_emb = model.encode(reference[0], convert_to_tensor=True)
cand_emb = model.encode(candidate, convert_to_tensor=True)
similarity = util.cos_sim(ref_emb, cand_emb)
print("Cosine Similarity:", float(similarity))


BLEU: 57.89300674674101
ROUGE-1: 0.9090909090909091
ROUGE-L: 0.9090909090909091


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Cosine Similarity: 0.9925265908241272
