In [None]:
!pip install rouge_score
!pip install bert-score
!pip install sacrebleu

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=25174cbdb1b6944d66cdf78ebd6af190ddad204cd8335cce0f8010e62c7d44d4
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2
Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.0.0->bert-score)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.0.0->bert-score)
  Downloading nvidia_cuda_runtime_cu12

In [None]:
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer

In [None]:
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
def compute_bleu(reference, candidate):
    reference_tokens = [nltk.word_tokenize(reference)]
    candidate_tokens = nltk.word_tokenize(candidate)

    # To avoid zero scores
    smoothing = SmoothingFunction().method1
    return sentence_bleu(reference_tokens, candidate_tokens, smoothing_function=smoothing)


def compute_rouge(reference, candidate):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, candidate)

    return {
        "rouge-1": scores['rouge1'].fmeasure,
        "rouge-2": scores['rouge2'].fmeasure,
        "rouge-L": scores['rougeL'].fmeasure
    }


def compute_cosine_similarity(reference, candidate):
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform([reference, candidate])
    cos_sim = cosine_similarity(vectors[0:1], vectors[1:2])

    return cos_sim[0][0]


def evaluate(reference_qa, generated_qa):
    reference_question, reference_answer = reference_qa['question'], reference_qa['answer']
    generated_question, generated_answer = generated_qa['question'], generated_qa['answer']

    bleu_question = compute_bleu(reference_question, generated_question)
    bleu_answer = compute_bleu(reference_answer, generated_answer)

    rouge_question = compute_rouge(reference_question, generated_question)
    rouge_answer = compute_rouge(reference_answer, generated_answer)

    cosine_question = compute_cosine_similarity(reference_question, generated_question)
    cosine_answer = compute_cosine_similarity(reference_answer, generated_answer)

    return {
        "bleu": {
            "question": bleu_question,
            "answer": bleu_answer
        },
        "rouge": {
            "question": rouge_question,
            "answer": rouge_answer
        },
        "cosine_similarity": {
            "question": cosine_question,
            "answer": cosine_answer
        }
    }


In [None]:
reference_qa = {
      "question": "What are Carbon Nanotubes (CNTs) and how are they classified?",
      "answer": "Carbon Nanotubes (CNTs) are nanostructured materials composed of rolled-up sheets of graphene, forming cylindrical structures. They are classified into three main types based on the number of layers of graphene present: Single-walled Carbon Nanotubes (SWCNTs), Double-walled Carbon Nanotubes (DWCNTs), and Multi-walled Carbon Nanotubes (MWCNTs). These structures offer unique electrical, mechanical, and thermal properties that make them useful in applications such as drug delivery, sensors, and electronics."
}

generated_qa_llama_7B = {
    "question": "What are Carbon Nanotubes (CNTs) and how are they classified?",
    "answer": "Carbon Nanotubes (CNTs) are tiny tubes made of carbon atoms arranged in a special structure. They can be divided into two main types: single-walled carbon nanotubes (SWCNTs) and multi-walled carbon nanotubes (MWCNTs). SWCNTs are formed by wrapping a single layer of carbon atoms around a central axis, creating a tube-like structure. MWCNTs, on the other hand, are made up of multiple layers of graphene, which are stacked together to form a tube. Both types of CNTs have different properties and are synthesized using different methods. For example, SWCNTs are typically synthesized using a process called chemical vapor deposition (CVD), while MWCNTs are often made using a process called arc discharge. In the real world, CNTs have many potential applications, such as in the production of super-strong materials, in the development of new electronics, and in the creation of more efficient energy storage systems. For example, CNTs can be used to create ultra-lightweight materials that are stronger than steel, or to create new types of transistors that are faster and more efficient than those currently in use. Overall, CNTs are fascinating materials with a wide range of potential applications. By understanding their properties and how they are classified, researchers and engineers can develop new technologies that could have a significant impact on many areas of our lives."
}

generated_qa_gemma_7B = {
    "question": "What are Carbon Nanotubes (CNTs) and how are they classified?",
    "answer": "Carbon nanotubes are cylindrical structures made of graphite, with a diameter of around 100 nanometers and a length of several micrometers. They are classified based on their size and shape, with different types having different properties and application"
}

generated_qa_gemini = {
    "question": "What are Carbon Nanotubes (CNTs) and how are they classified?",
    "answer": "Carbon nanotubes (CNTs) are cylindrical structures made of carbon atoms, with nanometer-scale diameters. They are classified primarily by the number of graphene layers they contain: Single-walled carbon nanotubes (SWCNTs): One layer of graphene rolled into a tube. Multi-walled carbon nanotubes (MWCNTs): Multiple layers of graphene rolled into concentric tubes."
}
generated_qa_claude_3_sonnet = {
    "question": "What are Carbon Nanotubes (CNTs) and how are they classified?",
    "answer": "Carbon Nanotubes (CNTs) are cylindrical molecules composed entirely of carbon atoms arranged in a hexagonal pattern. They're classified primarily as: Single-walled carbon nanotubes (SWCNTs): Consisting of a single graphene sheet rolled into a cylinder Multi-walled carbon nanotubes (MWCNTs): Comprising multiple concentric graphene cylinders nested within each other. They can be further classified by their chirality or twist (zigzag, armchair, or chiral), which affects their electrical properties (metallic or semiconducting)."
}

generated_qa_gpt4 = {
    "question": "What are Carbon Nanotubes (CNTs) and how are they classified?",
    "answer": "Thermodynamic considerations in fluidized bed CVD for carbon nanotube synthesis focus on achieving the appropriate temperature and gas composition for carbon precursor decomposition, while kinetic considerations involve optimizing reaction rates, catalyst activation, and carbon diffusion to balance growth quality and yield."
}

generated_qa_deepseek_r1 = {
    "question": "What are Carbon Nanotubes (CNTs) and how are they classified?",
    "answer": "Carbon Nanotubes (CNTs) are cylindrical nanostructures composed of carbon atoms arranged in a hexagonal lattice, forming a tube-like structure. They are essentially graphene sheets (single-layer carbon atoms) rolled into a seamless cylinder. CNTs exhibit exceptional properties such as high tensile strength, lightweight, unique electronic behavior, and thermal conductivity, making them valuable in fields like biosensors, electronics, and materials science.  **Classification:**  CNTs are primarily classified into two types based on their structural layers:  1. **Single-Walled Carbon Nanotubes (SWCNTs):** Consist of a single graphene layer rolled into a tube. They exhibit distinct electronic properties depending on their chiral vector (the orientation of the roll), which determines whether they behave as metals or semiconductors.  2. **Multi-Walled Carbon Nanotubes (MWCNTs):** Comprise multiple concentric graphene cylinders nested within one another. These offer enhanced mechanical robustness and are often used in applications requiring durability. This structural distinction underpins their varied applications, with SWCNTs favored for precision electronic devices and MWCNTs for mechanical reinforcement in composites."
}

In [None]:
scores = evaluate(reference_qa, generated_qa_llama_7B)
print("\nEvaluation Scores for llama7B:")
for key, value in scores.items():
    print(f"  {key}: {value}")

scores = evaluate(reference_qa, generated_qa_deepseek_r1)
print("\nEvaluation Scores for deepseek_r1:")
for key, value in scores.items():
    print(f"  {key}: {value}")

scores = evaluate(reference_qa, generated_qa_gemma_7B)
print("\nEvaluation Scores for gemma7B:")
for key, value in scores.items():
    print(f"  {key}: {value}")

scores = evaluate(reference_qa, generated_qa_claude_3_sonnet)
print("\nEvaluation Scores for claude_3.5_sonnet:")
for key, value in scores.items():
    print(f"  {key}: {value}")

scores = evaluate(reference_qa, generated_qa_gpt4)
print("\nEvaluation Scores for gpt4_mini:")
for key, value in scores.items():
    print(f"  {key}: {value}")

scores = evaluate(reference_qa, generated_qa_gemini)
print("\nEvaluation Scores for gemini:")
for key, value in scores.items():
    print(f"  {key}: {value}")



Evaluation Scores for llama7B:
  bleu: {'question': 1.0, 'answer': 0.05565508535251791}
  rouge: {'question': {'rouge-1': 1.0, 'rouge-2': 1.0, 'rouge-L': 1.0}, 'answer': {'rouge-1': 0.3092783505154639, 'rouge-2': 0.1453287197231834, 'rouge-L': 0.19931271477663232}}
  cosine_similarity: {'question': np.float64(1.0000000000000002), 'answer': np.float64(0.48285005554000615)}

Evaluation Scores for deepseek_r1:
  bleu: {'question': 1.0, 'answer': 0.09795306242848913}
  rouge: {'question': {'rouge-1': 1.0, 'rouge-2': 1.0, 'rouge-L': 1.0}, 'answer': {'rouge-1': 0.4721030042918455, 'rouge-2': 0.19047619047619047, 'rouge-L': 0.30042918454935624}}
  cosine_similarity: {'question': np.float64(1.0000000000000002), 'answer': np.float64(0.5290103823814718)}

Evaluation Scores for gemma7B:
  bleu: {'question': 1.0, 'answer': 0.032565845569177666}
  rouge: {'question': {'rouge-1': 1.0, 'rouge-2': 1.0, 'rouge-L': 1.0}, 'answer': {'rouge-1': 0.35185185185185186, 'rouge-2': 0.09433962264150944, 'rouge-