In [None]:
# # Install necessary libraries
# !pip install transformers datasets

# # Import libraries
# import random
# from datasets import load_dataset
# from transformers import AutoTokenizer, AutoModelForCausalLM
# import torch

# # Step 1: Load the MMLU Dataset
# # dataset = load_dataset("hendrycks_test", "abstract_algebra", split="validation")  # Example subset: "abstract_algebra"
# dataset = load_dataset("hails/mmlu_no_train", "all")

# # Step 2: Preprocess the Data
# def preprocess_mmlu(data):
#     """
#     Prepares the MMLU dataset by randomly masking one incorrect option.
#     """
#     processed_data = []
#     for example in data['test']: # Access the 'test' split of the dataset
#         question = example["question"]
#         answer = example["answer"]  # The correct answer (A, B, C, or D)
#         # Assuming 'choices' contains the options as a list
#         options = example["choices"]

#         # Map the correct answer letter to its index
#         correct_index = example["answer"]

#         # Mask one incorrect option randomly
#         incorrect_indices = [i for i in range(len(options)) if i != correct_index]
#         masked_index = random.choice(incorrect_indices)
#         masked_options = options.copy()
#         masked_options[masked_index] = "______"

#         processed_data.append({
#             "question": question,
#             "options": options,
#             "masked_options": masked_options,
#             "correct_index": correct_index,
#             "masked_index": masked_index,
#         })
#     return processed_data

# processed_data = preprocess_mmlu(dataset)

# # Step 3: Load GPT-2 Model and Tokenizer
# model_name = "gpt2"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = AutoModelForCausalLM.from_pretrained(model_name)

# # Step 4: Predict the Masked Option
# def predict_with_gpt2(question, masked_options, tokenizer, model):
#     """
#     Uses GPT-2 to predict the masked option in a multiple-choice question.
#     """
#     # Prepare the input prompt
#     input_prompt = (
#         f"Question: {question}\n"
#         f"Options: {' | '.join(masked_options)}\n"
#         f"The missing option is:"
#     )

#     # Tokenize input
#     input_ids = tokenizer(input_prompt, return_tensors="pt").input_ids

#     # Generate prediction
#     with torch.no_grad():
#         output_ids = model.generate(input_ids, max_length=input_ids.shape[1] + 10, num_return_sequences=1)
#     output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

#     # Extract the predicted option
#     predicted_text = output_text.replace(input_prompt, "").strip()
#     return predicted_text

# # Step 5: Evaluate the Model on MMLU
# def evaluate_gpt2(data, tokenizer, model):
#     """
#     Evaluates GPT-2 on the MMLU dataset by calculating accuracy on masked options.
#     """
#     correct_predictions = 0
#     total_predictions = 0

#     for example in data:
#         question = example["question"]
#         masked_options = example["masked_options"]
#         correct_option = example["options"][example["correct_index"]]
#         masked_option = example["options"][example["masked_index"]]
#         print(f"the example is: {example}")

#         # Predict the masked option
#         predicted_option = predict_with_gpt2(question, masked_options, tokenizer, model)

#         print(f"predicted option is: {predicted_option}")
#         # Check if the prediction matches the masked option
#         if predicted_option.strip().lower() == masked_option.strip().lower():
#             correct_predictions += 1
#         total_predictions += 1

#     # Calculate accuracy
#     accuracy = (correct_predictions / total_predictions) * 100
#     return accuracy

# # Step 6: Run Evaluation
# accuracy = evaluate_gpt2(processed_data[:50], tokenizer, model)  # Test on the first 50 examples for faster evaluation
# print(f"GPT-2 Accuracy on MMLU: {accuracy:.2f}%")

## Using cosing similarity
# Install necessary libraries
!pip install transformers datasets sentence-transformers

# Import libraries
import random
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM
from sentence_transformers import SentenceTransformer, util

# Step 1: Load the MMLU Dataset
dataset = load_dataset("hails/mmlu_no_train", "all")

# Step 2: Preprocess the Data
def preprocess_mmlu(data):
    """
    Prepares the MMLU dataset by randomly masking one incorrect option.
    """
    processed_data = []
    for example in data['test']:  # Access the 'test' split of the dataset
        question = example["question"]
        options = example["choices"]
        correct_index = example["answer"]

        # Mask one incorrect option randomly
        incorrect_indices = [i for i in range(len(options)) if i != correct_index]
        masked_index = random.choice(incorrect_indices)
        masked_options = options.copy()
        masked_options[masked_index] = "______"

        processed_data.append({
            "question": question,
            "options": options,
            "masked_options": masked_options,
            "correct_index": correct_index,
            "masked_index": masked_index,
        })
    return processed_data

processed_data = preprocess_mmlu(dataset)

# Step 3: Load GPT-2 Model and Tokenizer
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Step 4: Load SentenceTransformer for Semantic Matching
embedder = SentenceTransformer("all-MiniLM-L6-v2")

# Step 5: Predict the Masked Option
def predict_with_gpt2(question, masked_options, tokenizer, model):
    """
    Uses GPT-2 to predict the masked option in a multiple-choice question.
    """
    # Prepare the input prompt
    input_prompt = (
        f"Question: {question}\n"
        f"Options: {' | '.join(masked_options)}\n"
        f"The missing option is:"
    )

    # Tokenize input
    input_ids = tokenizer(input_prompt, return_tensors="pt").input_ids

    # Generate prediction
    with torch.no_grad():
        output_ids = model.generate(input_ids, max_length=input_ids.shape[1] + 10, num_return_sequences=1)
    output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)

    # Extract the predicted option
    predicted_text = output_text.replace(input_prompt, "").strip()
    return predicted_text

# Step 6: Semantic Matching
def semantic_match(predicted_output, masked_option, embedder, threshold=0.8):
    """
    Determines if the predicted output semantically matches the masked option using embeddings.
    """
    # Compute embeddings
    predicted_embedding = embedder.encode(predicted_output, convert_to_tensor=True)
    masked_embedding = embedder.encode(masked_option, convert_to_tensor=True)

    # Compute cosine similarity
    similarity = util.cos_sim(predicted_embedding, masked_embedding).item()
    print(f"Similarity: {similarity:.2f}")  # Debugging

    # Check if similarity exceeds the threshold
    return similarity >= threshold

# Step 7: Evaluate the Model on MMLU with Semantic Matching
def evaluate_gpt2_with_semantics(data, tokenizer, model, embedder, threshold=0.8):
    """
    Evaluates GPT-2 on the MMLU dataset using semantic matching for masked options.
    """
    correct_predictions = 0
    total_predictions = 0

    for example in data:
        question = example["question"]
        masked_options = example["masked_options"]
        masked_option = example["options"][example["masked_index"]]

        # Predict the masked option
        predicted_output = predict_with_gpt2(question, masked_options, tokenizer, model)
        print(f"Predicted output: {predicted_output}")
        print(f"Masked option: {masked_option}")

        # Check for a semantic match
        if semantic_match(predicted_output, masked_option, embedder, threshold):
            correct_predictions += 1
        total_predictions += 1

    # Calculate accuracy
    accuracy = (correct_predictions / total_predictions) * 100
    return accuracy

# Step 8: Run Evaluation
accuracy_with_semantics = evaluate_gpt2_with_semantics(processed_data[:50], tokenizer, model, embedder, threshold=0.8)
print(f"GPT-2 Accuracy on MMLU with Semantic Matching: {accuracy_with_semantics:.2f}%")




modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: Freedom of speech.
The Supreme Court has
Masked option: Once a church has been recognized by the federal government, its tax-exempt status can never be revoked.
Similarity: 0.23


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The missing option is:
The missing option
Masked option: Registered voters are more likely to vote in general elections than they are in primary elections.
Similarity: 0.03


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The child's religion
The missing option is:
Masked option: The media to which the child is exposed
Similarity: 0.43


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The Constitution was not ratified by the states.
Masked option: Appealing to the president to adjudicate disputes
Similarity: 0.16


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: to support the candidate who is most likely to win
Masked option: coordinate local get-out-the-vote campaigns
Similarity: 0.25


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The judge is not a judge.
The
Masked option: Removal from office by the appointing president
Similarity: 0.16


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: ______ | president's chief of staff | national security
Masked option: vice president
Similarity: 0.47


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The issue is likely to be a political issue.
Masked option: The number of people affected by the issue
Similarity: 0.56


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The missing option is:
The missing option
Masked option: candidates' positions on international issues
Similarity: -0.05


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: ______ | Separate but equal | Judicial review |
Masked option: One man, one vote
Similarity: 0.17


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: A person who is not a member of the Democratic
Masked option: A former president who advises a current president on a particular foreign policy issue
Similarity: 0.32


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The Supreme Court has the power to override the
Masked option: Presidential executive orders
Similarity: 0.36


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: It provides a structured environment in which factions may flourish
Masked option: It allows factions to dominate on the national level while limiting their influence on state governments.
Similarity: 0.60


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The missing option is:
The missing option
Masked option: Women are more likely to vote Democratic than are men.
Similarity: 0.08


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The legal principle that instructs judges to follow
Masked option: certiorari
Similarity: 0.13


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: ______ | Professionals | Voters who are active in
Masked option: Blue-collar workers who belong to a union
Similarity: 0.25


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: whether representation in Congress should be equal among states or
Masked option: the future of the slave trade
Similarity: 0.14


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: ______ | the power of interest groups has weakened in
Masked option: United States domestic policy grew more conservative in the 1990s
Similarity: 0.18


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The exclusionary rule was established to
Options
Masked option: allow private organizations to restrict their memberships
Similarity: 0.25


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: the members of the committee are not representative of all
Masked option: committee members often lose track of their constituents' priorities because of their frequent contact with special-interest lobbyists
Similarity: 0.52


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The Senate would be required to use the "
Masked option: increase tax rates
Similarity: 0.12


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: the House Rules Committee is considered among the most powerful
Masked option: number of subcommittees that a standing committee may establish at any given time
Similarity: 0.40


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: "The Supreme Court has not ruled on the question
Masked option: Marbury v. Madison
Similarity: 0.37


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The date of the general election is not the
Masked option: restrict the number of entrants in the presidential race
Similarity: 0.24


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: the executive branch can veto legislation approved by the president
Masked option: federal district courts to overturn legislation
Similarity: 0.29


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The campaign must disclose all campaign contributions to the
Masked option: limit campaign contributions
Similarity: 0.73


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The president executes a "pocket veto" by
Masked option: Issuing an executive order invalidating a recently passed bill
Similarity: 0.36


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The Republican Party has become more conservative.
Masked option: It has come to be dominated by labor unions.
Similarity: 0.33


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The missing option is:
The missing option
Masked option: Young adults are less likely to vote than senior citizens.
Similarity: 0.02


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The President has not used executive orders for actions
Masked option: Executive orders have the same effect as laws passed by Congress.
Similarity: 0.58


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The federal government provides most of the funding to
Masked option: Confederation
Similarity: 0.21


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: creating insurmountable obstacles to the founding of factions
Masked option: restricting factional political activity to the state level only
Similarity: 0.59


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The only way to get a majority of voters
Masked option: voters are more likely to support environmental protection efforts
Similarity: 0.35


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The boundary lines of congressional districts must be red
Masked option: determine which party's leader will be named Speaker of the House
Similarity: 0.21


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The judge who is not a judge of the
Masked option: Appellate judges usually decide the facts of a case.
Similarity: 0.59


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The Senate and House of Representatives pass the bill
Masked option: All amendments to the bill are invalidated, and the original version of the bill is sent to the president to sign.
Similarity: 0.46


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: I and IV only.
The missing option is
Masked option: II and IV only
Similarity: 0.66


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The group's constituents are the ones who are
Masked option: Many such groups have been in existence for several decades or more, allowing them to master the legislative system and to develop close ties with legislators.
Similarity: 0.43


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The only way to get the party to agree
Masked option: into a federal structure in which state and local parties have no power
Similarity: 0.35


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: the Constitution does not provide for the creation of a
Masked option: the Supreme Court has original jurisdiction over all constitutional matters
Similarity: 0.35


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: White Southerners.
The Democratic Party has
Masked option: African Americans
Similarity: 0.50


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: ______ | Under some circumstances, the death penalty may
Masked option: The death penalty may only be imposed upon citizens.
Similarity: 0.62


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: ______ | Checks and balances | Equal representation | Un
Masked option: Direct democracy
Similarity: 0.25


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The Senate has a majority of the House's members
Masked option: Members are appointed to the committee in the House but are elected to committees in the Senate.
Similarity: 0.66


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The election is over.
The election is
Masked option: Voters increasingly get their information from newspapers.
Similarity: 0.30


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The right to privacy is not a right of the
Masked option: Common law requires the government to respect citizens' right to privacy.
Similarity: 0.73


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The president would veto the bill.
The missing
Masked option: The majority of American citizens support the proposed legislation.
Similarity: 0.26


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The missing option is:
The missing option
Masked option: Labor unions
Similarity: 0.02


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Predicted output: The Constitution does not provide for the transfer of
Masked option: veto legislative bills of attainder
Similarity: 0.37
Predicted output: The bill would have to be passed by the
Masked option: An adjournment
Similarity: 0.28
GPT-2 Accuracy on MMLU with Semantic Matching: 0.00%
