In [2]:
from datasets import load_dataset
import numpy as np

# Load the RTE dataset
dataset = load_dataset("glue", "rte")
train_dataset = dataset["train"]

def add_feature(example, length=8, train_dataset=None, num_of_ctx_items=3):
    context = []
    for _ in range(num_of_ctx_items):
        jj = np.random.randint(length)
        s1 = train_dataset['sentence1'][jj]
        s2 = train_dataset['sentence2'][jj]
        label = str(train_dataset['label'][jj])
        context.append([s1, s2, label])
    
    example['context'] = context
    return example

# Apply the add_feature function to the dataset
new_train_dataset = train_dataset.map(
    add_feature, 
    fn_kwargs={
        'length': len(train_dataset), 
        'train_dataset': train_dataset, 
        'num_of_ctx_items': 3
    }
)

# Display the dataset info
print(new_train_dataset)

# Display a sample from the dataset
print(new_train_dataset[0])

Downloading readme:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/584k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/69.0k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/621k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2490 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/277 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2490 [00:00<?, ? examples/s]

Dataset({
    features: ['sentence1', 'sentence2', 'label', 'idx', 'context'],
    num_rows: 2490
})
{'sentence1': 'No Weapons of Mass Destruction Found in Iraq Yet.', 'sentence2': 'Weapons of Mass Destruction Found in Iraq.', 'label': 1, 'idx': 0, 'context': [['Following the Israel-Egypt Peace Treaty of 1979, Israel agreed to withdraw from the Sinai Peninsula, in exchange for peace with its neighbor. For over two decades, the Sinai Peninsula was home to about 7,000 Israelis.', 'The Israel-Egypt Peace Agreement was signed in 1979.', '0'], ['"A force majeure is an act of God," said attorney Phil Wittmann, who represents the New Orleans Saints and owner Tom Benson\'s local interests.', 'Phil Wittmann works for Tom Benson.', '0'], ['Galvarino Apablaza is the leader of the Marxist group FPMR - Manuel Rodriguez Patriotic Front. FPMR was founded in 1983 and became the armed wing of the Chilean Communist Party to carry out terrorist attacks against the Chilean militarist government of the dic

In [21]:
def create_prompt_single(example):
    prompt = "Task: Determine the relationship between the following two sentences. Answer 0 for 'entailment', 1 for 'contradiction'.\n\n"

    # Add context examples
    num_examples = len(example['context'])
    if num_examples > 0:
            prompt += f"Here are {num_examples} example{'s' if num_examples > 1 else ''} to help you:\n\n"
    for idx, (s1, s2, label) in enumerate(example['context'], 1):
        prompt += f"Example {idx}:\n"
        prompt += f"Sentence1: {s1}\n"
        prompt += f"Sentence2: {s2}\n"
        prompt += f"Answer: {label}\n\n"
    
    # Add the actual question
    prompt += "Now, please answer the following question:\n\n"
    prompt += "Question:\n"
    prompt += f"Sentence1: {example['sentence1']}\n"
    prompt += f"Sentence2: {example['sentence2']}\n"
    prompt += "Answer: "
    
    return prompt

In [22]:
sample_data = new_train_dataset[0]
sample_prompt = create_prompt_single(sample_data)
print(sample_prompt)

Task: Determine the relationship between the following two sentences. Answer 0 for 'entailment', 1 for 'contradiction'.

Here are 3 examples to help you:

Example 1:
Sentence1: Following the Israel-Egypt Peace Treaty of 1979, Israel agreed to withdraw from the Sinai Peninsula, in exchange for peace with its neighbor. For over two decades, the Sinai Peninsula was home to about 7,000 Israelis.
Sentence2: The Israel-Egypt Peace Agreement was signed in 1979.
Answer: 0

Example 2:
Sentence1: "A force majeure is an act of God," said attorney Phil Wittmann, who represents the New Orleans Saints and owner Tom Benson's local interests.
Sentence2: Phil Wittmann works for Tom Benson.
Answer: 0

Example 3:
Sentence1: Galvarino Apablaza is the leader of the Marxist group FPMR - Manuel Rodriguez Patriotic Front. FPMR was founded in 1983 and became the armed wing of the Chilean Communist Party to carry out terrorist attacks against the Chilean militarist government of the dictator Augusto Pinochet. I

In [18]:
new_train_dataset[0]['context'][2]

['Galvarino Apablaza is the leader of the Marxist group FPMR - Manuel Rodriguez Patriotic Front. FPMR was founded in 1983 and became the armed wing of the Chilean Communist Party to carry out terrorist attacks against the Chilean militarist government of the dictator Augusto Pinochet. Its name is a tribute for Manuel Rodriguez Erdoiza, considered a Chilean hero in the war of the independence of Chile against Spain.',
 '"Comandante Salvador" is the nickname of Galvarino Apablaza.',
 '1']

In [19]:
def create_prompt_batch(examples):
    prompts = []
    for idx in range(len(examples['sentence1'])):
        prompt = """Task: Determine the relationship between the following two sentences. Answer 0 for 'entailment', 1 for 'contradiction'.\n\n"""

        # Add context examples
        num_examples = len(examples['context'][idx])
        if num_examples > 0:
            prompt += f"Here are {num_examples} example{'s' if num_examples > 1 else ''} to help you:\n\n"
        for ctx_idx, (s1, s2, label) in enumerate(examples['context'][idx], 1):
            prompt += f"Example {ctx_idx}:\n"
            prompt += f"Sentence1: {s1}\n"
            prompt += f"Sentence2: {s2}\n"
            prompt += f"Answer: {label}\n\n"
        
        # Add the actual question
        prompt += "Now, please answer the following question:\n\n"
        prompt += "Question:\n"
        prompt += f"Sentence1: {examples['sentence1'][idx]}\n"
        prompt += f"Sentence2: {examples['sentence2'][idx]}\n"
        prompt += "Answer: "
        
        prompts.append(prompt)
    
    return {"prompt": prompts}

In [20]:
# Generate prompts for the entire dataset
prompted_dataset = new_train_dataset.map(create_prompt_batch, batched=True, remove_columns=new_train_dataset.column_names)

# Check the results
print(prompted_dataset[0]['prompt'])
print(f"\nTotal number of generated prompts: {len(prompted_dataset)}")

Map:   0%|          | 0/2490 [00:00<?, ? examples/s]

Task: Determine the relationship between the following two sentences. Answer 0 for 'entailment', 1 for 'contradiction'.

Here are 3 examples to help you:

Example 1:
Sentence1: Following the Israel-Egypt Peace Treaty of 1979, Israel agreed to withdraw from the Sinai Peninsula, in exchange for peace with its neighbor. For over two decades, the Sinai Peninsula was home to about 7,000 Israelis.
Sentence2: The Israel-Egypt Peace Agreement was signed in 1979.
Answer: 0

Example 2:
Sentence1: "A force majeure is an act of God," said attorney Phil Wittmann, who represents the New Orleans Saints and owner Tom Benson's local interests.
Sentence2: Phil Wittmann works for Tom Benson.
Answer: 0

Example 3:
Sentence1: Galvarino Apablaza is the leader of the Marxist group FPMR - Manuel Rodriguez Patriotic Front. FPMR was founded in 1983 and became the armed wing of the Chilean Communist Party to carry out terrorist attacks against the Chilean militarist government of the dictator Augusto Pinochet. I