In [116]:
import argparse
import csv
import numpy as np
import os
import pandas as pd
from pprint import pprint
import time
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig

gpt35_df = pd.read_csv('../conditional/data/112_gsm8k_gpt35_cot_onesent_responses.csv')

model_name = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left'

model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
model.generation_config = GenerationConfig.from_pretrained(model_name)
model.generation_config.pad_token_id = model.generation_config.eos_token_id

Loading checkpoint shards: 100%|██████████| 2/2 [00:45<00:00, 22.71s/it]


In [51]:
questions = gpt35_df['Question'].tolist()
overtime_question = questions[9]

In [3]:
num_samples = 1

In [4]:
from utils import get_prompt_message

question_vector = tokenizer.apply_chat_template(get_prompt_message(overtime_question, 0), add_generation_prompt=True, return_tensors="pt")
input_tensor = question_vector.repeat(num_samples, 1)

PROMPT:  Q: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she make?
A: Let's think step by step.


In [5]:
outputs = model.generate(
                input_tensor.to(model.device), 
                max_new_tokens=1000, 
                return_dict_in_generate=True, 
                output_scores=True,
                do_sample=True,
                temperature=0.7,
                top_k=40,
            )

In [6]:
tokenizer.encode(". ")

[1, 842, 28705]

Constrained Generation

In [18]:
stop_sequences = [". ", "\n", ]

stop_ids = [tokenizer.encode(stop_word)[1:] for stop_word in stop_sequences]

In [19]:
stop_ids

[[842, 28705], [28705, 13]]

In [20]:
from transformers import StoppingCriteria, StoppingCriteriaList

In [21]:
class StoppingCriteriaSub(StoppingCriteria):
    def __init__(self, stops = []):
      StoppingCriteria.__init__(self), 

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, stops = []):
      self.stops = stops
      for i in range(len(stops)):
        self.stops = self.stops[i]

In [23]:
stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops = [[842, 28705], [13]])])

In [29]:
# Fix the StoppingCriteriaSub class to properly implement the stopping condition
class StoppingCriteriaSub(StoppingCriteria):
    def __init__(self, stops=[]):
        super().__init__()
        self.stops = stops

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs):
        # Check if any of the stop sequences are in the input_ids
        for stop_sequence in self.stops:
            # Check if the stop sequence is in the input_ids
            if all(torch.eq(input_ids[:, -len(stop_sequence):], torch.tensor(stop_sequence, device=input_ids.device)).all(1)):
                return torch.ones(input_ids.shape[0], dtype=torch.bool, device=input_ids.device)
        return torch.zeros(input_ids.shape[0], dtype=torch.bool, device=input_ids.device)

# Update the stopping criteria initialization with correct stop sequences
stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_ids)])


In [30]:
outputs = model.generate(
                input_tensor.to(model.device), 
                max_new_tokens=1000, 
                return_dict_in_generate=True, 
                output_scores=True,
                do_sample=True,
                temperature=0.7,
                top_k=40,
                stopping_criteria=stopping_criteria
            )

In [32]:
outputs.sequences

tensor([[    1,   733, 16289, 28793,  1186, 28747,   320,  1380,  2870,   429,
         28740, 28783, 28723, 28734, 28734,   396,  5115, 28723, 28705,  1047,
           630,  3791,   680,   821, 28705, 28783,  3316,   660,  6139, 28725,
           630,   349, 18711,   354,   754,  1536, 28725,   690,   349,  5907,
           486,   574,  5115,   346, 21062,   648, 28705, 28740, 28748, 28750,
           574,  5115,   346, 21062, 28723, 28705,  1047,   630,  3791, 28705,
         28740, 28734,  3316,  1012,  1370,   354, 28705, 28782,  2202, 28725,
           910,  1188,  2445,  1235,   630,  1038, 28804,    13, 28741, 28747,
          3169, 28742, 28713,  1073,  3707,   486,  3707, 28723,   733, 28748,
         16289, 28793, 28705, 28740, 28723,  4205, 28725,  1346, 28742, 28713,
          1300,   575,   910,  1188,   320,  1380,  2870,   660,  1370, 28723,
          4577,   630,  3791, 28705, 28740, 28734,  3316,  1012,  1370,   354,
         28705, 28782,  2202, 28725,   559,  3102, 2

In [58]:
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids, scores, **kwargs) -> bool:
        stop_list = [". ", " \n", ]
        stop_token_ids = [tokenizer(x, return_tensors='pt', add_special_tokens=False)['input_ids'] for x in stop_list]
        stop_token_ids = [x.to(model.device) for x in stop_token_ids]
        
        for stop_ids in stop_token_ids:
            if (input_ids[0][-len(stop_ids[0])+1:] == stop_ids[0][1:]).all():
                return True
        return False

In [59]:
stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [45]:
outputs_3 = model.generate(
                input_tensor.to(model.device), 
                max_new_tokens=1000, 
                return_dict_in_generate=True, 
                output_scores=True,
                do_sample=True,
                temperature=0.7,
                top_k=40,
                stopping_criteria=stopping_criteria
            )

In [49]:
tokenizer.decode(outputs_3.sequences[0])

"<s> [INST] Q: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she make?\nA: Let's think step by step. [/INST] "

Generation Chaining

In [52]:
from transformers import StoppingCriteria, StoppingCriteriaList

class StoppingCriteriaSub(StoppingCriteria):
    def __init__(self, stops=[]):
        super().__init__()
        self.stops = stops

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs):
        for stop_sequence in self.stops:
            if all(torch.eq(input_ids[:, -len(stop_sequence):], torch.tensor(stop_sequence, device=input_ids.device)).all(1)):
                return torch.ones(input_ids.shape[0], dtype=torch.bool, device=input_ids.device)
        return torch.zeros(input_ids.shape[0], dtype=torch.bool, device=input_ids.device)

In [56]:
input_tensor.shape

torch.Size([1, 92])

In [55]:
outputs_3.sequences.shape

torch.Size([1, 93])

In [None]:
def exploding_step_generation(num_generations, question):
    question_vector = tokenizer.apply_chat_template(get_prompt_message(question, 0), add_generation_prompt=True, return_tensors="pt")
    input_tensor = question_vector.repeat(num_samples, 1)

    # stop_sequences = [". ", "\n", ]
    # stop_ids = [tokenizer.encode(stop_word)[1:] for stop_word in stop_sequences]

    stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops = [[842, 28705], [13]])]) # hard coded stops: ". ", "\n"

    for i in range(num_generations):
        outputs = model.generate(
                    input_tensor.to(model.device), 
                    max_new_tokens=1000, 
                    return_dict_in_generate=True, 
                    output_scores=True,
                    do_sample=True,
                    temperature=0.7,
                    top_k=40,
                    stopping_criteria=stopping_criteria
                )
    

In [57]:
batch_input = input_tensor.repeat(3, 1)
batch_input.shape

torch.Size([3, 92])

In [None]:
stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops = [[842, 28705], [13]])])

In [60]:
batch_output = model.generate(
                    batch_input.to(model.device), 
                    max_new_tokens=1000,
                    return_dict_in_generate=True, 
                    output_scores=True,
                    do_sample=True,
                    temperature=0.7,
                    top_k=40,
                    stopping_criteria=stopping_criteria
                )

In [61]:
batch_output.sequences.shape

torch.Size([3, 107])

In [62]:
tokenizer.batch_decode(batch_output.sequences)

["<s> [INST] Q: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she make?\nA: Let's think step by step. [/INST] First, let's calculate her daily salary without considering overtime:\n",
 "<s> [INST] Q: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she make?\nA: Let's think step by step. [/INST] Tina makes $18.00 an hour. If she works",
 "<s> [INST] Q: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she make?\nA: Let's think step by step. [/INST] Let's first calculate T

In [63]:
batch_output2 = model.generate(
                    batch_output.sequences, 
                    max_new_tokens=1000,
                    return_dict_in_generate=True, 
                    output_scores=True,
                    do_sample=True,
                    temperature=0.7,
                    top_k=40,
                    stopping_criteria=stopping_criteria
                )

In [64]:
tokenizer.batch_decode(batch_output2.sequences)

["<s> [INST] Q: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she make?\nA: Let's think step by step. [/INST] First, let's calculate her daily salary without considering overtime:\n\n",
 "<s> [INST] Q: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she make?\nA: Let's think step by step. [/INST] Tina makes $18.00 an hour. If she works more",
 "<s> [INST] Q: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she make?\nA: Let's think step by step. [/INST] Let's first calc

In [65]:
batch_output3 = model.generate(
                    batch_output2.sequences, 
                    max_new_tokens=1000,
                    return_dict_in_generate=True, 
                    output_scores=True,
                    do_sample=True,
                    temperature=0.7,
                    top_k=40,
                    stopping_criteria=stopping_criteria
                )

In [66]:
tokenizer.batch_decode(batch_output3.sequences)

["<s> [INST] Q: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she make?\nA: Let's think step by step. [/INST] First, let's calculate her daily salary without considering overtime:\n\nDaily salary = Hourly wage x Hours worked per day\n",
 "<s> [INST] Q: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she make?\nA: Let's think step by step. [/INST] Tina makes $18.00 an hour. If she works more than 8 hours per shift, she is eligible for overtime. The",
 "<s> [INST] Q: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hou

In [73]:
from transformers import StoppingCriteria, StoppingCriteriaList

class StopOnTokens(StoppingCriteria):
    def __init__(self, stops=[]):
        super().__init__()
        self.stops = [torch.tensor(stop, dtype=torch.long) for stop in stops]  # Convert stops to tensors immediately

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in self.stops:
            stop_ids = stop_ids.to(input_ids.device)  # Ensure stop_ids are on the same device as input_ids
            if input_ids.shape[1] >= len(stop_ids):  # Ensure input_ids is long enough
                if torch.eq(input_ids[:, -len(stop_ids):], stop_ids).all(dim=1).any():
                    return True
        return False

class StoppingCriteriaSub(StoppingCriteria):
    def __init__(self, stops=[]):
        super().__init__()
        self.stops = stops

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs):
        # Check if the last tokens of input_ids match any of the stop sequences
        for stop_sequence in self.stops:
            # Check the last len(stop_sequence) tokens of input_ids
            if input_ids.shape[1] >= len(stop_sequence):
                if all(torch.eq(input_ids[:, -len(stop_sequence):], torch.tensor(stop_sequence, device=input_ids.device)).all(1)):
                    return torch.ones(input_ids.shape[0], dtype=torch.bool, device=input_ids.device)
        return torch.zeros(input_ids.shape[0], dtype=torch.bool, device=input_ids.device)

stops = [[842, 28705], [13]]
# stopping_criteria2 = StoppingCriteriaList([StoppingCriteriaSub(stops=stops)])
stopping_criteria = StoppingCriteriaList([StopOnTokens(stops=stops)])

In [None]:
class OneSentenceCriteria(StoppingCriteria):
    def __init__(self, stop_ids=[]):
        # super().__init__()
        self.stops = stop_ids

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        # Check each stop sequence against the end of the input_ids
        for stop_ids in self.stop_token_ids:
            if input_ids.shape[1] >= len(stop_ids):  # Ensure input_ids is long enough
                # Check if the last tokens of input_ids match the stop sequence
                if torch.eq(input_ids[:, -len(stop_ids):], torch.tensor(stop_ids, device=input_ids.device)).all(dim=1).any():
                    return True
        return False

In [99]:
class BatchSentenceStoppingCriteria(StoppingCriteria):
    def __init__(self, tokenizer, stop_sequences=None):
        # Encode each stop sequence and store their token IDs
        self.stop_token_ids = [[842, 28705], [13]]
        if stop_sequences:
            self.stop_token_ids = [tokenizer.encode(seq, add_special_tokens=False) for seq in stop_sequences]

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        # Initialize a tensor to keep track of which sequences should stop
        stop_mask = torch.zeros(input_ids.shape[0], dtype=torch.bool, device=input_ids.device)
        
        # Check each stop sequence against the end of the input_ids for each sequence in the batch
        for stop_ids in self.stop_token_ids:
            if input_ids.shape[1] >= len(stop_ids):  # Ensure input_ids is long enough
                # Check if the last tokens of input_ids match the stop sequence
                match = torch.eq(input_ids[:, -len(stop_ids):], torch.tensor(stop_ids, device=input_ids.device)).all(dim=1)
                stop_mask |= match  # Update the stop mask
        
        return stop_mask

In [85]:
batch_output = model.generate(
                    batch_input.to(model.device), 
                    max_new_tokens=1000,
                    return_dict_in_generate=True, 
                    output_scores=True,
                    do_sample=True,
                    temperature=0.7,
                    top_k=40,
                    stopping_criteria=StoppingCriteriaList([BatchSentenceStoppingCriteria(tokenizer)])
                )

In [86]:
batch_output.sequences

tensor([[    1,   733, 16289, 28793,  1186, 28747,   320,  1380,  2870,   429,
         28740, 28783, 28723, 28734, 28734,   396,  5115, 28723, 28705,  1047,
           630,  3791,   680,   821, 28705, 28783,  3316,   660,  6139, 28725,
           630,   349, 18711,   354,   754,  1536, 28725,   690,   349,  5907,
           486,   574,  5115,   346, 21062,   648, 28705, 28740, 28748, 28750,
           574,  5115,   346, 21062, 28723, 28705,  1047,   630,  3791, 28705,
         28740, 28734,  3316,  1012,  1370,   354, 28705, 28782,  2202, 28725,
           910,  1188,  2445,  1235,   630,  1038, 28804,    13, 28741, 28747,
          3169, 28742, 28713,  1073,  3707,   486,  3707, 28723,   733, 28748,
         16289, 28793,  4205, 28725,  1346, 28742, 28713, 13911,   320,  1380,
         28742, 28713,  4392, 21062,   354,  2739, 28705, 28782, 28734,  3316,
           297, 28705, 28782,  2202, 28747,    13],
        [    1,   733, 16289, 28793,  1186, 28747,   320,  1380,  2870,   429,


In [87]:
tokenizer.batch_decode(batch_output.sequences)

["<s> [INST] Q: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she make?\nA: Let's think step by step. [/INST] First, let's calculate Tina's regular wage for working 50 hours in 5 days:\n",
 "<s> [INST] Q: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she make?\nA: Let's think step by step. [/INST] First, we need to calculate Tina's daily wage without considering overtime. \n</s></s></s></s></s>",
 "<s> [INST] Q: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she mak

In [107]:
def iterative_generation(model, initial_input, num_samples, num_iterations, stopping_criteria, min_new_tokens=0):
    current_input = initial_input.repeat(num_samples, 1)
    all_generated_sequences = []

    for _ in range(num_iterations):
        output = model.generate(
            current_input.to(model.device),
            min_new_tokens=min_new_tokens,
            max_new_tokens=1000,  # Adjust as necessary
            return_dict_in_generate=True,
            output_scores=True,
            do_sample=True,
            temperature=0.7,
            top_k=40,
            stopping_criteria=stopping_criteria
        )

        # Decode and store the generated sequences
        decoded_sequences = tokenizer.batch_decode(output.sequences, skip_special_tokens=True)
        print(output.sequences)
        print(decoded_sequences)
        print()

        all_generated_sequences.append(decoded_sequences)

        # Find the end of the generated sequences (excluding padding and EOS tokens)
        end_indices = (output.sequences == tokenizer.pad_token_id).nonzero()[:, 1]
        end_indices[end_indices == 0] = output.sequences.shape[1]  # If no padding token found, use the sequence length

        # Use the generated sequences up to the end indices as the next input
        current_input = output.sequences[:, :end_indices.min().item()]

    return all_generated_sequences

In [121]:
stops = [[842, 28705], [13]]  # Token IDs for ". " and "\n"
stopping_criteria = StoppingCriteriaList([BatchSentenceStoppingCriteria(tokenizer)])


num_iterations = 2
num_samples = 2
min_new_tokens = 10
generated_texts = iterative_generation(model, input_tensor, num_samples, num_iterations, stopping_criteria, min_new_tokens)

for i, texts in enumerate(generated_texts):
    print(f"Iteration {i+1}:")
    for text in texts:
        print(text)
    print("\n")

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


tensor([[    1,   733, 16289, 28793,  1186, 28747,   320,  1380,  2870,   429,
         28740, 28783, 28723, 28734, 28734,   396,  5115, 28723, 28705,  1047,
           630,  3791,   680,   821, 28705, 28783,  3316,   660,  6139, 28725,
           630,   349, 18711,   354,   754,  1536, 28725,   690,   349,  5907,
           486,   574,  5115,   346, 21062,   648, 28705, 28740, 28748, 28750,
           574,  5115,   346, 21062, 28723, 28705,  1047,   630,  3791, 28705,
         28740, 28734,  3316,  1012,  1370,   354, 28705, 28782,  2202, 28725,
           910,  1188,  2445,  1235,   630,  1038, 28804,    13, 28741, 28747,
          3169, 28742, 28713,  1073,  3707,   486,  3707, 28723,   733, 28748,
         16289, 28793,  4205, 28725,  1346, 28742, 28713,  1300,   575,   910,
          1188,   320,  1380,  2870,   660,  5115, 28747,    13,     2,     2,
             2,     2,     2,     2,     2,     2],
        [    1,   733, 16289, 28793,  1186, 28747,   320,  1380,  2870,   429,


KeyboardInterrupt: 

In [None]:
stops = [[842, 28705], [13]]  # Token IDs for ". " and "\n"
stopping_criteria = StoppingCriteriaList([BatchSentenceStoppingCriteria(tokenizer)])


num_iterations = 3
num_samples = 2
generated_texts = iterative_generation(model, input_tensor, num_samples, num_iterations, stopping_criteria)

for i, texts in enumerate(generated_texts):
    print(f"Iteration {i+1}:")
    for text in texts:
        print(text)
    print("\n")

In [106]:
num_iterations = 10
num_samples = 3
generated_texts = iterative_generation(model, input_tensor, num_samples, num_iterations, stopping_criteria)

for i, texts in enumerate(generated_texts):
    print(f"Iteration {i+1}:")
    for text in texts:
        print(text)
    print("\n")

tensor([[    1,   733, 16289, 28793,  1186, 28747,   320,  1380,  2870,   429,
         28740, 28783, 28723, 28734, 28734,   396,  5115, 28723, 28705,  1047,
           630,  3791,   680,   821, 28705, 28783,  3316,   660,  6139, 28725,
           630,   349, 18711,   354,   754,  1536, 28725,   690,   349,  5907,
           486,   574,  5115,   346, 21062,   648, 28705, 28740, 28748, 28750,
           574,  5115,   346, 21062, 28723, 28705,  1047,   630,  3791, 28705,
         28740, 28734,  3316,  1012,  1370,   354, 28705, 28782,  2202, 28725,
           910,  1188,  2445,  1235,   630,  1038, 28804,    13, 28741, 28747,
          3169, 28742, 28713,  1073,  3707,   486,  3707, 28723,   733, 28748,
         16289, 28793,   320,  1380,  2870,   429, 28740, 28783, 28723, 28734,
         28734,   396,  5115,   304,  3791,  1012,  1370,   354, 28705, 28782,
          2202, 28725,   579,   630,  2870,   429, 28740, 28783, 28723, 28734,
         28734,  1318, 28705, 28782,   327,   429, 2

Incorrect results above, new approach...

In [117]:
def iterative_generation(model, initial_input, num_samples, num_iterations, stopping_criteria, min_new_tokens=0):
    current_input = initial_input.repeat(num_samples, 1)
    all_generated_sequences = []

    for _ in range(num_iterations):
        output = model.generate(
            current_input.to(model.device),
            min_new_tokens=min_new_tokens,
            max_new_tokens=1000,  # Adjust as necessary
            return_dict_in_generate=True,
            output_scores=True,
            do_sample=True,
            temperature=0.7,
            top_k=40,
            stopping_criteria=stopping_criteria
        )

        generated_sequences = tokenizer.batch_decode(output.sequences, skip_special_tokens=True)
        print(output.sequences)
        print(generated_sequences)
        print()

        all_generated_sequences.append(generated_sequences)

        # Update the current input with the newly generated sequences
        current_input = output.sequences

    return all_generated_sequences

In [118]:
tokenizer.pad_token_id

2

In [120]:
tokenizer.padding_side = 'left'

In [123]:
model.generation_config

GenerationConfig {
  "bos_token_id": 1,
  "eos_token_id": 2,
  "pad_token_id": 2
}

In [124]:
tokenizer.padding_side

'left'

In [122]:
num_iterations = 3
num_samples = 2
generated_texts = iterative_generation(model, input_tensor, num_samples, num_iterations, stopping_criteria)

for i, texts in enumerate(generated_texts):
    print(f"Iteration {i+1}:")
    for text in texts:
        print(text)
    print("\n")

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


tensor([[    1,   733, 16289, 28793,  1186, 28747,   320,  1380,  2870,   429,
         28740, 28783, 28723, 28734, 28734,   396,  5115, 28723, 28705,  1047,
           630,  3791,   680,   821, 28705, 28783,  3316,   660,  6139, 28725,
           630,   349, 18711,   354,   754,  1536, 28725,   690,   349,  5907,
           486,   574,  5115,   346, 21062,   648, 28705, 28740, 28748, 28750,
           574,  5115,   346, 21062, 28723, 28705,  1047,   630,  3791, 28705,
         28740, 28734,  3316,  1012,  1370,   354, 28705, 28782,  2202, 28725,
           910,  1188,  2445,  1235,   630,  1038, 28804,    13, 28741, 28747,
          3169, 28742, 28713,  1073,  3707,   486,  3707, 28723,   733, 28748,
         16289, 28793, 28705, 28740, 28723,  4205, 28725,  1346, 28742, 28713,
         13911,   320,  1380, 28742, 28713,  6790, 24431,  1671,  9868,   754,
          1536, 28723,   985,  3791, 28705, 28740, 28734,  3316,  1012,  1370,
           354, 28705, 28782,  2202, 28725,   579,  

A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.


tensor([[    1,   733, 16289, 28793,  1186, 28747,   320,  1380,  2870,   429,
         28740, 28783, 28723, 28734, 28734,   396,  5115, 28723, 28705,  1047,
           630,  3791,   680,   821, 28705, 28783,  3316,   660,  6139, 28725,
           630,   349, 18711,   354,   754,  1536, 28725,   690,   349,  5907,
           486,   574,  5115,   346, 21062,   648, 28705, 28740, 28748, 28750,
           574,  5115,   346, 21062, 28723, 28705,  1047,   630,  3791, 28705,
         28740, 28734,  3316,  1012,  1370,   354, 28705, 28782,  2202, 28725,
           910,  1188,  2445,  1235,   630,  1038, 28804,    13, 28741, 28747,
          3169, 28742, 28713,  1073,  3707,   486,  3707, 28723,   733, 28748,
         16289, 28793, 28705, 28740, 28723,  4205, 28725,  1346, 28742, 28713,
         13911,   320,  1380, 28742, 28713,  6790, 24431,  1671,  9868,   754,
          1536, 28723,   985,  3791, 28705, 28740, 28734,  3316,  1012,  1370,
           354, 28705, 28782,  2202, 28725,   579,  

Right to Left padding conversion

In [127]:
import torch

# Simulated output sequences from a model (right-padded)
sequences = torch.tensor([
    [101, 102, 103, 0, 0, 0],  # Sequence 1
    [201, 202, 203, 204, 0, 0],  # Sequence 2
    [301, 302, 0, 0, 0, 0],
    [301, 302, 2, 1, 1, 1]  # Sequence 3

])

# Tokenizer's pad token ID
pad_token_id = 0

# Trim right padding from each sequence individually
trimmed_sequences = []
for seq in sequences:
    # Find the first pad token index or use the full length if no pad token is found
    pad_index = (seq == pad_token_id).nonzero(as_tuple=True)[0]
    if len(pad_index) > 0:
        trimmed_sequences.append(seq[:pad_index[0]])
    else:
        trimmed_sequences.append(seq)

# Determine the maximum length after trimming
max_length = max(len(seq) for seq in trimmed_sequences)
padded_input = torch.full((len(trimmed_sequences), max_length), fill_value=pad_token_id, dtype=torch.long)

# Ensure left padding for the next input
for i, seq in enumerate(trimmed_sequences):
    padded_input[i, -len(seq):] = seq  # Copy sequence to the rightmost part of the tensor

# Print the original, trimmed, and left-padded sequences
print("Original Sequences:")
print(sequences)
print("\nTrimmed Sequences:")
for seq in trimmed_sequences:
    print(seq)
print("\nLeft-Padded Sequences:")
print(padded_input)

Original Sequences:
tensor([[101, 102, 103,   0,   0,   0],
        [201, 202, 203, 204,   0,   0],
        [301, 302,   0,   0,   0,   0],
        [301, 302,   2,   1,   1,   1]])

Trimmed Sequences:
tensor([101, 102, 103])
tensor([201, 202, 203, 204])
tensor([301, 302])
tensor([301, 302,   2,   1,   1,   1])

Left-Padded Sequences:
tensor([[  0,   0,   0, 101, 102, 103],
        [  0,   0, 201, 202, 203, 204],
        [  0,   0,   0,   0, 301, 302],
        [301, 302,   2,   1,   1,   1]])


In [135]:
def iterative_generation(model, tokenizer, initial_input, num_samples, num_iterations, stopping_criteria, min_new_tokens=0):
    current_input = initial_input.repeat(num_samples, 1)
    all_generated_sequences = []

    for _ in range(num_iterations):
        output = model.generate(
            current_input.to(model.device),
            min_new_tokens=min_new_tokens,
            max_new_tokens=1000,
            return_dict_in_generate=True,
            output_scores=True,
            do_sample=True,
            temperature=0.7,
            top_k=40,
            stopping_criteria=stopping_criteria
        )

        # Decode and store the generated sequences
        generated_sequences = tokenizer.batch_decode(output.sequences, skip_special_tokens=True)
        print(output.sequences)
        print(generated_sequences)
        print()

        all_generated_sequences.append(generated_sequences)

        # Trim right padding from each sequence individually
        trimmed_sequences = []
        for seq in output.sequences:
            # Find the first pad token index or use the full length if no pad token is found
            pad_index = (seq == tokenizer.pad_token_id).nonzero(as_tuple=True)[0]
            if len(pad_index) > 0:
                trimmed_sequences.append(seq[:pad_index[0]])
            else:
                trimmed_sequences.append(seq)

        # Determine the maximum length after trimming
        max_length = max(len(seq) for seq in trimmed_sequences)
        padded_input = torch.full((num_samples, max_length), fill_value=tokenizer.pad_token_id, dtype=torch.long, device=model.device)

        # Ensure left padding for the next input
        for i, seq in enumerate(trimmed_sequences):
            padded_input[i, -len(seq):] = seq  # Copy sequence to the rightmost part of the tensor

        current_input = padded_input

    return all_generated_sequences

In [136]:
num_iterations = 3
num_samples = 2
generated_texts = iterative_generation(model, tokenizer, input_tensor, num_samples, num_iterations, stopping_criteria)

for i, texts in enumerate(generated_texts):
    print(f"Iteration {i+1}:")
    for text in texts:
        print(text)
    print("\n")

tensor([[    1,   733, 16289, 28793,  1186, 28747,   320,  1380,  2870,   429,
         28740, 28783, 28723, 28734, 28734,   396,  5115, 28723, 28705,  1047,
           630,  3791,   680,   821, 28705, 28783,  3316,   660,  6139, 28725,
           630,   349, 18711,   354,   754,  1536, 28725,   690,   349,  5907,
           486,   574,  5115,   346, 21062,   648, 28705, 28740, 28748, 28750,
           574,  5115,   346, 21062, 28723, 28705,  1047,   630,  3791, 28705,
         28740, 28734,  3316,  1012,  1370,   354, 28705, 28782,  2202, 28725,
           910,  1188,  2445,  1235,   630,  1038, 28804,    13, 28741, 28747,
          3169, 28742, 28713,  1073,  3707,   486,  3707, 28723,   733, 28748,
         16289, 28793,  4205, 28725,  1346, 28742, 28713, 13911,   910,  1188,
          2445,   320,  1380,  2870,  1938,   559,  4392,  3316, 28723, 28705,
            13,     2,     2,     2,     2,     2,     2,     2,     2,     2,
             2,     2,     2,     2,     2,     2,  

RuntimeError: The expanded size of the tensor (130) must match the existing size (0) at non-singleton dimension 0.  Target sizes: [130].  Tensor sizes: [0]

In [139]:
def iterative_generation(model, tokenizer, initial_input, num_samples, num_iterations, stopping_criteria, min_new_tokens=0):
    current_input = initial_input.repeat(num_samples, 1)
    all_generated_sequences = []

    for _ in range(num_iterations):
        output = model.generate(
            current_input.to(model.device),
            min_new_tokens=min_new_tokens,
            return_dict_in_generate=True,
            output_scores=True,
            do_sample=True,
            temperature=0.7,
            top_k=40,
            stopping_criteria=stopping_criteria
        )

        # Decode and store the generated sequences
        generated_sequences = tokenizer.batch_decode(output.sequences, skip_special_tokens=True)
        print(output.sequences)
        print(generated_sequences)
        print()

        all_generated_sequences.append(generated_sequences)

        # Trim padding from both sides of each sequence
        trimmed_sequences = []
        for seq in output.sequences:
            # Find indices of the first and last non-pad tokens
            non_pad_indices = (seq != tokenizer.pad_token_id).nonzero(as_tuple=True)[0]
            if len(non_pad_indices) > 0:
                start_index = non_pad_indices[0]
                end_index = non_pad_indices[-1] + 1
                trimmed_sequences.append(seq[start_index:end_index])
            else:
                # If the entire sequence is padding, use an empty sequence
                trimmed_sequences.append(torch.tensor([], dtype=torch.long, device=model.device))

        # Determine the maximum length after trimming
        max_length = max(len(seq) for seq in trimmed_sequences)
        padded_input = torch.full((num_samples, max_length), fill_value=tokenizer.pad_token_id, dtype=torch.long, device=model.device)

        # Ensure left padding for the next input
        for i, seq in enumerate(trimmed_sequences):
            padded_input[i, -len(seq):] = seq  # Copy sequence to the rightmost part of the tensor

        current_input = padded_input

    return all_generated_sequences

In [140]:
num_iterations = 3
num_samples = 2
generated_texts = iterative_generation(model, tokenizer, input_tensor, num_samples, num_iterations, stopping_criteria)

for i, texts in enumerate(generated_texts):
    print(f"Iteration {i+1}:")
    for text in texts:
        print(text)
    print("\n")

tensor([[    1,   733, 16289, 28793,  1186, 28747,   320,  1380,  2870,   429,
         28740, 28783, 28723, 28734, 28734,   396,  5115, 28723, 28705,  1047,
           630,  3791,   680,   821, 28705, 28783,  3316,   660,  6139, 28725,
           630,   349, 18711,   354,   754,  1536, 28725,   690,   349,  5907,
           486,   574,  5115,   346, 21062,   648, 28705, 28740, 28748, 28750,
           574,  5115,   346, 21062, 28723, 28705,  1047,   630,  3791, 28705,
         28740, 28734,  3316,  1012,  1370,   354, 28705, 28782,  2202, 28725,
           910,  1188,  2445,  1235,   630,  1038, 28804,    13, 28741, 28747,
          3169, 28742, 28713,  1073,  3707,   486,  3707, 28723,   733, 28748,
         16289, 28793,  4205, 28725,  1346, 28742, 28713, 13911,   320,  1380,
         28742, 28713,  6790, 24431, 28747,    13,     2,     2,     2,     2,
             2,     2,     2,     2,     2,     2,     2,     2,     2],
        [    1,   733, 16289, 28793,  1186, 28747,   320, 

In [141]:
num_iterations = 8
num_samples = 2
generated_texts = iterative_generation(model, tokenizer, input_tensor, num_samples, num_iterations, stopping_criteria)

for i, texts in enumerate(generated_texts):
    print(f"Iteration {i+1}:")
    for text in texts:
        print(text)
    print("\n")

tensor([[    1,   733, 16289, 28793,  1186, 28747,   320,  1380,  2870,   429,
         28740, 28783, 28723, 28734, 28734,   396,  5115, 28723, 28705,  1047,
           630,  3791,   680,   821, 28705, 28783,  3316,   660,  6139, 28725,
           630,   349, 18711,   354,   754,  1536, 28725,   690,   349,  5907,
           486,   574,  5115,   346, 21062,   648, 28705, 28740, 28748, 28750,
           574,  5115,   346, 21062, 28723, 28705,  1047,   630,  3791, 28705,
         28740, 28734,  3316,  1012,  1370,   354, 28705, 28782,  2202, 28725,
           910,  1188,  2445,  1235,   630,  1038, 28804,    13, 28741, 28747,
          3169, 28742, 28713,  1073,  3707,   486,  3707, 28723,   733, 28748,
         16289, 28793, 28705, 28740, 28723,   320,  1380, 28742, 28713,  5115,
           346, 21062,   349,   429, 28740, 28783, 28723, 28734, 28734, 28723,
         28705,    13],
        [    1,   733, 16289, 28793,  1186, 28747,   320,  1380,  2870,   429,
         28740, 28783, 28723

In [143]:
for i in range(len(generated_texts[0])):
    for j, text in enumerate(generated_texts):
        print(text[i])
        print()
    print("--------------------------------")

[INST] Q: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she make?
A: Let's think step by step. [/INST] 1. Tina's hourly wage is $18.00. 


[INST] Q: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she make?
A: Let's think step by step. [/INST] 1. Tina's hourly wage is $18.00. 
2. She works more than 8 hours per shift, so she is eligible for overtime. 


[INST] Q: Tina makes $18.00 an hour.  If she works more than 8 hours per shift, she is eligible for overtime, which is paid by your hourly wage + 1/2 your hourly wage.  If she works 10 hours every day for 5 days, how much money does she make?
A: Let's think step by step. [/INST] 1. Tina's ho