#### **Set an environment**

In [None]:
!pip install huggingface_hub
!pip install -q transformers accelerate

In [None]:
import torch
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed, pipeline

In [None]:
DEVICE = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
set_seed(42)

#### **Setup models**
Selected Model : LLaMA 3.2-1B (base, instruct)

In [None]:
!huggingface-cli login

In [None]:
# Base model
tokenizer_base = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
model_base = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B").to(DEVICE)
model_base.generation_config.pad_token_id = model_base.generation_config.eos_token_id

In [None]:
# Instruct model
tokenizer_instruct = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B-Instruct")
model_instruct = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B-Instruct").to(DEVICE)
model_instruct.generation_config.pad_token_id = model_instruct.generation_config.eos_token_id

#### **Generate sentences with five different decoding strategies**
1. Greedy search
2. Beam-search decoding
3. Top-p sampling
4. Contrastive search
5. DoLa decoding

In [None]:
DECODE_MAX_NEW_TOKEN = 10

In [None]:
# Greedy search
def greedy_search(**inputs):
  outputs = model_base.generate(
      **inputs,
      max_new_tokens=DECODE_MAX_NEW_TOKEN,
  )
  return outputs

# Beam-search decoding
def beam_search_decoding(**inputs):
  outputs = model_base.generate(
      **inputs,
      num_beams=5,
      max_new_tokens=DECODE_MAX_NEW_TOKEN
  )
  return outputs

# Top-p sampling
def top_p_sampling(**inputs):
  outputs = model_base.generate(
      **inputs,
      do_sample=True,
      top_k=50,
      top_p=0.9,
      max_new_tokens=DECODE_MAX_NEW_TOKEN
    )
  return outputs

# Constrastive search
def contrastive_search(**inputs):
  outputs = model_base.generate(
      **inputs,
      penalty_alpha=0.6,
      top_k=4,
      max_new_tokens=DECODE_MAX_NEW_TOKEN
  )
  return outputs

# DoLa decoding
def dola_decoding(**inputs):
  outputs = model_base.generate(
      **inputs,
      dola_layers="high",
      repetition_penalty=1.2,
      max_new_tokens=DECODE_MAX_NEW_TOKEN
  )
  return outputs

def decode(decoding_strategy):
  for key in sentences:
    inputs = tokenizer_base(sentences[key]["prompt"], return_tensors="pt")
    inputs = inputs.to(DEVICE)

    if decoding_strategy == "greedy":
      outputs = greedy_search(**inputs)
    elif decoding_strategy == "beam":
      outputs = beam_search_decoding(**inputs)
    elif decoding_strategy == "top-p":
      outputs = top_p_sampling(**inputs)
    elif decoding_strategy == "contrastive":
      outputs = contrastive_search(**inputs)
    elif decoding_strategy == "dola":
      outputs = dola_decoding(**inputs)

    decoded_text = tokenizer_base.decode(outputs[0], skip_special_tokens=True)
    if "." in decoded_text:
      decoded_text = decoded_text.split(".")[0] + "."
    generated_sentences[key][decoding_strategy] = decoded_text

Three sentences with different topics.

*   Factual information
*   Inference-based explanation
*   Emotional expression



In [None]:
factual_information = {
    "prompt": "The Eiffel Tower is located in",
    "gold": "The Eiffel Tower is located in Paris, France."
}

inference = {
    "prompt": "When it rains, people take",
    "gold": "When it rains, people take umbrellas."
}

emotional = {
    "prompt": "When he won the prize, he felt",
    "gold": "When he won the prize, he felt proud."
}

sentences = {
    "factual": factual_information,
    "inference": inference,
    "emotional": emotional
}

In [None]:
generated_sentences = {
    "factual": {
        "greedy": "",
        "beam": "",
        "top-p": "",
        "contrastive": "",
        "dola": ""
    },
    "inference": {
        "greedy": "",
        "beam": "",
        "top-p": "",
        "contrastive": "",
        "dola": ""
    },
    "emotional": {
        "greedy": "",
        "beam": "",
        "top-p": "",
        "contrastive": "",
        "dola": ""
    }
}

# generate sentences
decoding_strategies = ["greedy", "beam", "top-p", "contrastive", "dola"]

for decoding_strategy in tqdm(decoding_strategies, desc="generating sentences"):
  decode(decoding_strategy)

# compare generate sentences with gold sentences
for key in sentences:
  if key == "factual":
    sentence_type = "Factual information"
  elif key == "inference":
    sentence_type = "Inference-based explanation"
  else:
    sentence_type = "Emotional expression"

  print(f"\n===== {sentence_type} =====")
  print(f"Prompt                : {sentences[key]['prompt']}")
  print(f"Gold sentence         : {sentences[key]['gold']}")
  print(f"---------------------------------------------------")
  print(f"Greedy search         : {generated_sentences[key]['greedy']}")
  print(f"Beam-search decoding  : {generated_sentences[key]['beam']}")
  print(f"Top-p sampling        : {generated_sentences[key]['top-p']}")
  print(f"Contrastive search    : {generated_sentences[key]['contrastive']}")
  print(f"DoLa decoding         : {generated_sentences[key]['dola']}")

#### **Utilize techniques for complex step-by-step reasoning**
1. question only (base, instruct)
2. few-shot prompting (base, instruct)
3. one-shot chain-of-thought (base, instruct)
4. zero-shot chain-of-thought (base, instruct)

In [None]:
# Top-p sampling
MAX_NEW_TOKEN = 100
COT_MAX_TOKEN = 400

In [None]:
# Answer = 11
initial_prompt = """
Question: Bob bought 3 packs of apples, each containing 5 apples. He ate 4 of them. How many apples does he have now?
"""

# Few-shot prompting
few_shot = """
Question: Alice bought 4 boxes of oranges, each containing 6 oranges. She ate 5 of them. How many oranges does she have now?
Answer: 19

Question: Tom bought 2 packs of candies, each with 8 candies. He gave 3 to his sister. How many candies does he have now?
Answer: 13
"""

# Chain-of-thought
chain_of_thought = """
Question: Alice bought 4 boxes of oranges, each containing 6 oranges. She ate 5 of them. How many oranges does she have now?
Solution:
1. Calculate the total number of oranges Alice bought: 4 * 6 = 24
2. Subtract the number of oranges Alice ate: 24 - 5 = 19
3. The answer is the remaining number of oranges: 19
Answer: 19
"""

# Chain-of-thought (zero-shot)
cot_zero = """
Let's think step by step.
"""

In [None]:
def set_pipeline(model_type):
  if model_type == "base":
    model = model_base
    tokenizer = tokenizer_base
  else:
    model = model_instruct
    tokenizer = tokenizer_instruct

  pipe = pipeline(
      "text-generation",
      model=model,
      tokenizer=tokenizer,
      torch_dtype=torch.float16,
      device=0,
  )
  return pipe

def get_prompt(prompt_type):
  if prompt_type == "question only":
    return initial_prompt
  elif prompt_type == "few-shot prompting":
    return few_shot + initial_prompt
  elif prompt_type == "chain-of-thought":
    return chain_of_thought + initial_prompt
  elif prompt_type == "chain-of-thought (zero-shot)":
    return initial_prompt + cot_zero

def generate_sentence(model_type, prompt_type):
  pipe = set_pipeline(model_type)
  prompt = get_prompt(prompt_type)

  sequences = pipe(
      prompt,
      max_new_tokens=MAX_NEW_TOKEN if "chain-of-thought" in prompt_type else COT_MAX_TOKEN,
      do_sample=True,
      top_k=50,
      top_p=0.9,
      pad_token_id = pipe.tokenizer.eos_token_id
  )

  generated_sentences[prompt_type][model_type] = sequences[0]["generated_text"]

In [None]:
generated_sentences = {
    "question only": {
        "base" : [],
        "instruct": []
    },
    "few-shot prompting": {
        "base": [],
        "instruct": []
    },
    "chain-of-thought": {
        "base": [],
        "instruct": []
    },
    "chain-of-thought (zero-shot)": {
        "base": [],
        "instruct": []
    }
}

prompt_type = ["question only", "few-shot prompting", "chain-of-thought", "chain-of-thought (zero-shot)"]

# generate sentences with a base model
for prompt in tqdm(prompt_type, desc=f"generating sentences - 'base'"):
  generate_sentence('base', prompt)

# print all results
print(f"\n===== Base model ======")
print(f"\n----- Zero-shot prompting -----")
print(f"{generated_sentences['question only']['base']}")
print(f"\n----- Few-shot prompting -----")
print(f"{generated_sentences['few-shot prompting']['base']}")
print(f"\n----- Chain-of-thought -----")
print(f"{generated_sentences['chain-of-thought']['base']}")
print(f"\n----- Chain-of-thought (zero-shot) -----")
print(f"{generated_sentences['chain-of-thought (zero-shot)']['base']}")

In [None]:
generated_sentences = {
    "question only": {
        "base" : [],
        "instruct": []
    },
    "few-shot prompting": {
        "base": [],
        "instruct": []
    },
    "chain-of-thought": {
        "base": [],
        "instruct": []
    },
    "chain-of-thought (zero-shot)": {
        "base": [],
        "instruct": []
    }
}

prompt_type = ["question only", "few-shot prompting", "chain-of-thought", "chain-of-thought (zero-shot)"]


# generate sentences with an instruct model
for prompt in tqdm(prompt_type, desc=f"generating sentences - 'instruct'"):
  generate_sentence("instruct", prompt)

# print all results
print(f"\n===== Instruct model ======")
print(f"\n----- Zero-shot prompting -----")
print(f"{generated_sentences['question only']['instruct']}")
print(f"\n----- Few-shot prompting -----")
print(f"{generated_sentences['few-shot prompting']['instruct']}")
print(f"\n----- Chain-of-thought -----")
print(f"{generated_sentences['chain-of-thought']['instruct']}\n")
print(f"\n----- Chain-of-thought (zero-shot) -----")
print(f"{generated_sentences['chain-of-thought (zero-shot)']['instruct']}\n")