In [1]:
# Uninstall conflicting packages to start clean
!pip uninstall -y torch torchvision transformers accelerate bitsandbytes unsloth

# Install PyTorch and Torchvision with compatible versions
!pip install torch==2.1.0+cu121 torchvision==0.16.0+cu121 --index-url https://download.pytorch.org/whl/cu121

# Install Transformers with a specific compatible version
!pip install transformers==4.46.2

# Install Accelerate (compatible with transformers, trl, and peft)
!pip install accelerate>=0.34.1

# Install bitsandbytes for 4-bit/8-bit quantization
!pip install bitsandbytes==0.43.3

# Install Unsloth
!pip install unsloth==2024.11.7

# Optional: Install dotenv if needed for environment variable management
!pip install python-dotenv

# Install TRL and related packages with compatible versions
!pip install "trl<0.9.0" peft nltk tqdm groq



Found existing installation: torch 2.5.1
Uninstalling torch-2.5.1:
  Successfully uninstalled torch-2.5.1
[0mFound existing installation: transformers 4.46.2
Uninstalling transformers-4.46.2:
  Successfully uninstalled transformers-4.46.2
Found existing installation: accelerate 1.2.0
Uninstalling accelerate-1.2.0:
  Successfully uninstalled accelerate-1.2.0
Found existing installation: bitsandbytes 0.43.3
Uninstalling bitsandbytes-0.43.3:
  Successfully uninstalled bitsandbytes-0.43.3
Found existing installation: unsloth 2024.11.7
Uninstalling unsloth-2024.11.7:
  Successfully uninstalled unsloth-2024.11.7
Looking in indexes: https://download.pytorch.org/whl/cu121
Collecting torch==2.1.0+cu121
  Downloading https://download.pytorch.org/whl/cu121/torch-2.1.0%2Bcu121-cp310-cp310-linux_x86_64.whl (2200.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 GB[0m [31m696.3 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchvision==0.16.0+cu121
  Using cached http

In [3]:
from peft import get_peft_model, LoraConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import PPOTrainer, PPOConfig
import torch
import random
from google.colab import userdata
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
from nltk.corpus import wordnet as wn
import numpy as np
import os
from google.cloud import storage
from safetensors import safe_open
import copy
from unsloth import FastLanguageModel
from transformers import TextStreamer

### Simulated user profiles and preferences
we define a simulated spicy-loving user, who prefers spicy flavors, and a sour-loving user, who prefers sour flavors.

In [4]:
user_profiles = {
    "spicy_lover": {
        "preferred_flavor": "spicy",
        "keywords": ["spicy"], #will store keywords that are related to spiciness
        "ingredients":["hot pepper", "hot sauce", "chili", "jalapeno"]  #will store ingredients that are related to spiciness
    },
    "sour_lover": {
        "preferred_flavor": "sour",
        "keywords": ["sour"], #will store keywords that are related to sourness
        "ingredients":["lime", "lemon", "vinegar"]  #will store ingredients that are related to sourness
    }
}

In [5]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

Now we enhance the user profiles to include keywords that are synonyms to their preferneces. The synonyms are determined by WordNet.

In [6]:
def get_synonyms_and_related_terms(word):
    """
    Find synonyms and related terms using WordNet
    """
    related_terms = set()
    for syn in wn.synsets(word):
        for lemma in syn.lemmas():
            related_terms.add(lemma.name())
    return related_terms

def enhance_user_profile(user_profile):
    """
    Extend the keywords and ingredients in the user_profile with synonyms
    """
    extended_profile = {
        "keywords": set(user_profile["keywords"]),
        "ingredients": set(user_profile["ingredients"])
    }

    for keyword in user_profile["keywords"]:
        # if the keyword has multiple words, join them with _ for nltk recognition
        keyword_underscored = keyword.replace(" ", "_") if " " in keyword else keyword

        related_keywords = get_synonyms_and_related_terms(keyword_underscored)
        # join the _ in the related word returned by nltk if the related word has it
        related_keywords_clean = [rk.replace("_", " ") if "_" in rk else rk for rk in related_keywords]

        extended_profile["keywords"].update(related_keywords_clean)

    # Enhance ingredients with related terms
    for ingredient in user_profile["ingredients"]:
        # if the ingredient has multiple words, join them with _ for nltk recognition
        ingredient_underscored = ingredient.replace(" ", "_") if " " in ingredient else ingredient

        related_ingredients = get_synonyms_and_related_terms(ingredient_underscored)
        # join the _ in the related word returned by nltk if the related word has it
        related_ingredients_clean = [ri.replace("_", " ") if "_" in ri else ri for ri in related_ingredients]

        extended_profile["ingredients"].update(related_ingredients_clean)

    extended_profile["keywords"] = list(extended_profile["keywords"])
    extended_profile["ingredients"] = list(extended_profile["ingredients"])

    return extended_profile

for user_type, user_profile in user_profiles.items():
    enhanced_profile = enhance_user_profile(user_profile)
    user_profile["extended_keywords"] = enhanced_profile["keywords"]
    user_profile["extended_ingredients"] = enhanced_profile["ingredients"]

In [29]:
user_profiles.items()

dict_items([('spicy_lover', {'preferred_flavor': 'spicy', 'keywords': ['spicy'], 'ingredients': ['hot pepper', 'hot sauce', 'chili', 'jalapeno'], 'extended_keywords': ['hot', 'risque', 'racy', 'juicy', 'savoury', 'blue', 'gamey', 'naughty', 'gamy', 'zesty', 'piquant', 'savory', 'spicy'], 'extended_ingredients': ['tabasco plant', 'chile', 'Capsicum annuum longum', 'cayenne', 'chilly', 'chili con carne', 'jalapeno', 'Capsicum frutescens', 'hot sauce', 'cayenne pepper', 'chilli', 'chilli pepper', 'long pepper', 'chili pepper', 'tabasco pepper', 'jalapeno pepper', 'hot pepper', 'chili']}), ('sour_lover', {'preferred_flavor': 'sour', 'keywords': ['sour'], 'ingredients': ['lime', 'lemon', 'vinegar'], 'extended_keywords': ['saturnine', 'off-key', 'ferment', 'morose', 'false', 'glowering', 'sullen', 'off', 'acidity', 'moody', 'acidulate', 'rancid', 'dour', 'dark', 'sour', 'sourness', 'glum', 'tartness', 'turned', 'acetify', 'acidify', 'work', 'turn'], 'extended_ingredients': ['hydrated lime', 

In [31]:
print(list(user_profiles.keys()))

['spicy_lover', 'sour_lover']


### RLAIF

Since we do not have human feedback data, inspired by the work on RLAIF, we use the off-the-shell `llama-3.1-8b-instant` model (called through the GroqAPI) for preference labeling. The model decides from two generated recipes which one better suits the user's taste profile, since DPO instead of PPO will be implemented.

In [34]:
from groq import Groq
import requests
groq_api_key = userdata.get('GROQ_API_KEY')
model = 'llama-3.1-8b-instant'
client = Groq(
      api_key=groq_api_key
  )

def get_ai_preference(generated_1, generated_2, user_type, user_profile):
    """
    RLAIF - an off-the-shelf LLM labels which recipe aligns better with the user's taste profile.
    """

    prompt = f"""
    You are an expert chef who understands user's taste profile well.
    A {user_type} user prefers recipes with the following characteristics:
    - Keywords: {', '.join(user_profile['extended_keywords'])}
    - Ingredients: {', '.join(user_profile['extended_ingredients'])}

    Based on the user's preferences, which of the two recipes aligns with the taste profile of the user better? Respond with "Recipe 1" or "Recipe 2" only, no additional tokens.

    1. Recipe 1: {generated_1}
    2. Recipe 2: {generated_2}
    """

    client = Groq(
        api_key=groq_api_key
    )

    chat_completion = client.chat.completions.create(
        messages=[
                {
                    "role": "user",
                    "content": prompt,
                }
            ],
        model='llama-3.1-8b-instant'
    )
    choice_response = chat_completion.choices[0].message.content

    # Determine the preferred recipe
    if "Recipe 1" in choice_response or "recipe 1" in choice_response:
        return generated_1
    elif "Recipe 2" in choice_response or "recipe 2" in choice_response:
        return generated_2
    else:
        return choice_response

In [35]:
for user_type, user_profile in user_profiles.items():
    # simple generated recipes to demonstrate AIF performance
    generated_recipe_1 = "chili with jalapeno and hot sauce."
    generated_recipe_2 = "tomato soup recipe with vinegar."
    preferred_recipe = get_ai_preference(generated_recipe_1, generated_recipe_2, user_type, user_profile)
    print(user_type)
    print("Preferred Recipe:", preferred_recipe)

spicy_lover
Preferred Recipe: chili with jalapeno and hot sauce.
sour_lover
Preferred Recipe: tomato soup recipe with vinegar.


### Applying LoRA to finetuned llama-3.2-3b model

Original fine-tuned model

In [None]:
llama_checkpoint_path = "./checkpoint-4512"  # TO-DO: change this
local_file_path="./checkpoint-4512/adapter_model.safetensors"  # TO-DO: change this

llama_model, llama_tokenizer = FastLanguageModel.from_pretrained(
    model_name=llama_checkpoint_path,
    max_seq_length=2048,
    load_in_4bit=True,
)
FastLanguageModel.for_inference(llama_model)
text_streamer = TextStreamer(llama_tokenizer)

state_dict = {}
with safe_open(local_file_path, framework="pt", device="cpu") as f:
    for key in f.keys():
        state_dict[key] = f.get_tensor(key)

llama_model.load_state_dict(state_dict, strict=False)
llama_model.eval()

DPO model

In [None]:
dpo_model = copy.deepcopy(llama_model)
dpo_tokenizer = llama_tokenizer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dpo_model.to(device)

# apply LoRA to DPO model
lora_config = LoraConfig(
    r=4,
    lora_alpha=32,
    lora_dropout=0.1,
    task_type="CAUSAL_LM"
)

dpo_model = get_peft_model(dpo_model, lora_config)
dpo_model.print_trainable_parameters()

optimizer = torch.optim.AdamW(dpo_model.parameters(), lr=1e-5)

### Training loop with DPO

We train for 5 epochs with 50 DPO comparisons per epoch.

In [None]:
def generate_recipe(model, tokenizer, prompt, max_new_tokens=150, stream_output=False, preference=None):
    """
    Generate text using the provided model and tokenizer.
    """
    if preference:
        prompt += "The recipe should have a " + preference + " flavor"

    model.to(device)
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    if stream_output and text_streamer:
        _ = model.generate(
            **inputs, streamer=text_streamer, max_new_tokens=max_new_tokens
        )
        return None
    else:
        outputs = model.generate(**inputs, max_new_tokens=max_new_tokens)
        return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [18]:
def pad_to_same_length(tensor_a, tensor_b, pad_value=0):
    """
    Pad tensors to the same length to facilitate DPO loss calculation
    """
    max_length = max(tensor_a.size(1), tensor_b.size(1))
    tensor_a_padded = F.pad(tensor_a, (0, max_length - tensor_a.size(1)), value=pad_value)
    tensor_b_padded = F.pad(tensor_b, (0, max_length - tensor_b.size(1)), value=pad_value)
    return tensor_a_padded, tensor_b_padded

In [38]:
import torch.nn.functional as F
from tqdm import tqdm


num_epochs = 5
num_comparisons_per_epoch = 50  # number of DPO comparisons per epoch

for epoch in range(num_epochs):
    epoch_dpo_loss = 0  # total DPO loss for the epoch

    with tqdm(total=num_comparisons_per_epoch * len(user_profiles), desc=f"Epoch {epoch + 1}") as pbar:
        for _ in range(num_comparisons_per_epoch):
          for user_type, user_profile in user_profiles.items():
              prompt = "Please write a low-sodium meal recipe that takes approximately 55 minutes and includes the following ingredients: tomato, beef. The recipe should be formatted with a clear list of ingredients and detailed, step-by-step cooking instructions."

              # generate two different recipes
              generated_spicy = generate_recipe(dpo_model,dpo_tokenizer, prompt, "spicy")
              generated_light = generate_recipe(dpo_model,dpo_tokenizer,prompt, "sour")

              # use RLAIF to determine which recipe the user prefers
              preferred_output = get_ai_preference(generated_spicy, generated_light, user_type, user_profile)
              non_preferred_output = generated_light if preferred_output == generated_spicy else generated_spicy
              preferred_input_ids = dpo_tokenizer(preferred_output, return_tensors="pt").input_ids.to(dpo_model.device)
              non_preferred_input_ids = dpo_tokenizer(non_preferred_output, return_tensors="pt").input_ids.to(dpo_model.device)

              # get log probabilities for both outputs to compute gradients
              preferred_output_logits = dpo_model(preferred_input_ids).logits
              non_preferred_output_logits = dpo_model(non_preferred_input_ids).logits
              preferred_log_probs = preferred_output_logits.log_softmax(dim=-1)
              non_preferred_log_probs = non_preferred_output_logits.log_softmax(dim=-1)

              # ensure size of tensors (to avoid error)
              seq_len = min(preferred_log_probs.size(1), non_preferred_log_probs.size(1))
              preferred_log_probs = preferred_log_probs[:, :seq_len, :]
              non_preferred_log_probs = non_preferred_log_probs[:, :seq_len, :]

              # check vocab size of tensors (to avoid error)
              vocab_size = min(preferred_log_probs.size(-1), non_preferred_log_probs.size(-1))
              preferred_log_probs = preferred_log_probs[:, :, :vocab_size]
              non_preferred_log_probs = non_preferred_log_probs[:, :, :vocab_size]

              # calculate DPO loss
              dpo_loss = torch.mean(preferred_log_probs - non_preferred_log_probs)

              # update model weights
              optimizer.zero_grad()
              (-dpo_loss).backward()  # Negate to maximize the preference
              optimizer.step()

              epoch_dpo_loss += dpo_loss.item()

              pbar.update(1)
              print(f"++++++++ User Type: {user_type} ++++++++")
              print(f"++++++++ Prompt: {prompt} ++++++++")
              print(f"++++++++ Preferred Recipe: {preferred_output} ++++++++")
              print(f"++++++++ Non-Preferred Recipe: {non_preferred_output} ++++++++")
              print(f"++++++++ DPO Loss: {dpo_loss.item()} ++++++++")
              print("-" * 80)

    # average DPO loss for the epoch
    avg_epoch_loss = epoch_dpo_loss / (num_comparisons_per_epoch * len(user_profiles))
    print(f"******* Epoch {epoch + 1} completed with average DPO loss: {avg_epoch_loss:.4f} *******")
    print("=" * 100)


Epoch 1:  50%|█████     | 1/2 [00:21<00:21, 21.72s/it]

++++++++ User Type: spicy_lover ++++++++
++++++++ Prompt: Please write a low-sodium meal recipe that takes approximately 55 minutes and includes the following ingredients: tomato, beef. The recipe should be formatted with a clear list of ingredients and detailed, step-by-step cooking instructions. ++++++++
++++++++ Preferred Recipe: and should be formatted with a clear list of ingredients and detailed, step-by-step cooking instructions.Ingredients:
tomato
beef

Instructions:
1. put all ingredients in crock pot , add water if necessary , cook on high for 1 hour
2. serve over rice or pasta !
3. i use my favorite method -- cooked steak or chicken , but you can also substitute veggies like mushrooms or broccoli to get the desired effect
4. it is not necessary to use all of the vegetables , just enough to make the meaty texture better than most other types of meats
5. you may need to adjust the seasoning a bit depending on your taste -- some people prefer a thicker consistency , others pref

Epoch 1: 100%|██████████| 2/2 [00:40<00:00, 20.22s/it]

++++++++ User Type: sour_lover ++++++++
++++++++ Prompt: Please write a low-sodium meal recipe that takes approximately 55 minutes and includes the following ingredients: tomato, beef. The recipe should be formatted with a clear list of ingredients and detailed, step-by-step cooking instructions. ++++++++
++++++++ Preferred Recipe: but is very delicious served on a bed of lettuce or salad greens.Ingredients:
tomato
beef

Instructions:
1. cut meat into bite size pieces
2. combine all ingredients except lettuce in a blender
3. puree until smooth
4. serve over your favorite pasta , salad greens or rolls !
5. enjoy ! :)
6. also great for dipping !
7. if you like it to be a little sweeter than that , add more salt and pepper to taste
8. i'm not sure how to make this any other way so i substitute vegetable oil instead of water
9. you can also use a non stick blender or food processor to make this much easier
10. also great for dipping , as well as for sandwiches !
11. i love to dip my sandwi




### Compare Original vs. RLAIF model

In [44]:
import pandas as pd
from IPython.display import display, HTML

def format_recipe(recipe_text):
    """
    Format a recipe string into HTML.
    """
    parts = recipe_text.split("Instructions:")
    #ingredients = parts[0].replace("Ingredients:", "").strip().split("\n")
    instructions = parts[1].strip().split("\n") if len(parts) > 1 else []

    #ingredients_html = "<ul>" + "".join(f"<li>{ingredient.strip()}</li>" for ingredient in ingredients if ingredient.strip()) + "</ul>"
    instructions_html = "<ol>" + "".join(f"<li>{instruction.strip()}</li>" for instruction in instructions if instruction.strip()) + "</ol>"

    #return f"<strong>Ingredients:</strong>{ingredients_html}<strong>Instructions:</strong>{instructions_html}"
    return f"<strong>Instructions:</strong>{instructions_html}"


def display_model_comparison():
    prompt = "Please write a low-sodium meal recipe that takes approximately 55 minutes and includes the following ingredients: tomato, beef. The recipe should be formatted with a clear list of ingredients and detailed, step-by-step cooking instructions."
    user_specifics = []
    rlaif_outputs = []
    original_outputs = []

    for user_type, user_profile in user_profiles.items():
      flavor = user_profile['preferred_flavor']
      original_output = generate_recipe(llama_model, llama_tokenizer, prompt, flavor)
      dpo_output = generate_recipe(dpo_model, dpo_tokenizer, prompt, flavor)

      user_specifics.append(user_type)
      rlaif_outputs.append(format_recipe(dpo_output))
      original_outputs.append(format_recipe(original_output))

    data = {
        "User specifics": user_specifics,
        "RLAIF Model Output": rlaif_outputs,
        "Original Model Output": original_outputs
    }
    df = pd.DataFrame(data)

    display(HTML(df.to_html(index=False, escape=False)))

display_model_comparison()

User specifics,RLAIF Model Output,Original Model Output
spicy_lover,"Instructions:1. combine all ingredients , except beef , in crockpot2. cook over medium heat until meat is tender3. stir in remaining ingredients4. cover and simmer for 15 minutes5. serve hot or cold6. this can also be served as a side dish !7. enjoy ! :)8. i love to add garlic cloves during the cooking process so they get a bit crispy after you add them9. if you like it a little different then just toss in some ground black pepper10. i use my crock pot for smoking , but it's not perfect for serving11. it does make it easier to clean up the mess when you're done ! :)12. i always put the beef in the crockpot before adding the tomatoes and it makes a big difference !13. if you don't know what to do with the sauce , use a little more water and let it boil down a bit14. it will be thicker and slightly bubbly15. if you want it more spicy , add more pepper or salt16. it's really simple , and it gives the best result !17. try using the fresh thyme or sage18. it has a lot of health benefits19. you","Instructions:1. mix all ingredients together2. cook on high for 6 hours or until done to your liking3. enjoy !4. if you want to add more seasoning , put in a blender or food processor and blend until smooth5. serve over rice or noodles !6. enjoy !7. i always use the same 1 / 4 cup water as it is for this recipe but i like my beans very much8. they are also great as an added addition to pasta or chicken soup9. i usually just add some chopped tomatoes to taste so they don't get too dry or mushy10. i also add a little fresh garlic powder to taste as well11. i think this is really good served with plain bread ! :)12. i'm sure there will be plenty other variations of this recipe that would make this dish better than it is ! :)13. enjoy ! :)14. this makes about 2 dozen servings15. the original recipe called for 3 cups of meatballs16. i used 3 cups of meatballs instead of the original 5 cups because i was afraid they wouldn't be enough17. this is what i did and it turned out pretty good18. i love serving this with cornbread or a loaf of bread -- both wonderful !19. if you"
sour_lover,"Instructions:1. combine all ingredients in crockpot , mix well2. cover and cook on high for 30 mins3. serve hot with crackers , chips or any other kind of cracker you want !4. if you don't like them , use another cracker / chip maker ! :)5. i always add more water as i go along so it doesn't boil too much6. also makes a nice little snack bowl , if you're looking for something sweet take a tablespoon out of your mouth and squeeze some lemon juice into it ! :)7. enjoy ! :)8. my favorite is an old fashioned , it's great when you're hungry !9. this will keep up to 4 months at least !10. also good with ice cream !11. also great with chocolate ice cream !12. also great with brownies !13. try it with buttercream , it tastes best with melted buttercream14. i've never tried it with crackers , but i'm sure you could get away with it ! :)15. if you'd like to substitute any other type of cheese you can use a few different cheeses , such as white bread , egg yolks , black pepper , etc16. you'll probably need to adjust the amounts depending on","Instructions:1. put all in a bowl2. stir together well3. refrigerate for several hours , stirring occasionally to ensure even mixing4. serve immediately or freeze for up to 1 week !5. i like to add frozen chopped tomatoes when they are ready so i can get them out of my fridge overnight !6. if you do not have a freezer , just use your favorite food processor to process the tomato into a paste7. it will become very dry after this process but you will need to keep it warm in the refrigerator8. also make sure to mix thoroughly as it is quite difficult to remove from the heat9. if using fresh mushrooms , add a little water to help dissolve any lumps10. mix well11. let stand at room temperature until fully set12. preheat oven to 350 degrees fahrenheit13. place the meat on a baking sheet and bake for 30 minutes , then turn the meat over once again14. cool the meat completely on a rack before cutting into squares or slices15. cut each square into a circle about 3 / 4 inch thick16. divide the meat pieces evenly among the squares17. slice the squares into quarters18. place one piece of bread in each quarter and roll each round around with a fork19. cover with plastic wrap and chill for 10 minutes20"
