In [None]:
!pip install transformers

In [None]:
#  importing the required libraries
from tqdm import trange
import torch
import torch.nn.functional as F
import numpy as np
import re
import os
from transformers import  AutoTokenizer,AutoModelWithLMHead
from transformers import GPT2LMHeadModel, GPT2Tokenizer

print(f"PyTorch version: {torch.__version__}")

# Model class to load the saved model, can use either of them
MODEL_CLASSES = {
    'gpt2': (GPT2LMHeadModel, GPT2Tokenizer),
}
MODEL_CLASSES1 = {
    'gpt2': (AutoModelWithLMHead, AutoTokenizer),
}


# seed values
def set_seed(seed):
    np.random.seed(seed)
    torch.manual_seed(seed)


def top_k_top_p_filtering(logits, top_k=0, top_p=0.0, filter_value=-float('Inf')):
    """ Filter a distribution of logits using top-k and/or nucleus (top-p) filtering
        Args:
            logits: logits distribution shape (vocabulary size)
            top_k > 0: keep only top k tokens with highest probability (top-k filtering).
            top_p > 0.0: keep the top tokens with cumulative probability >= top_p (nucleus filtering).
                Nucleus filtering is described in Holtzman et al. (http://arxiv.org/abs/1904.09751)
        From: https://gist.github.com/thomwolf/1a5a29f6962089e871b94cbd09daf317
    """
    assert logits.dim() == 1  # batch size 1 for now - could be updated for more but the code would be less clear
    top_k = min(top_k, logits.size(-1))  # Safety check
    if top_k > 0:
        # Remove all tokens with a probability less than the last token of the top-k
        indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
        logits[indices_to_remove] = filter_value

    if top_p > 0.0:
        sorted_logits, sorted_indices = torch.sort(logits, descending=True)
        cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)

        # Remove tokens with cumulative probability above the threshold
        sorted_indices_to_remove = cumulative_probs > top_p
        # Shift the indices to the right to keep also the first token above the threshold
        sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
        sorted_indices_to_remove[..., 0] = 0

        indices_to_remove = sorted_indices[sorted_indices_to_remove]
        logits[indices_to_remove] = filter_value
    return logits


def sample_sequence(model, length, context, tokenizer, num_samples=1, temperature=1, top_k=0, top_p=0.0, device):
    end_token = tokenizer.convert_tokens_to_ids(["<END_RECIPE>"])[0]
    context = torch.tensor(context, dtype=torch.long, device=device)
    context = context.unsqueeze(0).repeat(num_samples, 1)
    generated = context
    with torch.no_grad():
        for _ in trange(length):
            inputs = {'input_ids': generated}
            outputs = model(**inputs)  # Note: we could also use 'past' with GPT-2/Transfo-XL/XLNet (cached hidden-states)
            next_token_logits = outputs[0][0, -1, :] / temperature
            filtered_logits = top_k_top_p_filtering(next_token_logits, top_k=top_k, top_p=top_p)
            next_token = torch.multinomial(F.softmax(filtered_logits, dim=-1), num_samples=1)
            generated = torch.cat((generated, next_token.unsqueeze(0)), dim=1)
            if next_token.item() == end_token:
                print('breaking----->>')
                break
    return generated

# global seed
set_seed(20)

# loadind the trained model

model_class, tokenizer_class = MODEL_CLASSES['gpt2']
tokenizer = tokenizer_class.from_pretrained('/content/drive/MyDrive/capstone-remoteserver/recipe word/data annotation/outputs/tempt1')
model = model_class.from_pretrained('/content/drive/MyDrive/capstone-remoteserver/recipe word/data annotation/outputs/tempt1')
model.to(torch.device("cuda" ))
model.eval();

PyTorch version: 1.8.1+cu101


In [None]:
# loading the csv dataset
import joblib as jb
from sklearn.model_selection import train_test_split

df_new = jb.load('/content/drive/MyDrive/capstone-remoteserver/recipe word/data annotation/dataset_df_new.pkl')

In [None]:
df1, df2 = train_test_split(df_new, train_size=0.65, random_state= 2)
test1, remaining = train_test_split(df1, train_size=0.01, random_state= 2)
test1.reset_index(drop=True, inplace=True)
test1.head()


Unnamed: 0,instructions,ingredients,title,keywords
0,[combine sauce ingredients and set aside . mix...,"[sauce, shrimp, cornstarch, oil, cashews, brow...",Shrimp (or Chicken) with Cashew Nuts,"[shrimp, green_pepper, chilies, cornstarch, ga..."
1,"[dressing , stir together all ingredients . sa...","[salad, greens, olive_oil, salt, tomato, mozza...",Insalate Caprese,"[salad, tomato, mozzarella, olive_oil, ingredi..."
2,"[place pork belly into a large pot , and cover...","[pork_belly, water, pork, pork_meat, soy_sauce...",Okinawa Shoyu Pork,"[pork_meat, pork_belly, soy_sauce, garlic, sau..."
3,[rinse the meat with cool water and pat it dry...,"[cool_water, paper_towels, pepper, nonstick_co...",Slow-Cooker Sicilian Pot Roast - Diabetic Frie...,"[tomato_paste, nonstick_cooking_spray, pasta, ..."
4,"[using a food processor or blender , process a...","[vanilla, wafers, pretzels, sugar, cream_chees...",Margarita Balls II,"[cream_cheese, vanilla, margarita_mix, white_s..."


In [None]:
test1.shape

(757, 4)

In [None]:
test1['ingredients'][30]

['crust',
 'baking_mix',
 'garlic_powder',
 'pepper',
 'hot_water',
 'dough',
 'hands',
 'tomatoes',
 'ricotta',
 'garlic',
 'parmesan',
 'ricotta_mixture',
 'mozzarella_cheese',
 'tomato',
 'basil',
 'oregano',
 'oil',
 'water']

In [None]:
# taking input from the user for recipe
raw_text = input("Comma-separated ingredients, semicolon to close the list >>> ")
prepared_input = '<BEGIN_RECIPE> <BEGIN_INPUT> ' + raw_text.replace(',', ' <NEXT_INPUT> ').replace(';', ' <END_INPUT>')
context_tokens = tokenizer.encode(prepared_input)

out = sample_sequence(
    model=model,
    context=context_tokens,
    tokenizer=tokenizer,
    length=768,
    temperature=1,
    top_k=30,
    top_p=1,
    device=torch.device("cuda")
)
out = out[0, len(context_tokens):].tolist()
text = tokenizer.decode(out, clean_up_tokenization_spaces=True)
if "<END_RECIPE>" not in text:
    print(text)
    print("Failed to generate, recipe's too long")
# salt, chicken, beef, milk, water, garlic, onion, vinegar;

Comma-separated ingredients, semicolon to close the list >>> crust, baking_mix,garlic_powder, pepper, hot water, dough, tomato, basil, oregano, oil , garlic;


 61%|██████    | 469/768 [00:13<00:14, 20.76it/s]

breaking----->>


In [None]:
# formating and printing the generated recipe
user_input = prepared_input
user_input = user_input.replace('<BEGIN_RECIPE> <BEGIN_INPUT>', '## User inputs ##\n    - ').replace('<NEXT_INPUT>', '\n    -').replace('<END_INPUT>', '\n------------------------\n')
generated_recipe = str(text.replace('<BEGIN_TITLE>', '## Recipe Name:- ##\n').replace('<END_TITLE>', '\n')) \
                        .replace('<BEGIN_INGREDS>', '\n## Ingredients ##\n    -').replace('<NEXT_INGREDS>', '\n    -').replace('<END_INGREDS>', '\n\n') \
                        .replace('<BEGIN_INSTR>', '## Cooking instructions ##\n    -').replace('.','.\n    -').replace('<NEXT_INSTR>', '\n    -').replace('- <END_INSTR>', '\n\n') \
                        .replace(' <END_RECIPE>', 'Voila Enjoy your recipe :)')
                    

print(user_input)
print(generated_recipe)

## User inputs ##
    -  crust 
    -  baking_mix 
    - garlic_powder 
    -  pepper 
    -  hot water 
    -  dough 
    -  tomato 
    -  basil 
    -  oregano 
    -  oil  
    -  garlic 
------------------------

## Recipe Name:- ##
 Italian Meatballs With Tomatoes and Tomato 
 
## Ingredients ##
    - garlic 
    - basil 
    - oregano 
    - sugar 
    - oregano_powder 
    - eggplant 
    - salt 
    - flour 
    - baking_mix 
    - oil 
    - dough 
    - hands 
    - hands 
    - plastic 
    - hands 
    - meatballs 
    - brown 
    - tomato 
    - crust 

 ## Cooking instructions ##
    - in a large bowl, blend the 1/4 cup garlic, basil, oregano, sugar, and oregano powder 
    - mix well.
    - in another bowl, mix eggplant and salt into flour until lightly combined.
    - in a clean bowl, mix the eggplant, salt, baking mix, oil, and 1 tbsp of the garlic herb oil with your hands until combined.
    - add the dough to a lightly floured surface, and knead briefly with your h