# Custom FOOD entities Model

In [5]:
############# Importimg Libraries ##############
import spacy
from spacy.matcher import Matcher

# Custom small food dataset preparation and preprocess for spacy NER dataset format

In [7]:
####### This function for creating a list of food patterns It will help us for annotation ########
def create_patterns():
    food_patterns = [
        [{'LOWER': 'milk'}],[{'LOWER': 'Milk'}],[{'LOWER': 'meat'}],[{'LOWER': 'Meat'}],
        [{'LOWER': 'Bread'}],[{'LOWER': 'bread'}],[{'LOWER': 'Butter'}],[{'LOWER': 'butter'}],
        [{'LOWER': 'Cheese'}],[{'LOWER': 'cheese'}],[{'LOWER': 'Yogurt'}],[{'LOWER': 'Yogurt'}],
        [{'LOWER': 'Sandwich'}],[{'LOWER': 'sandwich'}],[{'LOWER': 'Pancake'}],[{'LOWER': 'Pancake'}],
        [{'LOWER': 'Pie'}],[{'LOWER': 'pie'}],[{'LOWER': 'Honey'}],[{'LOWER': 'honey'}],
        [{'LOWER': 'Waffle'}],[{'LOWER': 'waffle'}],[{'LOWER': 'Donuts'}],[{'LOWER': 'donuts'}],
        [{'LOWER': 'Salad'}],[{'LOWER': 'salad'}],[{'LOWER': 'Meatball'}],[{'LOWER': 'meatball'}],
        [{'LOWER': 'Grilled chicken'}],[{'LOWER': 'grilled chicken'}],[{'LOWER': 'Burger'}],[{'LOWER': 'burger'}],[{'LOWER': 'burgers'}],
        [{'LOWER': 'Tuna'}],[{'LOWER': 'tuna'}],[{'LOWER': 'Fish'}],[{'LOWER': 'fish'}],[{'LOWER': 'Hilsa'}],
        [{'LOWER': 'Noodles'}],[{'LOWER': 'noodles'}],[{'LOWER': 'Egg'}],[{'LOWER': 'egg'}],
        [{'LOWER': 'Bacon'}],[{'LOWER': 'bacon'}],[{'LOWER': 'Pizza'}],[{'LOWER': 'pizza'}],
        [{'LOWER': 'French Fries'}],[{'LOWER': 'french fries'}],[{'LOWER': 'French fries'}],
        [{'LOWER': 'Biryani'}],[{'LOWER': 'biryani'}],[{'LOWER': 'Pasta'}],[{'LOWER': 'pasta'}],
        [{'LOWER': 'Smoked salmon'}],[{'LOWER': 'smoked salmon'}],[{'LOWER': 'salmon'}],[{'LOWER': 'Salmon'}],
        [{'LOWER': 'Mayonnaise'}],[{'LOWER': 'mayonnaise'}],[{'LOWER': 'Taco'}],[{'LOWER': 'taco'}],
        [{'LOWER': 'Hotdog'}],[{'LOWER': 'hotdog'}],[{'LOWER': 'hot dog'}],[{'LOWER': 'Dosa'}],[{'LOWER': 'dosa'}],
        [{'LOWER': 'Chocolate'}],[{'LOWER': 'chocolate'}],[{'LOWER': 'Ice cream'}],[{'LOWER': 'ice cream'}],
        [{'LOWER': 'Rice'}],[{'LOWER': 'rice'}],
        [{'LOWER': 'Fride Rice'}],[{'LOWER': 'fride rice'}],[{'LOWER': 'hamburgers'}],[{'LOWER': 'Hamburgers'}],
        
        [{'LOWER': 'Apple'}],[{'LOWER': 'apple'}],[{'LOWER': 'Banana'}],[{'LOWER': 'banana'}],
        [{'LOWER': 'Orange'}],[{'LOWER': 'orange'}],[{'LOWER': 'Mango'}],[{'LOWER': 'mango'}],
        [{'LOWER': 'Grapes'}],[{'LOWER': 'grape'}],[{'LOWER': 'Pineapple'}],[{'LOWER': 'pineapple'}],
        [{'LOWER': 'Pomegranate'}],[{'LOWER': 'pomegranate'}],[{'LOWER': 'Avocado'}],[{'LOWER': 'avocado'}],
        [{'LOWER': 'Coconut'}],[{'LOWER': 'coconut'}],[{'LOWER': 'Papaya'}],[{'LOWER': 'papaya'}],
        [{'LOWER': 'Dragonfruit'}],[{'LOWER': 'dragonfruit'}],[{'LOWER': 'Strawberry'}],[{'LOWER': 'strawberry'}],
        [{'LOWER': 'Blueberry'}],[{'LOWER': 'blueberry'}],[{'LOWER': 'Blackberry'}],[{'LOWER': 'blackberry'}],
        [{'LOWER': 'Cherry'}],[{'LOWER': 'Lime'}]]
    return food_patterns


In [8]:
######## Creating Matcher #########
matcher = Matcher(nlp.vocab, validate=True)
matcher.add("FOOD", create_patterns())

In [19]:
########### This function will take nlp doc and return a tuple with full sentence and entities ############
def parse_train_data(doc):
    detections = [(doc[start:end].start_char, doc[start:end].end_char, 'FOOD') for idx, start, end in matcher(doc)]
    return (doc.text, {'entities': detections})

parse_train_data(nlp("I love hot dog and rice"))

('I love hot dog and rice', {'entities': [(19, 23, 'FOOD')]})

# Lets prepare our custom small food dataset

In [10]:
####### Fooddata.txt contains some texts related to food ##############
with open("fooddata.txt",'r') as f:
    data = f.read()

In [44]:
######## Now using the previous function (parse_train_data(doc)) we can prepare our dataset for NER and stored in a list ######
doc = nlp(data)
train_data = [parse_train_data(nlp(sn.text)) for sn in list(doc.sents)]

In [45]:
train_data

[('The avocado (Persea americana) is a tree originating in the Americas which is likely native to the highland regions of south-central Mexico to Guatemala.',
  {'entities': [(4, 11, 'FOOD')]}),
 ('The term hot dog can also refer to the sausage itself.', {'entities': []}),
 ('A hamburger is a food consisting of fillings usually a patty of ground meat, typically beefâ€”placed inside a sliced bun or bread roll.',
  {'entities': [(71, 75, 'FOOD'), (124, 129, 'FOOD')]}),
 ('A pancake is a flat cake, often thin and round, prepared from a starch-based batter that may contain eggs, milk and butter and cooked on a hot surface such as a griddle or frying pan, often frying with oil or butter.',
  {'entities': [(107, 111, 'FOOD'), (116, 122, 'FOOD'), (209, 215, 'FOOD')]}),
 ('It is a type of batter bread.', {'entities': [(23, 28, 'FOOD')]}),
 ('Examples of fast food (left to right, top to bottom): Cheeseburger, Soft drink, French fries, Pizza Margherita, Hot dog, Fried chicken, Submarine sandwich

In [13]:
############# importing DocBin and tqdm ############
from spacy.tokens import DocBin
from tqdm import tqdm

In [46]:
############ Creating a nlp and DocBin object #############
nlp = spacy.load("en_core_web_sm")
db = DocBin()

# Covert our train_data into .spacy file

We need to first convert the training data into .spacy object, because spacy 3 required .spacy file for training

In [47]:
######### convertiong our training data into .spacy object and saving it locally ##########
for text, annot in tqdm(train_data):
    doc = nlp.make_doc(text) 
    ents = []
    for start, end, label in annot["entities"]:
        span = doc.char_span(start, end, label=label, alignment_mode="contract")
        if span is None:
            print("Skipping entity")
        else:
            ents.append(span)
    doc.ents = ents 
    db.add(doc)

db.to_disk("./training_data.spacy")

100%|██████████| 20/20 [00:00<00:00, 716.20it/s]


# Train the model

In [48]:
############ Creating Config file #############
! python -m spacy init config config.cfg --lang en --pipeline ner --optimize efficiency

[!] To generate a more effective transformer-based config (GPU-only), install
the spacy-transformers package and re-run this command. The config generated now
does not use transformers.
[i] Generated config template specific for your use case
- Language: en
- Pipeline: ner
- Optimize for: efficiency
- Hardware: CPU
- Transformer: None
[+] Auto-filled config with all values
[+] Saved config
config.cfg
You can now add your data and train your pipeline:
python -m spacy train config.cfg --paths.train ./train.spacy --paths.dev ./dev.spacy


In [49]:
########### Now train using the config file ###########
! python -m spacy train config.cfg --output ./ --paths.train ./training_data.spacy --paths.dev ./training_data.spacy

[i] Using CPU


[2022-06-19 20:58:32,048] [INFO] Set up nlp object from config
[2022-06-19 20:58:32,060] [INFO] Pipeline: ['tok2vec', 'ner']
[2022-06-19 20:58:32,064] [INFO] Created vocabulary
[2022-06-19 20:58:32,064] [INFO] Finished initializing nlp object
[2022-06-19 20:58:32,200] [INFO] Initialized pipeline components: ['tok2vec', 'ner']


[1m
[+] Initialized pipeline
[1m
[i] Pipeline: ['tok2vec', 'ner']
[i] Initial learn rate: 0.001
E    #       LOSS TOK2VEC  LOSS NER  ENTS_F  ENTS_P  ENTS_R  SCORE 
---  ------  ------------  --------  ------  ------  ------  ------
  0       0          0.00     37.83    0.00    0.00    0.00    0.00
 41     200          4.51    765.04  100.00  100.00  100.00    1.00
 93     400          0.00      0.00  100.00  100.00  100.00    1.00
158     600          0.00      0.00  100.00  100.00  100.00    1.00
235     800          0.00      0.00  100.00  100.00  100.00    1.00
335    1000          0.00      0.00  100.00  100.00  100.00    1.00
435    1200          0.00      0.00  100.00  100.00  100.00    1.00
584    1400          0.00      0.00  100.00  100.00  100.00    1.00
784    1600          0.00      0.00  100.00  100.00  100.00    1.00
984    1800          0.00      0.00  100.00  100.00  100.00    1.00
[+] Saved pipeline to output directory
model-last


# Lets test our model

In [50]:
nlp_ner = spacy.load("model-best")

In [61]:
doc = nlp_ner("""Another concept of fast food that is becoming popular is that of Food Courts. Here also one has to purchase coupons and collect the food from one of the several counters. Each one of these counters serves specific variety of food and may be owned by different individuals or caterers. Food Courts are normally located on much bigger premises and may provide seating facility in addition to the stand and eat arrangement. Typically one entrepreneur owns or takes on lease the entire premises and promotes the place under one name. They then let out individual counters to different independent operators to offer different menu. Internal competition is avoided by not allowing more than one counter to offer similar food.
In the fast-food version, a plate already arranged with a variety of cooked vegetables and curries along with a fixed quantity of rice and Indian flatbreads is handed out across the counter against a prepaid coupon. The curries and breads vary depending on the region and local preferences. The higher priced ones may add a sweet to the combination. Refills are generally not offered. The rice, which is first washed and then mixed with spices, is added on top, before the meat and rice are cooked together. Sometimes potato is added to the mix to add body to the dish.
Kacchi Biriyani is eaten all year round, and people rarely need a special occasion to eat it. If you are visiting Bangladesh, make sure you grab yourself a plate.""")

In [62]:
spacy.displacy.render(doc, style="ent", jupyter=True) 