In [52]:
import pandas as pd
import ast
import re
import inflect
import json

In [54]:
# For cleaning up recipe titles
def remove_extra_whitespace(input_string):
    cleaned_string = re.sub(r'\s+', ' ', input_string.strip())
    return cleaned_string.lower()

def replace_non_alpha_parentheses(input_string):
    cleaned_string = re.sub(r'[^a-zA-Z() ]', '', input_string)
    return cleaned_string

def remove_null_chars(s):
    new_list = []
    for x in s:
        if '\x00' in x:
            new_list.append(x.replace('\x00', ""))
        else:
            new_list.append(x)
    return new_list
#-----------------------------#

def remove_non_alphabet(s):
    return {re.sub(r'[^a-zA-Z ]', '', word) for word in s}

def check_ingredients(s, ings):
    food_items = s.intersection(ings)
    return food_items

def get_ingredients():
    p = inflect.engine()
    with open('train.json', 'r') as file:
        data = json.load(file)
    ings = set()
    for recipe in data:
        s = recipe["ingredients"]
        ings.update(s)
    opposite = set()
    for x in ings:
        alt = None
        if p.singular_noun(x):
            alt = p.singular_noun(x)
        else:
            alt = p.plural(x)
        if alt:
            opposite.add(alt)
    ings = ings.union(opposite)
    return ings

In [55]:
df = pd.read_csv('recipes_data.csv')
# convert NER, ingredients, directions to appropriate data structures
df['NER'] = df['NER'].apply(lambda x: set(ast.literal_eval(x)))
df['ingredients'] = df['ingredients'].apply(lambda x: ast.literal_eval(x))
df['directions'] = df['directions'].apply(lambda x: ast.literal_eval(x))

In [56]:
df['NER'] = df['NER'].apply(remove_non_alphabet)

# Create validation set of over 1500 ingredients
all_ingredients = get_ingredients()

# Clean up NER column by matching to ingredient validation set
df['NER'] = df['NER'].apply(lambda x: check_ingredients(x, all_ingredients))
df = df[df['NER'] != set()]

In [68]:
# Create dataframes for each table

ingredients_df = pd.DataFrame(list(all_ingredients), columns=['name'])

recipes_df = df.drop(columns=['source']).rename(columns={'title': 'name'})
recipes_df['name'] = recipes_df['name'].apply(lambda x: remove_extra_whitespace(replace_non_alpha_parentheses(x)))
recipes_df = recipes_df[recipes_df['name'] != '']
recipes_df['directions'] = recipes_df['directions'].apply(remove_null_chars)
recipes_df = recipes_df.reset_index()
recipes_df['index'] = recipes_df.index
recipes_df = recipes_df.rename(columns = {'index': 'id'})

recipe_ingredients_df = recipes_df.copy()
recipe_ingredients_df = recipe_ingredients_df.explode('NER')
recipe_ingredients_df['recipe_id'] = recipe_ingredients_df.index
recipe_ingredients_df = recipe_ingredients_df.drop(columns=['name', 'ingredients', 'directions', 'link', 'site']).rename(columns={'NER': 'ingredient'}).reset_index()
recipe_ingredients_df = recipe_ingredients_df[['recipe_id', 'ingredient']]

recipes_df = recipes_df.drop(columns=['NER'])

In [71]:
recipes_df

Unnamed: 0,id,name,ingredients,directions,link,site
0,0,nobake nut cookies,"[1 c. firmly packed brown sugar, 1/2 c. evapor...","[In a heavy 2-quart saucepan, mix brown sugar,...",www.cookbooks.com/Recipe-Details.aspx?id=44874,www.cookbooks.com
1,1,jewell balls chicken,"[1 small jar chipped beef, cut up, 4 boned chi...","[Place chipped beef on bottom of baking dish.,...",www.cookbooks.com/Recipe-Details.aspx?id=699419,www.cookbooks.com
2,2,creamy corn,"[2 (16 oz.) pkg. frozen corn, 1 (8 oz.) pkg. c...","[In a slow cooker, combine all ingredients. Co...",www.cookbooks.com/Recipe-Details.aspx?id=10570,www.cookbooks.com
3,3,chicken funny,"[1 large whole chicken, 2 (10 1/2 oz.) cans ch...","[Boil and debone chicken., Put bite size piece...",www.cookbooks.com/Recipe-Details.aspx?id=897570,www.cookbooks.com
4,4,reeses cups(candy),"[1 c. peanut butter, 3/4 c. graham cracker cru...",[Combine first four ingredients and press in 1...,www.cookbooks.com/Recipe-Details.aspx?id=659239,www.cookbooks.com
...,...,...,...,...,...,...
2223095,2223095,sunnys fake crepes,[1/2 cup chocolate hazelnut spread (recommende...,[Spread hazelnut spread on 1 side of each tort...,www.foodnetwork.com/recipes/sunny-anderson/sun...,www.foodnetwork.com
2223096,2223096,devil eggs,"[1 dozen eggs, 1 paprika, 1 salt and pepper to...","[Boil eggs on medium for 30mins., Then cool eg...",cookpad.com/us/recipes/355411-devil-eggs,cookpad.com
2223097,2223097,extremely easy and quick namul daikon salad,"[150 grams Daikon radish, 1 tbsp Sesame oil, 1...",[Julienne the daikon and squeeze out the exces...,cookpad.com/us/recipes/153324-extremely-easy-a...,cookpad.com
2223098,2223098,panroasted pork chops with apple fritters,"[1 cup apple cider, 6 tablespoons sugar, 4 tab...","[In a large bowl, mix the apple cider with 4 c...",cooking.nytimes.com/recipes/1015164,cooking.nytimes.com


In [72]:
recipe_ingredients_df

Unnamed: 0,recipe_id,ingredient
0,0,brown sugar
1,0,vanilla
2,0,nuts
3,0,butter
4,0,milk
...,...,...
16041394,2223099,egg whites
16041395,2223099,red pepper
16041396,2223099,milk
16041397,2223099,sausage


In [51]:
type(recipes_df.loc[0]['directions'])

set

In [238]:
import requests
import json
import re
api_url = 'https://trackapi.nutritionix.com/v2/natural/nutrients'
app_id = '4a922d3b'
app_key = '581d2bca47024d1740305fde31226677'
query = '1 cup rice'
headers = {
    'Content-Type' : 'application/json',
    "x-app-id": app_id,
    "x-app-key": app_key
}
params = {
    "query": query
}
response = requests.post(api_url, json=params, headers=headers)
if response.status_code == 200:
    data = response.text
    food_names = re.findall(r'"food_name":"(.*?)"', data)
    print(food_names)
else:
    print(f'Request failed: Error {response.status_code}')

Request failed: Error 401


In [125]:
from spellchecker import SpellChecker

spell = SpellChecker()
misspelled = spell.unknown(['parsey', 'cheesse', 'broc'])

for word in misspelled:
    # Get the one `most likely` answer
    print(spell.correction(word))
    # Get a list of `likely` options
    print(spell.candidates(word))

cheese
{'cheeses', 'cheese'}
bro
{'brok', 'bros', 'brock', 'bloc', 'croc', 'bro', 'roc', 'brow', 'bronc'}
parley
{'parley', 'parsec', 'parsley', 'parse'}
