###### imports

In [77]:
import pandas as pd
import re
import pulp

##### data wrangling

In [78]:
data = pd.read_csv('./Data/kaggle_data.csv') #read in the kaggle nutrition data.

data = data.drop(columns=["Unnamed: 0"]) # get rid of extra index
data = data.astype(str) # convert all columns to strings

In [79]:
s = data.iloc[1,1]
print(s)
alpha = re.compile(r"[a-z]+")
m = alpha.search(s)
m.group()

100 g


'g'

In [80]:
data.columns

Index(['name', 'serving_size', 'calories', 'total_fat', 'saturated_fat',
       'cholesterol', 'sodium', 'choline', 'folate', 'folic_acid', 'niacin',
       'pantothenic_acid', 'riboflavin', 'thiamin', 'vitamin_a',
       'vitamin_a_rae', 'carotene_alpha', 'carotene_beta',
       'cryptoxanthin_beta', 'lutein_zeaxanthin', 'lucopene', 'vitamin_b12',
       'vitamin_b6', 'vitamin_c', 'vitamin_d', 'vitamin_e', 'tocopherol_alpha',
       'vitamin_k', 'calcium', 'copper', 'irom', 'magnesium', 'manganese',
       'phosphorous', 'potassium', 'selenium', 'zink', 'protein', 'alanine',
       'arginine', 'aspartic_acid', 'cystine', 'glutamic_acid', 'glycine',
       'histidine', 'hydroxyproline', 'isoleucine', 'leucine', 'lysine',
       'methionine', 'phenylalanine', 'proline', 'serine', 'threonine',
       'tryptophan', 'tyrosine', 'valine', 'carbohydrate', 'fiber', 'sugars',
       'fructose', 'galactose', 'glucose', 'lactose', 'maltose', 'sucrose',
       'fat', 'saturated_fatty_acids', 'mon

In [81]:
# 'zink' should be 'zinc'
data = data.rename({'zink':'zinc'},
            axis=1)
data.zinc

0       0.06 mg
1       4.53 mg
2       0.16 mg
3       3.63 mg
4       0.48 mg
         ...   
8784    3.67 mg
8785    4.30 mg
8786    2.39 mg
8787    3.42 mg
8788    3.42 mg
Name: zinc, Length: 8789, dtype: object

In [82]:
data.columns[1]

'serving_size'

In [83]:
# Generate a dictionary of units for the different nutrient types (serving_size, 
# calories, etc.)
units = {}
for r in range(1,len(data.columns)):  # for each of the nutrient labels
    alpha = re.compile(r"[mcgiu]+",re.I) # generate pattern object
    i = 0 # start with 1st nutrient value
    s = data.iat[i,r] # get nutrient value
    m = alpha.search(s) # check pattern object against nutrient value, 
                        # generating match object
    # try:
    #     print(m.group())
    # except:
    #     print("no match")
    try:
        units[data.columns[r]] = m.group()
    except:
        while not m:
            # loop through until we have a unit or decide there are no units
            i = i+1 # go to next value
            s = data.iat[i,r] # store value
            m = alpha.search(s) # search value against the pattern

            try: # store match if there is one
                units[data.columns[r]] = m.group() 
            except: 
                if i==data.shape[0]-1: # if no units found by end, None units
                    m = "None"
                    units[data.columns[r]] = m

In [84]:
units

{'serving_size': 'g',
 'calories': 'None',
 'total_fat': 'g',
 'saturated_fat': 'g',
 'cholesterol': 'mg',
 'sodium': 'mg',
 'choline': 'mg',
 'folate': 'mcg',
 'folic_acid': 'mcg',
 'niacin': 'mg',
 'pantothenic_acid': 'mg',
 'riboflavin': 'mg',
 'thiamin': 'mg',
 'vitamin_a': 'IU',
 'vitamin_a_rae': 'mcg',
 'carotene_alpha': 'mcg',
 'carotene_beta': 'mcg',
 'cryptoxanthin_beta': 'mcg',
 'lutein_zeaxanthin': 'mcg',
 'lucopene': 'None',
 'vitamin_b12': 'mcg',
 'vitamin_b6': 'mg',
 'vitamin_c': 'mg',
 'vitamin_d': 'IU',
 'vitamin_e': 'mg',
 'tocopherol_alpha': 'mg',
 'vitamin_k': 'mcg',
 'calcium': 'mg',
 'copper': 'mg',
 'irom': 'mg',
 'magnesium': 'mg',
 'manganese': 'mg',
 'phosphorous': 'mg',
 'potassium': 'mg',
 'selenium': 'mcg',
 'zinc': 'mg',
 'protein': 'g',
 'alanine': 'g',
 'arginine': 'g',
 'aspartic_acid': 'g',
 'cystine': 'g',
 'glutamic_acid': 'g',
 'glycine': 'g',
 'histidine': 'g',
 'hydroxyproline': 'g',
 'isoleucine': 'g',
 'leucine': 'g',
 'lysine': 'g',
 'methionine

In [85]:
data.head() # looking at the data again, I need to remove the units, and replace 
            # nan with 0 

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [86]:
# Generate a dictionary of units for the different info types (serving_size, 
# calories, etc.)

units = {}
for r in range(1,len(data.columns)):  # for each of the nutrient labels
    pattern_units = re.compile(r"[mcgiu]+",re.I) # generate pattern object
    i = 0 # start with 1st nutrient value
    s = data.iat[i,r] # get nutrient value
    m = pattern_units.search(s) # check pattern object against nutrient value, 
                        # generating match object
    # try:
    #     print(m.group())
    # except:
    #     print("no match")
    try:
        units[data.columns[r]] = m.group()
    except:
        while not m:
            # loop through until we have a unit or decide there are no units
            i = i+1 # go to next value
            s = data.iat[i,r] # store value
            m = pattern_units.search(s) # search value against the pattern

            try: # store match if there is one
                units[data.columns[r]] = m.group() 
            except: 
                if i==data.shape[0]-1: # if no units found by end, None units
                    m = "None"
                    units[data.columns[r]] = m

 per [Joao Silva, stack overflow](https://stackoverflow.com/questions/1450897/remove-characters-except-digits-from-string-using-python), use re.sub to replace digits.  Need to tweak to keep the decimal

In [87]:
data.head()

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,Cornstarch,100 g,381,0.1g,,0,9.00 mg,0.4 mg,0.00 mcg,0.00 mcg,...,0.05 g,0.009 g,0.016 g,0.025 g,0.00 mg,0.0 g,0.09 g,0.00 mg,0.00 mg,8.32 g
1,"Nuts, pecans",100 g,691,72g,6.2g,0,0.00 mg,40.5 mg,22.00 mcg,0.00 mcg,...,71.97 g,6.180 g,40.801 g,21.614 g,0.00 mg,0.0 g,1.49 g,0.00 mg,0.00 mg,3.52 g
2,"Eggplant, raw",100 g,25,0.2g,,0,2.00 mg,6.9 mg,22.00 mcg,0.00 mcg,...,0.18 g,0.034 g,0.016 g,0.076 g,0.00 mg,0.0 g,0.66 g,0.00 mg,0.00 mg,92.30 g
3,"Teff, uncooked",100 g,367,2.4g,0.4g,0,12.00 mg,13.1 mg,0,0,...,2.38 g,0.449 g,0.589 g,1.071 g,0,0,2.37 g,0,0,8.82 g
4,"Sherbet, orange",100 g,144,2g,1.2g,1mg,46.00 mg,7.7 mg,4.00 mcg,0.00 mcg,...,2.00 g,1.160 g,0.530 g,0.080 g,1.00 mg,0.0 g,0.40 g,0.00 mg,0.00 mg,66.10 g


In [88]:
data.shape[1]

76

In [89]:
# remove all of the units, which have been stored and should be constant across 
# all of the food types for each nutrient

for c in range(1,data.shape[1]):
    for r in range(0,data.shape[0]):
        try:
            data.iat[r,c] = float(re.sub(r"[^0-9.]","",str(data.iat[r,c])))
        except (ValueError):
            pass

In [90]:
data.head()

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,Cornstarch,100.0,381.0,0.1,,0.0,9.0,0.4,0.0,0.0,...,0.05,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32
1,"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,...,71.97,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
2,"Eggplant, raw",100.0,25.0,0.2,,0.0,2.0,6.9,22.0,0.0,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3
3,"Teff, uncooked",100.0,367.0,2.4,0.4,0.0,12.0,13.1,0.0,0.0,...,2.38,0.449,0.589,1.071,0.0,0.0,2.37,0.0,0.0,8.82
4,"Sherbet, orange",100.0,144.0,2.0,1.2,1.0,46.0,7.7,4.0,0.0,...,2.0,1.16,0.53,0.08,1.0,0.0,0.4,0.0,0.0,66.1


In [91]:
# find the food, nutrient coordinates of the 'nan' values for inspection and 
# confirmation that they can be safely converted to float('0').

nan_coord = []
for c in range(1,data.shape[1]):
    for r in range(0,data.shape[0]):
        if (data.iat[r,c] == "nan"):
            nan_coord.append([data.iat[r,0],data.columns[c],(r,c)])
            
# I'm not sure if having the alpha and/or numeric coordinates together
# or separate is better.  I want a good look at nan's per nutrient type


In [92]:
nan_coord[0:5]

[['Cornstarch', 'saturated_fat', (0, 4)],
 ['Eggplant, raw', 'saturated_fat', (2, 4)],
 ['PACE, Picante Sauce', 'saturated_fat', (10, 4)],
 ['Goji berries, dried', 'saturated_fat', (11, 4)],
 ['Mango nectar, canned', 'saturated_fat', (12, 4)]]

In [93]:
# I'll generate a df with two columns, food and nutrient_category 
coord_2d = [(x, y) for x, y, _ in nan_coord]

# Creating a dataframe from the 2D data
TwoDDF = pd.DataFrame(coord_2d, columns=['food', 'nutrient_category'])

# Displaying the dataframe
print(TwoDDF)

                                                   food nutrient_category
0                                            Cornstarch     saturated_fat
1                                         Eggplant, raw     saturated_fat
2                                   PACE, Picante Sauce     saturated_fat
3                                   Goji berries, dried     saturated_fat
4                                  Mango nectar, canned     saturated_fat
...                                                 ...               ...
1585  Beverages, prepared with water, frozen concent...     saturated_fat
1586  Apple juice, diluted with 3 volume water witho...     saturated_fat
1587  Carbonated beverage, without caffeine, with so...     saturated_fat
1588  Apple juice, with added ascorbic acid, diluted...     saturated_fat
1589  Pan Dulce, contains wheat flour and rice flour...     saturated_fat

[1590 rows x 2 columns]


In [94]:
# saturated_fat is the only category with 'nan' values
TwoDDF[["nutrient_category"]].value_counts()

nutrient_category
saturated_fat        1590
dtype: int64

In [95]:
# convert nan for saturated_fat to 0

data.saturated_fat = [0 if val == 'nan' else val for val in data.saturated_fat]


In [96]:
data.dtypes

name              object
serving_size      object
calories          object
total_fat         object
saturated_fat    float64
                  ...   
alcohol           object
ash               object
caffeine          object
theobromine       object
water             object
Length: 76, dtype: object

In [97]:
for column in data.columns:
    try:
        data[column] = pd.to_numeric(data[column], errors='raise').astype(float)
    except ValueError:
        pass  # Ignore columns that cannot be converted to float


In [98]:
data.dtypes.value_counts() # all columns have been converted to float, 
# except "name"

float64    75
object      1
dtype: int64

In [99]:
data.head(2)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,Cornstarch,100.0,381.0,0.1,0.0,0.0,9.0,0.4,0.0,0.0,...,0.05,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32
1,"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,...,71.97,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52


In [100]:
[print(val) for val in data.columns.values];

name
serving_size
calories
total_fat
saturated_fat
cholesterol
sodium
choline
folate
folic_acid
niacin
pantothenic_acid
riboflavin
thiamin
vitamin_a
vitamin_a_rae
carotene_alpha
carotene_beta
cryptoxanthin_beta
lutein_zeaxanthin
lucopene
vitamin_b12
vitamin_b6
vitamin_c
vitamin_d
vitamin_e
tocopherol_alpha
vitamin_k
calcium
copper
irom
magnesium
manganese
phosphorous
potassium
selenium
zinc
protein
alanine
arginine
aspartic_acid
cystine
glutamic_acid
glycine
histidine
hydroxyproline
isoleucine
leucine
lysine
methionine
phenylalanine
proline
serine
threonine
tryptophan
tyrosine
valine
carbohydrate
fiber
sugars
fructose
galactose
glucose
lactose
maltose
sucrose
fat
saturated_fatty_acids
monounsaturated_fatty_acids
polyunsaturated_fatty_acids
fatty_acids_total_trans
alcohol
ash
caffeine
theobromine
water


In [101]:
constraints_aa = pd.read_csv("./AminoAcids.csv")
constraints = pd.read_csv("./constraints.csv")
# remove notes from constraints
constraints = constraints.drop(['notes:'], axis = 1)

# store and remove units
constraints_units = {}
alpha = re.compile(r"[mcg]+",re.I) # generate pattern object
for r in range(0, constraints.shape[0]):
    s = constraints.Min[r] # get nutrient value
    try:
        m = alpha.search(constraints.Min[r]) # check pattern object against nutrient value, 
        constraints_units[constraints.nutrient[r]] = m.group()
        constraints.iat[r,1] = float(re.sub(r"[^0-9.]","",
                                     str(constraints.iat[r,1])))
        constraints.iat[r,2] = float(re.sub(r"[^0-9.]","",
                                     str(constraints.iat[r,2])))
    except:
        constraints_units[constraints.nutrient[r]] = "None"

constraints.Min = pd.to_numeric(constraints.Min)
constraints.Max = pd.to_numeric(constraints.Max)

In [102]:
constraints.dtypes

nutrient     object
Min         float64
Max         float64
dtype: object

In [136]:
data.head().protein

0     0.26
1     9.17
2     0.98
3    13.30
4     1.10
Name: protein, dtype: float64

In [107]:
# I'll convert the units of vitamin_a  from IU to the current standard of mcg.
# The conversion is 1 IU = .3 mcg RAE
units["vitamin_a"]
data.vitamin_a = data.vitamin_a/.3

In [108]:
units["vitamin_a"] = 'mcg'

In [109]:
data.vitamin_a

0         0.000000
1       186.666667
2        76.666667
3        30.000000
4       153.333333
           ...    
8784     36.666667
8785      0.000000
8786      0.000000
8787     13.333333
8788     20.000000
Name: vitamin_a, Length: 8789, dtype: float64

In [110]:
constraints.head()

Unnamed: 0,nutrient,Min,Max
0,calories,2000.0,2000.0
1,protein,56.0,160.0
2,total_fat,22.2,78.0
3,saturated_fat,0.0,12.0
4,cholesterol,0.0,


Which nutrients will I be using as constraints and what will I be optimizing.  Ultimately I would 

In [111]:
data.head()

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,Cornstarch,100.0,381.0,0.1,0.0,0.0,9.0,0.4,0.0,0.0,...,0.05,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32
1,"Nuts, pecans",100.0,691.0,72.0,6.2,0.0,0.0,40.5,22.0,0.0,...,71.97,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
2,"Eggplant, raw",100.0,25.0,0.2,0.0,0.0,2.0,6.9,22.0,0.0,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3
3,"Teff, uncooked",100.0,367.0,2.4,0.4,0.0,12.0,13.1,0.0,0.0,...,2.38,0.449,0.589,1.071,0.0,0.0,2.37,0.0,0.0,8.82
4,"Sherbet, orange",100.0,144.0,2.0,1.2,1.0,46.0,7.7,4.0,0.0,...,2.0,1.16,0.53,0.08,1.0,0.0,0.4,0.0,0.0,66.1


In [112]:
constraints.head()

Unnamed: 0,nutrient,Min,Max
0,calories,2000.0,2000.0
1,protein,56.0,160.0
2,total_fat,22.2,78.0
3,saturated_fat,0.0,12.0
4,cholesterol,0.0,


In [113]:
constraints_aa

Unnamed: 0,AminoAcid,Min
0,cystine + methionine,19 mg/kg/d
1,histidine,14 mg/kg/d
2,isoleucine,19 mg/kg/d
3,leucine,42 mg/kg/d
4,lysine,38 mg/kg/d
5,methionine,13 mg/kg/d
6,phenylalanine + tyrosine,33 mg/kg/d
7,phenylalanine,14 mg/kg/d
8,threonine,20 mg/kg/d
9,tryptophan,5 mg/kg/d


In [114]:
constraints_aa.head()

Unnamed: 0,AminoAcid,Min
0,cystine + methionine,19 mg/kg/d
1,histidine,14 mg/kg/d
2,isoleucine,19 mg/kg/d
3,leucine,42 mg/kg/d
4,lysine,38 mg/kg/d


In [115]:
constraints_aa = constraints_aa.dropna(how='any')
constraints_aa.reset_index(drop=True, inplace=True)

In [116]:
# remove units from the aminoacid constraints:
constraints_aa['Min'] = constraints_aa['Min'].str.extract('(\d+)', expand=False)

Bodyweight adjustment for AA requirements.

In [117]:
weight_kg = float(input("Enter your weight in kilograms: "))

Enter your weight in kilograms:  80


In [118]:
constraints_aa['Min'] = pd.to_numeric(constraints_aa['Min'])*weight_kg

[per EdChum](https://stackoverflow.com/questions/44117326/how-can-i-remove-all-non-numeric-characters-from-all-the-values-in-a-particular)

In [124]:
constraints = constraints.set_index('nutrient')

KeyError: "None of ['nutrients'] are in the columns"

In [127]:
constraints

Unnamed: 0_level_0,Min,Max
nutrient,Unnamed: 1_level_1,Unnamed: 2_level_1
calories,2000.0,2000.0
protein,56.0,160.0
total_fat,22.2,78.0
saturated_fat,0.0,12.0
cholesterol,0.0,
sodium,1500.0,2300.0
choline,550.0,3500.0
folate,400.0,1000.0
folic_acid,,
niacin,16.0,35.0


In [128]:
data.columns

Index(['name', 'serving_size', 'calories', 'total_fat', 'saturated_fat',
       'cholesterol', 'sodium', 'choline', 'folate', 'folic_acid', 'niacin',
       'pantothenic_acid', 'riboflavin', 'thiamin', 'vitamin_a',
       'vitamin_a_rae', 'carotene_alpha', 'carotene_beta',
       'cryptoxanthin_beta', 'lutein_zeaxanthin', 'lucopene', 'vitamin_b12',
       'vitamin_b6', 'vitamin_c', 'vitamin_d', 'vitamin_e', 'tocopherol_alpha',
       'vitamin_k', 'calcium', 'copper', 'irom', 'magnesium', 'manganese',
       'phosphorous', 'potassium', 'selenium', 'zinc', 'protein', 'alanine',
       'arginine', 'aspartic_acid', 'cystine', 'glutamic_acid', 'glycine',
       'histidine', 'hydroxyproline', 'isoleucine', 'leucine', 'lysine',
       'methionine', 'phenylalanine', 'proline', 'serine', 'threonine',
       'tryptophan', 'tyrosine', 'valine', 'carbohydrate', 'fiber', 'sugars',
       'fructose', 'galactose', 'glucose', 'lactose', 'maltose', 'sucrose',
       'fat', 'saturated_fatty_acids', 'mon

In [133]:
data.name.values

array(['Cornstarch', 'Nuts, pecans', 'Eggplant, raw', ...,
       'Lamb, raw, separable lean and fat, composite of trimmed retail cuts, frozen, imported, New Zealand',
       'Beef, raw, all grades, trimmed to 0" fat, separable lean only, boneless, eye of round roast, round',
       'Beef, raw, all grades, trimmed to 0" fat, separable lean only, boneless, eye of round steak, round'],
      dtype=object)

food:Cornstarch type:<class 'str'>, i:0 
food:Nuts, pecans type:<class 'str'>, i:1 
food:Eggplant, raw type:<class 'str'>, i:2 
food:Teff, uncooked type:<class 'str'>, i:3 
food:Sherbet, orange type:<class 'str'>, i:4 
food:Cauliflower, raw type:<class 'str'>, i:5 
food:Taro leaves, raw type:<class 'str'>, i:6 
food:Lamb, raw, ground type:<class 'str'>, i:7 
food:Cheese, camembert type:<class 'str'>, i:8 
food:Vegetarian fillets type:<class 'str'>, i:9 
food:PACE, Picante Sauce type:<class 'str'>, i:10 
food:Goji berries, dried type:<class 'str'>, i:11 
food:Mango nectar, canned type:<class 'str'>, i:12 
food:Crackers, rusk toast type:<class 'str'>, i:13 
food:Chicken, boiled, feet type:<class 'str'>, i:14 
food:Quail, raw, meat only type:<class 'str'>, i:15 
food:Pie, lemon, fried pies type:<class 'str'>, i:16 
food:Peppers, raw, jalapeno type:<class 'str'>, i:17 
food:Winged bean tuber, raw type:<class 'str'>, i:18 
food:Salami, turkey, cooked type:<class 'str'>, i:19 
food:Grapes, r

In [155]:

def check_for_food(string, food):
    pattern = re.compile(r"\b" + re.escape(food) + r"\b",re.I)
    match = re.search(pattern, string)
    if match:
        return True
    else:
        return False

In [153]:
apples_match = []
sardines_match = []

for i, food in enumerate(data.name.values):
    if check_for_food(food,'Apples'):
        apples_match.append(True)
    else:
        apples_match.append(False)
    if check_for_food(food, 'Sardines')

SyntaxError: invalid syntax (2727234722.py, line 9)