In [1]:
import snap
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import collections
import pickle

In [2]:
##############################################
########## Graph Loading Functions ###########
##############################################

mapping_names = [
    'IID_to_Ingredient_Mapping',
    'Ingredient_to_Category_Mapping',
    'Category_to_Ingredient_Mapping',
    'FID_to_Flavor Mapping',
    'Flavor_to_CAS_Mapping',
    'CAS_to_List_of_Flavors_Mapping',
    'RID_to_List_of_Ingredients_Mapping',
    'Cuising_to_List_of_Ingredients_Mapping',
    'Ingredient_to_List_of_Cuisines_Mapping',
    'RID_to_Cuisine_Mappings',
    'Cuisine_to_List_of_RIDs_Mapping',
    'Cuisine_to_Regions',
    'Region_to_Cuisines'
]
# Load the dictionary of names to mappings
def load_mappings():
    mappings = {}
    for name in mapping_names:
        filename = '../data/mappings/{}.pkl'.format(name)
        with open(filename, 'rb') as f:
            mappings[name] = pickle.load(f)
            
# Load an undirected graph from a binary file
def load_graph(filename):
    FIn = snap.TFIn(filename)
    return snap.TUNGraph.Load(FIn)

# Load Weights Dictionary for a given file
def load_weights(filename):
    with open(filename, 'rb') as f:
        return pickle.load(f)

ingredient_flavor_graph_file = '../data/graphs/ingredient_flavor.graph'
ingredient_recipe_graph_file = '../data/graphs/ingredient_recipe.graph'
# Load the Ingredient-Flavor and Ingredient-Recipe Graphs and the data mappings
def load_basic_graphs():
    return load_graph(ingredient_flavor_graph_file), load_graph(ingredient_recipe_graph_file), load_mappings()

# Load the Original Complement Network
ocn_graph_file = '../data/graphs/ocn.graph'
ocn_weights_file = '../data/weights/ocn_weights.pkl'
def load_ocn():
    return load_graph(ocn_graph_file), load_weights(ocn_weights_file) 

# Load the Updated Complement Network
ucn_graph_file = '../data/graphs/ucn.graph'
ucn_weights_file = '../data/weights/ucn_weights.pkl'
def load_ucn():
    return load_graph(ucn_graph_file), load_weights(ucn_weights_file) 

# Load the Substitution Network
sn_graph_file = '../data/graphs/sn.graph'
sn_weights_file = '../data/weights/sn_weights.pkl'
def load_sn():
    return load_graph(sn_graph_file), load_weights(sn_weights_file) 

In [3]:
##############################################
############# General Functions ##############
##############################################

def get_nbr_set(G, NId):
    set([Nbr for Nbr in G.GetNI(NId).GetOutEdges()])

# Compute the Pointwise Mutual Information metric from the paper (Note NR = Number of Recipes)
def PMI(IRG, AIId, BIId, NR):
    ANbr = get_nbr_set(IRG, AIId)
    BNbr = get_nbr_set(IRG, BIId)
    PA = len(ANbr) / float(NR)
    PB = len(BNbr) / float(NR)
    PAB = len(ANbr.intersection(BNbr)) / float(NR)
    return np.log(PAB / (PA * PB))

# Compute the Jaccard Index for two nodes in the graph G
def JI(G, ANId, BNId):
    ANbr = get_nbr_set(G, ANId)
    BNbr = get_nbr_set(G, BNId)
    return float(len(ANbr.intersection(BNbr))) / len(ANbr.union(BNbr))

# Compute the Co-Occurrence Factor for two Ingredients A and B
def COF(IFG, IRG, AIId, BIId, MFF, SFF):
    FF, RF = JI(IFG, AIId, BIId), JI(IRG, AIId, BIId)
    return RF * (((FF - MFF)/SFF) ** 2)

# Compute the Substitution Factor for two Ingredients A and B
def SF(IFG, IRG, AIId, BIId):
    FF, RF = JI(IFG, AIId, BIId), JI(IRG, AIId, BIId)
    return FF / RF

def MeanFF(IFG, IIds):
    return np.mean([JI(IFG, AIId, BIId) for i, AIId in enumerate(IIds[:-1]) for BIId in IIds[i+1:]])

def StdFF(IFG, IIds):
    IIds = iid_to_ingredient.keys()
    return np.std([JI(IFG, AIId, BIId) for i, AIId in enumerate(IIds[:-1]) for BIId in IIds[i+1:]])