In [30]:
#!pip install neo4j
import pandas as pd
import numpy as np
from neo4j import GraphDatabase
import graph_model as gm
from sklearn.metrics import mean_squared_error
from math import sqrt

np.set_printoptions(suppress=True)

In [51]:
uri = "bolt://3.220.233.169:7687"
driver = GraphDatabase.driver(uri, auth=("neo4j", "i-0e23d19f0d8795714"))

In [41]:
def cypher(driver, query, results_columns):
    """This is wrapper for sending basic cypher queries to a neo4j server. Input is a neo4j connection
    driver, a string representing a cypher queryand a list of string for data frame column names.
    returns the dataframe of the results."""

    with driver.session() as session:
        result = session.run(query)

    result_df = pd.DataFrame(result.values(), columns=results_columns)

    return result_df


def expected_rating(rating_dist):
    """this takes a distribution of probabilities by rating from one to five and returns the
    expected value of the rating"""
    runsum = 0
    for i in [1, 2, 3, 4, 5]:
        runsum += rating_dist[i - 1] * i
    return runsum


In [56]:
review_dist["u.id"]

Series([], Name: u.id, dtype: object)

In [37]:
review_dist

Unnamed: 0,r.id,r.stars,u.id


In [62]:
def biz_preference_state(driver, biz_id, state):

    # send a cypher query to the server that returns reviews of biz by people
    # in state
    review_dist = cypher(
        driver,
        f"MATCH (u:User)-[:WROTE]->(r:Review)-[:REVIEWS]->(b:Business)-->(:City)-->(s:State)\
        WHERE b.id='{biz_id}' AND s.name='{state}'\
        RETURN r.id, r.stars, u.id",
        [
            'r.id',
            'r.stars',
            'u.id'])
    
    
    review_stars = review_dist['r.stars'].value_counts()
    num_reviews = review_dist['r.stars'].shape[0]
    cat_ids = ['cool2', 'funny1', 'useful6']

    # we initialize a blank list of users in the user categories
    user_in_cat = []

    for cat in cat_ids:
        # this loop sends a crypher query to retreive users in each category in
        # the state
        temp = cypher(
            driver,
            f'MATCH (u:User)-[]->(r:Reputation)\
            WHERE r.id ="{cat}" and u.id IN {list(review_dist["u.id"])}\
            RETURN u.id',
            ['u.id'])
        user_in_cat.append(temp)

    reviews_in_cat = []
    for i in range(len(user_in_cat)):
        # this loop goes through each user category and sends a cypher query to get the reviews of
        # the business from users in the category

        sim_user = cypher(
            driver, f'MATCH (u:User)-[:WROTE]->(r:Review)-[:REVIEWS]->(b:Business)\
            WHERE b.id = "{biz_id}" and u.id IN {list(user_in_cat[i]["u.id"])}\
            RETURN r.stars, u.id', ['r.stars', 'u.id'])
        reviews_in_cat.append(sim_user)

    # this loop and PRu below uses laplace smoothing and the distribution of biz reviews
    # to come up with naive bayes estimated probability distribution,
    # prob(review of biz = k)
    numerator = np.empty(5)
    for i in (1, 2, 3, 4, 5):
        try:
            numerator[i - 1] = review_stars[i]
        except BaseException:
            numerator[i - 1] = 0

    PRu = (numerator + 1) / (num_reviews + 5)

    # the code below uses laplace smoothing and the distribution of the biz reviews to come up with
    # a naive bayes estimate of the distribution (prob review of biz=k|given
    # reviewer in category j)
    num_cat = len(user_in_cat)
    cats_by_stars = np.empty((num_cat, 5))

    for i in range(num_cat):
        if not reviews_in_cat[i].empty:
            cat_stars = reviews_in_cat[i]['r.stars'].value_counts()
            for j in (1, 2, 3, 4, 5):
                try:
                    cats_by_stars[i][j - 1] = cat_stars[j]
                except BaseException:
                    cats_by_stars[i][j - 1] = 0

        else:
            # If there are no users in a category we use the review
            # distribution without the conditional

            for j in (1, 2, 3, 4, 5):
                try:
                    cats_by_stars[i][j - 1] = review_stars[j]
                except BaseException:
                    cats_by_stars[i][j - 1] = 0

    PRaj = ((cats_by_stars + 1) / (numerator + num_cat)).prod(axis=0)

    # we now take the product of the distributions and normalize them so they
    # sum to 1
    biz_prefs_un_normalized = PRu * PRaj

    biz_prefs = biz_prefs_un_normalized / sum(biz_prefs_un_normalized)

    return biz_prefs

In [63]:
biz_preference_state(driver,'kUQwaqwxKlScAQNhRx2_2w' , 'NV')

array([0.27166951, 0.00971054, 0.12830297, 0.26256042, 0.32775656])

In [None]:
def user_preference(driver, demo_data, biz_id):

    # send a cypher query to the server that returns users reviews of businesses
    
    review_dist = # data from demo['r.id', 'r.stars', 'b.id'])

    # send a cypher query to the server that returns all of the biz's
    # categories
    biz_categories = cypher(driver, f"\
    MATCH (b:Business)-[:IN_CATEGORY]->(c:Category) \
    WHERE b.id='{biz_id}' RETURN c.id", ['c.id'])

    # these manipulate the biz categories and user's reviews for computation
    # later
    review_stars = review_dist['r.stars'].value_counts()
    num_reviews = review_dist['r.stars'].shape[0]
    cat_ids = list(biz_categories['c.id'])

    # we initialize a blank list of businesses in the biz categories
    biz_in_cat = []
    for cat in cat_ids:
        # this loop sends a cypher query to retreive businesses in each
        # category in the state
        temp = cypher(driver, f"\
        MATCH (b:Business)-[:IN_CATEGORY]->(c:Category) \
        WHERE c.id='{cat}' AND NOT b.id='{biz_id}' RETURN b.id", ['b.id'])
        biz_in_cat.append(temp)

    reviews_in_cat = []
    for i in range(len(biz_in_cat)):
        # this loop goes through each biz category and sends a cypher query to get the reviews of
        # businesses in that category by the user
        sim_biz = cypher(
            driver, f"MATCH (u:User)-[:WROTE]->(r:Review)-[:REVIEWS]->(b:Business)\
            WHERE u.id='{user_id}' and b.id IN {list(biz_in_cat[i]['b.id'])}\
            RETURN r.stars, b.id", [
                'r.stars', 'b.id'])

        reviews_in_cat.append(sim_biz)

    # this loop and PRu below uses laplace smoothing and the distribution of user's reviews
    # to come up with naive bayes estimated probability distribution,
    # prob(review from user = k)
    numerator = np.empty(5)
    for i in (1, 2, 3, 4, 5):
        try:
            numerator[i - 1] = review_stars[i]
        except BaseException:
            numerator[i - 1] = 0

    PRu = (numerator + 1) / (num_reviews + 5)

    # the code below uses laplace smoothing and the distribution of the biz reviews to come up with
    # a naive bayes estimate of the distribution (prob review from user =
    # k|given biz in category j)
    num_cat = len(biz_in_cat)
    cats_by_stars = np.empty((num_cat, 5))

    for i in range(num_cat):
        if not reviews_in_cat[i].empty:
            cat_stars = reviews_in_cat[i]['r.stars'].value_counts()
            for j in (1, 2, 3, 4, 5):
                try:
                    cats_by_stars[i][j - 1] = cat_stars[j]
                except BaseException:
                    cats_by_stars[i][j - 1] = 0
        else:
            # If there are businesses in a category we use the review
            # distribution without the conditional
            for j in (1, 2, 3, 4, 5):
                try:
                    cats_by_stars[i][j - 1] = review_stars[j]
                except BaseException:
                    cats_by_stars[i][j - 1] = 0

    PRaj = ((cats_by_stars + 1) / (numerator + num_cat)).prod(axis=0)

    # we now take the product of the distributions and normalize them so they
    # sum to 1
    user_prefs_un_normalized = PRu * PRaj

    user_prefs = user_prefs_un_normalized / sum(user_prefs_un_normalized)

    return user_prefs
