In [None]:
import sqlite3
import pandas as pd

def print_table(query):
    conn = sqlite3.connect('data/ratingRager.db')
    cursor = conn.cursor()
    cursor.execute(query)

    column_names = [description[0] for description in cursor.description]

    rows = cursor.fetchall()

    column_widths = [max(len(str(row[i])) for row in rows + [column_names]) for i in range(len(column_names))]

    header = "  ".join(f"{name:<{column_widths[i]}}" for i, name in enumerate(column_names))
    print('\n',header)
    print("-" * len(header))

    for row in rows:
        row_str = "  ".join(f"{str(item):<{column_widths[i]}}" for i, item in enumerate(row))
        print(row_str)
    print('\n')

    return pd.read_sql_query(query, conn)

In [None]:
# 5 Most Recent Reviews

QUERY = """
WITH recent_reviews AS (
    SELECT
        restaurant_id,
        description,
        date,
        ROW_NUMBER() OVER (PARTITION BY restaurant_id ORDER BY date DESC) AS rank
    FROM reviews
)
SELECT
    restaurant_id,
    description,
    date
FROM recent_reviews
WHERE rank <= 5;
"""
df_recent = print_table(QUERY)

In [None]:
# 5 Oldest Recent Reviews

QUERY = """
WITH old_reviews AS (
    SELECT
        restaurant_id,
        description,
        date,
        ROW_NUMBER() OVER (PARTITION BY restaurant_id ORDER BY date ASC) AS rank
    FROM reviews
)
SELECT
    restaurant_id,
    description,
    date
FROM old_reviews
WHERE rank <= 5;
"""
df_old = print_table(QUERY)

In [None]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

analyzer = SentimentIntensityAnalyzer()

def get_sentiment(text):
    if pd.isna(text):
        return None
    sentiment = analyzer.polarity_scores(text)
    return sentiment['compound']

df_recent["recent_sentiment"] = df_recent["description"].apply(get_sentiment)


In [None]:
print(df_old["recent_sentiment"].min())

-0.9958


In [None]:
df_old["recent_sentiment"] = df_old["description"].apply(get_sentiment)

In [None]:
average_sentiment = df_recent.groupby("restaurant_id")["recent_sentiment"].mean().reset_index()
average_sentiment.columns = ["restaurant_id", "average_sentiment"]

In [None]:
print(average_sentiment)

      restaurant_id  average_sentiment
0                 1            0.89158
1                 2            0.54396
2                 3            0.31586
3                 4            0.38400
4                 5            0.25674
...             ...                ...
3937           3938            0.49580
3938           3939            0.61324
3939           3940            0.09554
3940           3941            0.49694
3941           3942            0.14860

[3942 rows x 2 columns]


In [None]:
average_sentiment_old = df_old.groupby("restaurant_id")["recent_sentiment"].mean().reset_index()
average_sentiment_old.columns = ["restaurant_id", "average_sentiment"]

In [None]:
print(average_sentiment_old)

      restaurant_id  average_sentiment
0                 1            0.63434
1                 2            0.52190
2                 3            0.62130
3                 4            0.14236
4                 5            0.62952
...             ...                ...
3937           3938            0.91438
3938           3939            0.85796
3939           3940            0.67970
3940           3941            0.78794
3941           3942            0.92546

[3942 rows x 2 columns]


In [None]:
average_sentiment["sentiment_score"] = (0.7 * average_sentiment["average_sentiment"]) + (0.3 * average_sentiment_old["average_sentiment"])

print(average_sentiment["sentiment_score"])

0       0.814408
1       0.537342
2       0.407492
3       0.311508
4       0.368574
          ...   
3937    0.621374
3938    0.686656
3939    0.270788
3940    0.584240
3941    0.381658
Name: sentiment_score, Length: 3942, dtype: float64


In [None]:
# Normalize from -1 to 1 to 0 to 1
average_sentiment["sentiment_score"] = (average_sentiment["sentiment_score"] + 1) / 2

In [None]:
print(average_sentiment["sentiment_score"].max())

0.9926999999999999


In [None]:
# Update score table

conn = sqlite3.connect('data/ratingRager.db')
cursor = conn.cursor()

for index, row in average_sentiment.iterrows():
    cursor.execute(
        "UPDATE scores SET sentiment_score = ? WHERE restaurant_id = ?;",
        (row['sentiment_score'], row['restaurant_id'])
    )

conn.commit()
conn.close()