In [None]:
import sqlite3
import pandas as pd

def print_table(query, p=False):
    conn = sqlite3.connect('../data/ratingRager.db')
    cursor = conn.cursor()
    cursor.execute(query)

    column_names = [description[0] for description in cursor.description]

    rows = cursor.fetchall()

    column_widths = [max(len(str(row[i])) for row in rows + [column_names]) for i in range(len(column_names))]

    header = "  ".join(f"{name:<{column_widths[i]}}" for i, name in enumerate(column_names))
    if p:
        print('\n',header)
        print("-" * len(header))

        for row in rows:
            row_str = "  ".join(f"{str(item):<{column_widths[i]}}" for i, item in enumerate(row))
            print(row_str)
        print('\n')

    return pd.read_sql_query(query, conn)

In [None]:
QUERY = """
    SELECT id, name FROM restaurants;
"""
df = print_table(QUERY)

In [None]:
import re
def normalize_name(name):
    name = re.sub(r" -.*", "", name)
    name = re.sub(r"\s?\(.*\)", "", name)
    return name

df['normalized_name'] = df['name'].apply(normalize_name)


In [None]:
print(df["normalized_name"].to_string())

In [None]:
import pandas as pd

name_counts = df['normalized_name'].value_counts()

df['uniqueness_score'] = df['normalized_name'].map(name_counts)
df['uniqueness_score'] = 1 / df['uniqueness_score']

print(df['uniqueness_score'])

0       0.008000
1       1.000000
2       1.000000
3       0.008264
4       0.100000
          ...   
3936    1.000000
3937    1.000000
3938    0.333333
3939    1.000000
3940    1.000000
Name: uniqueness_score, Length: 3941, dtype: float64


In [None]:
# Update score table

conn = sqlite3.connect('../data/ratingRager.db')
cursor = conn.cursor()

for index, row in df.iterrows():
    cursor.execute(
        "UPDATE scores SET uniqueness_score = ? WHERE restaurant_id = ?;",
        (row['uniqueness_score'], row['id'])
    )

conn.commit()
conn.close()