In [10]:
import pandas as pd
import sqlite3
conn = sqlite3.connect('/Users/zphilipp/git/research/dealsdb/deals_db.db')
cursor = conn.cursor()
# Vytvoření tabulky (pokud neexistuje)
cursor.execute('''DROP TABLE IF EXISTS deals''')
cursor.execute('''DROP TABLE IF EXISTS redemption''')
cursor.execute('''DROP TABLE IF EXISTS merchant''')
cursor.execute('''DROP TABLE IF EXISTS customer_taxonomy''')
cursor.execute('''DROP TABLE IF EXISTS deal_division''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS deals (
    deal_id TEXT,
    deal_uuid TEXT,
    deal_option_id TEXT,
    title TEXT,
    title_general TEXT,
    highlights TEXT,
    url TEXT,
    redemption_locations_id INT,
    is_bookable INT,
    value INT,
    price INT,
    dynamic_pricing_enabled INT,
    discount_percent INT,
    start_at DATE,
    end_at DATE,
    merchant_id TEXT,
    ui_treatment_id INT,
    gallery_title TEXT,
    small_image TEXT,
    med_image TEXT,
    margin INT,
    sale_price INT,
    sale_price_effective_date DATE,
    currency TEXT,
    rating_count INT,
    rating_value FLOAT,
    description TEXT,
    customer_category_id TEXT
)''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS redemption (
    deal_uuid TEXT,
    id INT,
    name TEXT,
    country TEXT,
    state TEXT,
    postal_code INT,
    street_address1 TEXT,
    lat FLOAT,
    lon FLOAT,
    geo_type TEXT,
    city TEXT,
    city_permalink TEXT,
    neighborhood TEXT
);
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS merchant (
    id TEXT PRIMARY KEY,
    name TEXT
);
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS customer_taxonomy (
    id TEXT PRIMARY KEY,
    name TEXT,
    link TEXT,
    parent_id TEXT
);
''')
cursor.execute('''
    CREATE TABLE IF NOT EXISTS deal_division (
        deal_id TEXT,
        id TEXT,
        name TEXT,
        permalink TEXT
);
''')
cursor.execute('''
    CREATE TABLE IF NOT EXISTS deal_location (
        deal_uuid TEXT,
        lat TEXT,
        lon TEXT
);
''')
conn.commit()
conn.close()

In [11]:
def insertLocations(deal_uuid, lat, lon,c, conn):

        try:
            c.execute("""
INSERT INTO deal_location (deal_uuid, lat, lon)
VALUES (?, ?, ?)""",
            (deal_uuid, lat, lon))
        except:
            print (d)
            raise


def insertRedemption(deal_uuid, r, c, conn):
    for d in r:
        try:
            c.execute("""
INSERT INTO redemption (
    deal_uuid, id, name, country, state, postal_code, street_address1,
    lat, lon, geo_type, city, city_permalink, neighborhood)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            (deal_uuid, d.get('id', None), d.get('name', ""), d['country'], 
            d.get('state', ""), d.get('postalCode', None), d['streetAddress1'], d.get('lat', None),
            d.get('lon', None), d.get('geo', {}).get("type", ""), d.get('city', ""),
            d.get('city_object', {}).get('permalink', ''), d.get('neighborhood', "")))

            insertLocations(deal_uuid, d.get('lat'), d.get('lon'), c, conn)
        except:
            print (d)
            raise

def insertDivision(deal_id, d, c, conn):
    data = d.get("divisions", None)

    for d in data:
        try:
            c.execute("""
INSERT INTO deal_division (
    deal_id, id, name, permalink )
VALUES (?, ?, ?, ?)""", (deal_id, d.get('id', None), d.get('name', ""), d.get('permalink', "")))
        except:
            print (d)
            raise

def insertMerchant(d, c, conn):
    try:
        c.execute("""
INSERT INTO merchant (id, name)
VALUES (?, ?)""",
            (d['merchant_id'], d['merchant_name']))
    except:
        pass
        #raise

def insertCustomerTaxonomy(d, c, conn):
    try:
        d = d[0]
        categories = []
        if d.get("top_category_guid", ""):
            categories.append({
                "id" : d.get("top_category_guid", ""),
                "name" : d.get("top_category", ""),
                "link" : d.get("top_category_permalink", ""),
                "parent" : None})
        if d.get("category_guid", ""):
            categories.append({
                "id" : d.get("category_guid", ""),
                "name" : d.get("category", ""),
                "link" : d.get("category_permalink", ""),
                "parent" : d.get("top_category_guid", "")})
        if d.get("category2_guid", ""):
            categories.append({
                "id" : d.get("category2_guid", ""),
                "name" : d.get("category2", ""),
                "link" : d.get("category2_permalink", ""),
                "parent" : d.get("category_guid", "")})
        if d.get("category3_guid", ""):
            categories.append({
                "id" : d.get("category3_guid", ""),
                "name" : d.get("category3", ""),
                "link" : d.get("category3_permalink", ""),
                "parent" : d.get("category2_guid", "")})
        if d.get("category4_guid", ""):
            categories.append({
                "id" : d.get("categor4_guid", ""),
                "name" : d.get("category4", ""),
                "link" : d.get("category4_permalink", ""),
                "parent" : d.get("category3_guid", "")})
        if d.get("category5_guid", ""):
            categories.append({
                "id" : d.get("categor5_guid", ""),
                "name" : d.get("category5", ""),
                "link" : d.get("category5_permalink", ""),
                "parent" : d.get("category4_guid", "")})
        for ca in categories:
            c.execute("""
INSERT OR IGNORE INTO customer_taxonomy (id, name, link, parent_id)
VALUES (?, ?, ?, ?)""", (ca['id'], ca['name'], ca['link'], ca['parent']))
    except:
        print (categories)
        raise
        #return None
    return categories.pop()["id"]
    
def insertDeal(d, c, conn):
    if d.get("is_bookable", None):
        is_bookable = d["is_bookable"]
    else:
        is_bookable = None
        
    if d.get("merchant_id", None):
        insertMerchant(d, c, conn)

    if d.get('customer_taxonomy_hierarchy', []):
        customer_category_id = insertCustomerTaxonomy(
            d.get('customer_taxonomy_hierarchy', []), c, conn)
    else:
        customer_category_id = None

    if d.get("redemption_locations", None):
        insertRedemption(d["deal_uuid"], d["redemption_locations"], c, conn)

    if d.get("divisions", None):
        insertDivision(d['deal_id'], d, c, conn)
        

    c.execute("""
INSERT INTO deals (
    deal_id, deal_uuid, deal_option_id, title, highlights, title_general, url, redemption_locations_id,
    is_bookable, value, price, dynamic_pricing_enabled, discount_percent, start_at, end_at,
    merchant_id, ui_treatment_id, gallery_title, small_image, med_image, margin,
    sale_price, sale_price_effective_date, currency, rating_count, rating_value, description,
    customer_category_id)

VALUES
    (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)

""", (
    d['deal_id'], d["deal_uuid"], d["deal_option_id"], d.get("title", None), d.get("title", "highlights"), d["title_general"],
    d["url"], 0,
    is_bookable,
    d["value"], d["price"],
    d.get("dynamicPricingEnabled", ""), d.get("discount_percent", None), d["start_at"], d["end_at"],
    d.get("merchant_id", None), d.get("ui_treatment_id", None), d["gallery_title"], d["small_image"],
    d["med_image"], d.get("margin", None), d["sale_price"], d["sale_price_effective_date"],
    d["currency"], d.get("rating_count", None), d.get("rating_value", None), d.get("description", ""),
    customer_category_id
))
    #if d.get("divisions", None):
    #    insertDicisions(d["divisions"], c, conn)

    #if len(d) > 1:
    #    print ("Div: %s" % d)

    
    conn.commit()

In [12]:
import json
import os
directory = 'deals_data/saved_deals/'
conn = sqlite3.connect('deals_db.db')
cursor = conn.cursor()

cursor.execute("""DELETE FROM deals""")
cursor.execute("""DELETE FROM redemption""")
cursor.execute("""DELETE FROM merchant""")
cursor.execute("""DELETE FROM customer_taxonomy""")
cursor.execute("""DELETE FROM deal_division""")
conn.commit()
i = 0
# Čtení původního JSON souboru po řádcích
for filename in os.listdir(directory):
    
    with open('deals_data/saved_deals/%s' % filename, 'r', encoding='UTF-8') as file:
        data = file.read()
        d = json.loads(data)
        insertDeal(d, cursor, conn)

    conn.commit()

conn.close()
        
    #i = i + 1
    #if (i > 1):
    #    break

{'deal_id': 'texas-rotisserie-and-grill-2581-broadway-2', 'deal_option_id': '6d5e65e3-cd62-4b0a-a35d-14caa0b49fa1', 'deal_uuid': 'ce3810b1-66c7-4be9-8dd1-9d01a66f273d', 'url': 'https://www.groupon.com/deals/texas-rotisserie-and-grill-2581-broadway-2', 'redemption_locations': [{'uuid': 'a60554f9-730d-b06a-768b-015071afc5be', 'id': 84179728, 'name': 'Manhattan', 'country': 'US', 'state': 'NY', 'postalCode': '10025', 'streetAddress1': '2581 Broadway', 'lat': 40.7955427, 'lng': -73.9714899, 'geo': {'coordinates': [-73.9714899, 40.7955427], 'type': 'Point'}, 'city': 'New York', 'city_object': {'name': 'New York', 'permalink': 'new-york-ny'}, 'neighborhood': 'Manhattan', 'neighborhood_object': {'name': 'Manhattan', 'permalink': 'upper-west-side-new-york-ny'}}], 'is_bookable': False, 'value': 2099, 'price': 1679, 'dynamicPricingEnabled': True, 'discount_percent': 28, 'start_at': '2025-01-13T05:00:00.000Z', 'end_at': '2049-05-22T03:59:59.000Z', 'merchant_name': 'Texas Rotisserie And Grill  258

OperationalError: near "VALUES": syntax error

In [34]:
with open('deals_data/saved_deals/deal_33ee55dc-4449-49bc-9636-96361bb78fcb.json', 'r') as f:
    d = json.load(f)
d

{'deal_id': 'viator-sebago-trails-paddling-co',
 'deal_option_id': 'e57cd9d8-8652-4f06-b14f-406b8dbc67c0',
 'deal_uuid': '33ee55dc-4449-49bc-9636-96361bb78fcb',
 'url': 'https://www.groupon.com/deals/viator-sebago-trails-paddling-co',
 'redemption_locations': [],
 'is_bookable': True,
 'value': 5900,
 'price': 5900,
 'discount_percent': 0,
 'start_at': '2021-01-26T20:39:50.000Z',
 'end_at': '2040-01-01T00:00:00.000Z',
 'merchant_name': 'Sebago Trails Paddling Co.',
 'merchant_id': 'viator-2',
 'ui_treatment_id': '649ddf1b-203d-4574-91d1-812a35fdf85c',
 'grt_taxonomy_hierarchy': [{'category': 'L2 - Things to Do - Leisure',
   'category2': 'L3 - Tours & Tastings',
   'category2_guid': '54d5094d-57b2-4cfc-a1fd-5f6cf3e82c97',
   'category3': 'L4 - Tours & Tastings',
   'category3_guid': 'cd6290e0-6aac-452f-9955-3b6835313c05',
   'category4': 'L5 - Tours & Tastings',
   'category4_guid': '351bb479-a133-4756-acce-43b621cfccb9',
   'category5': 'L6 - Tour / Attraction Passes',
   'category5_g

In [10]:
conn.close()

In [17]:
import json
import os
directory = 'deals_data/saved_deals/'
i = 0
# Čtení původního JSON souboru po řádcích
deals = []
for filename in os.listdir(directory):
    with open('deals_data/saved_deals/%s' % filename, 'r') as f:
        d = json.load(f)

        c = d.get('customer_taxonomy_hierarchy', None)[0]

        if (
            c.get("category_guid", None) == '80082ffd-dbc6-477a-8067-c084db4fb7cc' or
            c.get("category2_guid", None) == '80082ffd-dbc6-477a-8067-c084db4fb7cc' or
            c.get("category3_guid", None) == '80082ffd-dbc6-477a-8067-c084db4fb7cc' or
            c.get("category4_guid", None) == '80082ffd-dbc6-477a-8067-c084db4fb7cc' or
            c.get("category4_guid", None) == '80082ffd-dbc6-477a-8067-c084db4fb7cc'
        ):
            deals.append(c)  
        if i > 2:
            break

print(json.dumps(deals, indent=4))

IndexError: list index out of range

In [9]:
import pandas as pd
import sqlite3
conn = sqlite3.connect('deals_db.db')
cursor = conn.cursor()

c.execute("""
SELECT t.id, t.name, d.deal_id
FROM
    deals d JOIN customer_taxonomy t
        ON (d.customer_category_id=t.id)
GROUP BY t.id
ORDER BY t.id
""")


conn.commit()
conn.close()

NameError: name 'c' is not defined