# SQLite

## Import Library

In [9]:
# %pip install sqlite3
import sqlite3

# %pip install pandas
import pandas as pd

## Connect to SQLite

In [10]:
connection = sqlite3.connect('./database/e-commerce.db')
cursor = connection.cursor()

## Read Dataset

In [3]:
dataset_csv = pd.read_csv("./datasets/data.csv", encoding='ISO-8859-1')

dataset_csv.head(2)

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom


## Create Table

In [26]:
cursor.execute('''
    CREATE TABLE sales_order (
        InvoiceNo VARCHAR(255) NOT NULL,
        StockCode VARCHAR(255) NOT NULL,
        Description VARCHAR(255),
        Quantity INTEGER NOT NULL,
        InvoiceDate DATETIME NOT NULL,
        UnitPrice FLOAT NOT NULL,
        CustomerID FLOAT,
        Country VARCHAR(255) NOT NULL
    )
''')

connection.commit()

## Insert Data

In [27]:
data = dataset_csv[['InvoiceNo', 'StockCode', 'Description', 'Quantity', 'InvoiceDate', 'UnitPrice', 'CustomerID', 'Country']].values.tolist()

cursor.executemany('''
    INSERT INTO sales_order (InvoiceNo, StockCode, Description, Quantity, InvoiceDate, UnitPrice, CustomerID, Country)
    VALUES (?, ?, ?, ?, ?, ?, ?, ?)
''', data)

connection.commit()

## Check sales_order Table

In [11]:
import sqlite3

conn = sqlite3.connect('./database/e-commerce.db') 
cur = conn.cursor()

cur.execute("PRAGMA table_info(sales_order)") 
sql_columns = [column[1] for column in cur.fetchall()] 
print(f"Columns: {sql_columns}")

cur.execute("SELECT COUNT(*) FROM sales_order") 
sql_num_rows = cur.fetchone()[0] 
print(f"Number of rows: {sql_num_rows}")

conn.close()

Columns: ['InvoiceNo', 'StockCode', 'Description', 'Quantity', 'InvoiceDate', 'UnitPrice', 'CustomerID', 'Country']
Number of rows: 541909


## 1. Total number of unique products and unique customers

In [12]:
sql_num_products = pd.read_sql_query('''
    SELECT COUNT(DISTINCT Description) AS 'Unique Products' FROM sales_order                      
''', connection)

display(sql_num_products)

sql_num_customers = pd.read_sql_query('''
    SELECT COUNT(DISTINCT CustomerID) AS 'Unique Customers' FROM sales_order                      
''', connection)

display(sql_num_customers)

Unnamed: 0,Unique Products
0,4223


Unnamed: 0,Unique Customers
0,4372


## 2. Total revenue for each product, limited to top 5

In [13]:
sql_top_product_revenues = pd.read_sql_query('''
    SELECT 
        Description,
        SUM(Quantity * UnitPrice) AS Revenue
    FROM sales_order 
    GROUP BY Description 
    ORDER BY Revenue DESC
    LIMIT 5
''', connection)

sql_top_product_revenues = list(sql_top_product_revenues.itertuples(index=False, name=None))

display(sql_top_product_revenues)  

[('DOTCOM POSTAGE', 206245.48),
 ('REGENCY CAKESTAND 3 TIER', 164762.19),
 ('WHITE HANGING HEART T-LIGHT HOLDER', 99668.47),
 ('PARTY BUNTING', 98302.98),
 ('JUMBO BAG RED RETROSPOT', 92356.03)]

## 3. The most profitable countries (by total sales), limited to top 5

In [14]:
sql_top_countries = pd.read_sql_query('''
    SELECT 
        Country,
        SUM(Quantity * UnitPrice) AS Revenue
    FROM sales_order 
    GROUP BY Country 
    ORDER BY Revenue DESC
    LIMIT 5
''', connection)

sql_top_countries = list(sql_top_countries.itertuples(index=False, name=None))

display(sql_top_countries)  

[('United Kingdom', 8187806.364),
 ('Netherlands', 284661.54),
 ('EIRE', 263276.82),
 ('Germany', 221698.21),
 ('France', 197403.9)]

## 4. Top-selling products (by total sales) for each country, limited to top 5

In [15]:
sql_top_selling_product = pd.read_sql_query('''
    SELECT 
        Country,
        Description,
        Revenue
    FROM (
        SELECT 
            Country,
            Description,
            SUM(Quantity * UnitPrice) AS Revenue,
            ROW_NUMBER() OVER (PARTITION BY Country ORDER BY SUM(Quantity * UnitPrice) DESC) AS row_num
        FROM sales_order
        GROUP BY Country, Description
    ) AS ranked_sales
    WHERE row_num <= 5
    ORDER BY Country, Revenue DESC;
''', connection)

sql_top_selling_products_by_country_by_sales = {}

for row in sql_top_selling_product.itertuples(index=False):
    country = row.Country
    description = row.Description
    revenue = row.Revenue
    
    if country not in sql_top_selling_products_by_country_by_sales:
        sql_top_selling_products_by_country_by_sales[country] = []
    
    sql_top_selling_products_by_country_by_sales[country].append((description, revenue))

display(sql_top_selling_products_by_country_by_sales)

{'Australia': [('RABBIT NIGHT LIGHT', 3375.84),
  ('SET OF 6 SPICE TINS PANTRY DESIGN', 2082.0),
  ('RED TOADSTOOL LED NIGHT LIGHT', 1987.1999999999998),
  ('SET OF 3 CAKE TINS PANTRY DESIGN ', 1978.25),
  ('REGENCY CAKESTAND 3 TIER', 1978.1999999999998)],
 'Austria': [('POSTAGE', 1456.0),
  ('PACK OF 6 SWEETIE GIFT BOXES', 302.40000000000003),
  ('PACK OF 6 PANNETONE GIFT BOXES', 302.40000000000003),
  ('EDWARDIAN PARASOL RED', 214.20000000000002),
  ('EDWARDIAN PARASOL NATURAL', 214.20000000000002)],
 'Bahrain': [('ICE CREAM SUNDAE LIP GLOSS', 120.0),
  ('DOUGHNUT LIP GLOSS ', 75.0),
  ('NOVELTY BISCUITS CAKE STAND 3 TIER', 59.699999999999996),
  ('STRAWBERRY FAIRY CAKE TEAPOT', 39.6),
  ('GROW A FLYTRAP OR SUNFLOWER IN TIN', 30.0)],
 'Belgium': [('POSTAGE', 4248.0),
  ('ROUND SNACK BOXES SET OF4 WOODLAND ', 1181.4),
  ('SPACEBOY LUNCH BOX ', 641.8499999999999),
  ('DOLLY GIRL LUNCH BOX', 641.8499999999999),
  ('REGENCY CAKESTAND 3 TIER', 599.25)],
 'Brazil': [('REGENCY CAKESTAND 3 T

## 5. Top-selling products (by quantity) for each country, limited to top 5

In [16]:
sql_top_selling_product = pd.read_sql_query('''
    SELECT 
        Country,
        Description,
        Revenue
    FROM (
        SELECT 
            Country,
            Description,
            SUM(Quantity) AS Revenue,
            ROW_NUMBER() OVER (PARTITION BY Country ORDER BY SUM(Quantity) DESC) AS row_num
        FROM sales_order
        GROUP BY Country, Description
    ) AS ranked_sales
    WHERE row_num <= 5
    ORDER BY Country, Revenue DESC;
''', connection)

sql_top_selling_products_by_country_by_quantity = {}

for row in sql_top_selling_product.itertuples(index=False):
    country = row.Country
    description = row.Description
    revenue = row.Revenue
    
    if country not in sql_top_selling_products_by_country_by_quantity:
        sql_top_selling_products_by_country_by_quantity[country] = []
    
    sql_top_selling_products_by_country_by_quantity[country].append((description, revenue))

display(sql_top_selling_products_by_country_by_quantity)

{'Australia': [('MINI PAINT SET VINTAGE ', 2916),
  ('RABBIT NIGHT LIGHT', 1884),
  ('RED  HARMONICA IN BOX ', 1704),
  ('RED TOADSTOOL LED NIGHT LIGHT', 1344),
  ('DOLLY GIRL LUNCH BOX', 1024)],
 'Austria': [('SET 12 KIDS COLOUR  CHALK STICKS', 288),
  ('MINI JIGSAW PURDEY', 240),
  ('PACK OF 6 SWEETIE GIFT BOXES', 144),
  ('PACK OF 6 PANNETONE GIFT BOXES', 144),
  ('TEA BAG PLATE RED RETROSPOT', 48)],
 'Bahrain': [('ICE CREAM SUNDAE LIP GLOSS', 96),
  ('DOUGHNUT LIP GLOSS ', 60),
  ('GROW A FLYTRAP OR SUNFLOWER IN TIN', 24),
  ('S/4 PINK FLOWER CANDLES IN BOWL', 12),
  ('STRAWBERRY FAIRY CAKE TEAPOT', 8)],
 'Belgium': [('PACK OF 72 RETROSPOT CAKE CASES', 480),
  ('ROUND SNACK BOXES SET OF4 WOODLAND ', 420),
  ('PACK OF 60 SPACEBOY CAKE CASES', 384),
  ('SPACEBOY LUNCH BOX ', 339),
  ('DOLLY GIRL LUNCH BOX', 339)],
 'Brazil': [('SMALL HEART FLOWERS HOOK ', 24),
  ('SET/3 RED GINGHAM ROSE STORAGE BOX', 24),
  ('SET OF 6 SPICE TINS PANTRY DESIGN', 24),
  ('SET OF 4 PANTRY JELLY MOULDS',

## Submission

In [17]:
student_id = "REAINTCZ" 
name = "Wiwie Sanjaya"
drive_link = "https://github.com/wiwiewei18/sqlite-and-elastic-search-exercise"  

assignment_id = "00_database_project"

from rggrader import submit, submit_image

question_id = "01_sql_columns"
submit(student_id, name, assignment_id, str(sql_columns), question_id, drive_link)
question_id = "02_sql_num_rows"
submit(student_id, name, assignment_id, str(sql_num_rows), question_id, drive_link)
question_id = "03_sql_num_products"
submit(student_id, name, assignment_id, str(sql_num_products), question_id, drive_link)
question_id = "04_sql_num_customers"
submit(student_id, name, assignment_id, str(sql_num_customers), question_id, drive_link)
question_id = "05_sql_top_product_revenues"
submit(student_id, name, assignment_id, str(sql_top_product_revenues), question_id, drive_link)
question_id = "06_sql_top_countries"
submit(student_id, name, assignment_id, str(sql_top_countries), question_id, drive_link)
question_id = "07_sql_top_selling_products_by_country_by_sales"
submit(student_id, name, assignment_id, str(sql_top_selling_products_by_country_by_sales), question_id, drive_link)
question_id = "08_sql_top_selling_products_by_country_by_quantity"
submit(student_id, name, assignment_id, str(sql_top_selling_products_by_country_by_quantity), question_id, drive_link)

'Assignment successfully submitted'