In [None]:
import pandas as pd
import sqlite3

# Load CSV files
prods = pd.read_csv("PRODUCTS_TAKEHOME.csv")
trans = pd.read_csv("TRANSACTION_TAKEHOME.csv")
users = pd.read_csv("USER_TAKEHOME.csv")



# Set up in-memory SQLite DB
conn = sqlite3.connect(":memory:")

# Load DataFrames into SQLite tables
prods.to_sql("products", conn, index=False, if_exists="replace")
trans.to_sql("transactions", conn, index=False, if_exists="replace")
users.to_sql("users", conn, index=False, if_exists="replace")

# Cleaning PRODUCTS data, remove duplicates, remove nulls, change barcode data format from scientific notations to string

conn.execute( """CREATE TABLE products_cleaned AS 
SELECT CAST( barcode AS TEXT) AS cleanBarcode, CATEGORY_1, CATEGORY_2, CATEGORY_3, CATEGORY_4, BRAND
FROM products
WHERE BARCODE IS NOT NULL
GROUP BY CATEGORY_1, CATEGORY_2, CATEGORY_3, CATEGORY_4, BRAND""") 

# Cleaning TRANSACTIONS data

# Cleaning USERS data, remove duplicates, remove nulls, removed the timestamp since it isn't relevant for our exercise

conn.execute( """CREATE TABLE users_cleaned AS 
SELECT 
    ID, 
    DATE(REPLACE(CREATED_DATE, 'Z', '')) AS cleanedCreatedDate, 
    DATE(REPLACE(BIRTH_DATE, 'Z', '')) AS cleanedBirthDate, 
    STATE, LANGUAGE, GENDER
FROM users
WHERE ID IS NOT NULL
GROUP BY ID, CAST( CREATED_DATE AS DATE), CAST(BIRTH_DATE AS DATE), STATE, LANGUAGE, GENDER""") 

conn.commit()

## Here are a my queries. This is where I am testing the results of my new tables

query1 = """ SELECT CATEGORY_1, CATEGORY_2, CATEGORY_3, CATEGORY_4
FROM products_cleaned
WHERE CATEGORY_2 like '%Dips & Salsa%'
GROUP BY CATEGORY_1, CATEGORY_2, CATEGORY_3, CATEGORY_4
ORDER BY CATEGORY_2
LIMIT 100
"""

query = """ SELECT ID, COUNT(*)
FROM users_cleaned
GROUP BY ID
HAVING COUNT(*) > 1
LIMIT 100
"""


df = pd.read_sql_query(query, conn)
print(df)


Empty DataFrame
Columns: [ID, COUNT(*)]
Index: []
