In [None]:
import pandas as pd
import sqlite3

# Load CSV files
prods = pd.read_csv("PRODUCTS_TAKEHOME.csv")
trans = pd.read_csv("TRANSACTION_TAKEHOME.csv")
users = pd.read_csv("USER_TAKEHOME.csv")



# Set up in-memory SQLite DB
conn = sqlite3.connect(":memory:")

# Load DataFrames into SQLite tables
prods.to_sql("products", conn, index=False, if_exists="replace")
trans.to_sql("transactions", conn, index=False, if_exists="replace")
users.to_sql("users", conn, index=False, if_exists="replace")

# Cleaning PRODUCTS data, remove duplicates, remove nulls, change barcode data format from scientific notations to string

conn.execute( """CREATE TABLE products_cleaned AS 
SELECT CAST( barcode AS TEXT) AS cleanBarcode, CATEGORY_1, CATEGORY_2, CATEGORY_3, CATEGORY_4, BRAND
FROM products
WHERE BARCODE IS NOT NULL
GROUP BY CATEGORY_1, CATEGORY_2, CATEGORY_3, CATEGORY_4, BRAND""") 

# Cleaning USERS data, remove duplicates, remove nulls, removed the timestamp since it isn't relevant for our exercise

conn.execute( """CREATE TABLE users_cleaned AS 
SELECT 
    ID, 
    DATE(REPLACE(CREATED_DATE, 'Z', '')) AS cleanedCreatedDate, 
    DATE(REPLACE(BIRTH_DATE, 'Z', '')) AS cleanedBirthDate, 
    STATE, LANGUAGE, GENDER
FROM users
WHERE ID IS NOT NULL
GROUP BY ID, CAST( CREATED_DATE AS DATE), CAST(BIRTH_DATE AS DATE), STATE, LANGUAGE, GENDER""") 

conn.commit()

query1 = """ SELECT CATEGORY_1, CATEGORY_2, CATEGORY_3, CATEGORY_4
FROM products_cleaned
WHERE CATEGORY_2 like '%Dips & Salsa%'
GROUP BY CATEGORY_1, CATEGORY_2, CATEGORY_3, CATEGORY_4
ORDER BY CATEGORY_2
LIMIT 100
"""

query = """ SELECT ID, CleanedCreatedDate, cleanedBirthDate, STATE, LANGUAGE, GENDER
FROM users_cleaned
GROUP BY ID, CleanedCreatedDate, cleanedBirthDate, STATE, LANGUAGE, GENDER
ORDER BY ID
LIMIT 100
"""


df = pd.read_sql_query(query, conn)
print(df)


                          ID cleanedCreatedDate cleanedBirthDate STATE  \
0   5351b1efe4b0e3638af82697         2014-04-18       1995-01-12  None   
1   536178cfe4b012a86bd734f0         2014-04-30       1990-01-23  None   
2   5364098ae4b0a1ed6a0c2f85         2014-05-02       1995-01-19  None   
3   536409c8e4b060d2e95bb71b         2014-05-02       1993-04-07  None   
4   53640a0ee4b060d2e95bb7a9         2014-05-02       1968-08-09  None   
..                       ...                ...              ...   ...   
95  56c90241e4b089775946f2a8         2016-02-21       1986-04-15  None   
96  56ca2b6ee4b03934a7909b36         2016-02-21       1971-07-26  None   
97  56d42b85e4b0cad901c0bb9c         2016-02-29       1950-04-29    MA   
98  56d6be26e4b086f9a34e57d9         2016-03-02       1989-11-03    PR   
99  56db73bee4b0796561f73ffd         2016-03-06       1983-07-15  None   

   LANGUAGE  GENDER  
0      None    None  
1      None    None  
2      None    None  
3      None    None  
4