In [1]:
# etl_load.py
# This script loads the transformed data (transformed_full.csv and transformed_incremental.csv)
# into SQLite databases (full_data.db and incremental_data.db) and verifies the stored data.

import pandas as pd
import sqlite3
import os

# Create loaded directory if it doesn't exist
os.makedirs('loaded', exist_ok=True)

# Load transformed data
full_data = pd.read_csv('data/transformed/transformed_full.csv')
incremental_data = pd.read_csv('data/transformed/transformed_incremental.csv')

# Connect to SQLite database for full_data
conn_full = sqlite3.connect('loaded/full_data.db')

# Load full_data into SQLite
full_data.to_sql('full_data', conn_full, if_exists='replace', index=False)

# Verify loaded data
print('Full Data Preview from SQLite:')
query_full = 'SELECT * FROM full_data LIMIT 5'
full_data_preview = pd.read_sql(query_full, conn_full)
print(full_data_preview)

# Close connection
conn_full.close()

# Connect to SQLite database for incremental_data
conn_incremental = sqlite3.connect('loaded/incremental_data.db')

# Load incremental_data into SQLite
incremental_data.to_sql('incremental_data', conn_incremental, if_exists='replace', index=False)

# Verify loaded data
print('\nIncremental Data Preview from SQLite:')
query_incremental = 'SELECT * FROM incremental_data LIMIT 5'
incremental_data_preview = pd.read_sql(query_incremental, conn_incremental)
print(incremental_data_preview)

# Close connection
conn_incremental.close()

print('\nTransformed data successfully loaded into SQLite databases in loaded/ directory.')

Full Data Preview from SQLite:
   order_id customer_name product  quantity  unit_price  order_date   region  \
0         1         Diana  Tablet       2.0       500.0  2024-01-20    South   
1         2           Eve  Laptop       2.0       750.0  2024-04-29    North   
2         3       Charlie  Laptop       2.0       250.0  2024-01-08  Unknown   
3         4           Eve  Laptop       2.0       750.0  2024-01-07     West   
4         5           Eve  Tablet       3.0       750.0  2024-03-07    South   

   total_price  order_year  
0       1000.0        2024  
1       1500.0        2024  
2        500.0        2024  
3       1500.0        2024  
4       2250.0        2024  

Incremental Data Preview from SQLite:
   order_id customer_name product  quantity  unit_price  order_date   region  \
0       101         Alice  Laptop       1.5       900.0  2024-05-09  Central   
1       102       Unknown  Laptop       1.0       300.0  2024-05-07  Central   
2       103       Unknown  Laptop  