In [4]:
# ETL Load Phase - SQLite
import pandas as pd
import sqlite3
import os

# Ensure the "loaded" folder exists
os.makedirs('loaded', exist_ok=True)

# Load transformed datasets
df_full = pd.read_csv('transformed/transformed_full.csv')
df_incremental = pd.read_csv('transformed/transformed_incremental.csv')

# Preview data
print("Full Dataset Preview:")
display(df_full.head())

print("Incremental Dataset Preview:")
display(df_incremental.head())
# Create a SQLite connection and load data into it
conn = sqlite3.connect('loaded/full_data.db')

# Load both datasets into SQLite tables
df_full.to_sql('full_data', conn, if_exists='replace', index=False)
df_incremental.to_sql('incremental_data', conn, if_exists='replace', index=False)

print("Data successfully loaded into SQLite database!")
# Run SQL queries to confirm successful loading
print("Preview of full_data table:")
df_check_full = pd.read_sql('SELECT * FROM full_data LIMIT 5;', conn)
display(df_check_full)

print("Preview of incremental_data table:")
df_check_incremental = pd.read_sql('SELECT * FROM incremental_data LIMIT 5;', conn)
display(df_check_incremental)

# Check total rows in each table
count_full = pd.read_sql('SELECT COUNT(*) as Total_Rows FROM full_data;', conn)
count_incremental = pd.read_sql('SELECT COUNT(*) as Total_Rows FROM incremental_data;', conn)

print("Row Counts:")
display(count_full)
display(count_incremental)
# Always close the database connection
conn.close()
print("SQLite connection closed.")


Full Dataset Preview:


Unnamed: 0,Year,Cause_Code,Cause_Name,State,Deaths,Age_Adjusted_Rate,Rate_Category,Deaths_Rate_Scaled
0,2017,"Accidents (unintentional injuries) (V01-X59,Y8...",Unintentional injuries,United States,169936,49.4,Low,169.936
1,2017,"Accidents (unintentional injuries) (V01-X59,Y8...",Unintentional injuries,Alabama,2703,53.8,Medium,2.703
2,2017,"Accidents (unintentional injuries) (V01-X59,Y8...",Unintentional injuries,Alaska,436,63.7,Medium,0.436
3,2017,"Accidents (unintentional injuries) (V01-X59,Y8...",Unintentional injuries,Arizona,4184,56.2,Medium,4.184
4,2017,"Accidents (unintentional injuries) (V01-X59,Y8...",Unintentional injuries,Arkansas,1625,51.8,Medium,1.625


Incremental Dataset Preview:


Unnamed: 0,Year,Cause_Code,Cause_Name,State,Deaths,Age_Adjusted_Rate,Rate_Category,Deaths_Rate_Scaled
0,2007,All Causes,All causes,Washington,47323,731.9,Very High,47.323
1,2015,Influenza and pneumonia (J09-J18),Influenza and pneumonia,Arizona,775,9.5,Low,0.775
2,2013,"Diseases of heart (I00-I09,I11,I13,I20-I51)",Heart disease,North Dakota,1382,150.7,High,1.382
3,2008,"Diseases of heart (I00-I09,I11,I13,I20-I51)",Heart disease,Texas,38384,191.7,High,38.384
4,2015,"Nephritis, nephrotic syndrome and nephrosis (N...",Kidney disease,Virginia,1470,16.1,Low,1.47


Data successfully loaded into SQLite database!
Preview of full_data table:


Unnamed: 0,Year,Cause_Code,Cause_Name,State,Deaths,Age_Adjusted_Rate,Rate_Category,Deaths_Rate_Scaled
0,2017,"Accidents (unintentional injuries) (V01-X59,Y8...",Unintentional injuries,United States,169936,49.4,Low,169.936
1,2017,"Accidents (unintentional injuries) (V01-X59,Y8...",Unintentional injuries,Alabama,2703,53.8,Medium,2.703
2,2017,"Accidents (unintentional injuries) (V01-X59,Y8...",Unintentional injuries,Alaska,436,63.7,Medium,0.436
3,2017,"Accidents (unintentional injuries) (V01-X59,Y8...",Unintentional injuries,Arizona,4184,56.2,Medium,4.184
4,2017,"Accidents (unintentional injuries) (V01-X59,Y8...",Unintentional injuries,Arkansas,1625,51.8,Medium,1.625


Preview of incremental_data table:


Unnamed: 0,Year,Cause_Code,Cause_Name,State,Deaths,Age_Adjusted_Rate,Rate_Category,Deaths_Rate_Scaled
0,2007,All Causes,All causes,Washington,47323,731.9,Very High,47.323
1,2015,Influenza and pneumonia (J09-J18),Influenza and pneumonia,Arizona,775,9.5,Low,0.775
2,2013,"Diseases of heart (I00-I09,I11,I13,I20-I51)",Heart disease,North Dakota,1382,150.7,High,1.382
3,2008,"Diseases of heart (I00-I09,I11,I13,I20-I51)",Heart disease,Texas,38384,191.7,High,38.384
4,2015,"Nephritis, nephrotic syndrome and nephrosis (N...",Kidney disease,Virginia,1470,16.1,Low,1.47


Row Counts:


Unnamed: 0,Total_Rows
0,10868


Unnamed: 0,Total_Rows
0,1000


SQLite connection closed.


In [None]:
# updated outputs
