In [15]:
# for database connections
from sqlalchemy import create_engine, inspect, text, event

# data
import pandas as pd

# Postgres credentials
import config as cfg

# Error logging
import logging

In [2]:
# Setup the SQL connection variables
SQL_USERNAME = cfg.SQL_USERNAME
SQL_PASSWORD = cfg.SQL_PASSWORD
SQL_IP = cfg.SQL_IP
SQL_PORT = cfg.SQL_PORT
DATABASE = cfg.DATABASE

In [16]:
# Configure logging
logging.basicConfig()
logging.getLogger('sqlalchemy.engine').setLevel(logging.INFO)

In [3]:
# Connect to PostgreSQL server
connection_string = f'postgresql+psycopg2://{SQL_USERNAME}:{SQL_PASSWORD}@{SQL_IP}:{SQL_PORT}/{DATABASE}'
engine = create_engine(connection_string)

## upload campaign, category, subcategory and contact tables

In [4]:
# Check to see if the category and subcategory tables are in the database
inspector = inspect(engine)
print(inspector.get_table_names())

['jobs', 'unemployment', 'employment', 'state', 'income']


In [5]:
# Display all the columns of the tables in PostgreSQL to make sure they have the correct columns prior to loading the data

# Collect the names of tables within the database
tables = inspector.get_table_names()

# Using the inspector to print the column names within each table and its types
for table in tables:
    print(f"Table name: {table}")
    columns = inspector.get_columns(table)
    for column in columns:
        print(column["name"], column["type"])
        
    print()

Table name: jobs
fips VARCHAR(5)
state VARCHAR(2)
county VARCHAR(40)
pctemp_agriculture NUMERIC
pctemp_mining NUMERIC
pctemp_construction NUMERIC
pctemp_manufacturing NUMERIC
pctemp_trade NUMERIC
pctemp_trans NUMERIC
pctemp_information NUMERIC
pctemp_fire NUMERIC
pctemp_services NUMERIC
pctemp_government NUMERIC
num_civ_employed NUMERIC
last_update TIMESTAMP

Table name: unemployment
id VARCHAR(8)
fips VARCHAR(5)
year VARCHAR(4)
unemp_rate NUMERIC
num_unemployed NUMERIC
last_update TIMESTAMP

Table name: employment
id VARCHAR(8)
fips VARCHAR(5)
year VARCHAR(4)
num_civ_labor_force NUMERIC
num_employed NUMERIC
last_update TIMESTAMP

Table name: state
state VARCHAR(2)
latitude DOUBLE PRECISION
longitude DOUBLE PRECISION
name VARCHAR(25)
last_update TIMESTAMP

Table name: income
fips VARCHAR(5)
state VARCHAR(2)
county VARCHAR(40)
median_hh_inc_acs NUMERIC
percapita_inc NUMERIC
poverty_rate_0_17_acs NUMERIC
poverty_rate_acs NUMERIC
deep_pov_all NUMERIC
deep_pov_children NUMERIC
num_allinpov

In [6]:
# Open category csv file and read it into a pandas dataframe
income_df = pd.read_csv('Resources/income_cleaned.csv')
income_df.head()

Unnamed: 0,FIPS,State,County,Median_HH_Inc_ACS,PerCapitaInc,Poverty_Rate_0_17_ACS,Poverty_Rate_ACS,Deep_Pov_All,Deep_Pov_Children,NumAll_inPOV_ACS,PCTPOV017,POV017,MedHHInc,POVALL,PCTPOVALL,Num_inPOV_0_17_ACS
0,0,US,United States,62843.0,34103.0,18.519621,13.422426,5.985652,8.155789,42510843.0,16.8,12000470.0,65712.0,39490096.0,12.3,13377778.0
1,1000,AL,Alabama,50536.0,27928.0,23.900209,16.742549,7.410636,11.152622,795989.0,21.9,233890.0,51771.0,747478.0,15.6,258068.0
2,1001,AL,Autauga,58731.0,29819.0,23.215238,15.185172,6.261607,9.592381,8340.0,15.9,2040.0,58233.0,6723.0,12.1,3047.0
3,1003,AL,Baldwin,58320.0,32626.0,13.364308,10.354073,4.046885,5.65649,21704.0,13.5,6323.0,59871.0,22360.0,10.1,6098.0
4,1005,AL,Barbour,32525.0,18473.0,50.142884,30.668689,15.042156,31.491713,6875.0,41.0,2050.0,35972.0,5909.0,27.1,2632.0


In [7]:
# Open subcategory csv file and read it into a pandas dataframe
employment_df = pd.read_csv('Resources/employment_cleaned.csv')
employment_df.head()

Unnamed: 0,ID,FIPS,Year,NumCivLaborForce,Numemployed
0,16391,0,2020,160611064.0,147677360.0
1,16392,1000,2020,2230118.0,2099062.0
2,16393,1001,2020,25838.0,24576.0
3,16394,1003,2020,96763.0,91338.0
4,16395,1005,2020,8587.0,7982.0


In [8]:
# Open contact csv file and read it into a pandas dataframe
unemployment_df = pd.read_csv('Resources/unemployment_cleaned.csv')
unemployment_df.head()

Unnamed: 0,ID,FIPS,Year,UnempRate,NumUnemployed
0,1,0,2020,8.1,12933704.0
1,2,1000,2020,5.9,131056.0
2,3,1001,2020,4.9,1262.0
3,4,1003,2020,5.6,5425.0
4,5,1005,2020,7.0,605.0


In [23]:
# Open campaign csv file and read it into a pandas dataframe
jobs_df = pd.read_csv('Resources/jobs_cleaned.csv')
jobs_df.head()

Unnamed: 0,FIPS,State,County,PctEmpAgriculture,PctEmpMining,PctEmpConstruction,PctEmpManufacturing,PctEmpTrade,PctEmpTrans,PctEmpInformation,PctEmpFIRE,PctEmpServices,PctEmpGovt,NumCivEmployed
0,0,US,United States,1.259202,0.512723,6.592262,10.108008,13.745334,5.363914,2.011223,6.55584,49.244129,4.607366,154842185.0
1,1000,AL,Alabama,0.99319,0.39821,6.60499,14.332569,14.083735,5.454652,1.519607,5.523166,45.678045,5.411837,2097384.0
2,1001,AL,Autauga,0.517902,0.354783,6.072099,12.951635,12.445967,6.797977,1.362042,5.978305,44.082864,9.436424,24522.0
3,1003,AL,Baldwin,0.952772,0.257648,8.58546,9.249035,16.4779,5.003628,1.525907,7.520165,45.203016,5.224469,95091.0
4,1005,AL,Barbour,5.717342,0.0,6.810888,23.047664,12.813503,6.632592,0.606205,3.720433,33.638417,7.012956,8413.0


In [10]:
# Open campaign csv file and read it into a pandas dataframe
state_df = pd.read_csv('Resources/statelatlong.csv')
state_df.head()

Unnamed: 0,state,latitude,longitude,name
0,AK,63.588753,-154.493062,Alaska
1,AL,32.318231,-86.902298,Alabama
2,AR,35.20105,-91.831833,Arkansas
3,AZ,34.048928,-111.093731,Arizona
4,CA,36.778261,-119.417932,California


In [11]:
# If category table exists in the database, load the category data into the table
if 'state' in inspector.get_table_names():
    state_df.to_sql('state', schema='public', con=engine, index=False, if_exists='append', method='multi')

In [None]:
# If category table exists in the database, load the category data into the table
if 'income' in inspector.get_table_names():
    income_df.to_sql('income', schema='public', con=engine, index=False, if_exists='append', method='multi')

In [19]:
sample_df = jobs_df.head(10)
sample_df.head

<bound method NDFrame.head of    FIPS State         County  PctEmpAgriculture  PctEmpMining  \
0     0    US  United States           1.259202      0.512723   
1  1000    AL        Alabama           0.993190      0.398210   
2  1001    AL        Autauga           0.517902      0.354783   
3  1003    AL        Baldwin           0.952772      0.257648   
4  1005    AL        Barbour           5.717342      0.000000   
5  1007    AL           Bibb           1.967330      1.895791   
6  1009    AL         Blount           1.446366      0.634211   
7  1011    AL        Bullock           4.908544      0.000000   
8  1013    AL         Butler           2.496847      0.075662   
9  1015    AL        Calhoun           1.058544      0.346584   

   PctEmpConstruction  PctEmpManufacturing  PctEmpTrade  PctEmpTrans  \
0            6.592262            10.108008    13.745334     5.363914   
1            6.604990            14.332569    14.083735     5.454652   
2            6.072099            12.95

In [26]:
# If campaign table exists in the database, load the campaign data into the table
# Extract a small subset of the DataFrame
sample_df = jobs_df.head(3)

# Define the column names in the correct order
column_names = [
    'fips', 
    'state', 
    'county', 
    'pctemp_agriculture', 
    'pctemp_mining', 
    'pctemp_construction', 
    'pctemp_manufacturing', 
    'pctemp_trade', 
    'pctemp_trans', 
    'pctemp_information', 
    'pctemp_fire', 
    'pctemp_services', 
    'pctemp_government', 
    'num_civ_employed'
]

# Define chunk size
chunk_size = 1000  # Adjust as needed

# Load the data in chunks and insert
if 'jobs' in inspector.get_table_names():
    try:
        for chunk in pd.read_csv('Resources/jobs_cleaned_joy.csv', header=None, chunksize=chunk_size):
                        # Assign the column names
            chunk.columns = column_names
                        # Verify data types for correctness before inserting
            for col in column_names:
                if chunk[col].dtype == 'object':
                    print(f"Column {col} has non-numeric values.")
            # Insert data into the PostgreSQL table
            chunk.to_sql('jobs', schema='public', con=engine, index=False, if_exists='append', method=None)
    except Exception as e:
        print(f"Error occurred: {e}")
        
#if 'jobs' in inspector.get_table_names():
#    jobs_df.to_sql('jobs', schema='public', con=engine, index=False, if_exists='append', method=None)

INFO:sqlalchemy.engine.Engine:BEGIN (implicit)
INFO:sqlalchemy.engine.Engine:SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class.relname = %(table_name)s AND pg_catalog.pg_class.relkind = ANY (ARRAY[%(param_1)s, %(param_2)s, %(param_3)s, %(param_4)s, %(param_5)s]) AND pg_catalog.pg_namespace.nspname = %(nspname_1)s
INFO:sqlalchemy.engine.Engine:[cached since 3314s ago] {'table_name': 'jobs', 'param_1': 'r', 'param_2': 'p', 'param_3': 'f', 'param_4': 'v', 'param_5': 'm', 'nspname_1': 'public'}
INFO:sqlalchemy.engine.Engine:INSERT INTO public.jobs (fips, state, county, pctemp_agriculture, pctemp_mining, pctemp_construction, pctemp_manufacturing, pctemp_trade, pctemp_trans, pctemp_information, pctemp_fire, pctemp_services, pctemp_government, num_civ_employed) VALUES (%(f ... 836 characters truncated ... _2)s, %(pctemp_fire__2)s, %(pctemp_services__2)s, %(pctem

Column fips has non-numeric values.
Column state has non-numeric values.
Column county has non-numeric values.
Column pctemp_agriculture has non-numeric values.
Column pctemp_mining has non-numeric values.
Column pctemp_construction has non-numeric values.
Column pctemp_manufacturing has non-numeric values.
Column pctemp_trade has non-numeric values.
Column pctemp_trans has non-numeric values.
Column pctemp_information has non-numeric values.
Column pctemp_fire has non-numeric values.
Column pctemp_services has non-numeric values.
Column pctemp_government has non-numeric values.
Column num_civ_employed has non-numeric values.
Error occurred: (psycopg2.errors.InvalidTextRepresentation) invalid input syntax for type numeric: "PctEmpAgriculture"
LINE 1: ..._civ_employed) VALUES ('FIPS', 'State', 'County', 'PctEmpAgr...
                                                             ^

[SQL: INSERT INTO public.jobs (fips, state, county, pctemp_agriculture, pctemp_mining, pctemp_construction, 

In [None]:
# If subcategory table exists in the database, load the subcategory data into the table
if 'employment' in inspector.get_table_names():
    employment_df.to_sql('employment', schema='public', con=engine, index=False, if_exists='append', method='multi')

In [None]:
# If contact table exists in the database, load the contact data into the table
if 'unemployment' in inspector.get_table_names():
    unemployment_df.to_sql('unemployment', schema='public', con=engine, index=False, if_exists='append', method='multi')