# Load Parking Violations Data to SQLite

#### In this notebook we will be loading information from our cleaned ppa csv file in order to create tables in our SQLite database.

In [1]:
import pandas as pd
from sqlalchemy import create_engine, inspect, func
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Float, DateTime

In [2]:
# Sets an object to utilize the default declarative base in SQL Alchemy
Base = declarative_base()

In [3]:
# Creates Classes which will serve as the anchor points for our Parking Table
class Parking(Base):
    __tablename__ = 'ppa'
    anon_ticket_number = Column(Integer, primary_key=True)
    issue_datetime = Column(DateTime)
    state = Column(String(20))
    anon_plate_id = Column(String(50))
    location = Column(String(250))
    violation_desc = Column(String(100))
    fine = Column(Integer)
    issuing_agency = Column(String(100))
    lat = Column(Float)
    lon = Column(Float)
    zip_code = Column(String(100))
    month = Column(Integer)
    day = Column(Integer)
    hour = Column(Integer)
    ymdh = Column(String(100))

In [4]:
# Create Database Connection
database_path = '../flask_app/static/data/data_all.sqlite'
engine = create_engine(f'sqlite:///{database_path}')
conn = engine.connect()

In [5]:
# Create (if not already in existence) the table associated with our classes.
Base.metadata.create_all(engine)

# Use this to clear out the db (if applicable)
# Base.metadata.drop_all(engine)

In [6]:
# Check for tables
engine.table_names()

['ppa']

In [7]:
# Read df
parking_df = pd.read_csv('../resources/cleaned_data/cleaned_reduced_parking_violations_2017.csv')
parking_df.head(2)

Unnamed: 0,anon_ticket_number,issue_datetime,state,anon_plate_id,location,violation_desc,fine,issuing_agency,lat,lon,zip_code,month,day,hour,ymdh
0,7695773,2017-01-01 00:00:00,PA,4340382,3131 N 7TH ST,HP RESERVED SPACE,301,POLICE,39.999414,-75.142332,19133,1,1,0,2017-01-01 00
1,7915128,2017-01-01 00:30:00,NJ,4420623,1747 N 17TH ST,PARKING PROHBITED,41,POLICE,39.980069,-75.16253,19121,1,1,0,2017-01-01 00


In [8]:
# Use pandas to load csv converted df into our sqlite database
parking_df.to_sql(name='ppa', con=engine, if_exists='append', index=False)

In [9]:
# Check the data is successfully loaded in SQLite by performing a "select all" query
check_ppa_info = pd.read_sql_query('select * from ppa', con=engine)
check_ppa_info.head()

Unnamed: 0,anon_ticket_number,issue_datetime,state,anon_plate_id,location,violation_desc,fine,issuing_agency,lat,lon,zip_code,month,day,hour,ymdh
0,7689050,2017-01-09 12:29:00,PA,1975860,8419 GERMANTOWN AVE,METER EXPIRED,26,PPA,40.075568,-75.205661,19118,1,9,12,2017-01-09 12
1,7689051,2017-01-10 14:34:00,FL,4338399,1800 SOUTH ST,METER EXPIRED CC,36,PPA,39.944543,-75.172694,19146,1,10,14,2017-01-10 14
2,7689052,2017-01-10 14:53:00,PA,4338400,1430 SOUTH ST,METER EXPIRED CC,36,PPA,39.943758,-75.166379,19146,1,10,14,2017-01-10 14
3,7689053,2017-01-10 14:55:00,PA,4338400,1430 SOUTH ST,EXPIRED INSPECTION,41,PPA,39.943758,-75.166379,19146,1,10,14,2017-01-10 14
4,7689055,2017-01-10 12:04:00,PA,4338402,1 S 36TH ST,METER EXPIRED CC,36,PPA,39.955794,-75.194231,19104,1,10,12,2017-01-10 12


In [10]:
# Check row count again...
print(len(check_ppa_info))

430131


This row count aligns with our reduced dataset (after we randomly selected 30%).