In [1]:
# Import dependencies
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import datetime
from datetime import date
from datetime import time
from datetime import datetime
#import calendar
import warnings
warnings.filterwarnings('ignore')
import io

In [2]:
# https://nikgrozev.com/2015/06/16/fast-and-simple-sampling-in-pandas-when-loading-data-from-files/
# Fast and Simple Sampling in Pandas when Loading Data From Files
# The data to load
p = "Resources/LA_Parking.csv"

# Take every N-th (in this case 10th) row
n = 10

# Count the lines or use an upper bound
num_lines = sum(1 for l in open(p))

# The row indices to skip - make sure 0 is not included to keep the header!
skip_idx = [x for x in range(1, num_lines) if x % n != 0]

# Read the data
la_parking_df = pd.read_csv(p, skiprows=skip_idx)
la_parking_df.head(2)

Unnamed: 0,Ticket number,Issue Date,Issue time,Meter Id,Marked Time,RP State Plate,Plate Expiry Date,VIN,Make,Body Style,Color,Location,Route,Agency,Violation code,Violation Description,Fine amount,Latitude,Longitude
0,1121041762,2018/01/01 12:00:00 AM,955.0,,,CA,201807.0,,KIA,PA,RE,1048 E 43RD ST,13A27,1,8056E4,RED ZONE,93.0,6483814.0,1824574.0
1,4324502346,2018/01/01 12:00:00 AM,1134.0,,,CA,201706.0,,GMC,TK,BL,16900 VERMONT AVE S,00500,55,5204A-,DISPLAY OF TABS,25.0,99999.0,99999.0


In [3]:
print(f"Parking number of rows = {la_parking_df['Ticket number'].count()}")

Parking number of rows = 199879


In [4]:
# Separate Date column by day, month and year and the complete date without the hours

In [5]:
la_parking_df['Year'] = [i[0:4] for i in la_parking_df['Issue Date']]

In [6]:
la_parking_df['Month'] = [i[5:7] for i in la_parking_df['Issue Date']]

In [7]:
la_parking_df['Day'] = [i[8:10] for i in la_parking_df['Issue Date']]

In [8]:
la_parking_df['Parking_Date'] = [i[0:10] for i in la_parking_df['Issue Date']]

In [9]:
la_parking_df.head(2)

Unnamed: 0,Ticket number,Issue Date,Issue time,Meter Id,Marked Time,RP State Plate,Plate Expiry Date,VIN,Make,Body Style,...,Agency,Violation code,Violation Description,Fine amount,Latitude,Longitude,Year,Month,Day,Parking_Date
0,1121041762,2018/01/01 12:00:00 AM,955.0,,,CA,201807.0,,KIA,PA,...,1,8056E4,RED ZONE,93.0,6483814.0,1824574.0,2018,1,1,2018/01/01
1,4324502346,2018/01/01 12:00:00 AM,1134.0,,,CA,201706.0,,GMC,TK,...,55,5204A-,DISPLAY OF TABS,25.0,99999.0,99999.0,2018,1,1,2018/01/01


In [10]:
la_parking_df.columns

Index(['Ticket number', 'Issue Date', 'Issue time', 'Meter Id', 'Marked Time',
       'RP State Plate', 'Plate Expiry Date', 'VIN', 'Make', 'Body Style',
       'Color', 'Location', 'Route', 'Agency', 'Violation code',
       'Violation Description', 'Fine amount', 'Latitude', 'Longitude', 'Year',
       'Month', 'Day', 'Parking_Date'],
      dtype='object')

In [11]:
# select columns to use for the project
la_parking_df_project = la_parking_df[['Ticket number', 'Issue Date', 'Issue time', 'Meter Id', 'Marked Time',
       'RP State Plate', 'Plate Expiry Date', 'VIN', 'Make', 'Body Style',
       'Color', 'Location', 'Route', 'Agency', 'Violation code',
       'Violation Description', 'Fine amount', 'Latitude', 'Longitude', 'Year',
       'Month', 'Day', 'Parking_Date']]

In [12]:
la_parking_df_project.head(2)

Unnamed: 0,Ticket number,Issue Date,Issue time,Meter Id,Marked Time,RP State Plate,Plate Expiry Date,VIN,Make,Body Style,...,Agency,Violation code,Violation Description,Fine amount,Latitude,Longitude,Year,Month,Day,Parking_Date
0,1121041762,2018/01/01 12:00:00 AM,955.0,,,CA,201807.0,,KIA,PA,...,1,8056E4,RED ZONE,93.0,6483814.0,1824574.0,2018,1,1,2018/01/01
1,4324502346,2018/01/01 12:00:00 AM,1134.0,,,CA,201706.0,,GMC,TK,...,55,5204A-,DISPLAY OF TABS,25.0,99999.0,99999.0,2018,1,1,2018/01/01


In [13]:
# save df to a csv file
la_parking_df_project.to_csv(r'Resources\la_parking_df_project.csv')

###  Connect to local database

In [14]:
### Connect to local database
rds_connection_string = "root:modelobootcamp@127.0.0.1/ladata_db"
engine = create_engine(f'mysql://{rds_connection_string}')
connection = engine.connect()

In [15]:
# 7
# 6 of 11 crimes
connection.execute('use ladata_db;')

<sqlalchemy.engine.result.ResultProxy at 0x2424c4e7780>

In [16]:
# 8
# 7 of 11 crimes
engine.table_names()

['la_crimes_rate', 'parking_citations']

In [17]:
la_parking_df_project.to_sql(name='la_parking_df_project', con=engine, if_exists='replace', index=False)

In [18]:
results = engine.execute('select * from la_parking_df_project;')

### Confirm data has been added by querying the la_parking_df table

In [19]:
connection.execute('use ladata_db;')

<sqlalchemy.engine.result.ResultProxy at 0x2427fda8a90>

In [20]:
pd.read_sql_query('select * from la_parking_df_project', con=engine).head(2)

Unnamed: 0,Ticket number,Issue Date,Issue time,Meter Id,Marked Time,RP State Plate,Plate Expiry Date,VIN,Make,Body Style,...,Agency,Violation code,Violation Description,Fine amount,Latitude,Longitude,Year,Month,Day,Parking_Date
0,1121041762,2018/01/01 12:00:00 AM,955.0,,,CA,201807.0,,KIA,PA,...,1,8056E4,RED ZONE,93.0,6483814.0,1824574.0,2018,1,1,2018/01/01
1,4324502346,2018/01/01 12:00:00 AM,1134.0,,,CA,201706.0,,GMC,TK,...,55,5204A-,DISPLAY OF TABS,25.0,99999.0,99999.0,2018,1,1,2018/01/01
