# WPG Paystations

In [1]:
import os
import pandas as pd
import numpy as np
import folium
import matplotlib.pyplot as plt
import sklearn
import seaborn as sns
import folium

Reading in the Winnipeg Parking Authority Paystation dataset.

In [2]:
df = pd.read_csv(".../WPA_Paystation.csv")

The structure of the panda dataframe for the first 5 rows shows

In [3]:
df.head()

Unnamed: 0,Paystation Number,Restriction,Time Limit,Street,Payment Time,Available Space,Hourly Rate,location
0,3445,NO RESTRICTIONS,4 HOUR PARKING,Bannatyne,0900 - 1730,24,3.5,"(49.9027118083994, -97.1546055438918)"
1,4182,NO RESTRICTIONS,2 HOUR PARKING,Selkirk,0900 - 1730,18,2.5,"(49.9149081105698, -97.1424322041994)"
2,3129,"NO STOP AM, PM",2 HOUR PARKING,Main,0900 - 1530,10,3.5,"(49.8974285750503, -97.1386746911624)"
3,4903,NO RESTRICTIONS,2 HOUR PARKING,Kennedy,0800 - 1730,49,2.5,"(49.8854957047535, -97.1450922436489)"
4,4888,"NO STOP AM, PM",4 HOUR PARKING,William,0900 - 1530,7,3.5,"(49.9039282703859, -97.1552152452541)"


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 441 entries, 0 to 440
Data columns (total 8 columns):
Paystation Number    441 non-null int64
Restriction          441 non-null object
Time Limit           441 non-null object
Street               441 non-null object
Payment Time         441 non-null object
Available Space      441 non-null int64
Hourly Rate          441 non-null float64
location             441 non-null object
dtypes: float64(1), int64(2), object(5)
memory usage: 27.6+ KB


In [5]:
df.dtypes

Paystation Number      int64
Restriction           object
Time Limit            object
Street                object
Payment Time          object
Available Space        int64
Hourly Rate          float64
location              object
dtype: object

Creation of the variable short to test data cleaning on a smaller scale for later implementation on the larger dataframe.

In [6]:
short = df.iloc[:5]

In [7]:
short

Unnamed: 0,Paystation Number,Restriction,Time Limit,Street,Payment Time,Available Space,Hourly Rate,location
0,3445,NO RESTRICTIONS,4 HOUR PARKING,Bannatyne,0900 - 1730,24,3.5,"(49.9027118083994, -97.1546055438918)"
1,4182,NO RESTRICTIONS,2 HOUR PARKING,Selkirk,0900 - 1730,18,2.5,"(49.9149081105698, -97.1424322041994)"
2,3129,"NO STOP AM, PM",2 HOUR PARKING,Main,0900 - 1530,10,3.5,"(49.8974285750503, -97.1386746911624)"
3,4903,NO RESTRICTIONS,2 HOUR PARKING,Kennedy,0800 - 1730,49,2.5,"(49.8854957047535, -97.1450922436489)"
4,4888,"NO STOP AM, PM",4 HOUR PARKING,William,0900 - 1530,7,3.5,"(49.9039282703859, -97.1552152452541)"


Creation of the "lat" and "long" columns in the dataframe need to be created to be used with Folium. 

.str.strip('()') strips the entry of any occurance of ( or ) in the string
.str.split splits the character string by ', ' and accessing the 0th or 1st entry of the split

In [8]:
short['lat'] = short.location.str.strip('()').str.split(', ').str[0]
short['long'] = short.location.str.strip('()').str.split(', ').str[1]
#print(short)
short

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,Paystation Number,Restriction,Time Limit,Street,Payment Time,Available Space,Hourly Rate,location,lat,long
0,3445,NO RESTRICTIONS,4 HOUR PARKING,Bannatyne,0900 - 1730,24,3.5,"(49.9027118083994, -97.1546055438918)",49.9027118083994,-97.1546055438918
1,4182,NO RESTRICTIONS,2 HOUR PARKING,Selkirk,0900 - 1730,18,2.5,"(49.9149081105698, -97.1424322041994)",49.9149081105698,-97.1424322041994
2,3129,"NO STOP AM, PM",2 HOUR PARKING,Main,0900 - 1530,10,3.5,"(49.8974285750503, -97.1386746911624)",49.8974285750503,-97.1386746911624
3,4903,NO RESTRICTIONS,2 HOUR PARKING,Kennedy,0800 - 1730,49,2.5,"(49.8854957047535, -97.1450922436489)",49.8854957047535,-97.1450922436489
4,4888,"NO STOP AM, PM",4 HOUR PARKING,William,0900 - 1530,7,3.5,"(49.9039282703859, -97.1552152452541)",49.9039282703859,-97.1552152452541


In [9]:
#Test thinking
#short.loc[:,['lat', 'long']] = short.location.str.strip('()').str.split(', ', expand=True)

Using the above test case we'll create columns for the latitude and longitude for easier plotting of the paystations. When using the strip and split commands used earlier another method must be used to change the entries into float for plotting purposes.

In [10]:
# The first part strips the any ) or ( from location. Next  the same is done for any , left.
# The astype parameter assigns the column as a float instead of an object

df['lat'] = df.location.str.strip('()').str.split(', ').str[0].astype(float)
df['long'] = df.location.str.strip('()').str.split(', ').str[1].astype(float)

In [11]:
df.tail()

Unnamed: 0,Paystation Number,Restriction,Time Limit,Street,Payment Time,Available Space,Hourly Rate,location,lat,long
436,3291,NO RESTRICTIONS,9.5 HOUR PARKING,Dominion,0800 - 1730,14,2.5,"(49.8849064800786, -97.1780650034081)",49.884906,-97.178065
437,4259,"AM,PM",2 HOUR PARKING,St Mary,0900 - 1530,9,2.5,"(49.8909890169509, -97.1399663808966)",49.890989,-97.139966
438,4254,NO STOP PM,2 HOUR PARKING,York,0800 - 1530,4,2.5,"(49.8889192750144, -97.1418232709624)",49.888919,-97.141823
439,4184,NO RESTRICTIONS,2 HOUR PARKING,Princess,0800 - 1730,12,2.5,"(49.8998500171418, -97.1410474373245)",49.89985,-97.141047
440,3123,"NO STOP AM, PM",2 HOUR PARKING,Main,0900 - 1530,3,2.5,"(49.9006521004631, -97.1377370624127)",49.900652,-97.137737


In [12]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 441 entries, 0 to 440
Data columns (total 10 columns):
Paystation Number    441 non-null int64
Restriction          441 non-null object
Time Limit           441 non-null object
Street               441 non-null object
Payment Time         441 non-null object
Available Space      441 non-null int64
Hourly Rate          441 non-null float64
location             441 non-null object
lat                  441 non-null float64
long                 441 non-null float64
dtypes: float64(3), int64(2), object(5)
memory usage: 34.5+ KB


Let's check the unique values in three columns we're interested in

In [13]:
# Checking the uniqueness of values in 3 columns
unique_restrictions = df.Restriction.unique()
print(unique_restrictions)
unique_time = df['Time Limit'].unique()
print(unique_time)
unique_rate = df['Hourly Rate'].unique()
print(unique_rate)

['NO RESTRICTIONS' 'NO STOP AM, PM' 'AM, PM' 'NO STOP AM' 'NO STOP PM'
 'AM,PM' 'No Restrictions' 'NO' '5 WESTERN STALLS SAT ONLY'
 'NO STOP AM PM' 'PM restriction' 'pm restriction' 'no restriction']
['4 HOUR PARKING' '2 HOUR PARKING' '9.5 HOUR PARKING' '24 HOUR PARKING'
 '30 MINUTE PARKING']
[3.5 2.5 2. ]


In [14]:
# Just out of interest
rate_count = df.groupby(["Time Limit"]).count()
print(rate_count)

                   Paystation Number  Restriction  Street  Payment Time  \
Time Limit                                                                
2 HOUR PARKING                   361          361     361           361   
24 HOUR PARKING                    6            6       6             6   
30 MINUTE PARKING                  2            2       2             2   
4 HOUR PARKING                    59           59      59            59   
9.5 HOUR PARKING                  13           13      13            13   

                   Available Space  Hourly Rate  location  lat  long  
Time Limit                                                            
2 HOUR PARKING                 361          361       361  361   361  
24 HOUR PARKING                  6            6         6    6     6  
30 MINUTE PARKING                2            2         2    2     2  
4 HOUR PARKING                  59           59        59   59    59  
9.5 HOUR PARKING                13           13 

Folium produced map of the mean latitude and long values of all the obsrved data points.

In [15]:
map_wpg = folium.Map(location=[df['lat'].mean(), df['long'].mean()], zoom_start=13)
map_wpg

Create a definition determining the colour of the markers used to display visual pay station information.

In [16]:
# function for colour co-ordinating parking restrictions
def colour_rest(restrict):
    if restrict == ('NO RESTRICTIONS' or 'No Restrictions' or 'no restriction'):
        col = 'green'
    elif restrict == ('NO STOP AM, PM' or 'NO STOP AM PM'):
        col = 'darkpurple'
    elif restrict == ('NO STOP PM'):
        col = 'blue'
    elif restrict == ('NO STOP AM'):
        col = 'orange'
    elif restrict == ('AM, PM' or 'AM,PM'):
        col = 'red'
    else:
        col = 'pink'
    return col
    
# function for colour co-ordinating hourly rates
def colour_rate(hrate):
    if hrate == 2.:
        col = 'green'
    elif hrate == 2.5:
        col = 'yellow'
    else:
        col = 'red'
    return col

# function for colour co-ordinating parking time limits
def colour_time(limit):
    if limit == '30 MINUTE PARKING':
        col = 'darkred'
    elif limit == '2 HOUR PARKING':
        col = 'red'
    elif limit == '4 HOUR PARKING':
        col = 'green'
    elif limit == '9.5 HOUR PARKING':
        col = 'blue'
    else:
        col = 'black'
    return col



The creation of these locations is done using a for loop to create multiple child layers into the parent group using the add_child method.

In [17]:
fg = folium.FeatureGroup(name='Winnipeg Paystations')

for lat,long,street,restrict,hrate,limit in zip(df['lat'],df['long'],
                                                df['Street'],df['Restriction'],df['Hourly Rate'],
                                               df['Time Limit']):
    fg.add_child(folium.Marker(location=[lat,long],
                               popup=(folium.Popup(street)),
                               icon=folium.Icon(color=colour_rest(restrict),icon_color=colour_rate(hrate))))

map_wpg.add_child(fg)

map_wpg.add_child(folium.LayerControl())

In [18]:
# create map of Winnipeg locations using latitude and longitude values
map_wpg2 = folium.Map(location=[df['lat'].mean(), df['long'].mean()], zoom_start=13.5)

# add markers to map
for lat, lng, street in zip(df['lat'], df['long'], df['Street']):
    label = '{}'.format(street)
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=2,
        popup=label,
        color='blue',
        fill=True,
        fill_color='#3186cc',
        fill_opacity=0.7).add_to(map_wpg2)  
    
map_wpg2