# EPIC 2 - Melbourne Places of Interest
This Jupyter Notebook is about preparing 'Melbourne Places of Interest' CSV file for being used in Mo-Buddy Website Solution.
1. Read Raw Data
2. Clean Raw Data
3. Export Clean Data

In [1]:
# Import Packages
import pandas as pd

In [2]:
# Set option to display all columns
pd.set_option('display.max_columns', None)

## 1. Read in Raw Data from a CSV file

In [3]:
# Function for reading in raw data from a CSV file
def read_in_data(file_path):
    """
    Function for reading in raw data from CSV file.
    Inputs: 
        - file_Path, type: string, desc: CSV file path
    Outputs:
        - raw_data, type: dataframe, desc: Raw data
    """

    raw_data = pd.read_csv(file_path)
    
    return raw_data

In [4]:
# Read in data
filepath_raw_data = 'DataBases\Landmarks_and_places_of_interest.csv'
df_raw_places = read_in_data(filepath_raw_data)

In [5]:
df_raw_places.head()

Unnamed: 0,Theme,Sub Theme,Feature Name,Co-ordinates
0,Transport,Railway Station,Flemington Bridge Railway Station,"(-37.7881645889621, 144.939277838304)"
1,Mixed Use,Retail/Office/Carpark,Council House 2 (CH2),"(-37.8142591432011, 144.966638432727)"
2,Place Of Assembly,Library,The Melbourne Athenaeum Library,"(-37.8148855756416, 144.967291289941)"
3,Leisure/Recreation,Informal Outdoor Facility (Park/Garden/Reserve),Carlton Gardens South,"(-37.8060684577258, 144.971266479841)"
4,Place of Worship,Church,St Francis Church,"(-37.8118847831837, 144.962422614541)"


## 2. Clean up Raw Data

In [6]:
# Create a copy
df_places = df_raw_places.copy()

In [7]:
# Check for missing data
df_places.isnull().sum()

Theme           0
Sub Theme       0
Feature Name    0
Co-ordinates    0
dtype: int64

In [8]:
# Check values for 'Theme'
df_places['Theme'].value_counts()

Leisure/Recreation                      63
Place Of Assembly                       40
Place of Worship                        31
Transport                               26
Community Use                           21
Education Centre                        13
Mixed Use                               11
Office                                  11
Health Services                         11
Purpose Built                            4
Retail                                   3
Vacant Land                              3
Residential Accommodation                2
Industrial                               1
Specialist Residential Accommodation     1
Warehouse/Store                          1
Name: Theme, dtype: int64

In [9]:
# Check vlaues for 'Theme'
df_places['Sub Theme'].value_counts()

Informal Outdoor Facility (Park/Garden/Reserve)    37
Church                                             30
Railway Station                                    23
Art Gallery/Museum                                 19
Theatre Live                                       15
Major Sports & Recreation Facility                 14
Public Buildings                                   13
Office                                             11
Public Hospital                                     7
Retail/Office/Carpark                               5
Outdoor Recreation Facility (Zoo, Golf Course)      4
Tertiary (University)                               4
Function/Conference/Exhibition Centre               4
Primary Schools                                     4
Police Station                                      3
Private Hospital                                    3
Indoor Recreation Facility                          3
Retail/Office                                       3
Retail/Office/Residential/Ca

In [10]:
# split 'Co-ordinates' -> 'latitude' 'longitude'
long_lati = df_places['Co-ordinates'].str.extract(r'\((.*)\)')[0].str.split(',')
df_places.insert(3, 'latitude', long_lati.str[0])
df_places.insert(4, 'longitude', long_lati.str[1])

In [11]:
# Check whether the dataframe is as expected
df_places.head()

Unnamed: 0,Theme,Sub Theme,Feature Name,latitude,longitude,Co-ordinates
0,Transport,Railway Station,Flemington Bridge Railway Station,-37.7881645889621,144.939277838304,"(-37.7881645889621, 144.939277838304)"
1,Mixed Use,Retail/Office/Carpark,Council House 2 (CH2),-37.8142591432011,144.966638432727,"(-37.8142591432011, 144.966638432727)"
2,Place Of Assembly,Library,The Melbourne Athenaeum Library,-37.8148855756416,144.967291289941,"(-37.8148855756416, 144.967291289941)"
3,Leisure/Recreation,Informal Outdoor Facility (Park/Garden/Reserve),Carlton Gardens South,-37.8060684577258,144.971266479841,"(-37.8060684577258, 144.971266479841)"
4,Place of Worship,Church,St Francis Church,-37.8118847831837,144.962422614541,"(-37.8118847831837, 144.962422614541)"


In [12]:
# Get usefull coluns
usefull_columns = [0, 1, 2, 3, 4]
df_places = df_places.iloc[:, usefull_columns].copy()

In [13]:
# Check whether the dataframe is as expected
df_places.head()

Unnamed: 0,Theme,Sub Theme,Feature Name,latitude,longitude
0,Transport,Railway Station,Flemington Bridge Railway Station,-37.7881645889621,144.939277838304
1,Mixed Use,Retail/Office/Carpark,Council House 2 (CH2),-37.8142591432011,144.966638432727
2,Place Of Assembly,Library,The Melbourne Athenaeum Library,-37.8148855756416,144.967291289941
3,Leisure/Recreation,Informal Outdoor Facility (Park/Garden/Reserve),Carlton Gardens South,-37.8060684577258,144.971266479841
4,Place of Worship,Church,St Francis Church,-37.8118847831837,144.962422614541


In [14]:
# Rename columns accordingly with format
df_places.rename(columns={'Theme':'theme', 'Sub Theme':'subTheme', 
                          'Feature Name':'name'}, inplace=True)

In [15]:
# Check whether the dataframe is as expected
df_places.head()

Unnamed: 0,theme,subTheme,name,latitude,longitude
0,Transport,Railway Station,Flemington Bridge Railway Station,-37.7881645889621,144.939277838304
1,Mixed Use,Retail/Office/Carpark,Council House 2 (CH2),-37.8142591432011,144.966638432727
2,Place Of Assembly,Library,The Melbourne Athenaeum Library,-37.8148855756416,144.967291289941
3,Leisure/Recreation,Informal Outdoor Facility (Park/Garden/Reserve),Carlton Gardens South,-37.8060684577258,144.971266479841
4,Place of Worship,Church,St Francis Church,-37.8118847831837,144.962422614541


In [17]:
# Include places id (created)
df_places.insert(0, 'id', 'PLAC-')
df_places.insert(1, 'id_', range(1,len(df_places)+1))

In [18]:
# Include places id (created)
df_places = df_places.astype({'id_':str})
df_places['id'] = df_places['id'] + df_places['id_']

In [19]:
# Check whether the dataframe is as expected
df_places.head()

Unnamed: 0,id,id_,theme,subTheme,name,latitude,longitude
0,PLAC-1,1,Transport,Railway Station,Flemington Bridge Railway Station,-37.7881645889621,144.939277838304
1,PLAC-2,2,Mixed Use,Retail/Office/Carpark,Council House 2 (CH2),-37.8142591432011,144.966638432727
2,PLAC-3,3,Place Of Assembly,Library,The Melbourne Athenaeum Library,-37.8148855756416,144.967291289941
3,PLAC-4,4,Leisure/Recreation,Informal Outdoor Facility (Park/Garden/Reserve),Carlton Gardens South,-37.8060684577258,144.971266479841
4,PLAC-5,5,Place of Worship,Church,St Francis Church,-37.8118847831837,144.962422614541


In [20]:
# Selet only usefull columns
useful_column = ['id', 'theme', 'subTheme', 'name', 'latitude', 'longitude']
df_places = df_places.loc[:,useful_column]

In [21]:
# Check whether the dataframe is as expected
df_places.head()

Unnamed: 0,id,theme,subTheme,name,latitude,longitude
0,PLAC-1,Transport,Railway Station,Flemington Bridge Railway Station,-37.7881645889621,144.939277838304
1,PLAC-2,Mixed Use,Retail/Office/Carpark,Council House 2 (CH2),-37.8142591432011,144.966638432727
2,PLAC-3,Place Of Assembly,Library,The Melbourne Athenaeum Library,-37.8148855756416,144.967291289941
3,PLAC-4,Leisure/Recreation,Informal Outdoor Facility (Park/Garden/Reserve),Carlton Gardens South,-37.8060684577258,144.971266479841
4,PLAC-5,Place of Worship,Church,St Francis Church,-37.8118847831837,144.962422614541


## 3. Export Clean Data to a CSV file

In [22]:
# Export full version
df_places.to_csv('Output\Places\OK_Place_of_Interest_Melbourne_V2.csv', index=False)