### Visualise the location of Cafes in Melbourne over the past 20 years (2002 - 2020)

Data source: https://data.melbourne.vic.gov.au/Business/Cafes-and-restaurants-with-seating-capacity/xt2y-tnn9

How to overlay x, y coordiate datapoints on a map:
https://jakevdp.github.io/PythonDataScienceHandbook/04.13-geographic-data-with-basemap.html

Read big data:
https://towardsdatascience.com/why-and-how-to-use-pandas-with-large-data-9594dda2ea4c

Maps: https://data.melbourne.vic.gov.au/Property/Municipal-boundary/ck33-yh8z

In [11]:
datapath = '/Users/tiantianyuan/work/learn_py/self/ds_projects/database/kaggle/coffee/Melbourne_cafe_location/'
filename = 'Cafe_restaurant_bistro_seatsv2.csv'

import pandas as pd
import numpy as np

# read data into a pandas dataframe.
# df = pd.read_csv(datapath + filename, nrows = 1000)
# cols = ['Census year', 'Seating type', 'Number of seats', 'x coordinate', 'y coordinate']
df_chunk = pd.read_csv(datapath + filename, chunksize = 2000, iterator=True)

# Strictly speaking, df_chunk is not a dataframe but an object for further operation in the next step.
# large file 39942 rows! 

chunk_list = []  # append each chunk df here 

# Each chunk is in df format
for chunk in df_chunk:  
    # Once the data filtering is done, append the chunk to list
    chunk_list.append(chunk)
    
# concat the list into dataframe 
df = pd.concat(chunk_list)


# clean data.

# get the columns we need.

# overlay x_coord, y_coord ontop of a map today

# make it an animation

In [12]:
print(type(df))
print(df.shape)
print(df.info())
print(df.head())

<class 'pandas.core.frame.DataFrame'>
(39941, 6)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 39941 entries, 0 to 39940
Data columns (total 6 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Census year      39941 non-null  int64  
 1   Block ID         39941 non-null  int64  
 2   Property ID      39941 non-null  int64  
 3   Number of seats  39941 non-null  int64  
 4   x coordinate     39939 non-null  float64
 5   y coordinate     39939 non-null  float64
dtypes: float64(2), int64(4)
memory usage: 1.8 MB
None
   Census year  Block ID  Property ID  Number of seats  x coordinate  \
0         2002        64       104665               22    144.961106   
1         2002        64       104669               90    144.961262   
2         2002        64       104669               30    144.961262   
3         2002        64       105870               20    144.961328   
4         2002        64       105869               65    14

In [None]:
# # Ok, google maps require API which is annoying, let's try another approach of not using google maps

# import gmplot
# pathout = '/Users/tiantianyuan/work/learn_py/self/ds_projects/cafe_location/out/'
# # lat, long
# gmap1 = gmplot.GoogleMapPlotter(-37.8130702965, 144.961106124, 13)  
# # gmap1.apikey = 'AIzaSyB0g0GBeYwUV9UGcCB6MJtT8sdojo7iQLE'
# lats = df['y coordinate'].values
# lngs = df['x coordinate'].values

# gmap1.scatter(lats[0:100], lngs[0:100], color='#3B0B39', size=10, marker=False)

# gmap1.draw(pathout + 'cafe_map.html') 

In [35]:
df_cut = df.iloc[0:6000, :]
print(df_cut.shape)
print(df_cut.head())

(6000, 7)
   Census year  Block ID  Property ID  Number of seats  x coordinate  \
0         2002        64       104665               22    144.961106   
1         2002        64       104669               90    144.961262   
2         2002        64       104669               30    144.961262   
3         2002        64       105870               20    144.961328   
4         2002        64       105869               65    144.961073   

   y coordinate                     geometry  
0    -37.813070  POINT (144.96111 -37.81307)  
1    -37.813395  POINT (144.96126 -37.81339)  
2    -37.813395  POINT (144.96126 -37.81339)  
3    -37.813543  POINT (144.96133 -37.81354)  
4    -37.813598  POINT (144.96107 -37.81360)  


In [45]:
%matplotlib widget
import matplotlib.pyplot as plt
from shapely.geometry import Point, Polygon
import geopandas as gpd

# import street map
# pathmap = '/Users/tiantianyuan/work/learn_py/self/ds_projects/cafe_location/geomap_shp/green_maps/'
# street_map = gpd.read_file(pathmap + 'Green_Wedge2016_region.shp')

pathmap = '/Users/tiantianyuan/work/learn_py/self/ds_projects/cafe_location/geomap_shp/muni_bound/'
street_map = gpd.read_file(pathmap + 'geo_export_bd0b4a8b-56ed-449c-a011-2bde239ce59c.shp')


# designate coordinate system
# crs = {'init':'espc:4326'}  # zip x and y coordinates into single feature
# crs = {'init': 'EPSG:3395'}
# world = world.to_crs("EPSG:3395")
crs = 'EPSG:3395'

df_cut = df.iloc[0:3000, :]
geometry = [Point(xy) for xy in zip(df_cut['x coordinate'], df_cut['y coordinate'])]# create GeoPandas dataframe
geo_df = gpd.GeoDataFrame(df_cut, crs = crs, geometry = geometry)


# create figure and axes, assign to subplot
fig, ax = plt.subplots(figsize = (6,6))# add .shp mapfile to axes
street_map.plot(ax = ax, alpha = 0.4, color = 'k', edgecolor='black')


# geo_df.plot(column='Number of seats',ax=ax, alpha=0.5, legend=True, markersize=1)
geo_df.plot(column='Census year',ax=ax, alpha=0.25, legend=True, markersize=1)

# add title to graph
plt.title('Melbourne Cafes 2002-2003', fontsize=15,fontweight='bold')
# set latitiude and longitude boundaries for map display
# plt.xlim(144.,145)
# plt.ylim(-38,-37)# show map
plt.show()



df_cut = df.iloc[3001:5800, :]
geometry = [Point(xy) for xy in zip(df_cut['x coordinate'], df_cut['y coordinate'])]# create GeoPandas dataframe
geo_df = gpd.GeoDataFrame(df_cut, crs = crs, geometry = geometry)
fig, ax = plt.subplots(figsize = (6,6))# add .shp mapfile to axes
street_map.plot(ax = ax, alpha = 0.4, color = 'k', edgecolor='black')
geo_df.plot(column='Census year',ax=ax, alpha=0.25, legend=True, markersize=1)
plt.title('Melbourne Cafes 2003-2004', fontsize=15,fontweight='bold')
plt.show()

df_cut = df.iloc[5801:8500, :]
geometry = [Point(xy) for xy in zip(df_cut['x coordinate'], df_cut['y coordinate'])]# create GeoPandas dataframe
geo_df = gpd.GeoDataFrame(df_cut, crs = crs, geometry = geometry)
fig, ax = plt.subplots(figsize = (6,6))# add .shp mapfile to axes
street_map.plot(ax = ax, alpha = 0.4, color = 'k', edgecolor='black')
geo_df.plot(column='Census year',ax=ax, alpha=0.25, legend=True, markersize=1)
plt.title('Melbourne Cafes 2005-2006', fontsize=15,fontweight='bold')
plt.show()

df_cut = df.iloc[8501:12000, :]
geometry = [Point(xy) for xy in zip(df_cut['x coordinate'], df_cut['y coordinate'])]# create GeoPandas dataframe
geo_df = gpd.GeoDataFrame(df_cut, crs = crs, geometry = geometry)
fig, ax = plt.subplots(figsize = (6,6))# add .shp mapfile to axes
street_map.plot(ax = ax, alpha = 0.4, color = 'k', edgecolor='black')
geo_df.plot(column='Census year',ax=ax, alpha=0.25, legend=True, markersize=1)
plt.title('Melbourne Cafes 2006-2007', fontsize=15,fontweight='bold')
plt.show()


df_cut = df.iloc[12001:15000, :]
geometry = [Point(xy) for xy in zip(df_cut['x coordinate'], df_cut['y coordinate'])]# create GeoPandas dataframe
geo_df = gpd.GeoDataFrame(df_cut, crs = crs, geometry = geometry)
fig, ax = plt.subplots(figsize = (6,6))# add .shp mapfile to axes
street_map.plot(ax = ax, alpha = 0.4, color = 'k', edgecolor='black')
geo_df.plot(column='Census year',ax=ax, alpha=0.25, legend=True, markersize=1)
plt.title('Melbourne Cafes 2007-2008', fontsize=15,fontweight='bold')
plt.show()



df_cut = df.iloc[15001:20000, :]
geometry = [Point(xy) for xy in zip(df_cut['x coordinate'], df_cut['y coordinate'])]# create GeoPandas dataframe
geo_df = gpd.GeoDataFrame(df_cut, crs = crs, geometry = geometry)
fig, ax = plt.subplots(figsize = (6,6))# add .shp mapfile to axes
street_map.plot(ax = ax, alpha = 0.4, color = 'k', edgecolor='black')
geo_df.plot(column='Census year',ax=ax, alpha=0.25, legend=True, markersize=1)
plt.title('Melbourne Cafes 2009-2010', fontsize=15,fontweight='bold')
plt.show()


df_cut = df.iloc[20001:25000, :]
geometry = [Point(xy) for xy in zip(df_cut['x coordinate'], df_cut['y coordinate'])]# create GeoPandas dataframe
geo_df = gpd.GeoDataFrame(df_cut, crs = crs, geometry = geometry)
fig, ax = plt.subplots(figsize = (6,6))# add .shp mapfile to axes
street_map.plot(ax = ax, alpha = 0.4, color = 'k', edgecolor='black')
geo_df.plot(column='Census year',ax=ax, alpha=0.25, legend=True, markersize=1)
plt.title('Melbourne Cafes 2010-2011', fontsize=15,fontweight='bold')
plt.show()


df_cut = df.iloc[25001:30000, :]
geometry = [Point(xy) for xy in zip(df_cut['x coordinate'], df_cut['y coordinate'])]# create GeoPandas dataframe
geo_df = gpd.GeoDataFrame(df_cut, crs = crs, geometry = geometry)
fig, ax = plt.subplots(figsize = (6,6))# add .shp mapfile to axes
street_map.plot(ax = ax, alpha = 0.4, color = 'k', edgecolor='black')
geo_df.plot(column='Census year',ax=ax, alpha=0.25, legend=True, markersize=1)
plt.title('Melbourne Cafes 2012-2014', fontsize=15,fontweight='bold')
plt.show()


df_cut = df.iloc[30001:35000, :]
geometry = [Point(xy) for xy in zip(df_cut['x coordinate'], df_cut['y coordinate'])]# create GeoPandas dataframe
geo_df = gpd.GeoDataFrame(df_cut, crs = crs, geometry = geometry)
fig, ax = plt.subplots(figsize = (6,6))# add .shp mapfile to axes
street_map.plot(ax = ax, alpha = 0.4, color = 'k', edgecolor='black')
geo_df.plot(column='Census year',ax=ax, alpha=0.25, legend=True, markersize=1)
plt.title('Melbourne Cafes 2014-2015', fontsize=15,fontweight='bold')
plt.show()



df_cut = df.iloc[35001:39940, :]
geometry = [Point(xy) for xy in zip(df_cut['x coordinate'], df_cut['y coordinate'])]# create GeoPandas dataframe
geo_df = gpd.GeoDataFrame(df_cut, crs = crs, geometry = geometry)
fig, ax = plt.subplots(figsize = (6,6))# add .shp mapfile to axes
street_map.plot(ax = ax, alpha = 0.4, color = 'k', edgecolor='black')
geo_df.plot(column='Census year',ax=ax, alpha=0.25, legend=True, markersize=1)
plt.title('Melbourne Cafes 2015-2016', fontsize=15,fontweight='bold')
plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [None]:
# lats = df['y coordinate'].values
# lngs = df['x coordinate'].values

# gmap1.scatter(lats[0:100], lngs[0:100], color='#3B0B39', size=10, marker=False)

# gmap1.draw(pathout + 'cafe_map.html') 

ok, interesting, to do:

* 1 change the city map to larger FOV
* 2 automise the code for years (fine-tune)
* 3 animation (fine-tune)