In [1]:
import pandas as pd
from scipy import spatial
import math

In [2]:
%%time
gnaf_address_combined = pd.read_csv("../data/Master/ACT-GNAF.csv")



Wall time: 1.65 s


In [53]:
gnaf_address_combined.head(2)

Unnamed: 0,ADDRESS_DETAIL_PID,DATE_CREATED,DATE_LAST_MODIFIED,DATE_RETIRED,BUILDING_NAME,LOT_NUMBER_PREFIX,LOT_NUMBER,LOT_NUMBER_SUFFIX,FLAT_TYPE_CODE,FLAT_NUMBER_PREFIX,...,PRIMARY_SECONDARY,STATE,STREET_NAME,STREET_TYPE_CODE,LOCALITY_NAME,MB_2016_PID,LONGITUDE,LATITUDE,FULL_ADDRESS,CARTESIAN_COOR
0,GAACT715187300,2004-04-29,2018-11-01,,,,,,,,...,,ACT,GEORGINA,CRESCENT,KALEEN,MB1680024002000,149.102358,-35.220722,"10A GEORGINA CRESCENT, KALEEN, ACT 2617","(-4466.081815343984, 2672.6451730840595, -3674..."
1,GAACT715187309,2004-04-29,2018-11-01,,,,,,UNIT,,...,S,ACT,GEORGINA,CRESCENT,KALEEN,MB1680024002000,149.102358,-35.220722,"UNIT1510 GEORGINA CRESCENT, KALEEN, ACT 2617","(-4466.081815343984, 2672.6451730840595, -3674..."


In [3]:
%%time
asgs = pd.read_csv("../data/Master/" +"AU-ASGS.csv", dtype='str')

Wall time: 1.43 s


In [24]:
#gnaf_address_combined.info()
gnaf_address_combined.shape

(255287, 44)

### Independent Testing

In [19]:
#### Method to find Cartesian Coordinates
def cartesian(latitude, longitude, elevation = 0):
    # Convert to radians
    latitude = latitude * (math.pi / 180)
    longitude = longitude * (math.pi / 180)

    R = 6371 # 6378137.0 + elevation  # relative to centre of the earth
    X = R * math.cos(latitude) * math.cos(longitude)
    Y = R * math.cos(latitude) * math.sin(longitude)
    Z = R * math.sin(latitude)
    return (X, Y, Z)

type(cartesian(*list((41.49008, -71.312796))))

tuple

In [12]:
%%time

#### This could be done on master dataset creation ####

# places = []
# X = gnaf_address_combined[['LATITUDE', 'LONGITUDE']]
# for index, row in X.iterrows():
#     coordinates = [row['LATITUDE'], row['LONGITUDE']]
#     cartesian_coord = cartesian(*coordinates)
#     places.append(cartesian_coord)
    
gnaf_address_combined['CARTESIAN_COOR'] = gnaf_address_combined[['LATITUDE', 'LONGITUDE']].apply(lambda x : cartesian(*list((x.LATITUDE, x.LONGITUDE))), axis=1)

## iterrows() is resource-consuming, use tolist() instead
places = gnaf_address_combined['CARTESIAN_COOR'].tolist()

Wall time: 30.4 s


In [46]:
print(len(places))
print(places[4])

255287
(-4466.077465926709, 2672.6317151305593, -3674.347939923484)


In [52]:
%%time
##### Test with simulated Input Using Spatial KDTree to find the nearested coordinate in the dataset
##### This returns the index of the row in the dataset
from scipy import spatial

lon = 149.027961
lat = -35.243624

cartesian_coord = cartesian(lat, lon)

tree = spatial.KDTree(places)
closest = tree.query([cartesian_coord], p = 2)

index = closest[1][0]
print(index)

255286
Wall time: 6.82 s


In [56]:
### Get the full Address by the returned Index
gnaf_address_combined.iloc[index,:][['ADDRESS_DETAIL_PID', 'LONGITUDE', 'LATITUDE', 'FULL_ADDRESS']]

ADDRESS_DETAIL_PID                                   GAACT718835355
LONGITUDE                                                   149.028
LATITUDE                                                   -35.2436
FULL_ADDRESS          121 WILLIAM HOVELL DRIVE, BELCONNEN, ACT 2617
Name: 255286, dtype: object

### Make function of the Above experiment

In [46]:
AU = {}
AU['MESH_BLOCK'] = 'MB_CODE_2016'
AU['SA1'] = 'SA1_7DIGITCODE_2016'
AU['SA2'] = 'SA2_NAME_2016'
AU['SA3'] = 'SA3_NAME_2016'
AU['SA4'] = 'SA4_NAME_2016'
AU['SUBURB'] = 'SSC_NAME_2016'
AU['LGA'] = 'LGA_NAME_2016'
AU['STATE'] = 'STATE_NAME_2016'

country = AU

In [47]:
%%time

### This should be in initialisation
from ast import literal_eval

## This is to convert the following string value
## '(-4466.08, 2672.64, -3674.33)' into Tuple style -> (-4466.08, 2672.64, -3674.33)
gnaf_address_combined['CARTESIAN_COOR'] = gnaf_address_combined['CARTESIAN_COOR'].apply(lambda x: literal_eval(str(x)))

## The column values are put into a list
## the expectected out is like this
## [(-4466.081815343984, 2672.6451730840595, -3674.332864268595), 
## (-4466.071446037419, 2672.6110625341303, -3674.3702790150883)]
cart_cor = gnaf_address_combined['CARTESIAN_COOR'].tolist()

## cart_cor array will be used to constract the spatial.KDTree later step

Wall time: 5.84 s


In [56]:
print(len(places))
print(places[1:4])

255287
[(-4466.081815343984, 2672.6451730840595, -3674.332864268595), (-4466.071446037419, 2672.6110625341303, -3674.3702790150883), (-4466.081815343984, 2672.6451730840595, -3674.332864268595)]


In [48]:
def getRegionByCoordinates(lat, lon):
    
    # Get Cartesian coordinates of the input
    cartesian_coord = cartesian(lat, lon)
    
    # Construct the tree
    tree = spatial.KDTree(cart_cor)
    
    closest = tree.query([cartesian_coord], p = 2)
    
    index = closest[1][0]
    
    ## Get the result row
    res = gnaf_address_combined.iloc[index,:]   
    
    mb = res['MB_2016_PID']
    
    region = asgs[asgs['MB_CODE_2016'] == mb[4:]]

    for key, value in AU.items():
        print(key+":"+region.iloc[0][value])

### Testing the Function

In [49]:
%%time
lon = 149.027961
lat = -35.243624
getRegionByCoordinates(lat, lon)

MESH_BLOCK:80056484200
SA1:8111101
SA2:Molonglo Corridor
SA3:Belconnen
SA4:Australian Capital Territory
SUBURB:ACT Remainder - Belconnen
LGA:Unincorporated ACT
STATE:Australian Capital Territory
Wall time: 6.53 s


In [50]:
lat = -35.19678041
lon = 149.02779517
getRegionByCoordinates(lat, lon)

MESH_BLOCK:80011580000
SA1:8100604
SA2:Dunlop
SA3:Belconnen
SA4:Australian Capital Territory
SUBURB:Dunlop
LGA:Unincorporated ACT
STATE:Australian Capital Territory


In [51]:
lat = -35.220722
lon = 149.102358
getRegionByCoordinates(lat, lon)

MESH_BLOCK:80024002000
SA1:8101613
SA2:Kaleen
SA3:Belconnen
SA4:Australian Capital Territory
SUBURB:Kaleen
LGA:Unincorporated ACT
STATE:Australian Capital Territory
