In [1]:
import pandas as pd
import numpy as np
import folium
import sys
from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS
from math import cos, asin, sqrt, pi
from folium.plugins import HeatMap

In [2]:
def get_exif(filename):
    return Image.open(filename)._getexif()

In [3]:
def get_labeled_exif(exif):
    labeled = {}
    for (key, val) in exif.items():
        labeled[TAGS.get(key)] = val

    return labeled

In [4]:
def get_geotagging(exif):
    if not exif:
        raise ValueError("No EXIF metadata found")

    geotagging = {}
    for (idx, tag) in TAGS.items():
        if tag == 'GPSInfo':
            if idx not in exif:
                raise ValueError("No EXIF geotagging found")

            for (key, val) in GPSTAGS.items():
                if key in exif[idx]:
                    geotagging[val] = exif[idx][key]

    return geotagging

In [5]:
def get_decimal_from_dms(dms, ref):

    # https://stackoverflow.com/questions/64405326/django-exif-data-ifdrational-object-is-not-subscriptable
    degrees = dms[0]
    minutes = dms[1] / 60.0
    seconds = dms[2] / 3600.0

    if ref in ['S', 'W']:
        degrees = -degrees
        minutes = -minutes
        seconds = -seconds

    return round(degrees + minutes + seconds, 5)


In [6]:

def get_coordinates(geotags):
    lat = get_decimal_from_dms(geotags['GPSLatitude'], geotags['GPSLatitudeRef'])

    lon = get_decimal_from_dms(geotags['GPSLongitude'], geotags['GPSLongitudeRef'])

    return (lat,lon)

In [7]:
def cal_distance(hotel_range):
    p = pi / 180
    lat1 = hotel_range['lat']
    lon1 = hotel_range['lon']
    lat2 = hotel_range['latitude']
    lon2 = hotel_range['longitude']

    a = 0.5-np.cos((lat2 - lat1) * p)/2 + np.cos(lat1 * p) * np.cos(lat2 * p) * (1 - np.cos((lon2 - lon1) * p)) / 2
    b = 12742*np.arcsin(np.sqrt(a))*1000
   
    return b

In [8]:
def cal_dis(lat1, lon1, lat2, lon2):
    p = pi / 180
    a = 0.5-np.cos((lat2 - lat1) * p)/2 + np.cos(lat1 * p) * np.cos(lat2 * p) * (1 - np.cos((lon2 - lon1) * p)) / 2
    b = 12742*np.arcsin(np.sqrt(a))*1000
   
    return b

In [9]:
def count_amenities(df1,df,category):
    df2 = df[df['amenity']== category ].reset_index(drop = True)
    times = 0
    for i in range(len(df2.index)):
        lat1 = df1['latitude']
        lon1 = df1['longitude']
        lat2 = df2['lat'][i]
        lon2 = df2['lon'][i]
        d = cal_dis(lat1, lon1, lat2, lon2)
        if d < 500 :
            times  = times + 1
    return times

In [10]:
def main(input_directory, output_directory):
    # Identify current location
    image = input_directory
    #image = 'p1.jpg'
    exif = get_exif(image)
    labeled = get_labeled_exif(exif)
    geotags = get_geotagging(exif)
    lat, lon = get_coordinates(geotags)
    lat = float(lat)
    lon = float(lon)
    
    #Create amenity table with additional amenities
    amen = pd.read_json('amenities-vancouver.json.gz', lines=True)
    hotel = pd.read_csv("listings.csv")
    addition_amen = pd.read_csv('other_amenities.csv')
    amen = amen.drop(['timestamp','tags'], axis=1)
    amen = amen.dropna().reset_index(drop= True)
    df = [amen, addition_amen]
    new_amen = pd.concat(df).reset_index(drop = True)
    
    filt = ['arts_centre','restaurant','bar','casino', 'cinema', 'clock','museum','park','university','beach','theatre','lake', 'shopping_centre','conference_centre']
    new_amen = new_amen[new_amen['amenity'].isin(filt) == True]


    

    # Find hotel within 300m 
    hotel['lon'] = lon
    hotel['lat'] = lat
    distance = cal_distance(hotel)

    hotel['distance'] = distance
    hotel = hotel[hotel['distance']< 300]
    
    #Clean Unnecassary for visitors
    hotel = hotel.drop(['host_id','host_name','neighbourhood_group', 'neighbourhood','number_of_reviews','last_review','reviews_per_month','calculated_host_listings_count','availability_365','lon','lat','distance' ], axis = 1)
    hotel = hotel.reset_index(drop=True)
    
    
    hotel['art_centres'] = hotel.apply(count_amenities,df = new_amen, category = 'arts_centre', axis = 1) 
    hotel['bars'] = hotel.apply(count_amenities,df = new_amen,category = 'bar', axis = 1) 
    hotel['threatres'] = hotel.apply(count_amenities,df = new_amen,category = 'threatre', axis = 1) 
    hotel['museums'] = hotel.apply(count_amenities,df = new_amen,category = 'museum', axis = 1) 
    hotel['casinos']= hotel.apply(count_amenities,df = new_amen,category = 'casino', axis = 1) 
    hotel['cinemas'] = hotel.apply(count_amenities,df = new_amen,category = 'cinema', axis = 1) 
    hotel['parks'] = hotel.apply(count_amenities,df = new_amen,category = 'park', axis = 1)
    hotel['restaurants'] = hotel.apply(count_amenities,df = new_amen,category = 'restaurant', axis = 1)
    hotel['conference_centres'] = hotel.apply(count_amenities,df = new_amen,category = 'conference_centre', axis = 1) 
    
    hotel = hotel.sort_values(by=['price'])
    
    # Show density graph of the amenities in Vancouver
    van_map = folium.Map(location=[lat, lon], zoom_start=12)
    incidents = folium.map.FeatureGroup()
    
    folium.Marker(
    location=[lat, lon],
    popup="My postion",
    icon=folium.Icon(icon="cloud"),
    ).add_to(van_map)

    heatpoints = new_amen[['lat', 'lon']].values.tolist()
    HeatMap(heatpoints).add_to(van_map)
    
    incidents = folium.map.FeatureGroup()
    latitudes = list(hotel.latitude)
    longitudes = list(hotel.longitude)
    labels = list(hotel.name)
    for lat, lng, label in zip(latitudes, longitudes, labels):
        folium.Marker([lat, lng], popup=label).add_to(van_map)
    van_map.add_child(incidents)
    
    hotel = hotel.drop(['latitude','longitude'], axis = 1)
    
    van_map.save(outfile= "Amenity.html")
    hotel.to_csv(output_directory)
    

In [11]:
if __name__=='__main__':
    #input_directory = sys.argv[1]
    input_directory = 'p1.jpg'
    #output_directory = sys.argv[2]
    output_directory = 'hotel_list'
    main(input_directory,output_directory)