In [6]:
import yaml
import requests
import datetime
import time
import csv
import pandas as pd
import math

# Get all Venue IDs for venues within the bounding box.


def get_delta(lower, upper, length):
    return (upper - lower)/length


def getBBoxVenuesNWSE(bound_N,bound_W,bound_S,bound_E,divisions,
                 foursquare_id,foursquare_secret):    
    
    lat_delta = get_delta(bound_N,bound_S,divisions)
    long_delta = get_delta(bound_W,bound_E, divisions)
    
    dist_ns=110574*abs(bound_N-bound_S)
    dist_ew=111320*math.cos(math.radians(bound_N))*abs(bound_E-bound_W)
    
    
    search_params = {
        'client_id': foursquare_id,
        'client_secret': foursquare_secret,
  #      'intent': 'browse',
        'limit': 50,
        'v': '20181209',
        'radius' : math.sqrt(dist_ew*dist_ew+dist_ns*dist_ns)/divisions,
        'sortByDistance': 1
        
    }
    
    print('BBox size {}m x {}m. Search r={}m'.format(dist_ns,dist_ew,search_params['radius']))
    

    search_count = 0

    venues_list=[]
        
    max_response_size=0

    for lat in range(divisions):
        for long in range(divisions):
            
            ne_lat = bound_N + lat * lat_delta
            ne_long = bound_W+ (long+1) * long_delta
            
            #search_params.update({'ne': '{},{}'.format(ne_lat, ne_long),
            #                      'sw': '{},{}'.format(ne_lat - lat_delta,
            #                                           ne_long - long_delta)})                          
            
            search_params.update({'ll': '{},{}'.format(ne_lat-0.5*lat_delta, ne_long-0.5*long_delta)})
  
            items=[]
            
            # print(search_params)
            while True:
                try:
                    r = requests.get('https://api.foursquare.com/v2/venues/explore',
                                 params=search_params)
                except requests.exceptions.RequestException as e:  
                    print(e)    
                    print('Sleeping 10s')
                    time.sleep(10)


                try:
                    resp=r.json()['response']
                except:
                    time.sleep(5)
                    print('retry')
                    continue

                if 'groups' in resp:
                    break
                else:
                    time.sleep(5)
                    print('retry')

            #if 'venues' in r.json()['response']:
            #    venues = r.json()['response']['venues']
            items = r.json()['response']['groups'][0]['items']
            time.sleep(0.6)

            # print('Query returned {} items: {}'.format(len(items),items))
            
            for item in items:
                venue=item['venue']
                categories = venue.get('categories', '')
                if len(categories) == 0:
                   categories = ''
                else:
                    categories = ';'.join([x['name'] for x in categories])
               # print(categories)
                venues_list.append([venue['name'], 
                                    venue['id'],
                    venue['location']['lat'], 
                    venue['location']['lng'],  
                    categories]);

            #print('Search retrieved {} venues.'.format(len(items)))
            max_response_size=max(max_response_size,len(items))

            if len(items) >= search_params['limit']:
                print("Recursive split")
                return getBBoxVenuesNWSE(bound_N,bound_W,bound_S,bound_E,2*divisions,
                 foursquare_id,foursquare_secret) 

            search_count += 1
            if search_count % 50 == 0:
                print(search_count)
         
        # gets fussy when more than 5000 requests/hr
    #        if search_count % 5000 == 0:
    #           time.sleep(60*60)

            

    print('{} Unique Venues Scraped: {}.'.format(
        len(venues_list), datetime.datetime.now()))

    print('Maximum response size {} venues.'.format(max_response_size))
    return venues_list

#df=pd.DataFrame(venues_list)
#df.to_csv('tampere_venues.csv',index=False)
#df

def getCenteredVenues(lat,lng,dist_to_side,divisions,foursquare_id,foursquare_secret):

    # convert distance in meters to latitude/longtitude deltas    
    
    delta_lat=dist_to_side/110574
    delta_lng=dist_to_side/(111320*math.cos(math.radians(lat)))
    
    return getBBoxVenuesNWSE(lat+delta_lat,lng-delta_lng,lat-delta_lat,lng+delta_lng,divisions,
                 foursquare_id,foursquare_secret)

def scrapeNamedCity(city,df,foursquare_id,foursquare_secret):
    from geopy.geocoders import Nominatim

    geolocator = Nominatim(user_agent="my-application")

    while True:
        try:
            location = geolocator.geocode(city)
        except:
            time.sleep(3)
            continue
        else:
            break
            
    lat= location.latitude
    lng = location.longitude
    dist_to_side=3000# meters
    print('The geographical coordinates of {} city centre are {}, {}.'.format(city,lat,lng))
    
    # convert distance in meters to latitude/longtitude deltas    
    
    delta_lat=2*dist_to_side/110574
    delta_lng=2*dist_to_side/(111320*math.cos(math.radians(lat)))
    
    
    divisions=math.ceil(2*25*dist_to_side/3000)

    processed_cities = set()

    if len(df)>0:
        processed_cities=set(df['City'].unique())
  
    for step_lat in range(9):
        for step_long in range(9):
            
            lat_tile = lat + (step_lat-4) * delta_lat
            long_tile = lng + (step_long-4) * delta_lng
            
            city_string = '{}-{}-{}'.format(city,step_lat,step_long)
 
            if city_string in processed_cities:
                print('Skipping {}'.format(city_string))
            else:
                print('Processing {}', format(city_string))
                venues_list=getCenteredVenues(lat_tile,long_tile,dist_to_side,divisions,CLIENT_ID,CLIENT_SECRET)
                if len(venues_list) > 0:
                    dfn=pd.DataFrame(venues_list)
                    print(dfn.shape)
                    dfn.drop_duplicates(inplace=True)
                    dfn.columns=['Name','Id','Latitude','Longtitude','Categories']
                    dfn['City']=city_string

                    print(dfn.shape)

                    if df.size > 0:
                       df=df.append(dfn,ignore_index=True)
                    else:
                       df=dfn
            df.to_csv(FILENAME,index=False)
                        
    return df
   
   
                                    

In [None]:
import pandas as pd
import os
import math
import numpy as np # library to handle data in a vectorized manner
!conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

!conda install -c conda-forge geopy --yes 
# uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import matplotlib.cm as cm
import matplotlib.colors as colors


print('Libraries imported.')


In [7]:
CLIENT_ID = 'U1ND1RSESM34E0F5N2VX0WIY1TSNYQM41LXUBM523HOLJXCV' # your Foursquare ID
CLIENT_SECRET = 'X0KNHD5JZDI4XS0TJLWZPZ5AGC44KHUB4J0STYMT2R5MEKPV' # your Foursquare Secret

#kaisa 
CLIENT_ID='HPVS5YBLFPA0AQ1OZOM42KQQ4EHFRUVO1DZS00ENE0HPUI0A'
CLIENT_SECRET='250O40DSBBEP53FPSDEIDBVINQ3VZD4G5SHHHYHPEGIMRGDR'

FILENAME='scraped_venues_tiled.csv'

cities_list=[
    "London, England",
    "Berlin, Germany",
    "Rome, Italy"
]

#cities_list=[
#   "Cremona, Italy"
#]


cities_list_test=['Pori, Finland',"Lappeenranta, Finland"]

df=pd.DataFrame()
try:
    df=pd.read_csv(FILENAME)
except:
    pass

for city in cities_list:
        
    print('city {}'.format(city))
    df=scrapeNamedCity(city,df,CLIENT_ID,CLIENT_SECRET)
    print(df.shape)
    df.to_csv(FILENAME,index=False)
df

city London, England
The geographical coordinates of London, England city centre are 51.5073219, -0.1276474.
Skipping London, England-0-0
Skipping London, England-0-1
Skipping London, England-0-2
Skipping London, England-0-3
Skipping London, England-0-4
Skipping London, England-0-5
Skipping London, England-0-6
Skipping London, England-0-7
Skipping London, England-0-8
Skipping London, England-1-0
Skipping London, England-1-1
Skipping London, England-1-2
Skipping London, England-1-3
Skipping London, England-1-4
Skipping London, England-1-5
Skipping London, England-1-6
Skipping London, England-1-7
Skipping London, England-1-8
Skipping London, England-2-0
Skipping London, England-2-1
Skipping London, England-2-2
Skipping London, England-2-3
Skipping London, England-2-4
Skipping London, England-2-5
Skipping London, England-2-6
Skipping London, England-2-7
Skipping London, England-2-8
Skipping London, England-3-0
Skipping London, England-3-1
Skipping London, England-3-2
Skipping London, Engl

Unnamed: 0,Name,Id,Latitude,Longtitude,Categories,City
0,The Wisley,4bf9195c5317a5937abf007f,51.318840,-0.482647,Golf Course,"London, England-0-0"
1,Ockham Bites,4c172c535e88c92829422566,51.316893,-0.453541,Café,"London, England-0-0"
2,Pyrford Golf Club,4c557759a724e21ee3fc1ef8,51.315947,-0.494943,Golf Course,"London, England-0-0"
3,Terrace Restaurant,57385044498ed9b3efb33dae,51.315352,-0.473335,Restaurant,"London, England-0-0"
4,Food Court,5d481c23816c150008c6a819,51.315492,-0.473591,Food Court,"London, England-0-0"
5,Ockham And Wisley Commons,4cca9a8db7d9a1438faffd36,51.315512,-0.454796,Park,"London, England-0-0"
6,Glasshouse Café,4c1f4ce0eac020a1eed74ac2,51.314778,-0.478156,Café,"London, England-0-0"
7,Wisley Shop,51b0b4ab498eb2b09f8b5974,51.314674,-0.471898,Gift Shop,"London, England-0-0"
8,The Glasshouse,4b6ebdb8f964a52094c82ce3,51.314204,-0.478711,Garden,"London, England-0-0"
9,RHS Garden Wisley,4b938c5ff964a520d94734e3,51.313881,-0.473204,Botanical Garden,"London, England-0-0"


In [None]:
df
