In [1]:
import yaml
import requests
import datetime
import time
import csv
import pandas as pd
import math

# Get all Venue IDs for venues within the bounding box.


def get_delta(lower, upper, length):
    return (upper - lower)/length


def getBBoxVenuesNWSE(bound_N,bound_W,bound_S,bound_E,divisions,
                 foursquare_id,foursquare_secret):    
    

    
    lat_delta = get_delta(bound_N,bound_S,divisions)
    long_delta = get_delta(bound_W,bound_E, divisions)
    
    dist_ns=110574*abs(bound_N-bound_S)
    dist_ew=111320*math.cos(math.radians(bound_N))*abs(bound_E-bound_W)
    
    
    search_params = {
        'client_id': foursquare_id,
        'client_secret': foursquare_secret,
  #      'intent': 'browse',
        'limit': 50,
        'v': '20181209',
        'radius' : math.sqrt(dist_ew*dist_ew+dist_ns*dist_ns)/divisions,
        'sortByDistance': 1
        
    }
    
    print('BBox size {}m x {}m. Search r={}m'.format(dist_ns,dist_ew,search_params['radius']))
    

    search_count = 0

    venues_list=[]

    max_response_size=0

    for lat in range(divisions):
        for long in range(divisions):
            
            ne_lat = bound_N + lat * lat_delta
            ne_long = bound_W+ (long+1) * long_delta
            
            #search_params.update({'ne': '{},{}'.format(ne_lat, ne_long),
            #                      'sw': '{},{}'.format(ne_lat - lat_delta,
            #                                           ne_long - long_delta)})                          
            
            search_params.update({'ll': '{},{}'.format(ne_lat-0.5*lat_delta, ne_long-0.5*long_delta)})
                                  
            
            # print(search_params)
            while True:
                try:
                    r = requests.get('https://api.foursquare.com/v2/venues/explore',
                                 params=search_params)
                except requests.exceptions.RequestException as e:  
                    print(e)    
                    print('Sleeping 10s')
                    time.sleep(10)
                
              
                try:
                    resp=r.json()['response']
                except:
                    time.sleep(5)
                    print('retry')
                    continue
                    
                if 'groups' in resp:
                    break
                else:
                    time.sleep(5)
                    print('retry')
        
            #if 'venues' in r.json()['response']:
            #    venues = r.json()['response']['venues']
            items = r.json()['response']['groups'][0]['items']
                        
            for item in items:
                venue=item['venue']
                categories = venue.get('categories', '')
                if len(categories) == 0:
                   categories = ''
                else:
                    categories = ';'.join([x['name'] for x in categories])
               # print(categories)
                venues_list.append([venue['name'], 
                                    venue['id'],
                    venue['location']['lat'], 
                    venue['location']['lng'],  
                    categories]);
                
                
                
            #print('Search retrieved {} venues.'.format(len(items)))
            max_response_size=max(max_response_size,len(items))
            
            if len(items) >= search_params['limit']:
                print("Recursive split")
                return getBBoxVenuesNWSE(bound_N,bound_W,bound_S,bound_E,2*divisions,
                 foursquare_id,foursquare_secret) 

            search_count += 1
            if search_count % 50 == 0:
                print(search_count)
        # gets fussy when more than 5000 requests/hr
    #        if search_count % 5000 == 0:
    #           time.sleep(60*60)

            time.sleep(0.6)

    print('{} Unique Venues Scraped: {}.'.format(
        len(venues_list), datetime.datetime.now()))

    print('Maximum response size {} venues.'.format(max_response_size))
    return venues_list

#df=pd.DataFrame(venues_list)
#df.to_csv('tampere_venues.csv',index=False)
#df

def getCenteredVenues(lat,lng,dist_to_side,divisions,foursquare_id,foursquare_secret):

    # convert distance in meters to latitude/longtitude deltas    
    
    delta_lat=dist_to_side/110574
    delta_lng=dist_to_side/(111320*math.cos(math.radians(lat)))
    
    return getBBoxVenuesNWSE(lat+delta_lat,lng-delta_lng,lat-delta_lat,lng+delta_lng,divisions,
                 foursquare_id,foursquare_secret)

def scrapeNamedCity(city,df,foursquare_id,foursquare_secret):
    from geopy.geocoders import Nominatim

    geolocator = Nominatim(user_agent="my-application")

    while True:
        try:
            location = geolocator.geocode(city)
        except:
            time.sleep(3)
            continue
        else:
            break
            
    lat= location.latitude
    lng = location.longitude
    dist_to_side=3000 # meters
    print('The geographical coordinates of {} city centre are {}, {}.'.format(city,lat,lng))

    venues_list=getCenteredVenues(lat,lng,dist_to_side,25,CLIENT_ID,CLIENT_SECRET)
    dfn=pd.DataFrame(venues_list)
    print(dfn.shape)
    dfn.drop_duplicates(inplace=True)
    dfn.columns=['Name','Id','Latitude','Longtitude','Categories']
    
    dfn['City']=city
    print(dfn.shape)
    print(df.shape)
    
    if df.size > 0:
       return df.append(dfn,ignore_index=True)
    else:
        return dfn
    
                                    

In [2]:
import pandas as pd
import numpy as np # library to handle data in a vectorized manner
# !conda install -c conda-forge folium=0.5.0 --yes 
import folium # map rendering library

# !conda install -c conda-forge geopy --yes # uncomment this line if you haven't completed the Foursquare API lab
from geopy.geocoders import Nominatim # convert an address into latitude and longitude values

import matplotlib.cm as cm
import matplotlib.colors as colors


print('Libraries imported.')


Libraries imported.


In [None]:
CLIENT_ID = 'U1ND1RSESM34E0F5N2VX0WIY1TSNYQM41LXUBM523HOLJXCV' # your Foursquare ID
CLIENT_SECRET = 'X0KNHD5JZDI4XS0TJLWZPZ5AGC44KHUB4J0STYMT2R5MEKPV' # your Foursquare Secret

#tampere_N=61.501303
#tampere_S=61.471321
#tampere_W=23.740965
#tampere_E=23.835300

#tampere_lat=61.4922285 # google maps location
#tampere_lng=23.75483
#dist_to_side=2000 # meters


geolocator = Nominatim(user_agent="my-application")

location = geolocator.geocode('Tampere, Finland')
tampere_lat= location.latitude
tampere_lng = location.longitude
dist_to_side=3000 # meters
print('The geograpical coordinates of city centre are {}, {}.'.format(tampere_lat,tampere_lng))

tampere_venues_list=getCenteredVenues(tampere_lat,tampere_lng,dist_to_side,50,CLIENT_ID,CLIENT_SECRET)
df=pd.DataFrame(tampere_venues_list)
df.drop_duplicates(inplace=True)
df.to_csv('tampere_venues_raw.csv',index=False)
df


In [None]:
df.drop_duplicates([1],inplace=True)
df
df.to_csv('tampere_venues.csv',index=False)


In [None]:
#oulu_lat=65.012075 #google maps location of ther city centre
#oulu_lng=25.465138
#dist_to_side=2000 # meters

location = geolocator.geocode('Oulu, Finland')
oulu_lat= location.latitude
oulu_lng = location.longitude
dist_to_side=3000 # meters
print('The geograpical coordinates of city centre are {}, {}.'.format(oulu_lat,oulu_lng))

oulu_venues_list=getCenteredVenues(oulu_lat,oulu_lng,dist_to_side,50,CLIENT_ID,CLIENT_SECRET)
df=pd.DataFrame(oulu_venues_list)
df.drop_duplicates(inplace=True)
df.to_csv('oulu_venues.csv',index=False)
df

In [None]:
#uppsala_lat= 59.858554#google maps location of ther city centre
#uppsala_lng=17.638929
#dist_to_side=2000 # meters

location = geolocator.geocode('Uppsala, Sweden')
uppsala_lat= location.latitude
uppsala_lng = location.longitude
dist_to_side=3000 # meters
print('The geograpical coordinates of city centre are {}, {}.'.format(uppsala_lat,uppsala_lng))


uppsala_venues_list=getCenteredVenues(uppsala_lat,uppsala_lng,dist_to_side,50,CLIENT_ID,CLIENT_SECRET)
df=pd.DataFrame(uppsala_venues_list)
df.drop_duplicates(inplace=True)
df.to_csv('uppsala_venues.csv',index=False)
df

In [None]:
geolocator = Nominatim(user_agent="my-application")

location = geolocator.geocode('Lubeck, Germany')
lubeck_lat= location.latitude
lubeck_lng = location.longitude

#lubeck_lat= 53.866409 # estimated city centre
#lubeck_lng=10.686966
dist_to_side=3000 # meters

print('The geograpical coordinates of Lübeck are {}, {}.'.format(lubeck_lat, lubeck_lng))

lubeck_venues_list=getCenteredVenues(lubeck_lat,lubeck_lng,dist_to_side,50,CLIENT_ID,CLIENT_SECRET)
df=pd.DataFrame(lubeck_venues_list)
df.drop_duplicates(inplace=True)
df.to_csv('lubeck_venues.csv',index=False)
df

In [None]:
address = 'Lubeck, Germany'

geolocator = Nominatim(user_agent="my-application")
location = geolocator.geocode(address)
lubeck_lat= location.latitude
lubeck_lng = location.longitude
print('The geograpical coordinates of Lübeck are {}, {}.'.format(lubeck_lat, lubeck_lng))

In [None]:
df=pd.read_csv('lubeck_venues.csv')



In [None]:
map_l = folium.Map(location=[lubeck_lat,lubeck_lng], zoom_start=13)
map_l

In [None]:

for lat, lon in zip(df['2'], df['3']):
    folium.CircleMarker(
        [lat, lon],
        radius=5,
        fill=True,
        fill_opacity=0.7).add_to(map_l)
    
map_l    

In [None]:
df['2']

In [None]:
print("Hello world")


In [3]:
CLIENT_ID = 'U1ND1RSESM34E0F5N2VX0WIY1TSNYQM41LXUBM523HOLJXCV' # your Foursquare ID
CLIENT_SECRET = 'X0KNHD5JZDI4XS0TJLWZPZ5AGC44KHUB4J0STYMT2R5MEKPV' # your Foursquare Secret

#kaisa 
CLIENT_ID='HPVS5YBLFPA0AQ1OZOM42KQQ4EHFRUVO1DZS00ENE0HPUI0A'
CLIENT_SECRET='250O40DSBBEP53FPSDEIDBVINQ3VZD4G5SHHHYHPEGIMRGDR'

cities_list=[
'Tampere, Finland',
'Oulu, Finland',
'Turku, Finland',
'Jyvaskyla, Finland',
'Lahti, Finland',
'Kuopio, Finland',
'Pori, Finland',
'Kouvola, Finland',
'Joensuu, Finland',
'Lappeenranta, Finland',
'Goteborg, Sweden',
'Malmo, Sweden',
'Uppsala, Sweden',
'Vasteras, Sweden',
'Orebro, Sweden',
'Linkoping, Sweden',
'Helsingborg, Sweden',
'Norrkoping, Sweden',
'Jonkoping, Sweden',
'Lund, Sweden',
'Kiel, Germany',
'Halle, Germany',
'Magdeburg, Germany',
'Lubeck, Germany',
'Rostock, Germany',
'Potsdam, Germany',
'Oldenburg, Germany',
'Osnabruck, Germany',
'Wolfsburg, Germany',
'Gottingen, Germany',
'Bremerhaven, Germany']

cities_list_test=['Pori, Finland',"Lappeenranta, Finland"]

df=pd.DataFrame()
try:
    df=pd.read_csv('scraped_venues.csv')
except:
    pass

for city in cities_list:
    processed_set=set(df['City'].unique())
    if city in processed_set:
        print('Skipping {}'.format(city))
    else:    
        print(city)
        df=scrapeNamedCity(city,df,CLIENT_ID,CLIENT_SECRET)
        print(df.shape)
        df.to_csv('scraped_venues.csv',index=False)
df

Skipping Tampere, Finland
Skipping Oulu, Finland
Skipping Turku, Finland
Skipping Jyvaskyla, Finland
Skipping Lahti, Finland
Skipping Kuopio, Finland
Skipping Pori, Finland
Skipping Kouvola, Finland
Skipping Joensuu, Finland
Skipping Lappeenranta, Finland
Skipping Goteborg, Sweden
Skipping Malmo, Sweden
Skipping Uppsala, Sweden
Skipping Vasteras, Sweden
Skipping Orebro, Sweden
Skipping Linkoping, Sweden
Skipping Helsingborg, Sweden
Skipping Norrkoping, Sweden
Skipping Jonkoping, Sweden
Skipping Lund, Sweden
Skipping Kiel, Germany
Skipping Halle, Germany
Skipping Magdeburg, Germany
Skipping Lubeck, Germany
Skipping Rostock, Germany
Skipping Potsdam, Germany
Oldenburg, Germany
The geographical coordinates of Oldenburg, Germany city centre are 53.1389753, 8.2146017.
BBox size 5999.999999999698m x 5996.209881554377m. Search r=339.30407116601884m
50
100
150
200
250
300
Recursive split
BBox size 5999.999999999698m x 5996.209881554377m. Search r=169.65203558300942m
50
100
150
200
250
300
350


Unnamed: 0,Name,Id,Latitude,Longtitude,Categories,City
0,Halkoniemi,4fd0e9bae4b01e9f635eebbe,61.524431,23.705198,Pier,"Tampere, Finland"
1,M/Y RUFUS II,4fd0cb55e4b06c6784ad68c6,61.525321,23.708508,Boat or Ferry,"Tampere, Finland"
2,Lentävänniemen pururata,51b6f427498ec741135a3a50,61.525504,23.709923,Trail,"Tampere, Finland"
3,Reuharin pikkukenttä,4e74553b1f6e072f132ad6d8,61.525299,23.718105,Soccer Field,"Tampere, Finland"
4,På bryggan vid Nässy,55cf481d498e0153ca37f0ba,61.524904,23.720144,Bathing Area,"Tampere, Finland"
5,Nuotiopaikka,501169b5e4b0abd87463a6b1,61.525513,23.728985,Campground,"Tampere, Finland"
6,Reuharin kalliot,4fed8c5ee4b0430e0a51a047,61.525703,23.728678,Beach,"Tampere, Finland"
7,M/S Sauna,51a8a981e4b09086363d8321,61.526184,23.744119,Boat or Ferry,"Tampere, Finland"
8,järvellä,4fc25556e4b0852f4c71935c,61.524642,23.743778,Lake,"Tampere, Finland"
9,Pölkkylänniemi,51adda85498e888a291d4074,61.523309,23.706919,Harbor / Marina,"Tampere, Finland"


In [None]:
df
