<a href="https://colab.research.google.com/github/vishalkumarlondon/CreatorCities/blob/main/notebooks/Get_OSM_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install osmnx
!pip install OSMPythonTools

In [None]:
# If running on colab, make sure to restart runtime before running this cell

import osmnx as ox
import pandas as pd
import numpy as np

# Step 0 - Get City Data

- Data source: Twitter


In [None]:
# Read in Cities Data from Twitter Analysis
all_cities = pd.read_csv('https://raw.githubusercontent.com/vishalkumarlondon/CreatorCities/main/data/creator_cities_top300.csv') 
all_cities = all_cities.dropna(how='all', axis=1) # Drop all NaN values
all_cities['OSM_Search_Query'] = all_cities['state'] + ', ' + all_cities['country'] # Create OSM search query string
all_cities.drop_duplicates(subset='state', inplace=True) # Drop all duplicate rows based on the state column
all_cities['OSM_Search_Query'] = np.where(all_cities['state'] == "Cape Town", "Cape Town, Western Cape, South Africa", all_cities['OSM_Search_Query'])
print('Dataframe', all_cities.shape) # Print size of dataframe
all_cities.head() # Show dataframe

Dataframe (236, 11)


Unnamed: 0.1,Unnamed: 0,City,Pro,Expert,Expert+,Creator Count,location,coordinates,state,country,OSM_Search_Query
0,0,"London, UK",14436,1609,179,16224,"London, Greater London, England, United Kingdom","(51.5073219, -0.1276474, 0.0)",London,United Kingdom,"London, United Kingdom"
1,1,"Los Angeles, CA",12629,2197,258,15084,"Los Angeles, Los Angeles County, California, U...","(34.0536909, -118.242766, 0.0)",Los Angeles,United States,"Los Angeles, United States"
2,2,"New York, NY",8098,1268,191,9557,"New York, United States","(40.7127281, -74.0060152, 0.0)",New York,United States,"New York, United States"
3,3,"Washington, D.C.",5664,804,92,6560,"Embassy of Hungary, 1500, Rhode Island Avenue ...","(38.9074322, -77.0350922, 0.0)",Washington D.C.,United States,"Washington D.C., United States"
4,5,"Austin, TX",4635,800,98,5533,"Austin, Travis County, Texas, United States","(30.2711286, -97.7436995, 0.0)",Austin,United States,"Austin, United States"


# Step 1 - Get AreaID

In [None]:
from OSMPythonTools.api import Api
osm_api = Api()

from OSMPythonTools.nominatim import Nominatim
nominatim = Nominatim()

from OSMPythonTools.overpass import overpassQueryBuilder, Overpass
overpass = Overpass()

In [None]:
city_list = all_cities['OSM_Search_Query']
print(len(city_list))

# Create Empty List to store area id!
areaId = []
for i in city_list:
  search_query = nominatim.query(i).areaId()
   #print(search_query)
  areaId.append(search_query)

In [None]:
df = {'OSM_Search_Query':city_list, 'areaID':areaId}
area_ID_df = pd.DataFrame(df) 
area_ID_df = area_ID_df.reset_index(drop=True)
area_ID_df[0:5]

Unnamed: 0,OSM_Search_Query,areaID
0,"London, United Kingdom",3600066000.0
1,"Los Angeles, United States",3600207000.0
2,"New York, United States",3600176000.0
3,"Washington D.C., United States",3605396000.0
4,"Austin, United States",3600113000.0


In [None]:
len(area_ID_df)

In [None]:
area_ID_df[area_ID_df["areaID"].isnull()]

In [None]:
#Merge Data Frame
all_cities_new = pd.merge(all_cities, area_ID_df, how='outer', on='OSM_Search_Query')
print(all_cities_new.shape)
all_cities_new.head()

In [None]:
all_cities_new.OSM_Search_Query.nunique()

# Step 2 - OSMPythonTools

Get OpenStreetMap Data using OSMPythonTools and the areaID of a city

In [None]:
all_cities_new = all_cities_new[~all_cities_new.areaID.isna()]
all_cities_new.areaID = all_cities_new.areaID.astype(int)

In [None]:
cities_0030 = all_cities_new[0:30]
cities_3060 = all_cities_new[30:60]
cities_6090 = all_cities_new[60:90]
cities_90120 = all_cities_new[90:120]
cities_120150 = all_cities_new[120:150]
cities_150180 = all_cities_new[150:180]
cities_180210 = all_cities_new[180:210]
cities_210240 = all_cities_new[210:]

## Amenity


In [None]:
def GetAmenityNode(data):
  cities_node_df = []

  for i, j in zip(data.areaID, data.OSM_Search_Query):
    try:
      city_areaID = i
      city_name = j
      city_result = overpass.query(overpassQueryBuilder(area=i, elementType='node', selector='amenity', out='meta'))
      city_result_json = city_result.toJSON()
      city_result_df = pd.json_normalize(city_result_json, record_path=['elements'])
      city_result_df['areaID'] = city_areaID
      city_result_df['OSM_Search_Query'] = city_name
      cities_node_df.append(city_result_df)
    except Exception:
      pass

  all_df_amenity_node = pd.concat(cities_node_df).fillna(0).reset_index(drop=True)
  all_df_amenity_node = all_df_amenity_node[['OSM_Search_Query', 'areaID', 'type','lat','lon','id','uid','tags.name','tags.amenity']]

  return all_df_amenity_node

In [None]:
amenity_cities_030 = GetAmenityNode(cities_0030)
amenity_cities_060 = GetAmenityNode(cities_3060)
amenity_cities_090 = GetAmenityNode(cities_6090)
amenity_cities_120 = GetAmenityNode(cities_90120)
amenity_cities_150 = GetAmenityNode(cities_120150)
amenity_cities_180 = GetAmenityNode(cities_150180)
amenity_cities_210 = GetAmenityNode(cities_180210)
amenity_cities_240 = GetAmenityNode(cities_210240)

In [None]:
all_df_amenity_node = pd.concat([amenity_cities_030, amenity_cities_060, amenity_cities_090, 
                                 amenity_cities_120, amenity_cities_150, amenity_cities_180, 
                                 amenity_cities_210, amenity_cities_240])

print(all_df_amenity_node.shape)
all_df_amenity_node.head()

In [None]:
all_df_amenity_node.OSM_Search_Query.nunique()

## Tourism


In [None]:
# NODE
def GetTourismNode(data):
  cities_node_df = []

  for i, j in zip(data.areaID, data.OSM_Search_Query):
    try:
      city_areaID = i
      city_name = j

      city_result = overpass.query(overpassQueryBuilder(area=i, elementType='node', selector='tourism', out='meta'))
      city_result_json = city_result.toJSON()
      city_result_df = pd.json_normalize(city_result_json, record_path=['elements'])
      city_result_df['areaID'] = city_areaID
      city_result_df['OSM_Search_Query'] = city_name
      cities_node_df.append(city_result_df)
    except Exception:
      pass

  all_df_tourism_node = pd.concat(cities_node_df).fillna(0).reset_index(drop=True)
  all_df_tourism_node = all_df_tourism_node[['OSM_Search_Query', 'areaID', 'type','lat','lon','id','uid','tags.name','tags.tourism']]

  return all_df_tourism_node

In [None]:
tourism_cities_030 = GetTourismNode(cities_0030)
tourism_cities_060 = GetTourismNode(cities_3060)
tourism_cities_090 = GetTourismNode(cities_6090)
tourism_cities_120 = GetTourismNode(cities_90120)
tourism_cities_150 = GetTourismNode(cities_120150)
tourism_cities_180 = GetTourismNode(cities_150180)
tourism_cities_210 = GetTourismNode(cities_180210)
tourism_cities_240 = GetTourismNode(cities_210240)

In [None]:
all_df_tourism_node = pd.concat([tourism_cities_030, tourism_cities_060, tourism_cities_090, 
                                 tourism_cities_120, tourism_cities_150, tourism_cities_180, 
                                 tourism_cities_210, tourism_cities_240])

print(all_df_tourism_node.shape)
all_df_tourism_node.head()

In [None]:
all_df_tourism_node.OSM_Search_Query.nunique()

## Leisure


In [None]:
# NODE
def GetLeisureNode(data):
  cities_node_df = []

  for i, j in zip(data.areaID, data.OSM_Search_Query):
    try:
      city_areaID = i
      city_name = j

      city_result = overpass.query(overpassQueryBuilder(area=i, elementType='node', selector='leisure', out='meta'))
      city_result_json = city_result.toJSON()
      city_result_df = pd.json_normalize(city_result_json, record_path=['elements'])
      city_result_df['areaID'] = city_areaID
      city_result_df['OSM_Search_Query'] = city_name
      cities_node_df.append(city_result_df)
    except Exception:
      pass

  all_df_leisure_node = pd.concat(cities_node_df).fillna(0).reset_index(drop=True)
  all_df_leisure_node = all_df_leisure_node[['OSM_Search_Query', 'areaID', 'type','lat','lon','id','uid','tags.name','tags.leisure']]

  return all_df_leisure_node

In [None]:
leisure_cities_030 = GetLeisureNode(cities_0030)
leisure_cities_060 = GetLeisureNode(cities_3060)
leisure_cities_090 = GetLeisureNode(cities_6090)
leisure_cities_120 = GetLeisureNode(cities_90120)
leisure_cities_150 = GetLeisureNode(cities_120150)
leisure_cities_180 = GetLeisureNode(cities_150180)
leisure_cities_210 = GetLeisureNode(cities_180210)
leisure_cities_240 = GetLeisureNode(cities_210240)

In [None]:
all_df_leisure_node = pd.concat([leisure_cities_030, leisure_cities_060, leisure_cities_090, 
                                 leisure_cities_120, leisure_cities_150, leisure_cities_180, 
                                 leisure_cities_210, leisure_cities_240])

print(all_df_leisure_node.shape)
all_df_leisure_node.head()

(449430, 9)


Unnamed: 0,OSM_Search_Query,areaID,type,lat,lon,id,uid,tags.name,tags.amenity
0,"Moscow, Russia",3602555133,node,55.816129,37.532389,40889936.0,519501.0,ЕКА,fuel
1,"Moscow, Russia",3602555133,node,55.836366,37.503649,40901233.0,9451067.0,Газпромнефть,fuel
2,"Moscow, Russia",3602555133,node,55.850427,37.535671,40903852.0,12100948.0,Лукойл,fuel
3,"Moscow, Russia",3602555133,node,55.88005,37.481759,40905170.0,9451067.0,Газпромнефть,fuel
4,"Moscow, Russia",3602555133,node,55.823834,37.558386,43076189.0,9451067.0,Роснефть,fuel


In [None]:
all_df_leisure_node.OSM_Search_Query.nunique()

65

## Nature


In [None]:
# NODE
def GetNatureNode(data):
  cities_node_df = []

  for i, j in zip(data.areaID, data.OSM_Search_Query):
    try:
      city_areaID = i
      city_name = j

      city_result = overpass.query(overpassQueryBuilder(area=i, elementType='node', selector='natural', out='meta'))
      city_result_json = city_result.toJSON()
      city_result_df = pd.json_normalize(city_result_json, record_path=['elements'])
      city_result_df['areaID'] = city_areaID
      city_result_df['OSM_Search_Query'] = city_name
      cities_node_df.append(city_result_df)
    except Exception:
      pass

  all_df_natural_node = pd.concat(cities_node_df).fillna(0).reset_index(drop=True)
  all_df_natural_node = all_df_amenity_node[['OSM_Search_Query', 'areaID', 'type','lat','lon','id','uid','tags.name','tags.natural']]

  return all_df_amenity_node

In [None]:
natural_cities_030 = GetNatureNode(cities_0030)
natural_cities_060 = GetNatureNode(cities_3060)
natural_cities_090 = GetNatureNode(cities_6090)
natural_cities_120 = GetNatureNode(cities_90120)
natural_cities_150 = GetNatureNode(cities_120150)
natural_cities_180 = GetNatureNode(cities_150180)
natural_cities_210 = GetNatureNode(cities_180210)
natural_cities_240 = GetNatureNode(cities_210240)

In [None]:
all_df_natural_node = pd.concat([natural_cities_030, natural_cities_060, natural_cities_090, 
                                 natural_cities_120, natural_cities_150, natural_cities_180, 
                                 natural_cities_210, natural_cities_240])

print(all_df_natural_node.shape)
all_df_natural_node.head()

(449430, 9)


Unnamed: 0,OSM_Search_Query,areaID,type,lat,lon,id,uid,tags.name,tags.amenity
0,"Moscow, Russia",3602555133,node,55.816129,37.532389,40889936.0,519501.0,ЕКА,fuel
1,"Moscow, Russia",3602555133,node,55.836366,37.503649,40901233.0,9451067.0,Газпромнефть,fuel
2,"Moscow, Russia",3602555133,node,55.850427,37.535671,40903852.0,12100948.0,Лукойл,fuel
3,"Moscow, Russia",3602555133,node,55.88005,37.481759,40905170.0,9451067.0,Газпромнефть,fuel
4,"Moscow, Russia",3602555133,node,55.823834,37.558386,43076189.0,9451067.0,Роснефть,fuel


In [None]:
all_df_natural_node.OSM_Search_Query.nunique()

65