In [None]:
import xml.etree.ElementTree as ET
import requests
import pandas as pd
import numpy as np
import pickle

## Data for the project:
1. You can download data from: https://download.bbbike.org/osm/bbbike/
2. Store the data in '../data' directory

In [None]:
data_dir = '../data/'
city = 'Tucson'

In [None]:
osm_file = data_dir + city + '.osm'
tree = ET.parse(osm_file)
root = tree.getroot()

## Description of OSM properties:

Opens Street Map properties could be found at https://wiki.openstreetmap.org/wiki/Map_Features

We have chosen few features for these project. Description follows.
interesting_node_tags: Node properties which positively correlate intersection clusters.

uninteresting_tags: nodes and ways properties which can be used to filter irrelevant ways, nodes which do not characterise interactions


In [None]:
uninteresting_tags = ['aerialway','aeroway','amenity','barrier','boundary','building','craft','geological',
                      'historic','landuse','leisure','man_made','military','natural','office','power','line',
                      'public_transport','shop','sport','waterway']

interesting_node_tags = ['crossing','toll','traffic_sign']

interesting_way_tags = ['name','hgv','lanes','oneway','bicycle','highway','bridge','layer','cycleway',
                        'sidewalk','busway','abutters','bicycle_road','driving_side','ford','ice_road','incline',
                       'junction','lit','motorroad','mountain_pass','mtb:scale','mtb:scale:uphill','mtb:description',
                       'overtaking','parking:condition','parking:lane','parking_places','sac_scale','service',
                       'surface','tactile_paving','tracktype','traffic_calming','trail_visibility','winter_road',
                       'place','railway','electrified','embankment','route','tourism','charge', 'location','narrow',
                       'tunnel','width','access','agriculture','maxheight','maxspeed','maxlength','maxstay',
                       'maxwidth','maxweight','minspeed','noexit']

## Create a dataframe for nodes

In [None]:
nodes_df = pd.DataFrame()

nodes = root.findall('./node')

row_id = 1

for node in nodes:
    flag = 0
    node_d = {}
    
    node_d['lat'] = node.get('lat')
    node_d['lon'] = node.get('lon')
    node_d['id'] = node.get('id')
    node_d['version'] = node.get('version')
    
    for tag in interesting_node_tags:
        node_d[tag] = None
    
    if node.findall('./tag'):
        tags = node.findall('./tag')
        for tag in tags:
            
            if tag.get('k') in uninteresting_tags:
                flag = 1
                break
                
            if tag.get('k') in interesting_node_tags:
                node_d[tag.get('k')] = tag.get('v')
    
    if flag==0:
        row = pd.DataFrame(node_d, index = [row_id])
        row_id +=1 
    
    nodes_df = nodes_df.append(row)

In [None]:
nodes_df = nodes_df.drop_duplicates(subset='id')

nodes_df_file = data_dir + city + '_nodes_df.pkl'
nodes_df.to_pickle(nodes_df_file)

## Create dataframe for ways

In [None]:
ways_df = pd.DataFrame()

ways = root.findall('./way')

row_id = 1

for way in ways:
    flag = 0
    way_d = {}
    
    way_d['id'] = way.get('id')
    way_d['version'] = way.get('version')
    way_d['nodes'] = []
    
    for tag in interesting_way_tags:
        way_d[tag] = None
    
            
    if way.findall('./tag'):
        tags = way.findall('./tag')
        for tag in tags:
            if tag.get('k') in uninteresting_tags:
                flag = 1
                break
            if tag.get('k') in interesting_way_tags:
                way_d[tag.get('k')] = tag.get('v')
    
    if way.findall('./nd'):
        nodes = way.findall('./nd')
        way_d['nodes'] = [node.get('ref') for node in nodes]
        
    if flag==0:
        row = pd.DataFrame([way_d], index = [row_id])
        row_id +=1 
    
    ways_df = ways_df.append(row)

In [None]:
ways_df_file = data_dir + city + '_ways_df.pkl'
ways_df.to_pickle(ways_df_file)

### Read data from stored pickle files
If you have pickle files created, following commands can be run instead of running previous steps again

In [None]:
nodes_df = pd.read_pickle(nodes_df_file)
ways_df = pd.read_pickle(ways_df_file)
nodes_df = nodes_df.drop_duplicates(subset='id')
ways_df = ways_df.drop_duplicates(subset='id')

## Properties for intersection
These osm properties have beesn chosen to represent ways/nodes forming intersections

In [None]:
interesting_way_tags = list(set(['name','hgv','lanes','oneway','bicycle','highway','bridge','layer','cycleway',
                        'sidewalk','busway','abutters','bicycle_road','driving_side','ford','ice_road','incline',
                       'junction','lit','motorroad','mountain_pass','mtb:scale','mtb:scale:uphill','mtb:description',
                       'overtaking','parking:condition','parking:lane','parking_places','sac_scale','service',
                       'surface','tactile_paving','tracktype','traffic_calming','trail_visibility','winter_road',
                       'place','railway','electrified','embankment','route','tourism','charge', 'location','narrow',
                       'tunnel','width','access','agriculture','maxheight','maxspeed','maxlength','maxstay',
                       'maxwidth','maxweight','minspeed','noexit','maxspeed']))

### Create empty lists in nodes dataframe for all interesting_way_tags

In [None]:
nodes_df['ways'] = np.empty((len(nodes_df), 0)).tolist()

for feature in interesting_way_tags:
    nodes_df[feature] = np.empty((len(nodes_df), 0)).tolist()

In [None]:
for i in range(len(ways_df)):
    way = ways_df.iloc[[i]]
    # get all nodes in a way
    node_list = way['nodes'].tolist()[0]
    
    # create node subset for nodes in node_list
    sub_nodes_df = nodes_df[nodes_df['id'].isin(node_list)]
    
    # insert list of way_id in ways column
    sub_nodes_df['ways'].apply(lambda x: x.extend(way['id']))
    
    # insert values of features in interesting_way_tags in the created subset
    for feature in interesting_way_tags:
        sub_nodes_df[feature].apply(lambda x: x.extend(way[feature]))

In [None]:
# add a column in nodes dataframe which depicts total number of ways at an intersection

nodes_df['no_of_ways'] = nodes_df['ways'].apply(lambda x: len(x))

## Store all dataframes with properties in pickle

In [None]:
nodes_df_features = data_dir + city + '_nodes_df_features.pkl' 
nodes_df.to_pickle(nodes_df_features)

intersections_df = nodes_df[nodes_df['no_of_ways']>2]


intersections_df_features = data_dir + city + '_intersections_df_features.pkl' 
intersections_df.to_pickle(intersections_df_features)