In [1]:
from pymongo import MongoClient
import pprint

client = MongoClient("mongodb://localhost:27017")
db = client.project

In [2]:
# Number of documents
db.oxford.find().count()

324550

In [3]:
db.oxford.distinct( "category" )

[u'node', u'way', u'relation']

In [4]:
# Number of nodes
db.oxford.find( { "category" : "node" } ).count()

273182

In [5]:
# Number of ways
db.oxford.find( { "category" : "way" } ).count()

50351

In [6]:
# Number of relations
db.oxford.find( { "category" : "relation" } ).count()

1017

### Users

In [7]:
# Number of unique users
unique_users = db.oxford.distinct( "created.user" )
len(unique_users)

1176

In [8]:
# Top 5 contributing users
top_users = db.oxford.aggregate([
        { "$group" : { "_id" : "$created.user",
                       "count" : { "$sum" : 1 } } },
        { "$project" : { "proportion" : { "$divide" : [ "$count", 324550 ] } } },
        { "$sort" : { "proportion" : -1 } },
        { "$limit" : 5 }   # Getting only the top 5
    ])

for user in top_users:
    pprint.pprint(user)

{u'_id': u'Andrew Chadwick', u'proportion': 0.12107225389000154}
{u'_id': u'craigloftus', u'proportion': 0.11503620397473424}
{u'_id': u'GordonFS', u'proportion': 0.08929286704668002}
{u'_id': u'Max--', u'proportion': 0.05713141272531197}
{u'_id': u'Richard Mann', u'proportion': 0.05309813588044986}


### Amenities

In [9]:
# Number of amenities
db.oxford.find( { "amenity" : { "$exists" : 1 } } ).count()

3682

In [10]:
# Number of unique amenities
unique_amenities = db.oxford.distinct( "amenity" )
len(unique_amenities)

124

In [11]:
# 10 most numerous amenities
top_amenities = db.oxford.aggregate([
        { "$match" : { "amenity" : { "$exists" : 1 } } },
        { "$group" : { "_id" : "$amenity",
                       "count" : { "$sum" : 1 } } },
        { "$project" : { "proportion" : { "$divide" : [ "$count", 3682 ] } } },
        { "$sort" : { "proportion" : -1 } },
        { "$limit" : 10 }   # Getting only the top 10
    ])

for amenity in top_amenities:
    pprint.pprint(amenity)

{u'_id': u'parking', u'proportion': 0.20369364475828355}
{u'_id': u'bicycle_parking', u'proportion': 0.14095600217273221}
{u'_id': u'post_box', u'proportion': 0.09016838674633351}
{u'_id': u'bench', u'proportion': 0.049429657794676805}
{u'_id': u'place_of_worship', u'proportion': 0.045898967952199894}
{u'_id': u'pub', u'proportion': 0.04481260184682238}
{u'_id': u'telephone', u'proportion': 0.038565996740901685}
{u'_id': u'restaurant', u'proportion': 0.03476371537208039}
{u'_id': u'school', u'proportion': 0.031233025529603477}
{u'_id': u'cafe', u'proportion': 0.029060293318848452}


It is quite interesting that there are more places of worship than restaurants or cafes! Could this mean that Oxford is a highly religious city? It is also noteworthy that bicycle parking is the second most numerous amenity. This in fact agrees with our conception of Oxford as a college town.

### Buildings

In [12]:
# Number of buildings
db.oxford.find( { "building" : { "$exists" : 1 } } ).count()

20760

In [13]:
# Level of the highest building
highest_bldg = db.oxford.aggregate([
        { "$match" : { "building.levels.basic_info" : { "$exists" : 1 } } },
        { "$project" : { "level" : "$building.levels.basic_info" } },
        { "$sort" : { "level" : -1 } },
        { "$limit" : 1 }   # Getting only the top 1
    ])

for building in highest_bldg:
    pprint.pprint(building)

{u'_id': ObjectId('570fec362054c17a26f47f42'), u'level': u'9'}


In [14]:
# Predominant building levels
num_bldgs_level = db.oxford.find( { "building.levels.basic_info" : { "$exists" : 1 } } ).count()
most_num_level = db.oxford.aggregate([
        { "$match" : { "building.levels.basic_info" : { "$exists" : 1 } } },
        { "$group" : { "_id" : "$building.levels.basic_info",
                       "count" : { "$sum" : 1 } } },
        { "$project" : { "proportion" : { "$divide" : [ "$count", num_bldgs_level ] } } },
        { "$sort" : { "proportion" : -1 } },
        { "$limit" : 3 }   # Getting only the top 3
    ])

for level in most_num_level:
    pprint.pprint(level)

{u'_id': u'1', u'proportion': 0.47619047619047616}
{u'_id': u'2', u'proportion': 0.19327731092436976}
{u'_id': u'4', u'proportion': 0.1400560224089636}


The result shows that 9 is the highest building level in Oxford and one-story buildings are predominant in the city. This also agrees with our conception of Oxford as a college town.

### Religion

We noted earlier that Oxford seems a religious city. Then, a natural question to ask is: What do they believe? Let's try to answer it through our data.

In [15]:
# Religion in Oxford
db.oxford.distinct( "religion" )

[u'christian',
 u'muslim',
 u'buddhist',
 u'jewish',
 u'spiritualist',
 u'multifaith']

In [16]:
# Religion by level of representation
num_doc_religion = db.oxford.find( { "religion" : { "$exists" : 1 } } ).count()
religion_by_num = db.oxford.aggregate([
        { "$match" : { "religion" : { "$exists" : 1 } } },
        { "$group" : { "_id" : "$religion",
                       "count" : { "$sum" : 1 } } },
        { "$project" : { "proportion" : { "$divide" : [ "$count", 207 ] } } },
        { "$sort" : { "proportion" : -1 } }
    ])

for faith in religion_by_num:
    pprint.pprint(faith)

{u'_id': u'christian', u'proportion': 0.9371980676328503}
{u'_id': u'muslim', u'proportion': 0.024154589371980676}
{u'_id': u'jewish', u'proportion': 0.014492753623188406}
{u'_id': u'buddhist', u'proportion': 0.014492753623188406}
{u'_id': u'multifaith', u'proportion': 0.004830917874396135}
{u'_id': u'spiritualist', u'proportion': 0.004830917874396135}


As expected, Christianity is the predominant religion in Oxford. But the extreme level of predominance is still quite surprising, especially against the backdrop of the city's being a college town (where new ideas and practices are often prevalent).