# 4.1 Imports

In [1]:
import numpy as np
import pandas as pd
import pymongo
from pymongo import MongoClient

In [2]:
# connection string in uri format
# !pip install pymongo[srv]
# since it includes the password, it was hidden

## connection_string = 
##     "mongodb+srv://ORGANIZATION:PASSWORD@cluster-0.lmfnt.mongodb.net/DATABASE_NAME?retryWrites=true&w=majority"

In [4]:
# connect to MongoDB Atlas (MongoDB cloud server)
client = MongoClient(connection_string)
db = client.test

In [5]:
# access to database
database = client['airbnb']

In [6]:
# access to collection(table)
collection = database['hongkong']

# 4.2 Queries about 'Yuen Long'

In [7]:
# total number of listings

x = collection.count_documents({'neighbourhood_cleansed':'Yuen Long'})
print('total number of listings:', x)

total number of listings: 132


In [8]:
# top 5 listings by review no

x = collection.find({'neighbourhood_cleansed':'Yuen Long'},
                    {'_id':0, 
                     'name':1, 
                     'number_of_reviews':1}).sort('number_of_reviews',-1).limit(5)

for cursor in x:
    print('listing name:', cursor['name'])
    print('number of reviews:', cursor['number_of_reviews'])
    print('\n')

listing name: 超赞房东 性价比高special deal 300 m to metro musician home
number of reviews: 100


listing name: 超赞豪华景观公寓浪漫情侣大床房/卓悦中心网红打卡商圈高端物业
number of reviews: 97


listing name: 高层景观舒适公寓双人房/福田口岸地铁囗高端小区/CBD会展中心/清静安全
number of reviews: 62


listing name: （已消毒，长租优惠）福田口岸，皇岗口岸，广深高速进出口，知名设计师佳作，精美三房
number of reviews: 58


listing name: Guest Room w/ Double Bed in Spacious Flat
number of reviews: 55




In [9]:
# next top 5 (5-10th) listings by review no

x = collection.find({'neighbourhood_cleansed':'Yuen Long'},
                    {'_id':0, 
                     'name':1, 
                     'number_of_reviews':1}).sort('number_of_reviews',-1).skip(5).limit(5)

for cursor in x:
    print('listing name:', cursor['name'])
    print('number of reviews:', cursor['number_of_reviews'])
    print('\n')

listing name: [初心]福田口岸会展中心直达北站温馨两居室可居4一6人
number of reviews: 51


listing name: 福田口岸地铁口舒适公寓温馨双人房/近CBD会展中心、购物公园、网红打卡商圈
number of reviews: 44


listing name: House with countryside view 元朗別墅獨立套房連陽台
number of reviews: 40


listing name: 福田口岸地铁口近CBD会展中心温馨舒适高级公寓双人房
number of reviews: 40


listing name: Beautiful 3 bedroom designer Apt
number of reviews: 38




In [10]:
# top 5 listings by review rating with at least 50 reviews

x = collection.find({'neighbourhood_cleansed':'Yuen Long',
                     'number_of_reviews':{'$gt':50}},
                    {'_id':0, 
                     'name':1, 
                     'number_of_reviews':1, 
                     'review_scores_rating':1}).sort('review_scores_rating',-1).limit(5)

for cursor in x:
    print('listing name:', cursor['name'])
    print('review_scores_rating:', cursor['review_scores_rating'])
    print('number of reviews:', cursor['number_of_reviews'])
    print('\n')

listing name: （已消毒，长租优惠）福田口岸，皇岗口岸，广深高速进出口，知名设计师佳作，精美三房
review_scores_rating: 100
number of reviews: 58


listing name: 超赞房东 性价比高special deal 300 m to metro musician home
review_scores_rating: 96
number of reviews: 100


listing name: Guest Room w/ Double Bed in Spacious Flat
review_scores_rating: 95
number of reviews: 55


listing name: 超赞豪华景观公寓浪漫情侣大床房/卓悦中心网红打卡商圈高端物业
review_scores_rating: 94
number of reviews: 97


listing name: 高层景观舒适公寓双人房/福田口岸地铁囗高端小区/CBD会展中心/清静安全
review_scores_rating: 93
number of reviews: 62




In [11]:
# next top 5 (5-10)listings by review rating with at least 50 reviews

x = collection.find({'neighbourhood_cleansed':'Yuen Long',
                     'number_of_reviews':{'$gt':50}},
                    {'_id':0, 
                     'name':1, 
                     'number_of_reviews':1, 
                     'review_scores_rating':1}).sort('review_scores_rating',-1).skip(5).limit(5)

for cursor in x:
    print('listing name:', cursor['name'])
    print('review_scores_rating:', cursor['review_scores_rating'])
    print('number of reviews:', cursor['number_of_reviews'])
    print('\n')

listing name: 高层景观舒适公寓双人房/福田口岸地铁囗高端小区/CBD会展中心/清静安全
review_scores_rating: 93
number of reviews: 62




In [12]:
# interesting findings: most of 'Yuen Long' findings have less than 50 number of reviews
# the credibility might be compromized
# another reason is that the total number of listings in this district is the other two

In [13]:
# all listings with review rating score below 50

x = collection.find({'neighbourhood_cleansed':'Yuen Long',
                    'review_scores_rating':{'$lt':30}},
                   {'_id':0, 
                     'name':1, 
                     'review_scores_rating':1})

for cursor in x:
    try:
        print('listing name:', cursor['name'])
    except:
        pass
    print('review_scores_rating:', cursor['review_scores_rating'])
    print('\n')

listing name: 福田口岸 会展中心 C bd商圈 鼓浪湾公寓清新大床房
review_scores_rating: 20




In [14]:
# averaage price

x = collection.find({'neighbourhood_cleansed':'Yuen Long'},
                 {'_id':0, 
                 'name':1, 
                 'price':1})

total = 0
count = 0
for cursor in x:
    try:
        count += 1
        total += float(cursor['price'].replace('$',''))
    except:
        pass

average_price = total / count    
print('average price:', round(average_price,2))

average price: 389.59


In [15]:
# another surprising finding
# though 'Yuen Long' is neither commercial nor financial center like the first two districts
# it has the highest average price

In [16]:
# listings with 'bedrooms': 2 or 'beds': 3

x = collection.count_documents({'neighbourhood_cleansed':'Yuen Long',
                                '$or':
                                [{'bedrooms': 2, 
                                  'beds': 3}]})

print('number of listings that have 2 bedrooms or 3 beds:', x)

number of listings that have 2 bedrooms or 3 beds: 5


In [17]:
# END