In [1]:
import numpy as np
import pandas as pd
import pymongo
from pymongo import MongoClient
import datetime

In [2]:
# connect to MongoDB
client = MongoClient('localhost', 27017)

In [7]:
# access to database
database = client['airbnb_hong_kong']

In [8]:
# access to collection(table)
collection = database['listings']

# 1. Overview

In [38]:
# total number of listings
x = collection.count_documents({})
print('total number of listings:', x)

total number of listings: 7087


In [23]:
# check the first item
collection.find_one()

{'_id': ObjectId('60b7510cfb23503df45cb721'),
 'id': 69074,
 'listing_url': 'https://www.airbnb.com/rooms/69074',
 'scrape_id': 20210220182008.0,
 'last_scraped': '2021-02-21',
 'name': 'Beautiful oasis of plants & art @ best location',
 'description': 'An ideal Hong location any visitor--hip without being touristy--this contemporary & newly renovated 1 bedroom apartment has every feature you need for your stay, from a stereo and internet to cooking needs and warm lighting. Enjoy!<br /><br /><b>The space</b><br />Filled with plants and art, this one bedroom urban oasis has been newly renovated and impeccably re-decorated -- warm and inviting in the best of neighborhoods for your stay in Hong Kong.  The art comes from local and regional artists all over Asia and the art & coffee table books are there for your enjoyment during your stay. The custom built desks provide ample working space. A fold out table is adjacent to the kitchen for dining.<br /><br />Before reading further, please ki

In [39]:
# total number of hosts
x = len(collection.distinct('host_id'))
print('total number of hosts:', x)

total number of hosts: 2048


In [49]:
# total number of superhosts

query = {'host_is_superhost': 't'}
x = collection.count_documents(query)

query = {'host_is_superhost': 'f'}
y = collection.count_documents(query)

print('total number of superhosts:', x)
print('total number of non-superhosts:', y)
print('superhost percentage:', round(x/(x+y),2))

total number of superhosts: 1286
total number of non-superhosts: 5797
superhost percentage: 0.18


In [53]:
# number of property types

x = len(collection.distinct('property_type'))
print('number of property types:', x)

number of property types: 76


In [55]:
# number of room type
collection.distinct('room_type')

['Entire home/apt', 'Hotel room', 'Private room', 'Shared room']

In [84]:
# number of reivew

x = collection.find({},{
        '_id':0, 'name':1,
        'description':1,
        'number_of_reviews':1
    }).sort('number_of_reviews',-1).limit(1)

for cursor in x:
    print(cursor)
    print('\nthe highest number of review:', cursor['number_of_reviews'])

{'name': '香港 Hong Kong 長洲 Cheung Chau lsland', 'description': '離碼頭 8-10分鐘路程 (free wifi)<br /><br /><b>The space</b><br />整個單位330呎 採全開放式,正常可2人睡( 不是和別人供享的 ) 2人後就加$80元1個人 (小朋友同價) 因空間比一般渡假屋大 另可加2張單人摺床或雙人充氣床褥，沙發夠大都足夠睡1人 單位最多5人睡,可供簡單煮食<br />(如有寵物每隻額外收取$80元 訂時加人數一位就可) 屋内已有水兜 軟墊和狗廁所提供 來前請咨詢)<br /><br /><b>Guest access</b><br />free wifi 風筒 洗澡用品 電熱水爐 電熱水煲 藍牙speakers 自拍腳架 小暖風機  Gas煮食爐<br /><br /><b>Other things to note</b><br />不是共享單位  沒有海景  不可BBQ 要自備牙刷牙膏毛巾3樣', 'number_of_reviews': 828}

the highest number of review: 828


In [98]:
# average number of review per listing

def streaming_average(previous_count, previous_average, n):
    new_average = ((previous_count * previous_average) + n) / (previous_count + 1)
    return new_average

x = collection.find({},{'_id':0, 'number_of_reviews':1})

previous_count, previous_average = 0, 0
for cursor in x:
    n = cursor['number_of_reviews']
    
    # skip all zero review
    if n != 0:
        new_average = streaming_average(previous_count, previous_average, n)
    
    previous_average = new_average
    previous_count += 1

print('average number of review per listing(excluding zero review):', round(new_average,2))

average number of review per listing(excluding zero review): 40.86


In [102]:
# 18 districts
collection.distinct('neighbourhood_cleansed')

['Central & Western',
 'Eastern',
 'Islands',
 'Kowloon City',
 'Kwai Tsing',
 'Kwun Tong',
 'North',
 'Sai Kung',
 'Sha Tin',
 'Sham Shui Po',
 'Southern',
 'Tai Po',
 'Tsuen Wan',
 'Tuen Mun',
 'Wan Chai',
 'Wong Tai Sin',
 'Yau Tsim Mong',
 'Yuen Long']

In [None]:
# Central & Western
# Yau Tsim Mong
# Yuen Long

# 2. Yau Tsim Mong

In [108]:
# top 5 listings by review no

x = collection.find({'neighbourhood_cleansed':'Yau Tsim Mong'},{'_id':0, 'name':1, 'number_of_reviews':1}).sort('number_of_reviews',-1).limit(5)

for cursor in x:
    print('listing name:', cursor['name'])
    print('number of reviews:', cursor['number_of_reviews'])
    print('\n')

listing name: Couple Tatami Room@Austin,Jordan,Tsim Sha Tsu
number of reviews: 472


listing name: 1分鐘到高鐵站,地鐵站、1min to Metro Station,Free Pocket WiFi
number of reviews: 433


listing name: Lovely home Mong Kok MTR Railway 4 beds
number of reviews: 384


listing name: Twin Room@hub of Kowloon to Airport and China
number of reviews: 378


listing name: Single Tatami1@Austin,Jordan,Tsim Sha Tsui
number of reviews: 372




In [110]:
# next top 5 (5-10th) listings by review no

x = collection.find({'neighbourhood_cleansed':'Yau Tsim Mong'},{'_id':0, 'name':1, 'number_of_reviews':1}).sort('number_of_reviews',-1).skip(5).limit(5)

for cursor in x:
    print('listing name:', cursor['name'])
    print('number of reviews:', cursor['number_of_reviews'])
    print('\n')

listing name: ❤❤ Peaceful Apartment ❤❤ Tsim Sha Tsui
number of reviews: 364


listing name: Comfort Home in Prince Edward Mtr 4 beds
number of reviews: 353


listing name: Deluxe Twin Room@Tsim Sha Tsui,Austin,Jordan
number of reviews: 349


listing name: (3) 干、濕分開,溫馨雙人大床房間(房號3)!值得一試!
number of reviews: 344


listing name: Tsim Sha Tsui, newly 2 beds apt. I.
number of reviews: 338




In [116]:
# top 5 listings by review rating with at least 50 reviews

x = collection.find({'neighbourhood_cleansed':'Yau Tsim Mong'},{'_id':0, 'name':1, 'review_scores_rating':1}).sort('review_scores_rating',-1).limit(5)

for cursor in x:
    print('listing name:', cursor['name'])
    print('number of reviews:', cursor['review_scores_rating'])
    print('\n')
    
# with at least 50 reviews
# try using MongoDB command in the first bracket
# add conditions

listing name: Harbour Mong kok Renovated Room 5
number of reviews: 100


listing name: 10 mins to CITY U --------- by MTR
number of reviews: 100


listing name: Monthly Rental 14103 Tsim Sha Tsui MTR above
number of reviews: 100


listing name: Creative Residence in Yau Ma Tei
number of reviews: 100


listing name: 尖沙咀的1房1厅，住3人，有厨房月租14000起
number of reviews: 100




In [None]:
# next top 5 (5-10)listings by review rating

In [None]:
# all listings with review rating over 90(?)

In [None]:
# averaage price

In [None]:
# all listings with price between 500 and 700 (?)

In [None]:
# average review rating

In [None]:
# all listings with 'amenities' has "Wifi", "Coffee maker", "Cable TV"

In [None]:
# listings with 'bedrooms': 2

# 3. Central & Western

In [109]:
# top 5 listings by review no

x = collection.find({'neighbourhood_cleansed':'Central & Western'},{'_id':0, 'name':1, 'number_of_reviews':1}).sort('number_of_reviews',-1).limit(5)

for cursor in x:
    print('listing name:', cursor['name'])
    print('number of reviews:', cursor['number_of_reviews'])
    print('\n')

listing name: SOHO'S URBAN CHIC DESIGNERS APT
number of reviews: 410


listing name: cdk guest room - twin
number of reviews: 319


listing name: Stunning Large Studio + Terrace. 1min to MTR.
number of reviews: 312


listing name: Central Centre 5 min walk to/from Central MTR
number of reviews: 272


listing name: Soho off Hollywood Road P3
number of reviews: 232




In [111]:
# next top 5 (5-10th) listings by review no

x = collection.find({'neighbourhood_cleansed':'Central & Western'},{'_id':0, 'name':1, 'number_of_reviews':1}).sort('number_of_reviews',-1).skip(5).limit(5)

for cursor in x:
    print('listing name:', cursor['name'])
    print('number of reviews:', cursor['number_of_reviews'])
    print('\n')

listing name: SoHo off Hollywood Road  3B
number of reviews: 229


listing name: Soho off Hollywood Road P4
number of reviews: 224


listing name: Perfect Retreat in Heart of City. 30sec to MTR.
number of reviews: 221


listing name: GREEN NEST IN  SHEUNG WAN HEART
number of reviews: 208


listing name: Fabulous 2 Bdrm Aprt Open kitchen
number of reviews: 207




# 4. Yuen Long

In [112]:
# top 5 listings by review no

x = collection.find({'neighbourhood_cleansed':'Yuen Long'},{'_id':0, 'name':1, 'number_of_reviews':1}).sort('number_of_reviews',-1).limit(5)

for cursor in x:
    print('listing name:', cursor['name'])
    print('number of reviews:', cursor['number_of_reviews'])
    print('\n')

listing name: 超赞房东 性价比高special deal 300 m to metro musician home
number of reviews: 100


listing name: 超赞豪华景观公寓浪漫情侣大床房/卓悦中心网红打卡商圈高端物业
number of reviews: 97


listing name: 高层景观舒适公寓双人房/福田口岸地铁囗高端小区/CBD会展中心/清静安全
number of reviews: 62


listing name: （已消毒，长租优惠）福田口岸，皇岗口岸，广深高速进出口，知名设计师佳作，精美三房
number of reviews: 58


listing name: Guest Room w/ Double Bed in Spacious Flat
number of reviews: 55




In [113]:
# next top 5 (5-10th) listings by review no

x = collection.find({'neighbourhood_cleansed':'Yuen Long'},{'_id':0, 'name':1, 'number_of_reviews':1}).sort('number_of_reviews',-1).skip(5).limit(5)

for cursor in x:
    print('listing name:', cursor['name'])
    print('number of reviews:', cursor['number_of_reviews'])
    print('\n')

listing name: [初心]福田口岸会展中心直达北站温馨两居室可居4一6人
number of reviews: 51


listing name: 福田口岸地铁口舒适公寓温馨双人房/近CBD会展中心、购物公园、网红打卡商圈
number of reviews: 44


listing name: House with countryside view 元朗別墅獨立套房連陽台
number of reviews: 40


listing name: 福田口岸地铁口近CBD会展中心温馨舒适高级公寓双人房
number of reviews: 40


listing name: Beautiful 3 bedroom designer Apt
number of reviews: 38




In [6]:
# END