In [1]:
import numpy as np
import pandas as pd
import pymongo
from pymongo import MongoClient

In [2]:
# connect to MongoDB
client = MongoClient('localhost', 27017)

In [3]:
# access to database
database = client['airbnb_hong_kong']

In [4]:
# access to collection(table)
collection = database['listings']

In [5]:
# total number of listings
x = collection.count_documents({})
print('total number of listings:', x)

total number of listings: 7087


In [6]:
# check the first item
collection.find_one()

{'_id': ObjectId('60b7510cfb23503df45cb721'),
 'id': 69074,
 'listing_url': 'https://www.airbnb.com/rooms/69074',
 'scrape_id': 20210220182008.0,
 'last_scraped': '2021-02-21',
 'name': 'Beautiful oasis of plants & art @ best location',
 'description': 'An ideal Hong location any visitor--hip without being touristy--this contemporary & newly renovated 1 bedroom apartment has every feature you need for your stay, from a stereo and internet to cooking needs and warm lighting. Enjoy!<br /><br /><b>The space</b><br />Filled with plants and art, this one bedroom urban oasis has been newly renovated and impeccably re-decorated -- warm and inviting in the best of neighborhoods for your stay in Hong Kong.  The art comes from local and regional artists all over Asia and the art & coffee table books are there for your enjoyment during your stay. The custom built desks provide ample working space. A fold out table is adjacent to the kitchen for dining.<br /><br />Before reading further, please ki

In [7]:
# total number of hosts
x = len(collection.distinct('host_id'))
print('total number of hosts:', x)

total number of hosts: 2048


In [8]:
# total number of superhosts

query = {'host_is_superhost': 't'}
x = collection.count_documents(query)

query = {'host_is_superhost': 'f'}
y = collection.count_documents(query)

print('total number of superhosts:', x)
print('total number of non-superhosts:', y)
print('superhost percentage:', round(x/(x+y),2))

total number of superhosts: 1286
total number of non-superhosts: 5797
superhost percentage: 0.18


In [9]:
# number of property types

x = len(collection.distinct('property_type'))
print('number of property types:', x)

number of property types: 76


In [10]:
# number of room type
collection.distinct('room_type')

['Entire home/apt', 'Hotel room', 'Private room', 'Shared room']

In [11]:
# number of reivew

x = collection.find({},{
        '_id':0, 'name':1,
        'description':1,
        'number_of_reviews':1
    }).sort('number_of_reviews',-1).limit(1)

for cursor in x:
    print(cursor)
    print('\nthe highest number of review:', cursor['number_of_reviews'])

{'name': '香港 Hong Kong 長洲 Cheung Chau lsland', 'description': '離碼頭 8-10分鐘路程 (free wifi)<br /><br /><b>The space</b><br />整個單位330呎 採全開放式,正常可2人睡( 不是和別人供享的 ) 2人後就加$80元1個人 (小朋友同價) 因空間比一般渡假屋大 另可加2張單人摺床或雙人充氣床褥，沙發夠大都足夠睡1人 單位最多5人睡,可供簡單煮食<br />(如有寵物每隻額外收取$80元 訂時加人數一位就可) 屋内已有水兜 軟墊和狗廁所提供 來前請咨詢)<br /><br /><b>Guest access</b><br />free wifi 風筒 洗澡用品 電熱水爐 電熱水煲 藍牙speakers 自拍腳架 小暖風機  Gas煮食爐<br /><br /><b>Other things to note</b><br />不是共享單位  沒有海景  不可BBQ 要自備牙刷牙膏毛巾3樣', 'number_of_reviews': 828}

the highest number of review: 828


In [None]:
# average number of review per listing

def streaming_average(previous_count, previous_average, n):
    new_average = ((previous_count * previous_average) + n) / (previous_count + 1)
    return new_average

x = collection.find({},{'_id':0, 'number_of_reviews':1})

previous_count, previous_average = 0, 0
for cursor in x:
    n = cursor['number_of_reviews']
    
    # skip all listings with zero review
    if n != 0:
        new_average = streaming_average(previous_count, previous_average, n)
    
    previous_average = new_average
    previous_count += 1

print('average number of review per listing(excluding zero review):', round(new_average,2))

In [12]:
# 18 districts
collection.distinct('neighbourhood_cleansed')

['Central & Western',
 'Eastern',
 'Islands',
 'Kowloon City',
 'Kwai Tsing',
 'Kwun Tong',
 'North',
 'Sai Kung',
 'Sha Tin',
 'Sham Shui Po',
 'Southern',
 'Tai Po',
 'Tsuen Wan',
 'Tuen Mun',
 'Wan Chai',
 'Wong Tai Sin',
 'Yau Tsim Mong',
 'Yuen Long']

In [None]:
# END