## 安裝 Pymongo

In [2]:
! pip install pymongo

Collecting pymongo
  Downloading pymongo-3.11.0-cp37-cp37m-macosx_10_9_x86_64.whl (378 kB)
[K     |████████████████████████████████| 378 kB 521 kB/s eta 0:00:01
[?25hInstalling collected packages: pymongo
Successfully installed pymongo-3.11.0


## MongoDB 基本操作

### 測試 MongoDB 連線

In [6]:
import pymongo
try:
    conn = pymongo.MongoClient()
except pymongo.errors.ConnectionFailure as e:
    print("Could not connect to MongoDB: %s" % e)



### 列出目前資料庫名稱

In [8]:
conn.database_names()

  """Entry point for launching an IPython kernel.


['admin', 'config', 'local', 'twcom']

### 建立Database

In [18]:
db = conn['rent591']
print(db)

Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'rent591')


### 建立Collection


In [19]:
coll = db['basic_profile']

## 新增資料 (Insert)

### 下載JSON
- https://raw.githubusercontent.com/ywchiu/mongodb_tutorial/main/data/rent591.json

In [20]:
import requests
res = requests.get('https://raw.githubusercontent.com/ywchiu/mongodb_tutorial/main/data/rent591.json')
with open('rent.json', 'w') as f:
    f.write(res.text)

### 讀取JSON 資料

In [22]:
import json
jd = json.loads(open('rent.json','r').read())

In [24]:
print(jd[0])

{'id': 9866527, 'user_id': 2991837, 'address': '大安路一段忠孝復興站樓上..', 'type': '1', 'post_id': 9866527, 'regionid': 1, 'sectionid': 5, 'streetid': 25614, 'room': 1, 'area': 11, 'price': '26,800', 'storeprice': 0, 'comment_total': 0, 'comment_unread': 0, 'comment_ltime': 0, 'hasimg': 1, 'kind': 1, 'shape': 2, 'houseage': 0, 'posttime': '14分鐘內', 'updatetime': 1602913536, 'refreshtime': 1602988322, 'checkstatus': 0, 'status': '', 'closed': 0, 'living': 'depart,advstore,market,night,park,school,hospital', 'condition': 'tv,icebox,cold,washer,hotwater,four,broadband,lift,naturalgas,landpost,cook,trabus,balcony_1,lease,bed,wardrobe,sofa,bookTable,chair,all_sex', 'isvip': 1, 'mvip': 1, 'is_combine': 1, 'cover': 'https://hp1.591.com.tw/house/active/2020/09/28/160126732136061501_210x158.crop.jpg', 'browsenum': 104, 'browsenum_all': 3288, 'floor2': 0, 'floor': 7, 'ltime': '2020-10-18 10:32:02', 'cases_id': '', 'social_house': 0, 'distance': 0, 'search_name': '', 'mainarea': None, 'balcony_area': None, 

### 匯入JSON 資料

In [29]:
coll.insert_many(jd)

<pymongo.results.InsertManyResult at 0x113683bc8>

### 統計匯入資料筆數

In [31]:
coll.count()

  """Entry point for launching an IPython kernel.


840

## 資料篩選 (Find)

### 找出第一筆資料

In [33]:
print(coll.find_one())

{'_id': ObjectId('5f8bb25a6eba2b752b3026ea'), 'id': 9866527, 'user_id': 2991837, 'address': '大安路一段忠孝復興站樓上..', 'type': '1', 'post_id': 9866527, 'regionid': 1, 'sectionid': 5, 'streetid': 25614, 'room': 1, 'area': 11, 'price': '26,800', 'storeprice': 0, 'comment_total': 0, 'comment_unread': 0, 'comment_ltime': 0, 'hasimg': 1, 'kind': 1, 'shape': 2, 'houseage': 0, 'posttime': '14分鐘內', 'updatetime': 1602913536, 'refreshtime': 1602988322, 'checkstatus': 0, 'status': '', 'closed': 0, 'living': 'depart,advstore,market,night,park,school,hospital', 'condition': 'tv,icebox,cold,washer,hotwater,four,broadband,lift,naturalgas,landpost,cook,trabus,balcony_1,lease,bed,wardrobe,sofa,bookTable,chair,all_sex', 'isvip': 1, 'mvip': 1, 'is_combine': 1, 'cover': 'https://hp1.591.com.tw/house/active/2020/09/28/160126732136061501_210x158.crop.jpg', 'browsenum': 104, 'browsenum_all': 3288, 'floor2': 0, 'floor': 7, 'ltime': '2020-10-18 10:32:02', 'cases_id': '', 'social_house': 0, 'distance': 0, 'search_name':

### 根據時間條件篩選

In [36]:
cur = coll.find({'ltime':{ '$lt' : '2020-10-18'}})

len([doc for doc in cur])

15

### 根據字串比對

In [37]:
cur = coll.find({'fulladdress': {'$regex' :'帝寶'}})
for doc in cur:
    print(doc)

{'_id': ObjectId('5f8bb25a6eba2b752b302746'), 'id': 9861404, 'user_id': 2937190, 'address': '仁愛路三段住商信義安和店..', 'type': '1', 'post_id': 9861404, 'regionid': 1, 'sectionid': 5, 'streetid': 25616, 'room': 3, 'area': 88.8, 'price': '400,000', 'storeprice': 0, 'comment_total': 0, 'comment_unread': 0, 'comment_ltime': 0, 'hasimg': 1, 'kind': 1, 'shape': 2, 'houseage': 0, 'posttime': '3小時內', 'updatetime': 1600759337, 'refreshtime': 1602979202, 'checkstatus': 1, 'status': '', 'closed': 0, 'living': '', 'condition': 'tv,icebox,cold,washer,hotwater,lift,naturalgas,cartplace,pet,cook,balcony_1,bed,wardrobe,sofa,bookTable,chair,all_sex', 'isvip': 1, 'mvip': 1, 'is_combine': 1, 'cover': 'https://hp2.591.com.tw/house/active/2020/09/22/160075825660868309_210x158.crop.jpg', 'browsenum': 50, 'browsenum_all': 1783, 'floor2': 0, 'floor': 13, 'ltime': '2020-10-18 08:02:04', 'cases_id': 5671, 'social_house': 0, 'distance': 0, 'search_name': '', 'mainarea': None, 'balcony_area': None, 'groundarea': None, 'li

### 排序及限制回傳筆數

In [38]:
# 預設為pymongo.ASCENDING - 由小排到大

cur = coll.find({'fulladdress': {'$regex' :'帝寶'}})\
    .sort('ltime').limit(1)

for doc in cur:
    print(doc)

{'_id': ObjectId('5f8bb25a6eba2b752b302746'), 'id': 9861404, 'user_id': 2937190, 'address': '仁愛路三段住商信義安和店..', 'type': '1', 'post_id': 9861404, 'regionid': 1, 'sectionid': 5, 'streetid': 25616, 'room': 3, 'area': 88.8, 'price': '400,000', 'storeprice': 0, 'comment_total': 0, 'comment_unread': 0, 'comment_ltime': 0, 'hasimg': 1, 'kind': 1, 'shape': 2, 'houseage': 0, 'posttime': '3小時內', 'updatetime': 1600759337, 'refreshtime': 1602979202, 'checkstatus': 1, 'status': '', 'closed': 0, 'living': '', 'condition': 'tv,icebox,cold,washer,hotwater,lift,naturalgas,cartplace,pet,cook,balcony_1,bed,wardrobe,sofa,bookTable,chair,all_sex', 'isvip': 1, 'mvip': 1, 'is_combine': 1, 'cover': 'https://hp2.591.com.tw/house/active/2020/09/22/160075825660868309_210x158.crop.jpg', 'browsenum': 50, 'browsenum_all': 1783, 'floor2': 0, 'floor': 13, 'ltime': '2020-10-18 08:02:04', 'cases_id': 5671, 'social_house': 0, 'distance': 0, 'search_name': '', 'mainarea': None, 'balcony_area': None, 'groundarea': None, 'li

In [41]:
# 設定為由大排到小
cur = coll.find({'fulladdress': {'$regex' :'帝寶'}})\
    .sort('ltime',pymongo.DESCENDING).limit(1)

for doc in cur:
    print(doc)

{'_id': ObjectId('5f8bb25a6eba2b752b302911'), 'id': 9949056, 'user_id': 2621275, 'address': '仁愛路三段帝寶景觀豪宅', 'type': '1', 'post_id': 9949056, 'regionid': 1, 'sectionid': 5, 'streetid': 25616, 'room': 3, 'area': 176.2, 'price': '380,000', 'storeprice': 0, 'comment_total': 0, 'comment_unread': 0, 'comment_ltime': 0, 'hasimg': 1, 'kind': 1, 'shape': 2, 'houseage': 0, 'posttime': '3小時內', 'updatetime': 1602487781, 'refreshtime': 1602979202, 'checkstatus': 1, 'status': '', 'closed': 0, 'living': '', 'condition': 'tv,icebox,cold,washer,hotwater,four,broadband,lift,naturalgas,pet,cook,balcony_1,bed,wardrobe,sofa,bookTable,chair,all_sex', 'isvip': 1, 'mvip': 1, 'is_combine': 0, 'cover': 'https://hp2.591.com.tw/house/active/2020/10/12/160249073754714945_210x158.crop.jpg', 'browsenum': 42, 'browsenum_all': 335, 'floor2': 0, 'floor': 13, 'ltime': '2020-10-18 08:03:28', 'cases_id': 5671, 'social_house': 0, 'distance': 0, 'search_name': '', 'mainarea': None, 'balcony_area': None, 'groundarea': None, '

## 修改資料 (Update)

### 修改單筆資料

In [62]:
result = coll.update_one(
    {"id": 9949056},
    {
        "$set": {
            "searchdate": "2020-10-18"
        }
    }
)


In [63]:
result

<pymongo.results.UpdateResult at 0x113b7d908>

In [65]:
cur = coll.find({"id": 9949056})
for doc in cur:
    print(doc.get('searchdate'))

2020-10-18


### 修改多筆資料

In [66]:
# 增添搜尋日期
for obj in coll.find():
    coll.update({'_id':obj['_id']},{'$set':{'searchdate' : "2020-10-18"}})

  


In [68]:
# 將金額全部改為整數
for obj in coll.find():
    coll.update({'_id':obj['_id']},{'$set':{'price' : int(obj['price'].replace(',',''))}})

  This is separate from the ipykernel package so we can avoid doing imports until


In [69]:
cur = coll.find_one({"id": 9949056})
cur.get('price')

380000

### 根據金額排序物件

In [71]:
# 預設為pymongo.ASCENDING - 由小排到大

cur = coll.find().sort('price', pymongo.DESCENDING).limit(3)

for doc in cur:
    print(doc.get('id'), doc.get('address'), doc.get('price'))

9861404 仁愛路三段住商信義安和店.. 400000
9949056 仁愛路三段帝寶景觀豪宅 380000
9897127 敦化北路小巨蛋全新裝潢名人加.. 350000


## 統計資料 (Aggregate)

### 計算區域數量

In [78]:
pipeline = [
     {"$unwind": "$section_name"},
     {"$group": {"_id": "$section_name", "count": {"$sum": 1}}}
]

list(coll.aggregate(pipeline))

[{'_id': '萬華區', 'count': 29},
 {'_id': '南港區', 'count': 47},
 {'_id': '松山區', 'count': 53},
 {'_id': '北投區', 'count': 29},
 {'_id': '大安區', 'count': 120},
 {'_id': '士林區', 'count': 70},
 {'_id': '大同區', 'count': 51},
 {'_id': '文山區', 'count': 44},
 {'_id': '信義區', 'count': 94},
 {'_id': '中正區', 'count': 71},
 {'_id': '內湖區', 'count': 91},
 {'_id': '中山區', 'count': 141}]

### 計算區域價格加總並排序

In [83]:

from bson.son import SON
pipeline = [
     {"$unwind": "$section_name"},
     {"$group": {"_id": "$section_name", "sum": {"$sum": "$price"}}},
     {"$sort": SON([("sum", -1)])}
]

list(coll.aggregate(pipeline))

[{'_id': '大安區', 'sum': 7744202},
 {'_id': '中山區', 'sum': 6122404},
 {'_id': '內湖區', 'sum': 5389386},
 {'_id': '士林區', 'sum': 4984388},
 {'_id': '信義區', 'sum': 4881408},
 {'_id': '中正區', 'sum': 3102495},
 {'_id': '松山區', 'sum': 2958405},
 {'_id': '南港區', 'sum': 2543298},
 {'_id': '大同區', 'sum': 2092300},
 {'_id': '文山區', 'sum': 1318300},
 {'_id': '北投區', 'sum': 1059600},
 {'_id': '萬華區', 'sum': 990798}]

### 計算區域平均價格並排序

In [86]:
from bson.son import SON
pipeline = [
     {"$unwind": "$section_name"},
     {"$group": {"_id": "$section_name", "mean": {"$avg": "$price"}}},
     {"$sort": SON([("mean", -1)])}
]

list(coll.aggregate(pipeline))

[{'_id': '士林區', 'mean': 71205.54285714286},
 {'_id': '大安區', 'mean': 64535.01666666667},
 {'_id': '內湖區', 'mean': 59224.02197802198},
 {'_id': '松山區', 'mean': 55818.96226415094},
 {'_id': '南港區', 'mean': 54112.72340425532},
 {'_id': '信義區', 'mean': 51929.87234042553},
 {'_id': '中正區', 'mean': 43697.112676056335},
 {'_id': '中山區', 'mean': 43421.304964539006},
 {'_id': '大同區', 'mean': 41025.490196078434},
 {'_id': '北投區', 'mean': 36537.93103448276},
 {'_id': '萬華區', 'mean': 34165.44827586207},
 {'_id': '文山區', 'mean': 29961.363636363636}]

### 根據多個欄位統計

In [97]:
from bson.son import SON
pipeline = [
     {"$unwind": "$regionname"},
     {"$unwind": "$sectionname"},
     {"$group": {"_id": { 'region'  : '$regionname',
                          'section' : '$sectionname'
                        }, "mean": {"$avg": "$price"}}},
     {"$sort": SON([("mean", -1)])}
]

list(coll.aggregate(pipeline))

[{'_id': {'region': '台北市', 'section': '士林區'}, 'mean': 71205.54285714286},
 {'_id': {'region': '台北市', 'section': '大安區'}, 'mean': 64535.01666666667},
 {'_id': {'region': '台北市', 'section': '內湖區'}, 'mean': 59224.02197802198},
 {'_id': {'region': '台北市', 'section': '松山區'}, 'mean': 55818.96226415094},
 {'_id': {'region': '台北市', 'section': '南港區'}, 'mean': 54112.72340425532},
 {'_id': {'region': '台北市', 'section': '信義區'}, 'mean': 51929.87234042553},
 {'_id': {'region': '台北市', 'section': '中正區'}, 'mean': 43697.112676056335},
 {'_id': {'region': '台北市', 'section': '中山區'}, 'mean': 43421.304964539006},
 {'_id': {'region': '台北市', 'section': '大同區'}, 'mean': 41025.490196078434},
 {'_id': {'region': '台北市', 'section': '北投區'}, 'mean': 36537.93103448276},
 {'_id': {'region': '台北市', 'section': '萬華區'}, 'mean': 34165.44827586207},
 {'_id': {'region': '台北市', 'section': '文山區'}, 'mean': 29961.363636363636}]

### 根據多個欄位產生多項統計

In [98]:
from bson.son import SON
pipeline = [
     {"$unwind": "$regionname"},
     {"$unwind": "$sectionname"},
     {"$group": {"_id": { 'region'  : '$regionname',
                          'section' : '$sectionname'
                        }, "mean": {"$avg": "$price"}, "cnt": {"$sum":1}}},
     {"$sort": SON([("mean", -1)])}
]

list(coll.aggregate(pipeline))

[{'_id': {'region': '台北市', 'section': '士林區'},
  'mean': 71205.54285714286,
  'cnt': 70},
 {'_id': {'region': '台北市', 'section': '大安區'},
  'mean': 64535.01666666667,
  'cnt': 120},
 {'_id': {'region': '台北市', 'section': '內湖區'},
  'mean': 59224.02197802198,
  'cnt': 91},
 {'_id': {'region': '台北市', 'section': '松山區'},
  'mean': 55818.96226415094,
  'cnt': 53},
 {'_id': {'region': '台北市', 'section': '南港區'},
  'mean': 54112.72340425532,
  'cnt': 47},
 {'_id': {'region': '台北市', 'section': '信義區'},
  'mean': 51929.87234042553,
  'cnt': 94},
 {'_id': {'region': '台北市', 'section': '中正區'},
  'mean': 43697.112676056335,
  'cnt': 71},
 {'_id': {'region': '台北市', 'section': '中山區'},
  'mean': 43421.304964539006,
  'cnt': 141},
 {'_id': {'region': '台北市', 'section': '大同區'},
  'mean': 41025.490196078434,
  'cnt': 51},
 {'_id': {'region': '台北市', 'section': '北投區'},
  'mean': 36537.93103448276,
  'cnt': 29},
 {'_id': {'region': '台北市', 'section': '萬華區'},
  'mean': 34165.44827586207,
  'cnt': 29},
 {'_id': {'regio

## 刪除Mongo 內的資料

### 根據條件刪除多筆資料

In [100]:
coll.delete_many({'sectionname': {'$regex' :'文山區'}})
coll.count()

  


796

### 捨棄整個Collection 的資料

In [101]:
coll.drop()
coll.count()

  


0