In [2]:
from pymongo import MongoClient
from datetime import datetime
from bson import ObjectId

In [3]:
client = MongoClient('localhost', 27017)

## 列出所有数据库

In [4]:
client.list_database_names()

['abmDiffusion',
 'admin',
 'config',
 'lagou',
 'local',
 'mdb_test',
 'moniter_crowdfunding',
 'purchase',
 'temp',
 'test']

In [6]:
for db in client.list_databases():
    print(db)

{'name': 'abmDiffusion', 'sizeOnDisk': 11501568.0, 'empty': False}
{'name': 'admin', 'sizeOnDisk': 32768.0, 'empty': False}
{'name': 'config', 'sizeOnDisk': 73728.0, 'empty': False}
{'name': 'lagou', 'sizeOnDisk': 950272.0, 'empty': False}
{'name': 'local', 'sizeOnDisk': 77824.0, 'empty': False}
{'name': 'mdb_test', 'sizeOnDisk': 32768.0, 'empty': False}
{'name': 'moniter_crowdfunding', 'sizeOnDisk': 147832832.0, 'empty': False}
{'name': 'purchase', 'sizeOnDisk': 606208.0, 'empty': False}
{'name': 'temp', 'sizeOnDisk': 73728.0, 'empty': False}
{'name': 'test', 'sizeOnDisk': 40960.0, 'empty': False}


## 创建数据库


Pymongodb 创建数据库的语法格式如下：
```javascrip
pymongo.MongoClient().<db_name>
```
如果数据库`db_name`存在，则返回该数据库；如果不存在，则创建并返回。

In [7]:
db = client['mdb_test']  # db=client.mdb_test

## 删除数据库

In [None]:
client.drop_database(mdb_test)

# 一、 插入操作

In [None]:
db.drop_collection('student')

In [52]:
col = db.create_collection('student')  # 创建集合col

### 1.`db.collection.insert()`插入1个或多个文档
> 注意: 在MongoDB 3.6以上版本中已不建议使用，如要插入1个文档，请使用`insert_one()`；如果要插入多个文档，请使用`insert_many()`。

In [54]:
a = {'student_id':2015210315, 'student_name':'张三丰', 'gender':'男'}
col.insert(a)  # col中插入文档a

  


ObjectId('5cd9273cf5d4321faa2849ec')

### 2.`db.collection.insert_one()`插入1个文档

In [55]:
b = {'student_id': 2015210316, 'student_name': '李想', 'gender': '男'}

In [56]:
col.insert_one(b)  # col中插入文档b

<pymongo.results.InsertOneResult at 0x106145048>

In [57]:
c = {'_id': 2015210317, 'student_name':'郭涛', 'gender':'男'}  # 指定主键值_id

In [59]:
col.insert_one(c)

<pymongo.results.InsertOneResult at 0x106145348>

### 3.`db.collection.insert_many()`插入多个文档

In [60]:
a_list = [{'student_id': 2015210318, 'student_name': 'Joshi', 'gender': '男'}, 
          {'student_id': 2015210319, 'student_name': 'Kevin', 'gender': '男'}, 
          {'student_id': 2015210320, 'student_name': 'Lily', 'gender': '女'}]  # 定义一个包含3个字典(document)的列表(array)

In [61]:
col.insert_many(a_list)

<pymongo.results.InsertManyResult at 0x106188b88>

# 二、查询

`Pymongo`中提供了`find()`, `find_one()`和`find_many()`等方法

## `db.collection.find(<query>, <projection>, ...)` 

查询满足`query`条件的所有文档，返回指向结果集合第一个文档的游标

In [62]:
res = col.find()

In [51]:
try:
    print(res.next())  # 逐一读取cursor指向的查询结果集合
except StopIteration as s:
    
    print('读取完毕',s)

读取完毕 


In [63]:
for r in res:
    print(r)  # 利用循环逐一读取所有结果

{'_id': ObjectId('5cd9273cf5d4321faa2849ec'), 'student_id': 2015210315, 'student_name': '张三丰', 'gender': '男'}
{'_id': ObjectId('5cd92796f5d4321faa2849ed'), 'student_id': 2015210316, 'student_name': '李想', 'gender': '男'}
{'_id': 2015210317, 'student_name': '郭涛', 'gender': '男'}
{'_id': ObjectId('5cd9294af5d4321faa2849ee'), 'student_id': 2015210318, 'student_name': 'Joshi', 'gender': '男'}
{'_id': ObjectId('5cd9294af5d4321faa2849ef'), 'student_id': 2015210319, 'student_name': 'Kevin', 'gender': '男'}
{'_id': ObjectId('5cd9294af5d4321faa2849f0'), 'student_id': 2015210320, 'student_name': 'Lily', 'gender': '女'}


可以直接使用`python`提供的对象保存结果集

In [70]:
res = list(col.find())  # 得到一个以字典为元素的列表

In [72]:
res[0]

{'_id': ObjectId('5cd9273cf5d4321faa2849ec'),
 'student_id': 2015210315,
 'student_name': '张三丰',
 'gender': '男'}

In [73]:
res[0]['_id']

ObjectId('5cd9273cf5d4321faa2849ec')

### 1. 选择

In [76]:
res = col.find({'student_id': 2015210315})  # 查看student集合中student_id为2015210315的文档
print(list(res))

[{'_id': ObjectId('5cd9273cf5d4321faa2849ec'), 'student_id': 2015210315, 'student_name': '张三丰', 'gender': '男'}]


#### 可以定义其它类型的大小关系或成员测试筛选条件，如下表

| 操作     | 格式     |范例     |SQL     |
| :------------- | :------------- |:------------- |:------------- |
| 等于       | `{key:value}` | `db.collection_name.find({"name":"张三"})` | `where name = '张三'` |
|小于 | `{key:{$lt:value}}` | `db.collection_name.find({"height":{$lt:173}})` | `where height < 173` |
|小于等于 | `{key:{$lte:value}}` | `db.collection_name.find({"height":{$lte:173}})` | `where height <= 173` |
|大于 | `{key:{$gt:value}}` | `db.collection_name.find({"height":{$gt:173}})` | `where height > 173` |
|大于等于 | `{key:{$gte:value}}` | `db.collection_name.find({"height":{$gte:173}})` | `where height >= 173` |
|不等于 | `{key:{$ne:value}}` | `db.collection_name.find({"height":{$ne:173}})` | `where height != 173` |
|in：出现在array | `{ket:{$in:value}}` | `db.collection_name.find({"奖学金":{$in:'国家奖学金'}}` | `NA` |
|nin：未出现在array | `{ket:{$nin:value}` | `db.collection_name.find({"奖学金":{$nin:'国家奖学金'}}` | `NA` |

>`find()`方法的`query`参数可以传入多个域(`key`)，每个域(`key`)以逗号隔开，即对应 `SQL` 的复合子句；此外，如果在一个键上也可以定义多个条件，例如: 身高在173和180之间的文档集合，对应查询`db.student.find({height: {$lte:180, $gte:180}})`。

- 如果文档的域的值为一个数组，且查询为复合条件，只要数组中有符合符合条件的元素，则会返回该文档。

In [81]:
student_list = [
    {'student_id': 2015210321, 'student_name': 'Steven', 'gender': '男', 'score': [70, 89]},
    {'student_id': 2015210322, 'student_name': 'Tony', 'gender': '男', 'score': [65, 89]} 
    ]
col.insert_many(student_list)

<pymongo.results.InsertManyResult at 0x10589a488>

In [83]:
res = col.find({'score': {'$lt': 70}})
print(list(res))

[{'_id': ObjectId('5cd9352af5d4321faa2849f2'), 'student_id': 2015210322, 'student_name': 'Tony', 'gender': '男', 'score': [65, 89]}]


#### 逻辑操作符

|操作|格式|范例|SQL|
|--:|--:|--:|--:|
|`$and` | `{'$and':[{expr1}, {expr2}, ... {exprN}]}` | `db.inventory.find({'$and':[{'price':1.99}, {'sale':True}]})` | `where price=1.99 AND sale=true` |
|`$or` | `{'$or':[{expr1}, {expr2}, ... {exprN}]}` | `db.inventory.find({'$or':[{'price':1.99}, {'sale':True}]})}` | `where price=1.99 OR sale=true` |
|`$not` | `{'$not': {expr}}` | `db.inventory.find({'price':{'$not':{'$gt':1.99}}})` | `where price < 1.99 and price is null` |
|`$nor` | `{'$nor':[{expr1}, {expr2}, ... {exprN}]}` |`db.inventory.find( { '$nor': [ { 'price': 1.99 }, { 'sale': true } ]  } )` | `where (price != 1.99 and sale != true) or (price != 1.99 and sale is null) or (price is null and sale != true)` |

In [90]:
res = col.find({'student_id': {'$in': [2015210321, 2015210322]}, 'gender': '男', 'score': {'$lte': 65}})
for r in res:
    print(r)

{'_id': ObjectId('5cd9352af5d4321faa2849f2'), 'student_id': 2015210322, 'student_name': 'Tony', 'gender': '男', 'score': [65, 89]}


In [89]:
res = col.find({'$and': [{'student_id': {'$in': [2015210321, 2015210322]}}, {'gender': '男'}, {'score': {'$lte': 65}}]})
for r in res:
    print(r)

{'_id': ObjectId('5cd9352af5d4321faa2849f2'), 'student_id': 2015210322, 'student_name': 'Tony', 'gender': '男', 'score': [65, 89]}


In [88]:
res = col.find({'$or': [{'student_id': {'$in': [2015210321, 2015210322]}}, {'gender': '男'}, {'score': {'$lte': 65}}]})
for r in res:
    print(r)

{'_id': ObjectId('5cd9273cf5d4321faa2849ec'), 'student_id': 2015210315, 'student_name': '张三丰', 'gender': '男'}
{'_id': ObjectId('5cd92796f5d4321faa2849ed'), 'student_id': 2015210316, 'student_name': '李想', 'gender': '男'}
{'_id': 2015210317, 'student_name': '郭涛', 'gender': '男'}
{'_id': ObjectId('5cd9294af5d4321faa2849ee'), 'student_id': 2015210318, 'student_name': 'Joshi', 'gender': '男'}
{'_id': ObjectId('5cd9294af5d4321faa2849ef'), 'student_id': 2015210319, 'student_name': 'Kevin', 'gender': '男'}
{'_id': ObjectId('5cd9352af5d4321faa2849f1'), 'student_id': 2015210321, 'student_name': 'Steven', 'gender': '男', 'score': [70, 89]}
{'_id': ObjectId('5cd9352af5d4321faa2849f2'), 'student_id': 2015210322, 'student_name': 'Tony', 'gender': '男', 'score': [65, 89]}


#### `$exists`: 查看文档中是否存在某个`field`

```python
db.collection.find({'filed_name': {'$exists': True | False}})
```

In [91]:
res = col.find({'score': {'$exists': True}})
for r in res:
    print(r)

{'_id': ObjectId('5cd9352af5d4321faa2849f1'), 'student_id': 2015210321, 'student_name': 'Steven', 'gender': '男', 'score': [70, 89]}
{'_id': ObjectId('5cd9352af5d4321faa2849f2'), 'student_id': 2015210322, 'student_name': 'Tony', 'gender': '男', 'score': [65, 89]}


#### `$type`: 查看域对应的值是否为特定的类型

```python
db.collection.find({'filed_name': {'$type': '<BSON type>'}})

db.collection.find({'filed_name': {'$type': ['<BSON type1>', '<BSON type2>', ...]}})
```

> 合法的[BSON类型](https://docs.mongodb.com/manual/reference/bson-types/)，注意使用里面的alias列作为参数值

In [105]:
res = col.find({'_id': {'$type': 'int'}})
for r in res:
    print(r)

{'_id': 2015210317, 'student_name': '郭涛', 'gender': '男'}


#### `$regex`: 正则匹配

```python
db.collection.find({'filed_name': {'regex': }})
```

In [161]:
res = col.find({'student_name': {'$regex': '^[SL][a-z]*y$'}})
for r in res:
    print(r)

{'_id': ObjectId('5cd9294af5d4321faa2849f0'), 'student_id': 2015210320, 'student_name': 'Lily', 'gender': '女'}


#### `$expr`: 应用公式

```python

db.collection.find({'filed_name': {'$expr': {}})

```

In [130]:
res = col.find({'student_id': {'$gt': 2015210319}})
for r in res:
    print(r)

{'_id': ObjectId('5cd9294af5d4321faa2849f0'), 'student_id': 2015210320, 'student_name': 'Lily', 'gender': '女'}
{'_id': ObjectId('5cd9352af5d4321faa2849f1'), 'student_id': 2015210321, 'student_name': 'Steven', 'gender': '男', 'score': [70, 89]}
{'_id': ObjectId('5cd9352af5d4321faa2849f2'), 'student_id': 2015210322, 'student_name': 'Tony', 'gender': '男', 'score': [65, 89]}


In [129]:
res = col.find({'$expr': {'$gt': ['$student_id', 2015210319]}})  # 
for r in res:
    print(r)

{'_id': ObjectId('5cd9294af5d4321faa2849f0'), 'student_id': 2015210320, 'student_name': 'Lily', 'gender': '女'}
{'_id': ObjectId('5cd9352af5d4321faa2849f1'), 'student_id': 2015210321, 'student_name': 'Steven', 'gender': '男', 'score': [70, 89]}
{'_id': ObjectId('5cd9352af5d4321faa2849f2'), 'student_id': 2015210322, 'student_name': 'Tony', 'gender': '男', 'score': [65, 89]}


#### 元组操作符

- `$all`: 返回集合中对应域中的值包含所有给定查询值的文档

```python
db.collection.find({'<field_name>': {'$all': [value1, value2, ...]}})
```
> 如果某一文档的field匹配所有的[value1, value2, ...]，则返回该文档


In [137]:
res = col.find({'score': {'$all': [70, 89]}})  # score元组中至包含[70, 89]的文档集合
for r in res:
    print(r)

{'_id': ObjectId('5cd9352af5d4321faa2849f1'), 'student_id': 2015210321, 'student_name': 'Steven', 'gender': '男', 'score': [70, 89]}
{'_id': ObjectId('5cd9352af5d4321faa2849f2'), 'student_id': 2015210322, 'student_name': 'Tony', 'gender': '男', 'score': [65, 89]}


等价于

In [132]:
res = col.find({'$and': [{'score': 70}, {'score': 89}]}
for r in res:
    print(r)

{'_id': ObjectId('5cd9352af5d4321faa2849f1'), 'student_id': 2015210321, 'student_name': 'Steven', 'gender': '男', 'score': [70, 89]}


- `$elemMatch`: 返回至少有1个元素满足`$eleMatch`指定的所有条件的元组的文档

```python
db.collection.find({'<field_name>': {'$elemMatch': {'expr1', 'expre2'}}})
```

In [135]:
res = col.find({'score': {'$elemMatch': {'$gt': 60, '$lt':70}}})  # score元组中至少有一个大于60且小于70的文档集合
for r in res:
    print(r)

{'_id': ObjectId('5cd9352af5d4321faa2849f2'), 'student_id': 2015210322, 'student_name': 'Tony', 'gender': '男', 'score': [65, 89]}


- `$size`: 返回集合中元组元素个数为指定值的文档集合

```python
db.collection.find({'<field_name>': {'$size': n}})
```

In [143]:
res = col.find({'score': {'$size': 2}})  # 返回score包含2个元素的文档集合
for r in res:
    print(r)

{'_id': ObjectId('5cd9352af5d4321faa2849f1'), 'student_id': 2015210321, 'student_name': 'Steven', 'gender': '男', 'score': [70, 89]}
{'_id': ObjectId('5cd9352af5d4321faa2849f2'), 'student_id': 2015210322, 'student_name': 'Tony', 'gender': '男', 'score': [65, 89]}


### 2. `projection`: 投影

In [163]:
res = col.find({'student_id': 2015210315}, projection={'student_name':True, '_id':False})  # 查看student集合中student_id为2015210315的文档
for r in res:
    print(r)

{'student_name': '张三丰'}


In [164]:
res = col.find({'student_id': 2015210315}, projection={'student_name':True})  # 查看student集合中student_id为2015210315的文档
for r in res:
    print(r)

{'_id': ObjectId('5cd9273cf5d4321faa2849ec'), 'student_name': '张三丰'}


- `$slice`

In [172]:
b_list = [
    {'student_id': 2015210323, 'student_name': 'Tom', 'gender': '男', 'score': [65, 89, 73, 78, 99]},
    {'student_id': 2015210324, 'student_name': 'Shelly', 'gender': '女', 'score': [55, 99, 43, 75, 87]}
]
col.insert_many(b_list)

<pymongo.results.InsertManyResult at 0x106108408>

In [184]:
res = col.find({'score': {'$gte': 70}}, projection={'student_name':True, 'score': {'$slice': -2}})
for r in res:
    print(r)

{'_id': ObjectId('5cd9352af5d4321faa2849f1'), 'student_name': 'Steven', 'score': [70, 89]}
{'_id': ObjectId('5cd9352af5d4321faa2849f2'), 'student_name': 'Tony', 'score': [65, 89]}
{'_id': ObjectId('5cd9928af5d4321faa2849f3'), 'student_name': 'Tom', 'score': [78, 99]}
{'_id': ObjectId('5cd9928af5d4321faa2849f4'), 'student_name': 'Shelly', 'score': [75, 87]}


### 3. `.sort()`: 按照域对文档集合排序

In [191]:
from pymongo import ASCENDING, DESCENDING

In [195]:
res = col.find({}, projection={'score': False}).sort([('student_name', DESCENDING), ('gender', ASCENDING)])
for r in res:
    print(r)

{'_id': 2015210317, 'student_name': '郭涛', 'gender': '男'}
{'_id': ObjectId('5cd92796f5d4321faa2849ed'), 'student_id': 2015210316, 'student_name': '李想', 'gender': '男'}
{'_id': ObjectId('5cd9273cf5d4321faa2849ec'), 'student_id': 2015210315, 'student_name': '张三丰', 'gender': '男'}
{'_id': ObjectId('5cd9352af5d4321faa2849f2'), 'student_id': 2015210322, 'student_name': 'Tony', 'gender': '男'}
{'_id': ObjectId('5cd9928af5d4321faa2849f3'), 'student_id': 2015210323, 'student_name': 'Tom', 'gender': '男'}
{'_id': ObjectId('5cd9352af5d4321faa2849f1'), 'student_id': 2015210321, 'student_name': 'Steven', 'gender': '男'}
{'_id': ObjectId('5cd9928af5d4321faa2849f4'), 'student_id': 2015210324, 'student_name': 'Shelly', 'gender': '女'}
{'_id': ObjectId('5cd9294af5d4321faa2849f0'), 'student_id': 2015210320, 'student_name': 'Lily', 'gender': '女'}
{'_id': ObjectId('5cd9294af5d4321faa2849ef'), 'student_id': 2015210319, 'student_name': 'Kevin', 'gender': '男'}
{'_id': ObjectId('5cd9294af5d4321faa2849ee'), 'student

### 4. `.limit()`: 限制返回文档个数

In [199]:
res = col.find({}, projection={'score': False}).limit(5)
for r in res:
    print(r)

{'_id': ObjectId('5cd9273cf5d4321faa2849ec'), 'student_id': 2015210315, 'student_name': '张三丰', 'gender': '男'}
{'_id': ObjectId('5cd92796f5d4321faa2849ed'), 'student_id': 2015210316, 'student_name': '李想', 'gender': '男'}
{'_id': 2015210317, 'student_name': '郭涛', 'gender': '男'}
{'_id': ObjectId('5cd9294af5d4321faa2849ee'), 'student_id': 2015210318, 'student_name': 'Joshi', 'gender': '男'}
{'_id': ObjectId('5cd9294af5d4321faa2849ef'), 'student_id': 2015210319, 'student_name': 'Kevin', 'gender': '男'}


### 5. `db.collection.distinct(field, query, options)` 去重

In [207]:
res = col.distinct('gender', {'student_id': {'$gt': 201521018}})
for r in res:
    print(r)

男
女


## 三、聚合简单示例

- `$addFields`用于生成计算字段

In [205]:
res = col.aggregate([
    {'$match': {'score': {'$exists': True}}},
    {'$addFields': {'avg_score':{'$avg': '$score'}}},
    {'$project': {'student_id': True, 'student_name': True, '_id': False, 'score': True, 'avg_score':True}}
])
for r in res:
    print(r)

{'student_id': 2015210321, 'student_name': 'Steven', 'score': [70, 89], 'avg_score': 79.5}
{'student_id': 2015210322, 'student_name': 'Tony', 'score': [65, 89], 'avg_score': 77.0}
{'student_id': 2015210323, 'student_name': 'Tom', 'score': [65, 89, 73, 78, 99], 'avg_score': 80.8}
{'student_id': 2015210324, 'student_name': 'Shelly', 'score': [55, 99, 43, 75, 87], 'avg_score': 71.8}


- `$unwind`用于展开一个array，array中的每一元素与文档中其他域值对构成新的文档

In [206]:
res = col.aggregate([
    {'$match': {'score': {'$exists': True}}}
    {'$unwind': '$score'}
])
for r in res:
    print(r)

{'_id': ObjectId('5cd9352af5d4321faa2849f1'), 'student_id': 2015210321, 'student_name': 'Steven', 'gender': '男', 'score': 70}
{'_id': ObjectId('5cd9352af5d4321faa2849f1'), 'student_id': 2015210321, 'student_name': 'Steven', 'gender': '男', 'score': 89}
{'_id': ObjectId('5cd9352af5d4321faa2849f2'), 'student_id': 2015210322, 'student_name': 'Tony', 'gender': '男', 'score': 65}
{'_id': ObjectId('5cd9352af5d4321faa2849f2'), 'student_id': 2015210322, 'student_name': 'Tony', 'gender': '男', 'score': 89}
{'_id': ObjectId('5cd9928af5d4321faa2849f3'), 'student_id': 2015210323, 'student_name': 'Tom', 'gender': '男', 'score': 65}
{'_id': ObjectId('5cd9928af5d4321faa2849f3'), 'student_id': 2015210323, 'student_name': 'Tom', 'gender': '男', 'score': 89}
{'_id': ObjectId('5cd9928af5d4321faa2849f3'), 'student_id': 2015210323, 'student_name': 'Tom', 'gender': '男', 'score': 73}
{'_id': ObjectId('5cd9928af5d4321faa2849f3'), 'student_id': 2015210323, 'student_name': 'Tom', 'gender': '男', 'score': 78}
{'_id': 