# requests 的高级用法

## 文件上传
requests可以模拟提交一些数据。如果有网站需要上传文件，可以用它实现

In [1]:
import requests

In [2]:
files = {'file':open('favicon.ico', 'rb')} # 需要以二进制流的格式提交
r = requests.post("http://httpbin.org/post", files=files)
print(r.text)

{
  "args": {}, 
  "data": "", 
  "files": {
    "file": "data:application/octet-stream;base64,AAABAAIAEBAAAAEAIAAoBQAAJgAAACAgAAABACAAKBQAAE4FAAAoAAAAEAAAACAAAAABACAAAAAAAAAFAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABERE3YTExPFDg4OEgAAAAAAAAAADw8PERERFLETExNpAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQUFJYTExT8ExMU7QAAABkAAAAAAAAAAAAAABgVFRf/FRUX/xERE4UAAAAAAAAAAAAAAAAAAAAAAAAAABEREsETExTuERERHhAQEBAAAAAAAAAAAAAAAAAAAAANExMU9RUVF/8VFRf/EREUrwAAAAAAAAAAAAAAABQUFJkVFRf/BgYRLA4ODlwPDw/BDw8PIgAAAAAAAAAADw8PNBAQEP8VFRf/FRUX/xUVF/8UFBSPAAAAABAQEDAPDQ//AAAA+QEBAe0CAgL/AgIC9g4ODjgAAAAAAAAAAAgICEACAgLrFRUX/xUVF/8VFRf/FRUX/xERES0UFBWcFBQV/wEBAfwPDxH7DQ0ROwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA0NEjoTExTnFRUX/xUVF/8SEhKaExMT2RUVF/8VFRf/ExMTTwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAERERTBUVF/8VFRf/ExMT2hMTFPYVFRf/FBQU8AAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAITExTxFRUX/xMTFPYTExT3FRUX/xQUFOEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFBQU4RUVF/8TExT3FBQU3hUVF/8TExT5Dw8PIQAAAAAAAAAAA

## Cookies 

In [4]:
headers = {
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36'
}
r = requests.get("https://www.baidu.com", headers=headers)
# print(r.cookies)
for key, value in r.cookies.items():
    print(key + '=' + value)

BAIDUID=E3DE15B3DC5D47425EA1263CE015E33D:FG=1
BIDUPSID=E3DE15B3DC5D474279889EF735EA6ED1
H_PS_PSSID=1456_21121_30826_31186_31271_31051_30824_26350_31164_22157
PSTM=1586600083
BDSVRTM=0
BD_HOME=1


## 会话维持
在requests中，如果直接利用get()或post()等方法的确可以做到模拟网页的请求，但是这实际上是相当于不同的会话，也就是说相当于你用了两个浏览器打开了不同的页面。
也可以在两次请求时设置一样的cookies。但这样做起来显得很烦琐。

使用requests.Session()对象可以维持一个会话

In [2]:
import requests
proxies = {
    'http':'127.0.0.1:1080',
    'https':'127.0.0.1:1080'
}
r = requests.get('http://httpbin.org/cookies/set/number/123456789', proxies=proxies)
# 请求这个网站时，可以设置一个cookie，名称为number，内容是123456789
print(r.text, '\n')
r = requests.get('http://httpbin.org/cookies')
# 请求这个网站可以获取当前的cookies
print(r.text, '\n')

headers = {'Cookie': "number=122322"}
r = requests.get('http://httpbin.org/cookies', headers=headers)
print(r.text)

{
  "cookies": {
    "number": "123456789"
  }
}
 

{
  "cookies": {}
}
 

{
  "cookies": {
    "number": "122322"
  }
}



In [3]:
# 从上面可以看到，无法使用get无法直接保留会话信息
# 使用Session()实例来构造请求可以模拟在同一个会话里访问
import requests

s = requests.Session()
s.get('http://httpbin.org/cookies/set/number/123456789')
r = s.get('http://httpbin.org/cookies')
print(r.text)

{
  "cookies": {
    "number": "123456789"
  }
}



## SSL证书验证
requests提供了证书验证的功能。当发送HTTP请求的时候，它会检查SSL证书，我们可以使用verify参数控制是否检查此证书。其实如果不加verify参数的话，默认是True，会自动验证。

In [8]:
# 由于12306的证书没有被官方CA机构信任，会出现证书验证错误
# 但是现在12306的证书应该可以通过验证了
import requests
 
response = requests.get('https://www.12306.cn', verify=False)
print(response.status_code)

200


## 代理设置

In [None]:
import requests
 
proxies = {
  "http": "http://10.10.1.10:3128", # 127.0.0.1：1080
  "https": "http://10.10.1.10:1080",
  #"http": "http://user:password@10.10.1.10:3128/" 使用密码
}

 
requests.get("https://www.taobao.com", proxies=proxies)

# proxies = {
#     'http': 'socks5://user:password@host:port',
#     'https': 'socks5://user:password@host:port'
# }
# 使用socks代理
# 需要安装socks库
# pip3 install 'requests[socks]'

## 超时设置
设置timeout关键字
```
r = requests.get("https://www.taobao.com", timeout = 1)
```
+ 请求分为两个阶段，连接(connect)和读取(read)
+ timeout也可以设置为一个元组(connect_time, read_time, total_time)
+ 设置为None则为永久等待

In [20]:
import requests

try:
    r = requests.get("https://www.taobao.com", timeout = 0.1)
except Exception as e:
    print(e)

try:
    r = requests.get("https://www.taobao.com", timeout = (0.1, 0.2, 0.3)) # tuple times as time limit
except Exception as e:
    print(e)

r = requests.get("https://www.taobao.com", timeout = None) # 永久等待

HTTPSConnectionPool(host='www.taobao.com', port=443): Read timed out. (read timeout=0.1)


## 身份认证
在访问网站时，我们可能会遇到这样的认证页面
![](./images/authentic.jpg)
这种简单的身份验证可以使用requests自带的身份认证功能：
```
r = requests.get('http://localhost:5000', auth=HTTPBasicAuth('username', 'password'))
```
也可以直接传入一个元组
```
r = requests.get('http://localhost:5000', auth=('username', 'password'))
```

## Prepared Request 
跟urllib类似，可以用一个Request来管理请求的各种参数。
使用*Request*对象，可以将请求当作独立的对象看待，这样在进行队列调度时会比较方便<br>
API:<br>
**class requests.Request(method=None, url=None, headers=None, files=None, data=None, params=None, auth=None, cookies=None, hooks=None, json=None)**

In [22]:
from requests import Request, Session
url = 'http://httpbin.org/post'
data = {
    'name':'zeal'
}
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36'
}
req = Request(method='POST', url=url, data=data, headers=headers)
s = Session() # 建立会话
s.proxies = {'http':'127.0.0.1:1080'}
prepped = s.prepare_request(req)
r = s.send(prepped) # 发送请求
print(r.text)


{
  "args": {}, 
  "data": "", 
  "files": {}, 
  "form": {
    "name": "zeal"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Content-Length": "9", 
    "Content-Type": "application/x-www-form-urlencoded", 
    "Host": "httpbin.org", 
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36", 
    "X-Amzn-Trace-Id": "Root=1-5e91efc3-0cf3a88f76e7f789f16cf6b1"
  }, 
  "json": null, 
  "origin": "47.241.78.168", 
  "url": "http://httpbin.org/post"
}

