# requests API
## requests.get

In [32]:
# different between .text & .json()
import requests

data = {
    'name' : 'Damon To',
    'age' : 20
}
rp = requests.get('http://httpbin.org/get', params = data)
print(type(rp.text))
print(rp.text)
print('----------------------------------------------------------------')
print(type(rp.json()))
print(rp.json())

<class 'str'>
{
  "args": {
    "age": "20", 
    "name": "Damon To"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Connection": "close", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.18.4"
  }, 
  "origin": "183.7.82.193", 
  "url": "http://httpbin.org/get?name=Damon+To&age=20"
}

----------------------------------------------------------------
<class 'dict'>
{'args': {'age': '20', 'name': 'Damon To'}, 'headers': {'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'Connection': 'close', 'Host': 'httpbin.org', 'User-Agent': 'python-requests/2.18.4'}, 'origin': '183.7.82.193', 'url': 'http://httpbin.org/get?name=Damon+To&age=20'}


### 抓取网页

In [11]:
import requests
import re

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
}
rp = requests.get('https://www.zhihu.com/explore', headers = headers)
# 正则表达式处理
pattern = re.compile('explore-feed.*?question_link.*?>(.*?)</a>', re.S)
titles = re.findall(pattern, rp.text)
print(titles)

['\n网吧里发生过哪些“惊为天人”的事?\n', '\n高端研究人员是否在逐渐老龄化？\n', '\n创业能有多艰辛？\n', '\n在物理研究中，你都见过哪些「还有这种操作」的神操作？\n', '\n教室里发生过哪些「惊为天人」的事？\n', '\n哪个瞬间你觉得自己像个智障？\n', '\n熊孩子的哪些神句震惊了你？\n', '\n攀登珠穆朗玛峰南坡与攀登北坡有什么区别？\n', '\n目前（2017年）阿里巴巴在与亚马逊的全球电商竞争中处于怎样的态势？\n', '\n第一次进派出所，怎样做才能显得很老练？\n']


### 抓取二进制数据

In [13]:
import requests

rp = requests.get('https://www.github.com/favicon.ico')
with open('favicon.ico', 'wb') as f:
    f.write(rp.content)
    f.close()
#run the code and get a Github ico file

## requests.post

In [15]:
import requests

data = {
    'name' : 'Damon To',
    'age' : 20
}
rp = requests.post('http://httpbin.org/post', data = data)
print(rp.text)

{
  "args": {}, 
  "data": "", 
  "files": {}, 
  "form": {
    "age": "20", 
    "name": "Damon To"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Connection": "close", 
    "Content-Length": "20", 
    "Content-Type": "application/x-www-form-urlencoded", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.18.4"
  }, 
  "json": null, 
  "origin": "125.94.170.146", 
  "url": "http://httpbin.org/post"
}



## requests 高级使用
### 文件上传

In [16]:
import requests

files = {'file' : open('favicon.ico', 'rb')}
rp = requests.post('http://httpbin.org/post', files = files)
print(rp.text)

{
  "args": {}, 
  "data": "", 
  "files": {
    "file": "data:application/octet-stream;base64,AAABAAIAEBAAAAEAIAAoBQAAJgAAACAgAAABACAAKBQAAE4FAAAoAAAAEAAAACAAAAABACAAAAAAAAAFAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABERE3YTExPFDg4OEgAAAAAAAAAADw8PERERFLETExNpAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABQUFJYTExT8ExMU7QAAABkAAAAAAAAAAAAAABgVFRf/FRUX/xERE4UAAAAAAAAAAAAAAAAAAAAAAAAAABERE8ETExTuERERHg8PDxAAAAAAAAAAAAAAAAAAAAANExMU9RUVF/8VFRf/EhIUrwAAAAAAAAAAAAAAABQUFJkVFRf/BQURLA0NDVwODg/BDw8PIgAAAAAAAAAADg4ONBAQEP8VFRf/FRUX/xUVF/8TExOPAAAAAA8PDzAPDQ//AAAA+QEBAe0CAgL/AgIC9g0NDTgAAAAAAAAAAAcHB0ACAgLrFRUX/xUVF/8VFRf/FRUX/xERES0TExacFBQV/wEBAfwPDxH7DAwROwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA0NEToTExTnFRUX/xUVF/8TExOaExMT2RUVF/8VFRf/ExMTTwAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEBAQTBUVF/8VFRf/ExMT2hMTFPYVFRf/FBQU8AAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAITExTxFRUX/xMTFPYTExT3FRUX/xQUFOEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFBQU4RUVF/8TExT3ExMU3hUVF/8TExT5Dw8PIQAAAAAAAAAAA

### Cookie 处理

In [25]:
import requests

params = {'username' : 'Ryan', 'password' : 'password'}
rp = requests.post('http://pythonscraping.com/pages/cookies/welcome.php', params)
print('Cookie is set to:')
print(rp.cookies.get_dict())
print('-------------------')
print('Going to profile page...')
rp = requests.get('http://pythonscraping.com/pages/cookies/profile.php', cookies = rp.cookies)
print(rp.text)

Cookie is set to:
{'loggedin': '1', 'username': 'Ryan'}
-------------------
Going to profile page...
Hey Ryan! Looks like you're still logged into the site!


### 会话维持
使用 session 持续跟踪会话信息(包括：cookie, header, HTTP 协议信息等等)

In [27]:
import requests

session = requests.session()

params = {'username' : 'Ryan', 'password' : 'password'}
s = session.post('http://pythonscraping.com/pages/cookies/welcome.php', params)
print('Cookie is set to:')
print(s.cookies.get_dict())
print('-------------------')
print('Going to profile page...')
s = session.get('http://pythonscraping.com/pages/cookies/profile.php')
print(s.text)

Cookie is set to:
{'loggedin': '1', 'username': 'Ryan'}
-------------------
Going to profile page...
Hey Ryan! Looks like you're still logged into the site!


### HTTP 基本接入认证

In [28]:
import requests
from requests.auth import AuthBase
from requests.auth import HTTPBasicAuth

auth = HTTPBasicAuth('rayn', 'password')
rp = requests.post('http://pythonscraping.com/pages/auth/login.php', auth = auth)
print(rp.text)

<p>Hello rayn.</p><p>You entered password as your password.</p>
