In [1]:
import requests
import re
from requests.packages import urllib3
from requests.auth import HTTPBasicAuth
from requests import Request, Session

In [2]:
"""simple demo"""

r = requests.get('https://www.baidu.com/')
print(type(r))
print(r.status_code)
print(type(r.text))
# print(r.text)
print(r.cookies)
print(list(r.cookies.items()))

<class 'requests.models.Response'>
200
<class 'str'>
<RequestsCookieJar[<Cookie BDORZ=27315 for .baidu.com/>]>
[('BDORZ', '27315')]


In [3]:
"""request support several types of methods"""

r = requests.post('http://httpbin.org/post')
r = requests.put('http://httpbin.org/put')  # 向服务器上传更新资源或在服务器上创建新资源
r = requests.delete('http://httpbin.org/delete')  # 请求服务器删除资源
r = requests.options('http://httpbin.org/get')  # 请求有关目标资源的通信选项

In [4]:
"""request.get: build the get request body"""

# not clever: r = requests.get('http://httpbin.org/get?name=germey&age=22')
data = {
    'name': 'getmey',
    'age': 22
}
r = requests.get('http://httpbin.org/get', params=data)
# print(r.text)

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'
}
r = requests.get('http://www.zhihu.com/explore', headers=headers)
pattern = re.compile('explore-feed.*?question_link.*?>(.*?)</a>', re.S)
titles = re.findall(pattern, r.text)
# print(titles)

In [5]:
"""request.get: crawl binary file, such as image\vedio..."""

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'
}
try:
    r = requests.get('https://github.com/favicon.png', headers=headers)
    # print(r.text)
    # print(r.content)
except requests.exceptions.RequestException as e:
    print(e)
with open('../data/favicon.ico', 'wb') as f:
    f.write(r.content)

In [6]:
"""request.post: build the post request body"""

data = {'name': 'germey', 'age': '22'}
r = requests.post('http://httpbin.org/post', data=data)
print(r.text)

{
  "args": {}, 
  "data": "", 
  "files": {}, 
  "form": {
    "age": "22", 
    "name": "germey"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate, br", 
    "Content-Length": "18", 
    "Content-Type": "application/x-www-form-urlencoded", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.31.0", 
    "X-Amzn-Trace-Id": "Root=1-65535ad0-1ced2d387084c5bf3b07a85d"
  }, 
  "json": null, 
  "origin": "223.76.221.58", 
  "url": "http://httpbin.org/post"
}



In [7]:
"""other info except for res content"""

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'
}
r = requests.get('http://www.jianshu.com', headers=headers)
print(type(r.status_code), r.status_code)
print(type(r.headers), r.headers)
print(type(r.cookies), r.cookies)
print(type(r.url), r.url)
print(type(r.history), r.history)

exit() if not r.status_code == requests.codes.okay else print('Request Successfully')


<class 'int'> 200
<class 'requests.structures.CaseInsensitiveDict'> {'Date': 'Tue, 14 Nov 2023 11:32:33 GMT', 'Content-Type': 'text/html; charset=utf-8', 'Transfer-Encoding': 'chunked', 'Connection': 'keep-alive', 'Vary': 'Accept-Encoding', 'X-Frame-Options': 'SAMEORIGIN', 'X-XSS-Protection': '1; mode=block', 'X-Content-Type-Options': 'nosniff', 'ETag': 'W/"4dd07b67db88eee51da6ad146313ca94"', 'Cache-Control': 'max-age=0, private, must-revalidate', 'Set-Cookie': 'locale=zh-CN; path=/', 'X-Request-Id': '9831931a-8f89-4c13-a88a-da6120d55a29', 'X-Runtime': '0.003918', 'Strict-Transport-Security': 'max-age=31536000; includeSubDomains; preload', 'Content-Encoding': 'gzip'}
<class 'requests.cookies.RequestsCookieJar'> <RequestsCookieJar[<Cookie locale=zh-CN for www.jianshu.com/>]>
<class 'str'> https://www.jianshu.com/
<class 'list'> [<Response [302]>]
Request Successfully


In [8]:
"""upload files through post"""
import json

files = {'file': open('../data/favicon.ico', 'rb')}
r = requests.post("http://httpbin.org/post", files=files)
content = r.text
content_dict = json.loads(content)
print(content_dict.keys())

dict_keys(['args', 'data', 'files', 'form', 'headers', 'json', 'origin', 'url'])


In [9]:
"""session：only request once"""

# 错误示例
requests.get('http://httpbin.org/cookies/set/number/123456789')
r = requests.get('http://httpbin.org/cookies')
# print(r.text)

# 正确示例
s = requests.Session()
s.get('http://httpbin.org/cookies/set/number/123456789')
r = s.get('http://httpbin.org/cookies')
# print(r.text)

In [10]:
"""session: prepare request"""

url = 'http://httpbin.org/post'
data = {
    'name': 'germey'
}
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome' +
                  '/96.0.4664.45 Safari/537.36'
}
s = Session()
req = Request('POST', url, data=data, headers=headers)
prepped = s.prepare_request(req)
r = s.send(prepped)
print(r.text)

{
  "args": {}, 
  "data": "", 
  "files": {}, 
  "form": {
    "name": "germey"
  }, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate, br", 
    "Content-Length": "11", 
    "Content-Type": "application/x-www-form-urlencoded", 
    "Host": "httpbin.org", 
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36", 
    "X-Amzn-Trace-Id": "Root=1-65535ad6-3ad8070342747f1c7c7207be"
  }, 
  "json": null, 
  "origin": "223.76.221.58", 
  "url": "http://httpbin.org/post"
}



In [11]:
"""SSL verification"""

urllib3.disable_warnings()
response = requests.get('https://www.12306.cn', verify=False)
print(response.status_code)

200


In [12]:
"""proxy"""

proxies = {
    "http": "https://127.0.0.1/50151",
    "https": "https://127.0.0.1/50151",
}
requests.get("https://www.baidu.com", headers=headers, proxies=proxies)
# connect refuse: possible because the request body is not full

ProxyError: HTTPSConnectionPool(host='www.baidu.com', port=443): Max retries exceeded with url: / (Caused by ProxyError('Cannot connect to proxy.', NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x7fa88cd00410>: Failed to establish a new connection: [Errno 111] Connection refused')))