# HTTP Requests

## The Requests Library

In [None]:
import requests

url = 'https://www.senate.gov/general/contact_information/senators_cfm.cfm'
headers = {'user-agent': 'my-app/0.0.1'}
r = requests.get(url, headers=headers)
content = r.text
print(content)

### Passing Parameters

In [None]:
rIL = requests.get(url, headers=headers, params={'State': 'IL'} )
illinois = rIL.text
print(illinois)

## Beautiful Soup

In [None]:
from bs4 import BeautifulSoup
import re

soup = BeautifulSoup(content, 'html.parser')

#### Find all &lt;a&gt; tags

In [None]:
links = soup.find_all('a')
print(len(links))

#### Find all tags that begin with "t"

In [None]:
t_tags = soup.find_all(re.compile('^t'))
print(len(t_tags))

#### Find all &lt;img&gt; tags that have border="0" and alt="" attributes

In [None]:
imgs = soup.find_all('img', {'border': '0', 'alt': ''})
print(len(imgs))

#### Find all &lt;img&gt; tags that are missing the alt attribute

In [None]:
imgs_no_alt = soup.find_all('img', {'alt': False})
print(len(imgs_no_alt))

#### Find the first three phone numbers formatted as "(###) ###-####".

In [None]:
phones = soup.find_all(text = re.compile('\(\d{3}\) \d{3}-\d{4}'), limit=3 )
print(phones)

#### Find the first three &lt;td&gt; tags that contain a phone number

In [None]:
phones_in_tds = soup.find_all('td', text = re.compile('\(\d{3}\) \d{3}-\d{4}'), limit=3)
print(phones_in_tds)

#### Find all &lt;a&gt; tags that have an href attribute

In [None]:
links_with_href = soup.find_all('a', href=True)
print(len(links_with_href))

#### Find all &lt;a&gt; tags that have an href attribute that contains "senate.gov/"

In [None]:
internal_links = soup.find_all('a', href = re.compile('senate.gov/'))
print(len(internal_links))

#### Find all &lt;a&gt; tags that have an href attribute that ends with "senate.gov/"

In [None]:
senator_links = soup.find_all('a', href = re.compile('senate.gov/$'))
print(len(senator_links))

#### Find all &lt;a&gt; tags that have an href attribute that ends with "senate.gov/" or "senate.gov"

In [None]:
senator_links = soup.find_all('a', href = re.compile('senate.gov/?$'))
print(len(senator_links))

### The select() Method
Get elements based on CSS selectors

In [None]:
top_nav_links = soup.select('ul.topnav li a')
for link in top_nav_links:
    print(link.text.strip())

## Put it all Together: Output List of Senators

In [None]:
import requests
from bs4 import BeautifulSoup
import re

url = 'https://www.senate.gov/general/contact_information/senators_cfm.cfm'
headers = {'user-agent': 'my-app/0.0.1'}
r = requests.get(url, headers=headers)
content = r.text

soup = BeautifulSoup(content, 'html.parser')

senators = soup.find_all('a', href = re.compile('senate.gov/?$'))

for i, senator in enumerate(senators, 1):
    print(i, senator.text.strip())

## XML

In [None]:
import requests
from bs4 import BeautifulSoup

url = 'https://www.senate.gov/general/contact_information/senators_cfm.xml'
headers = {'user-agent': 'my-app/0.0.1'}
r = requests.get(url, headers=headers)
content = r.text

soup = BeautifulSoup(content, 'xml')

senators = soup.find_all('member')

for i, senator in enumerate(senators, 1):
    print(i, senator.first_name.text, senator.last_name.text)