# Web 1 
- requests module
    - download content from the Internet using HTTP

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
# import statements
import requests
import json
from pandas import DataFrame

# requests module

### HTTP Status Codes overview
- 1XX : Informational
- 2XX : Successful
- 3XX : Redirection
- 4XX : Client Error
- 5XX : Server Error
    
https://en.wikipedia.org/wiki/List_of_HTTP_status_codes

## DEMO 1: Simple string example
- URL: https://www.msyamkumar.com/hello.txt

In [3]:
#Simple string example
url = "https://www.msyamkumar.com/hello.txt"
r = requests.get(url)
assert r.status_code == 200
print(type(r.text))
r.text

<class 'str'>


'Hello CS220 / CS319 students! Welcome to my website. Hope you are staying safe and healthy!\n'

In [4]:
typo_url = "https://www.msyamkumar.com/hello.txttttttt"
r = requests.get(typo_url)
r.raise_for_status() #shortcut for asserting status code is 200 OK
r.text

HTTPError: 404 Client Error: Not Found for url: https://www.msyamkumar.com/hello.txttttttt

## DEMO 2: JSON file example
- URL: https://www.msyamkumar.com/scores.json
- json.load(FILE_OBJECT)
- json.loads(STRING)

In [5]:
#JSON example
url = "https://www.msyamkumar.com/scores.json"
r = requests.get(url)
r.raise_for_status()
urltext = r.text

print(type(json.loads(urltext)))
#d = json.loads(urltext)

#Shortcut for json.loads
d = r.json()
print(d)

<class 'dict'>
{'alice': 100, 'bob': 200, 'cindy': 300}


## Good GET Etiquette

Don't make a lot of requests to the same server all at once.
 - Requests use up the server's time
 - Major websites will often ban users who make too many requests
 - You can break a server....similar to DDoS attacks (DON'T DO THIS)
 
In CS220 we will usually give you a link to a copied file to avoid overloading the site.

## DEMO 3: reddit json processing
- URL: https://www.msyamkumar.com/cs220/f21/materials/lectureDemo_code/lec-31/examples/UWMadison.json
- <b>(Please don't use this!)</b> Original URL: https://www.reddit.com/r/UWMadison.json 

In [6]:
# It is not recommended to spam the original website with 900+ HTTP GET requests, so please use the downloaded version
# url = "https://www.reddit.com/r/UWMadison.json"
# r = requests.get(url)
# r.raise_for_status()

# Use the below URL instead
url = "https://www.msyamkumar.com/cs220/f21/materials/lectureDemo_code/lec-31/examples/UWMadison.json"
r = requests.get(url)
r.raise_for_status()
#r.json()
print(type(r.json()))

<class 'dict'>


### How to explore an unknown JSON?
- If you run into a dict, try .keys() functions to look at the keys of the dictionary
- If you run into a list, iterate over the list and print each item

In [7]:
d = r.json()
# Uncomment each line to see the corresponding output
# print(d.keys())
# print(d["kind"])
# print(d["data"])

In [8]:
type(d["data"])
d["data"].keys()

dict_keys(['after', 'dist', 'modhash', 'geo_filter', 'children', 'before'])

In [9]:
print(type(d["data"]["children"]))
# Cannot call keys method on a list!
# d["data"]["children"].keys() # Uncomment to see AttributeError

<class 'list'>


In [10]:
for item in d["data"]["children"]:
    #print(type(item))
    #print(item)
    #print(type(item["data"]))
    #print(item["data"].keys())
    print(item["data"]["score"], item["data"]["title"])

265 A Relatively Comprehensive Guide to UW Football Games and Traditions
112 Planning a 1 month anniversary of CS577 midterm not being graded
4 CS classes already full?
71 NO, MATH 234 IS NOT CURVED
5 Social Work 206 with Curtis
3 Recommendations for an easy 3 credit literature course?
6 TIFU Help
9 Adderall: The epidemic that no one is talking about
5 Was the Calc 3 midterm harder than expected or just me?
4 Physics 202 - Midterm 2
3 What Python/data science course should I add on?
2 Missing keys in Engineering Building or Union South
3 ZOOLOGY 655- Modeling Neurodevelopmental disease
250 Worst day of my life.
2 CNSR SCI 321
2 AH Sp 2022 Course Highlight: Prof. Spaulding, AH 408:“Global Modernism,” T/TR 11am–12:15pm. What does 20thc. modernism look like from a global perspective? Is it possible to speak of a singular modernism at all, as opposed to a multiplicity of diverse modernisms, each responsive to local conditions?
2 Math 222 Professors
2 See-through red water bottle LOST
1 Wha

## DEMO 4: State populations
- URL: https://www.msyamkumar.com/cs220/f21/materials/lectureDemo_code/lec-31/examples/data/state_files.txt

In [11]:
prefix_URL = "https://www.msyamkumar.com/cs220/f21/materials/lectureDemo_code/lec-31/examples/data/"
r = requests.get(prefix_URL + "state_files.txt")
r.raise_for_status()
state_files = r.text.split("\n")
state_files

['Alabama.json',
 'Alaska.json',
 'Arizona.json',
 'Arkansas.json',
 'California.json',
 'Colorado.json',
 'Connecticut.json',
 'Delaware.json',
 'Florida.json',
 'Georgia.json',
 'Hawaii.json',
 'Idaho.json',
 'Illinois.json',
 'Indiana.json',
 'Iowa.json',
 'Kansas.json',
 'Kentucky.json',
 'Louisiana.json',
 'Maine.json',
 'Maryland.json',
 'Massachusetts.json',
 'Michigan.json',
 'Minnesota.json',
 'Mississippi.json',
 'Missouri.json',
 'Montana.json',
 'Nebraska.json',
 'Nevada.json',
 'New_Hampshire.json',
 'New_Jersey.json',
 'New_Mexico.json',
 'New_York.json',
 'North_Carolina.json',
 'North_Dakota.json',
 'Ohio.json',
 'Oklahoma.json',
 'Oregon.json',
 'Pennsylvania.json',
 'Rhode_Island.json',
 'South_Carolina.json',
 'South_Dakota.json',
 'Tennessee.json',
 'Texas.json',
 'Utah.json',
 'Vermont.json',
 'Virginia.json',
 'Washington.json',
 'West_Virginia.json',
 'Wisconsin.json',
 'Wyoming.json']

In [12]:
rows = []

for state in state_files:
    url = prefix_URL + state
    r = requests.get(url)
    r.raise_for_status()
    data = r.json()
    data["name"] = state
    rows.append(data)

df = DataFrame(rows)
df

Unnamed: 0,2000,2010,2015,name
0,4447100,4779736,4846411,Alabama.json
1,626932,710231,737046,Alaska.json
2,5130632,6392017,6728783,Arizona.json
3,2673400,2915918,2966835,Arkansas.json
4,33871648,37253956,38792291,California.json
5,4301261,5029196,5355588,Colorado.json
6,3405565,3574097,3594762,Connecticut.json
7,783600,897934,935968,Delaware.json
8,15982378,18801310,19905569,Florida.json
9,8186453,9687653,10097132,Georgia.json


In [13]:
df.head()
df = df.set_index("name")
df.head()

Unnamed: 0_level_0,2000,2010,2015
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alabama.json,4447100,4779736,4846411
Alaska.json,626932,710231,737046
Arizona.json,5130632,6392017,6728783
Arkansas.json,2673400,2915918,2966835
California.json,33871648,37253956,38792291


In [14]:
df.sum()

2000    280849847
2010    308143815
2015    318247565
dtype: int64

In [15]:
df.T

name,Alabama.json,Alaska.json,Arizona.json,Arkansas.json,California.json,Colorado.json,Connecticut.json,Delaware.json,Florida.json,Georgia.json,...,South_Dakota.json,Tennessee.json,Texas.json,Utah.json,Vermont.json,Virginia.json,Washington.json,West_Virginia.json,Wisconsin.json,Wyoming.json
2000,4447100,626932,5130632,2673400,33871648,4301261,3405565,783600,15982378,8186453,...,754844,5689283,20851820,2233169,608827,7078515,5894121,1808344,5363675,493782
2010,4779736,710231,6392017,2915918,37253956,5029196,3574097,897934,18801310,9687653,...,814180,6346105,25145561,2763885,625741,8001024,6724540,1852994,5686986,563626
2015,4846411,737046,6728783,2966835,38792291,5355588,3594762,935968,19905569,10097132,...,853304,6547779,26979078,2944498,626767,8328098,7063166,1848751,5759432,584304


In [16]:
df.T.max()

name
Alabama.json            4846411
Alaska.json              737046
Arizona.json            6728783
Arkansas.json           2966835
California.json        38792291
Colorado.json           5355588
Connecticut.json        3594762
Delaware.json            935968
Florida.json           19905569
Georgia.json           10097132
Hawaii.json             1420257
Idaho.json              1634806
Illinois.json          12882189
Indiana.json            6597880
Iowa.json               3109481
Kansas.json             2902507
Kentucky.json           4412617
Louisiana.json          4648990
Maine.json              1330256
Maryland.json           5975346
Massachusetts.json      6755124
Michigan.json           9938444
Minnesota.json          5457125
Mississippi.json        2993443
Missouri.json           6063827
Montana.json            1023252
Nebraska.json           1882980
Nevada.json             2838281
New_Hampshire.json      1327996
New_Jersey.json         8938844
New_Mexico.json         2085567
New