In [1]:
import json
!pip install -Uq requests
import requests
!mkdir -p data

# Using an API

## Exercise - earthquake API 

Let's as a group write a program to get data from the USGS Earthquake Catalog - [documentation](https://earthquake.usgs.gov/fdsnws/event/1/#methods)

Steps:
- Make a folder (from your current working directory) to hold the earthquake data 
- Investigate the data from the response 
- Save each earthquake as its own JSON in the folder you created (hint - you will need to decide on a name for each earthquake JSON) 

In [44]:
start = "2014-01-01"
end = "2014-01-02"
url = f"https://earthquake.usgs.gov/fdsnws/event/1/query?format=geojson&starttime={start}&endtime={end}"

import os
os.makedirs('./data/earthquakes', exist_ok=True)
os.makedirs(f'./data/earthquakes/{start}', exist_ok=True)

response = requests.get(url)
data = response.json()
features = data['features']
for feature in features:
    properties = feature['properties']
  
    url = properties['url'].split('/')[-1]
    path = f'./data/earthquakes/{start}/{url}.json'
    with open(path, 'w') as fi:
        json.dump(properties, fi)

## Exercise - Wikipedia API

Now for an open-ended exercise for you! Your task is to:
- create a database of countries
- in a folder called `countries` (you will need to make the folder)
- each country in it's own folder
- start with Germany

V1 of your program should:
- save the url you use to request the data
- save the title
- save the `line` parameter of each section (`data['parse']['sections']`)
- save all in a single JSON

V2 of your program should also:
- save all '.png' & '.jpg' images as images, with the url as the image name
- save all external links as CSV

Much of the work will be understanding how the Wikipedia API works - useful resources are below:
- [Main API page](https://www.mediawiki.org/wiki/API:Main_page)
- [What the actions are](https://www.mediawiki.org/w/api.php)
- [Python examples](https://github.com/wikimedia/mediawiki-api-demos/tree/master/python)

Please also feel free to work on another API - happy to assist you with this as well :)

In [45]:
url = f"https://en.wikipedia.org/w/api.php?action=parse&page={term}&format=json"
res = requests.get(url)
data = res.json()

data['parse']['sections']

[{'toclevel': 1,
  'level': '2',
  'line': 'Etymology',
  'number': '1',
  'index': '1',
  'fromtitle': 'Germany',
  'byteoffset': 12477,
  'anchor': 'Etymology'},
 {'toclevel': 1,
  'level': '2',
  'line': 'History',
  'number': '2',
  'index': '2',
  'fromtitle': 'Germany',
  'byteoffset': 15058,
  'anchor': 'History'},
 {'toclevel': 2,
  'level': '3',
  'line': 'Germanic tribes and Frankish Empire',
  'number': '2.1',
  'index': '3',
  'fromtitle': 'Germany',
  'byteoffset': 18483,
  'anchor': 'Germanic_tribes_and_Frankish_Empire'},
 {'toclevel': 2,
  'level': '3',
  'line': 'East Francia and Holy Roman Empire',
  'number': '2.2',
  'index': '4',
  'fromtitle': 'Germany',
  'byteoffset': 22352,
  'anchor': 'East_Francia_and_Holy_Roman_Empire'},
 {'toclevel': 2,
  'level': '3',
  'line': 'German Confederation and Empire',
  'number': '2.3',
  'index': '5',
  'fromtitle': 'Germany',
  'byteoffset': 30396,
  'anchor': 'German_Confederation_and_Empire'},
 {'toclevel': 2,
  'level': '3',

In [46]:
import json
import requests

term = 'germany'
os.makedirs(f'./data/countries/{term}', exist_ok=True)
os.makedirs(f'./data/countries/{term}/images', exist_ok=True)

url = f"https://en.wikipedia.org/w/api.php?action=parse&page={term}&format=json"
res = requests.get(url)
data = res.json()

data = data['parse']
out = {
    'url': url,
    'title': data['title']
}

path = os.path.join('data', 'countries', term, 'data.json')
with open(path, 'w') as fi:
    json.dump(out, fi)
    
for img in data['images']:
    url = f'https://en.wikipedia.org/w/api.php?action=query&format=json&list=allimages&aifrom={img}&ailimit=1'
    res = requests.get(url)
    res = res.json()
    with open(f'./data/countries/{term}/images/{img}', 'wb') as fi:
        url = res['query']['allimages'][0]['url']
        res = requests.get(url)
        fi.write(res.content)

In [2]:
import json
import requests

term = 'germany'
os.makedirs(f'./data/countries/{term}', exist_ok=True)
os.makedirs(f'./data/countries/{term}/images', exist_ok=True)

url = f"https://en.wikipedia.org/w/api.php?action=parse&page={term}&format=json"
res = requests.get(url)
data = res.json()

data = data['parse']
out = {
    'url': url,
    'title': data['title']
}

path = os.path.join('data', 'countries', term, 'data.json')
with open(path, 'w') as fi:
    json.dump(out, fi)
    
for img in data['images']:
    url = f'https://en.wikipedia.org/w/api.php?action=query&format=json&list=allimages&aifrom={img}&ailimit=1'
    res = requests.get(url)
    res = res.json()
    with open(f'./data/countries/{term}/images/{img}', 'wb') as fi:
        url = res['query']['allimages'][0]['url']
        res = requests.get(url)
        fi.write(res.content)