* API: Air Quality API http://aqicn.org/api
* Name: Zaiyuan Wu
* Student Number: 22205209

In [1]:
import time
import json, requests, urllib
from pathlib import Path
from datetime import datetime
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# API Key (replace with your own API key)
api_key = "#####################################"
# Prefix for API URLs
api_prefix = "https://api.waqi.info/feed"
# The city locations that we would like to study
city_names = ["Shanghai", "Nanjing", "Shenzhen", "Hongkong"]

## Create new directory for raw data

In [3]:
dir_raw = Path("raw")
dir_raw.mkdir(parents=True, exist_ok=True)

## Method to fetch json data by city names

In [4]:
def fetch(endpoint, params={}):
    # construct the url
    url = api_prefix
    if not endpoint.startswith("/"):
        url += "/"
    url += endpoint
    (api_prefix, endpoint)
    params["token"] = api_key
    url += "?" + urllib.parse.urlencode(params)
    print("Fetching %s" % url)
    # fetch the page
    response = requests.get(url)
    jdata = response.text
    return json.loads(jdata)

In [5]:
city_metadata = {}
city_keys = {}
for city_name in city_names:
    city_data = fetch("/" + city_name + "/")
    city_metadata[city_name] = city_data 
    city_keys[city_name] = city_data["data"]["idx"]

Fetching https://api.waqi.info/feed/Shanghai/?token=d5d5682f1442d5dcbad0eb868ff7069f80cab417
Fetching https://api.waqi.info/feed/Nanjing/?token=d5d5682f1442d5dcbad0eb868ff7069f80cab417
Fetching https://api.waqi.info/feed/Shenzhen/?token=d5d5682f1442d5dcbad0eb868ff7069f80cab417
Fetching https://api.waqi.info/feed/Hongkong/?token=d5d5682f1442d5dcbad0eb868ff7069f80cab417


In [6]:
city_metadata

{'Shanghai': {'status': 'ok',
  'data': {'aqi': 61,
   'idx': 1437,
   'attributions': [{'url': 'http://106.37.208.233:20035/emcpublish/',
     'name': 'China National Urban air quality real-time publishing platform (全国城市空气质量实时发布平台)'},
    {'url': 'https://china.usembassy-china.org.cn/embassy-consulates/shanghai/air-quality-monitor-stateair/',
     'name': 'U.S. Consulate Shanghai Air Quality Monitor'},
    {'url': 'https://sthj.sh.gov.cn/',
     'name': 'Shanghai Environment Monitoring Center(上海市环境监测中心)'},
    {'url': 'https://waqi.info/', 'name': 'World Air Quality Index Project'}],
   'city': {'geo': [31.2047372, 121.4489017],
    'name': 'Shanghai (上海)',
    'url': 'https://aqicn.org/city/shanghai',
    'location': ''},
   'dominentpol': 'pm25',
   'iaqi': {'co': {'v': 6.4},
    'h': {'v': 54},
    'no2': {'v': 9.6},
    'o3': {'v': 28.5},
    'p': {'v': 1013.4},
    'pm10': {'v': 34},
    'pm25': {'v': 61},
    'so2': {'v': 3.6},
    't': {'v': 14.4}},
   'time': {'s': '2022-11-04

## Parse and check the format of the data

In [7]:
metadata_rows = []
for city_name in city_names:
    row = {"city": city_name, "key": city_keys[city_name]}
    row["aqi"] = city_metadata[city_name]["data"]["aqi"]
    row["time"] = city_metadata[city_name]["data"]["time"]["s"]
    row["pm2.5"] =  city_metadata[city_name]["data"]["iaqi"]["pm25"]["v"]
    row["pm10"] =  city_metadata[city_name]["data"]["iaqi"]["pm10"]["v"]
    row["ozone"] =  city_metadata[city_name]["data"]["iaqi"]["o3"]["v"]
    metadata_rows.append(row)
pd.DataFrame(metadata_rows).set_index("city")

Unnamed: 0_level_0,key,aqi,time,pm2.5,pm10,ozone
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Shanghai,1437,61,2022-11-04 22:00:00,61,34,28.5
Nanjing,1485,50,2022-11-04 23:00:00,50,36,11.4
Shenzhen,1539,55,2022-11-04 23:00:00,55,24,44.3
Hongkong,3308,68,2022-11-04 23:00:00,68,29,33.4


## Fetch data automatically with 1-hour interval

In [8]:
def fetch_current_conditions(city_name):
    # create the endpoint URL
    endpoint = "/%s/" % city_name
    # fetch the current data
    conditions_data = fetch(endpoint)
    # write it out to our raw dataset directory
    date_suffix = datetime.now().strftime("%Y%m%d-%H00")
    fname = "%s-%s.json" % (city_name, date_suffix)
    out_path = dir_raw / fname
    print("Writing data to %s" % out_path)
    fout = open(out_path, "w")
    json.dump(conditions_data, fout, indent=4, sort_keys=False)
    fout.close()

In [9]:
while True:
    for city_name in city_names:
        fetch_current_conditions(city_name)
    # fetch data every hour
    time.sleep(3600)

Fetching https://api.waqi.info/feed/Shanghai/?token=d5d5682f1442d5dcbad0eb868ff7069f80cab417
Writing data to raw/Shanghai-20221104-1600.json
Fetching https://api.waqi.info/feed/Nanjing/?token=d5d5682f1442d5dcbad0eb868ff7069f80cab417
Writing data to raw/Nanjing-20221104-1600.json
Fetching https://api.waqi.info/feed/Shenzhen/?token=d5d5682f1442d5dcbad0eb868ff7069f80cab417
Writing data to raw/Shenzhen-20221104-1600.json
Fetching https://api.waqi.info/feed/Hongkong/?token=d5d5682f1442d5dcbad0eb868ff7069f80cab417
Writing data to raw/Hongkong-20221104-1600.json
Fetching https://api.waqi.info/feed/Shanghai/?token=d5d5682f1442d5dcbad0eb868ff7069f80cab417
Writing data to raw/Shanghai-20221104-1700.json
Fetching https://api.waqi.info/feed/Nanjing/?token=d5d5682f1442d5dcbad0eb868ff7069f80cab417
Writing data to raw/Nanjing-20221104-1700.json
Fetching https://api.waqi.info/feed/Shenzhen/?token=d5d5682f1442d5dcbad0eb868ff7069f80cab417
Writing data to raw/Shenzhen-20221104-1700.json
Fetching https://

KeyboardInterrupt: 