In [4]:
import requests
import json
from collections import OrderedDict
import pandas as pd

In [5]:
class weatherAcquirer:

    # constants
    ORIGIN = "https://api.weather.com/v1/location/{city}{suffix}/observations/historical.json?apiKey={key}&units=e&startDate={start}&endDate={end}"
    KEY = "e1f10a1e78da46f5b10a1e78da96f525"
    COUNTRY_SUFFIX = ":9:US"
    DATES = {}
    
    # cache
    log = {}
    temperatures = {}
    
    def __init__(self):
        ## dates
        mo31 = ["01","03","05","07","08","10","12"]
        mo30 = ["04","06","09","11"]
        mo28 = ["02"]
        for mo in mo31:
            self.DATES[mo] = "31"
        for mo in mo30:
            self.DATES[mo] = "30"
        for mo in mo28:
            self.DATES[mo] = "28"
    
    ## helper method: convert oF to oC
    def f2c(self, temp_f):
        return "%.1f" % ((temp_f - 32) / 1.8)
    
    # helper method: sort map
    def sortByTimestamp(self):
        self.temperatures = OrderedDict(sorted(self.temperatures.items()))
    
    ## helper method: merge two dictionaries.
    def merge(self, dict1, dict2):
        res = {**dict1, **dict2}
        return res

    ## Clean cache.
    def flush(self):
        self.log = {}
        self.temperatures = {}
    
    def acquireMonthlyTemp(self, city:str, year:str, month:str):
        errorlog = {}
        temperatures = {}
        start = year + month + "01"
        end = year + month + self.DATES[month]
        url = self.ORIGIN.format(city = city, suffix = self.COUNTRY_SUFFIX, key = self.KEY, start = start, end = end)
        r = requests.get(url)
        content = json.loads(r.content)
        if content['metadata']['status_code'] != 200:
            errorlog[mo] = "status code 400"
        # log temperatures
        for observation in content['observations']:
            temperatures[observation['expire_time_gmt']] = self.f2c(observation['temp'])
        print("{year}/{month} temperature for {city} complete".format(year = year, month = month, city = city))
        
        # cache.
        self.log = self.merge(self.log, errorlog)
        self.temperatures = self.merge(self.temperatures ,temperatures)
        return errorlog, temperatures
    
    
    def acquireAnnualTemp(self, city:str, year:str):
        errorlog = {}
        temperatures = {}
        for mo in self.DATES:
            prevErrorlog, prevTemps = self.acquireMonthlyTemp(city, year, mo)
            # merge errorlog and temperature map.
            errorlog = self.merge(errorlog, prevErrorlog)
            temperatures = self.merge(temperatures, prevTemps)
        print("{year} temperature for {city} complete".format(year = year, city = city))
        
        # cache.
        self.log = self.merge(self.log, errorlog)
        self.temperatures = self.merge(self.temperatures ,temperatures)
        return errorlog, temperatures

### take pittsburgh as an example:

### 2018年的数据不是严格从1月1号0点开始算的，所以要补充2017年12月的数据

In [6]:
ac = weatherAcquirer()
ac.acquireMonthlyTemp("KPIT", "2017", "12")
ac.acquireAnnualTemp("KPIT", "2018")

2017/12 temperature for KPIT complete
2018/01 temperature for KPIT complete
2018/03 temperature for KPIT complete
2018/05 temperature for KPIT complete
2018/07 temperature for KPIT complete
2018/08 temperature for KPIT complete
2018/10 temperature for KPIT complete
2018/12 temperature for KPIT complete
2018/04 temperature for KPIT complete
2018/06 temperature for KPIT complete
2018/09 temperature for KPIT complete
2018/11 temperature for KPIT complete
2018/02 temperature for KPIT complete
2018 temperature for KPIT complete


({},
 {1514793060: '-17.2',
  1514796660: '-16.1',
  1514800260: '-16.7',
  1514803860: '-18.3',
  1514807460: '-17.2',
  1514811060: '-18.3',
  1514814660: '-16.7',
  1514818260: '-16.7',
  1514821860: '-16.7',
  1514825460: '-14.4',
  1514829060: '-13.3',
  1514832660: '-12.2',
  1514834940: '-11.7',
  1514836260: '-11.7',
  1514839860: '-11.7',
  1514843460: '-12.2',
  1514846220: '-12.8',
  1514847060: '-12.8',
  1514850660: '-12.8',
  1514854260: '-12.8',
  1514857860: '-13.3',
  1514861460: '-13.9',
  1514865060: '-14.4',
  1514868660: '-14.4',
  1514872260: '-13.9',
  1514875860: '-13.9',
  1514879460: '-13.9',
  1514883060: '-13.9',
  1514886660: '-13.9',
  1514889240: '-13.9',
  1514890260: '-13.9',
  1514893860: '-13.9',
  1514897460: '-13.9',
  1514901060: '-13.9',
  1514904660: '-14.4',
  1514907540: '-14.4',
  1514908260: '-14.4',
  1514911860: '-13.9',
  1514915460: '-13.3',
  1514919060: '-13.3',
  1514920440: '-12.8',
  1514922660: '-12.2',
  1514926260: '-11.1',
  1514

### 根据timestamp把entry排序

In [7]:
ac.sortByTimestamp()

In [8]:
df = pd.DataFrame({'timestamp':ac.temperatures.keys(), 'pit':ac.temperatures.values()})

### API导出的数据时间和pit的实际时间有7小时时差，需调整timestamp

### 根据气象站所在时区设置timediff的值，eg：for pittsburgh, timediff = 7.

In [9]:
timediff = 7

In [10]:
df['timestamp'] = df['timestamp'] - 3600 * timediff

In [11]:
df['timestamp'] = pd.to_datetime(df['timestamp'],unit='s')

In [12]:
df.set_index('timestamp', inplace=True)

In [13]:
df

Unnamed: 0_level_0,pit
timestamp,Unnamed: 1_level_1
2017-12-01 00:51:00,3.9
2017-12-01 01:51:00,1.7
2017-12-01 02:51:00,2.2
2017-12-01 03:51:00,1.7
2017-12-01 04:05:00,2.2
...,...
2018-12-31 21:51:00,15.6
2018-12-31 22:06:00,15.6
2018-12-31 22:51:00,16.1
2018-12-31 23:08:00,16.1


### city codes reference: https://www.wunderground.com/history/daily/us/pa/imperial/KPIT
KPIT, KDCA, PANC...

### load temperature for selected range of date

In [14]:
import matplotlib.pyplot as plt

In [15]:
# plot fig size settings.
plt.rcParams['figure.figsize'] = [16, 9]

In [16]:
# convert object type to float.
df['pit']=df['pit'].astype(float)

In [17]:
(df['pit']['2018-2-1':'2018-2-2'])

timestamp
2018-02-01 00:51:00     6.7
2018-02-01 01:51:00     6.1
2018-02-01 02:51:00     6.7
2018-02-01 03:51:00     6.7
2018-02-01 04:51:00     6.7
                       ... 
2018-02-02 19:51:00    -9.4
2018-02-02 20:51:00    -9.4
2018-02-02 21:51:00    -9.4
2018-02-02 22:51:00    -9.4
2018-02-02 23:51:00   -10.0
Name: pit, Length: 64, dtype: float64