In [1]:
from bs4 import BeautifulSoup
import requests

In [2]:
import json

In [3]:
from collections import OrderedDict

In [4]:
import pandas as pd

In [13]:
class weatherAcquirer:

    # constants
    ORIGIN = "https://api.weather.com/v1/location/{city}{suffix}/observations/historical.json?apiKey={key}&units=e&startDate={start}&endDate={end}"
    KEY = "e1f10a1e78da46f5b10a1e78da96f525"
    COUNTRY_SUFFIX = ":9:US"
    DATES = {}
    
    # cache
    log = {}
    temperatures = {}
    
    def __init__(self):
        ## dates
        mo31 = ["01","03","05","07","08","10","12"]
        mo30 = ["04","06","09","11"]
        mo28 = ["02"]
        for mo in mo31:
            self.DATES[mo] = "31"
        for mo in mo30:
            self.DATES[mo] = "30"
        for mo in mo28:
            self.DATES[mo] = "28"
    
    ## helper method: convert oF to oC
    def f2c(self, temp_f):
        return "%.1f" % ((temp_f - 32) / 1.8)
    
    # helper method: sort map
    def sortByTimestamp(self):
        self.temperatures = OrderedDict(sorted(self.temperatures.items()))
    
    ## helper method: merge two dictionaries.
    def merge(self, dict1, dict2):
        res = {**dict1, **dict2}
        return res

    
    def acquireMonthlyTemp(self, city:str, year:str, month:str):
        errorlog = {}
        temperatures = {}
        start = year + month + "01"
        end = year + month + self.DATES[month]
        url = self.ORIGIN.format(city = city, suffix = self.COUNTRY_SUFFIX, key = self.KEY, start = start, end = end)
        r = requests.get(url)
        content = json.loads(r.content)
        if content['metadata']['status_code'] != 200:
            errorlog[mo] = "status code 400"
        # log temperatures
        for observation in content['observations']:
            temperatures[observation['expire_time_gmt']] = self.f2c(observation['temp'])
        print("{year}/{month} temperature for {city} complete".format(year = year, month = month, city = city))
        
        # cache.
        self.log = self.merge(self.log, errorlog)
        self.temperatures = self.merge(self.temperatures ,temperatures)
        return errorlog, temperatures
    
    
    def acquireAnnualTemp(self, city:str, year:str):
        errorlog = {}
        temperatures = {}
        for mo in self.DATES:
            prevErrorlog, prevTemps = self.acquireMonthlyTemp(city, year, mo)
            # merge errorlog and temperature map.
            errorlog = self.merge(errorlog, prevErrorlog)
            temperatures = self.merge(temperatures, prevTemps)
        print("{year} temperature for {city} complete".format(year = year, city = city))
        
        # cache.
        self.log = self.merge(self.log, errorlog)
        self.temperatures = self.merge(self.temperatures ,temperatures)
        return errorlog, temperatures

In [16]:
ac = weatherAcquirer()
ac.acquireMonthlyTemp("KPIT", "2017", "12")
ac.acquireAnnualTemp("KPIT", "2018")

2017/12 temperature for KPIT complete
2018/01 temperature for KPIT complete
2018/03 temperature for KPIT complete
2018/05 temperature for KPIT complete
2018/07 temperature for KPIT complete
2018/08 temperature for KPIT complete
2018/10 temperature for KPIT complete
2018/12 temperature for KPIT complete
2018/04 temperature for KPIT complete
2018/06 temperature for KPIT complete
2018/09 temperature for KPIT complete
2018/11 temperature for KPIT complete
2018/02 temperature for KPIT complete
2018 temperature for KPIT complete


In [None]:
ac.sortByTimestamp()

In [17]:
df = pd.DataFrame({'timestamp':ac.temperatures.keys(), 'pit':ac.temperatures.values()})

In [18]:
df['timestamp'] = pd.to_datetime(df['timestamp'],unit='s')

In [19]:
df.set_index('timestamp', inplace=True) # set column 'date' to index

In [20]:
df

Unnamed: 0_level_0,pit
timestamp,Unnamed: 1_level_1
2017-12-01 07:51:00,3.9
2017-12-01 08:51:00,1.7
2017-12-01 09:51:00,2.2
2017-12-01 10:51:00,1.7
2017-12-01 11:05:00,2.2
...,...
2019-01-01 04:51:00,15.6
2019-01-01 05:06:00,15.6
2019-01-01 05:51:00,16.1
2019-01-01 06:08:00,16.1


In [57]:
# KDCA:9:US
# PANC:9:US

### load temperature for selected range of date