This notebook loops through the weather stations associated with each load zone in New York State, and downloads minute-by-minute data from the Weather Underground API

In [None]:
import pandas as pd
import numpy as np
import time
import random
import joblib
import os

Import the dictionary of weather stations pickled in `02_get_iso_data`

In [5]:
weather_dict = joblib.load('weather_dict.pkl')

In [6]:
weather_dict

{'CAPITL': ('kalb', 'Capital', 'Albany'),
 'CENTRL': ('ksyr', 'Central', 'Syracuse'),
 'DUNWOD': ('klga', 'Dunwoodie', 'Yonkers'),
 'GENESE': ('kroc', 'Genese', 'Rochester'),
 'HUD VL': ('kpou', 'Hudson Valley', 'Poughkeepsie'),
 'LONGIL': ('kjfk', 'Long Island', 'NYC'),
 'MHK VL': ('krme', 'Mohawk Valley', 'Utica'),
 'MILLWD': ('klga', 'Millwood', 'Yonkers'),
 'N.Y.C.': ('kjfk', 'NYC', 'NYC'),
 'NORTH': ('kpbg', 'North', 'Plattsburgh'),
 'WEST': ('kbuf', 'West', 'Buffalo')}

In [8]:
airports = [i[0] for i in weather_dict.values()]

In [13]:
#remove duplicates
airports = list(set(airports))

In [89]:
dates = pd.date_range(pd.to_datetime('2001-05-01'), \
                       pd.to_datetime('2016-03-11'), freq='D')

In [83]:
def write_daily_weather_data(airport, dates):
    '''list, list ---> CSV files
    
    Takes a list of airport codes (weather stations) and a list of dates (as pandas datetime objects), calls the Weather \
    Underground API, cleans the weather data for each of those days, and exports each day to a csv in the data/wunderground folder
    
    CSVs will have the following columns:
    
    timeest | temperaturef | dewpointf | humidity | sealevelpressurein | visibilitymph | winddirection | windspeedmph | gustspeedmph
    
        | precipitationin | events | conditions | winddirdegrees | dateutc
    '''
    for d in dates:

        df0 = pd.read_csv('https://www.wunderground.com/history/airport/{0}/{1}/{2}/{3}/DailyHistory.html?format=1'\
                             .format(airport, d.year, d.month, d.day))
        cols = df0.columns
        df0.columns = [col.lower().replace(' ','').replace('<br/>', '') for col in cols]

        df0.dateutc = df0.dateutc.apply(lambda x: pd.to_datetime(x.replace('<br />', '')))

        df0.gustspeedmph = df0.gustspeedmph.replace('-', 0)
        df0.windspeedmph = df0.windspeedmph.replace('Calm', 0)
        df0.precipitationin = df0.precipitationin.replace('NaN', 0)
        df0.events = df0.events.replace('NaN', 0)

        filepath = '../data/wunderground/'+ airport +'/' + str(d.date()).replace('-','') + '.csv'
        print filepath
        df0.to_csv(filepath, index=False)



        t = 3
        time.sleep(t)

        if type(df0.dateutc[0]) == pd.tslib.Timestamp:
            continue
        else:
            print "Something is wrong"
            break

    print "Files for %s have been written" % airport
    return

Loop over the airports and export the weather files

In [45]:
for a in airports:
    write_daily_weather_data(a, dates)

../data/wunderground/klga/20120101.csv
../data/wunderground/klga/20120102.csv
../data/wunderground/klga/20120103.csv
../data/wunderground/klga/20120104.csv
../data/wunderground/klga/20120105.csv
../data/wunderground/klga/20120106.csv
../data/wunderground/klga/20120107.csv
../data/wunderground/klga/20120108.csv
../data/wunderground/klga/20120109.csv
../data/wunderground/klga/20120110.csv
../data/wunderground/klga/20120111.csv
../data/wunderground/klga/20120112.csv
../data/wunderground/klga/20120113.csv
../data/wunderground/klga/20120114.csv
../data/wunderground/klga/20120115.csv
../data/wunderground/klga/20120116.csv
../data/wunderground/klga/20120117.csv
../data/wunderground/klga/20120118.csv
../data/wunderground/klga/20120119.csv
../data/wunderground/klga/20120120.csv
../data/wunderground/klga/20120121.csv
../data/wunderground/klga/20120122.csv
../data/wunderground/klga/20120123.csv
../data/wunderground/klga/20120124.csv
../data/wunderground/klga/20120125.csv
../data/wunderground/klga

URLError: <urlopen error [Errno 54] Connection reset by peer>

In [85]:

def combine_weather_data(airport):
    '''Combine the weather data for each day at an airport into one combined csv'''
    csvs = []
    for file in os.listdir("../data/wunderground/"+airport+"/"):
        if file.endswith(".csv"):
            csvs.append(file)

    fout=open("../data/wunderground/"+airport+"_all.csv","a")

    # write the entire first file:
    for line in open("../data/wunderground/"+airport+"/"+csvs[0]):
        fout.write(line)
    # now the rest, skipping the headers:    
    for file in csvs[1:]:
        f = open("../data/wunderground/"+airport+"/"+file)
        f.next() # skip the header
        for line in f:
             fout.write(line)
        f.close() # not really needed
    fout.close()
    print "Files for %s have been combined" % airport

In [None]:
for a in airports:
    combine_weather_data(a)