In [19]:
import xlrd
import os
import csv
from zipfile import ZipFile
import pprint

datafile = "2013_ERCOT_Hourly_Load_Data.xls"
outfile = "2013_Max_Loads.csv"

In [23]:
# -*- coding: utf-8 -*-
'''
Find the time and value of max load for each of the regions
COAST, EAST, FAR_WEST, NORTH, NORTH_C, SOUTHERN, SOUTH_C, WEST
and write the result out in a csv file, using pipe character | as the delimiter.

An example output can be seen in the "example.csv" file.
'''


def parse_file(datafile):
    workbook = xlrd.open_workbook(datafile)
    sheet = workbook.sheet_by_index(0)
    data = []
    
    maxvalues = {} #structure : {label: [maxload(0), time(1)]}
    
    for col in range(1,sheet.ncols - 1):
        label = sheet.cell_value(0,col) #row0 is the first row of labels
        column = sheet.col_values(col,start_rowx = 1, end_rowx = sheet.nrows) #specific row , not index from 0
        maxvalues[label] = [max(column)] # pay attention to list means a label maps to a list instead of a number
        # a label is a list includes max value and time
        maxrownum = column.index(max(column)) + 1 # because "column" list is from start_rowx = 1 index 0 == row 1
        maxvalues[label].append(sheet.cell_value(maxrownum,0)) # so we need to +1 
        print(maxvalues) # {COAST:[1234,2013-1-1], EAST:[],...}
        
        
    stations = ['COAST', 'EAST', 'FAR_WEST', 'NORTH', 'NORTH_C', 'SOUTHERN', 'SOUTH_C', 'WEST']
    
    # Remember that you can use xlrd.xldate_as_tuple(sometime, 0) to convert
    # Excel date to Python tuple of (year, month, day, hour, minute, second)
    for station in stations:
        row = [station] # each row is a list and starts with a station name
        time = xlrd.xldate_as_tuple(maxvalues[station][1],0) # 0 is datemode return (y,month,day,hour,0,0)
        for index in range(len(time[:4])):  # review: time[:4] =(index from 0 to 3) len() == 4 range(4) == 0,1,2,3
            row.append(time[index]) # row == [station,year,month,day,hour]
        row.append(maxvalues[station][0])
        data.append(row) # is append() not append[]!!!!
        
    pprint.pprint(data)
    return data

def save_file(data, filename):
    # YOUR CODE HERE
    with open(filename,'w') as csvfile: # open empty file and load data to it
        datawriter = csv.writer(csvfile, delimiter = '|') # do it with | to divide each data
        datawriter.writerow(['Station','Year','Month','Day','Hour','Max Load']) #only one list row
        datawriter.writerows(data) # write row by row from data list (each row is a list [name,1234,2,..])
    
def test():

    data = parse_file(datafile)
    save_file(data, outfile)

    number_of_rows = 0
    stations = []

    ans = {'FAR_WEST': {'Max Load': '2281.2722140000024',
                        'Year': '2013',
                        'Month': '6',
                        'Day': '26',
                        'Hour': '17'}}
    correct_stations = ['COAST', 'EAST', 'FAR_WEST', 'NORTH',
                        'NORTH_C', 'SOUTHERN', 'SOUTH_C', 'WEST']
    fields = ['Year', 'Month', 'Day', 'Hour', 'Max Load']
    
    #check
    with open(outfile) as of:
        csvfile = csv.DictReader(of, delimiter="|")
        for line in csvfile:
            station = line['Station']
            if station == 'FAR_WEST':
                for field in fields:
                    # Check if 'Max Load' is within .1 of answer
                    if field == 'Max Load':
                        max_answer = round(float(ans[station][field]), 1)
                        max_line = round(float(line[field]), 1)
                        assert max_answer == max_line

                    # Otherwise check for equality
                    else:
                        assert ans[station][field] == line[field]

            number_of_rows += 1
            stations.append(station)

        # Output should be 8 lines not including header
        assert number_of_rows == 8

        # Check Station Names
        assert set(stations) == set(correct_stations)

        
if __name__ == "__main__":
    test()


{'COAST': [18779.025510000003, 41499.708333333336]}
{'COAST': [18779.025510000003, 41499.708333333336], 'EAST': [2380.1654089999956, 41491.708333333336]}
{'COAST': [18779.025510000003, 41499.708333333336], 'EAST': [2380.1654089999956, 41491.708333333336], 'FAR_WEST': [2281.2722140000024, 41451.708333333336]}
{'COAST': [18779.025510000003, 41499.708333333336], 'EAST': [2380.1654089999956, 41491.708333333336], 'FAR_WEST': [2281.2722140000024, 41451.708333333336], 'NORTH': [1544.7707140000005, 41493.708333333336]}
{'COAST': [18779.025510000003, 41499.708333333336], 'EAST': [2380.1654089999956, 41491.708333333336], 'FAR_WEST': [2281.2722140000024, 41451.708333333336], 'NORTH': [1544.7707140000005, 41493.708333333336], 'NORTH_C': [24415.570226999993, 41493.75]}
{'COAST': [18779.025510000003, 41499.708333333336], 'EAST': [2380.1654089999956, 41491.708333333336], 'FAR_WEST': [2281.2722140000024, 41451.708333333336], 'NORTH': [1544.7707140000005, 41493.708333333336], 'NORTH_C': [24415.57022699

In [11]:

workbook = xlrd.open_workbook(datafile)
sheet = workbook.sheet_by_index(0)
data = []

maxvalues = {} #structure : {label: [maxload(0), time(1)]}

for col in range(1,sheet.ncols - 1):
    label = sheet.cell_value(0,col) #row0 is the first row of labels
    column = sheet.col_values(col,start_rowx = 1, end_rowx = sheet.nrows) #specific row , not index from 0
    maxvalues[label] = [max(column)] # pay attention to list means a label maps to a list instead of a number
    # a label is a list includes max value and time
    maxrownum = column.index(max(column)) + 1 # because "column" list is from start_rowx = 1 index 0 == row 1
    maxvalues[label].append(sheet.cell_value(maxrownum,0)) # so we need to +1 
    print(maxvalues) # {COAST:[1234,2013-1-1], EAST:[],...}


stations = ['COAST', 'EAST', 'FAR_WEST', 'NORTH', 'NORTH_C', 'SOUTHERN', 'SOUTH_C', 'WEST']

# Remember that you can use xlrd.xldate_as_tuple(sometime, 0) to convert
# Excel date to Python tuple of (year, month, day, hour, minute, second)
for station in stations:
    row = [station] # each row is a list and starts with a station name
    time = xlrd.xldate_as_tuple(maxvalues[station][1],0) # 0 is datemode return (y,month,day,hour,0,0)
    for index in range(len(time[:4])):  # review: time[:4] =(index from 0 to 3) len() == 4 range(4) == 0,1,2,3
        row.append(time[index]) # row == [station,year,month,day,hour]
    row.append(maxvalues[station][0])
    data.append(row) # is append() not append[]!!!!
pprint.pprint(data)

{'COAST': [18779.025510000003, 41499.708333333336]}
{'COAST': [18779.025510000003, 41499.708333333336], 'EAST': [2380.1654089999956, 41491.708333333336]}
{'COAST': [18779.025510000003, 41499.708333333336], 'EAST': [2380.1654089999956, 41491.708333333336], 'FAR_WEST': [2281.2722140000024, 41451.708333333336]}
{'COAST': [18779.025510000003, 41499.708333333336], 'EAST': [2380.1654089999956, 41491.708333333336], 'FAR_WEST': [2281.2722140000024, 41451.708333333336], 'NORTH': [1544.7707140000005, 41493.708333333336]}
{'COAST': [18779.025510000003, 41499.708333333336], 'EAST': [2380.1654089999956, 41491.708333333336], 'FAR_WEST': [2281.2722140000024, 41451.708333333336], 'NORTH': [1544.7707140000005, 41493.708333333336], 'NORTH_C': [24415.570226999993, 41493.75]}
{'COAST': [18779.025510000003, 41499.708333333336], 'EAST': [2380.1654089999956, 41491.708333333336], 'FAR_WEST': [2281.2722140000024, 41451.708333333336], 'NORTH': [1544.7707140000005, 41493.708333333336], 'NORTH_C': [24415.57022699