# Sum generation by unit over baseline periods. Also get average capacity factor over the same periods.

In [1]:
import mysql.connector
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
sns.set_context("talk", rc={"font.size":16,"axes.titlesize":24,"axes.labelsize":20})

#### Fetch Data

In [2]:
server = mysql.connector.connect(user="apark2",
                                 password="Mindinmsight@1",
                                 host="127.0.0.1",
                                 database="widap")
# Seems to only want 1 connection at a time, for one user. Otherwise, Interface Error
query = server.cursor(buffered=True)

In [3]:
def getSumGen(orispl, unit, startYear, stopYear):
    sumGen = 'gen_' + str(startYear)[-2:] + '_' + str(stopYear)[-2:]
    query = """
            SELECT
                ORISPL_CODE,
                UNITID,
                SUM(gload * op_time) as `""" + sumGen + """`
            FROM
                widap.data
            WHERE ORISPL_CODE = """ + str(orispl) + """
                AND UNITID = '""" + str(unit) + """'
                AND SUBSTR(OP_DATE, 1, 4) >= """ + str(startYear) + """
                AND SUBSTR(OP_DATE, 1, 4) <= """ + str(stopYear)
    sumGen = pd.read_sql(query, server)
    return sumGen

In [4]:
getSumGen(55322, 'CTG-1', 2006, 2008).iloc[0]

ORISPL_CODE          55322
UNITID               CTG-1
gen_06_08      4.42315e+06
Name: 0, dtype: object

In [5]:
getSumGen(55322, 'CTG-1', 2006, 2008).iloc[0, -1]

4423153.720000002

In [6]:
df = pd.DataFrame(columns=['ORISPL_CODE', 'UNITID', 'gen_06_08', 'gen_15_17'])
row = getSumGen(55322, 'CTG-1', 2006, 2008).iloc[0]
row['gen_15_17'] = getSumGen(55322, 'CTG-1', 2015, 2017).iloc[0, -1]
df.append(row)

Unnamed: 0,ORISPL_CODE,UNITID,gen_06_08,gen_15_17
0,55322,CTG-1,4423153.72,4828646.72


#### Calculate average capacity factor (CF) for all plants and units 

Read in all ORISPL, UNITID.

In [7]:
WI_units = pd.read_csv('C:/Users/apark2/Documents/Year 2/Research/Analysis/Heat Rate Work/ORISPL_UNITID.csv')

In [8]:
WI_units.head()

Unnamed: 0,ORISPL_CODE,UNITID
0,9,CTG-1
1,87,1
2,113,1
3,113,2
4,113,3


In [13]:
def saveAllSumGen(orispl_unit, BL1_start, BL1_stop, BL2_start, BL2_stop):
    """
    This function calculates two average capacity factors over separate baseline periods (BL) for each Western
    Interconnection (WI) generating unit. BL1_start, BL1_stop = Starting and stopping year over which average CFs are calculated.
    orispl_unit = DataFrame of plant_unit pairs in the WI
    """
    BL1 = 'gen_' + str(BL1_start)[-2:] + '_' + str(BL1_stop)[-2:]
    BL2 = 'gen_' + str(BL2_start)[-2:] + '_' + str(BL2_stop)[-2:]
    sumGen = pd.DataFrame(columns=['ORISPL_CODE', 'UNITID', BL1, BL2])
    
    for i, row in WI_units.iterrows():
        plant = row[0]
        unit = row[1]
        newRow = getSumGen(plant, unit, BL1_start, BL1_stop).iloc[0]
        newRow['gen_15_17'] = getSumGen(plant, unit, BL2_start, BL2_stop).iloc[0, -1]
        sumGen = sumGen.append(newRow)
        print(plant, unit)
    sumGen.to_csv('sumGen.csv', index=False)
    return sumGen

In [14]:
sumGen = saveAllSumGen(WI_units, 2006, 2008, 2015, 2017)

9 CTG-1
87 1
113 1
113 2
113 3
113 4
116 1
116 2
117 CC4
117 CC5A
117 CC5B
118 1
118 2
118 CT3
120 1
120 CT5
120 CT6
124 GT1
126 1
126 2
126 3
126 4
141 1
141 2
141 3
147 K-1
147 K-2
147 K-7
160 1
160 2
160 3
160 4
228 10
228 9
246 1
246 2
247 3
247 4
247 5
247 6
247 7
259 1
259 2
259 3
259 4
260 1A
260 2A
260 3A
260 4A
271 1
271 2
271 3
271 4
271 5
271 6
271 7
302 1
302 2
302 3
302 4
302 5
310 1
310 2
310 3
310 4
315 1
315 2
315 3
315 4
315 5
315 6
329 1
329 2
329 31
329 32
329 41
329 42
330 1
330 2
330 3
330 4
330 5
330 7
331 1
331 2
331 3
331 4
334 1
334 2
334 3
334 4
335 1
335 2
335 3A
335 4A
341 1
341 2
341 3
341 4
345 1
345 2
350 1
350 2
356 17
356 5
356 6
356 7
356 8
358 1
358 2
375 M4
377 4
377 5
377 9
389 3
389 4
399 **10A
399 **10B
399 10
399 11
399 12
399 13
399 14
400 1
400 10
400 11
400 12
400 13
400 14
400 15
400 16
400 2
400 3
400 4
400 5
400 6
400 9
404 1
404 2
404 3
404 4
404 6
404 7
408 3
408 4
408 5
408 6
408 7
420 B1
420 B2
420 B3
422 GT3
422 GT4
422 GT5
465 1
465 2

In [15]:
sumGen = pd.read_csv('sumGen.csv')
sumGen

Unnamed: 0,ORISPL_CODE,UNITID,gen_06_08,gen_15_17
0,9.0,CTG-1,19878.69,71174.62
1,87.0,1,5398410.18,3656639.48
2,113.0,1,2765392.93,1143207.63
3,113.0,2,6683196.25,1210852.47
4,113.0,3,6785225.42,3096860.17
5,113.0,4,8938559.84,6122842.37
6,116.0,1,196178.32,209858.49
7,116.0,2,203546.14,172074.09
8,117.0,CC4,503796.10,455439.92
9,117.0,CC5A,1915665.16,2556854.10


How many plants have both before and after components?

In [16]:
sumGen_full = sumGen.dropna()
sumGen_full.shape

(470, 4)

In [17]:
sumGen_full.head()

Unnamed: 0,ORISPL_CODE,UNITID,gen_06_08,gen_15_17
0,9.0,CTG-1,19878.69,71174.62
1,87.0,1,5398410.18,3656639.48
2,113.0,1,2765392.93,1143207.63
3,113.0,2,6683196.25,1210852.47
4,113.0,3,6785225.42,3096860.17


In [16]:
sumGen_full.isna().any()

ORISPL_CODE    False
UNITID         False
CF_06_08       False
CF_15_17       False
dtype: bool

__470__ plants were operational for both baseline periods! That's plenty for a meaningful comparison. Score!

Idea: Plot average capacity factor for all coal and all gas units over the 17 years. See what's changed.