In [1]:
import pandas as pd
from sqlalchemy import create_engine
from datetime import datetime as dt
print('Libraries imported')

# Create connection string to SQL DB with meter readings

ENGINE = create_engine('postgresql://postgres:4raxeGo5xgB$@localhost:5432/eyedro_meters')
print('SQL Connection String Created')

# Set power factor and load assumptions
FIXED_PF = 0.8
FIXED_LOAD = 0.75
MIN_FIXED_LOAD_FACTOR = .3
print(f'Power factor and load assumptions set at PF = {FIXED_PF}; Load = {FIXED_LOAD}; Minimum Fixed Load Factor = {MIN_FIXED_LOAD_FACTOR}')

# Pull list of generator meter info from Excel
gen_info = 'generator_info.xlsx'
df_gen_info = pd.read_excel(gen_info)

# Add cleaned, non-dash serial number to pair with meter readings later
df_gen_info.insert(df_gen_info.columns.get_loc('Meter Serial No.') + 1, 'DeviceSerial_NoDash', df_gen_info['Meter Serial No.'].str.replace('-', ''))
print('Meter-Generator info file imported and cleaned')

# Create functions for use in the script

def calc_eor(kva,pf,load,kw):
    return ((kva*pf*load)-kw)/kw

print('Function created: calc_eor')

def calc_lor(kva,pf,load,kw):
    if kva*pf <= MIN_FIXED_LOAD_FACTOR:
        return ((kva*pf*load)-kw)/kw
    else:
        pass

print('Function created: calc_lor')


Libraries imported
SQL Connection String Created
Power factor and load assumptions set at PF = 0.8; Load = 0.75; Minimum Fixed Load Factor = 0.3
Meter-Generator info file imported and cleaned
Function created: calc_eor
Function created: calc_lor


In [4]:
meter_serials = df_gen_info['DeviceSerial_NoDash'].to_list()

for serial in meter_serials:
    
    rt_st = dt.now()
    
    try:
        # Call PG database to pull in meter readings for a sample meter
        df_meter_readings = pd.read_sql_query(f'select * from "{serial}"',con=ENGINE)

        # Create a new "kWH" column by dividing "Wh" by 1000
        df_meter_readings.insert(df_meter_readings.columns.get_loc("Wh") + 1, "kWH", df_meter_readings['Wh'] / 1000)

        # Create a new "KW" column by parsing from the "kWH" column
        df_meter_readings.insert(df_meter_readings.columns.get_loc("kWH") + 1, "KW", df_meter_readings['kWH'] * 4)

        # Remove 0-value readings from the dataset
        df_meter_readings = df_meter_readings[df_meter_readings['Wh'] != 0]

        # Create a boolean column 'Wh_Outlier' based being 2 std dev above or below the timeslot mean
        df_meter_readings['Wh_Std'] = df_meter_readings.groupby(['day_of_week', 'time'])['Wh'].transform('std')
        df_meter_readings['Wh_Outlier'] = ((df_meter_readings['Wh'] > df_meter_readings['timeslot_mean'] + 2 * df_meter_readings['Wh_Std']) | (df_meter_readings['Wh'] < df_meter_readings['timeslot_mean'] - 2 * df_meter_readings['Wh_Std']))

        # Pull in the KVA Rating for the given meter/generator
        df_meter_readings = df_meter_readings.merge(df_gen_info[['DeviceSerial_NoDash', 'KVA Rating']], 
                                                    left_on='DeviceSerial', 
                                                    right_on='DeviceSerial_NoDash', 
                                                    how='left')

        # Drop the extra "DeviceSerial_NoDash" column if needed
        df_meter_readings = df_meter_readings.drop(columns=['DeviceSerial_NoDash'])

        # Calculate EOR
        df_meter_readings['EOR'] = df_meter_readings.apply(lambda row: calc_eor(row['KVA Rating'], FIXED_PF, FIXED_LOAD, row['KW']), axis=1)

        # Calculate LOR
        df_meter_readings['LOR'] = df_meter_readings.apply(lambda row: calc_lor(row['KVA Rating'], FIXED_PF, FIXED_LOAD, row['KW']), axis=1)

        # Trim off columns
        cols_to_keep = ['index',
                         'DeviceSerial',
                         'Timestamp',
                         'Wh',
                         'kWH',
                         'KW',
                         'gmt_timestamp',
                         'month',
                         'week',
                         'day_of_month',
                         'day_of_week',
                         'time',
                         'timeslot_mean',
                         'timeslot_median',
                         'Wh_Outlier',
                         'KVA Rating',
                         'EOR',
                         'LOR']

        df_meter_readings = df_meter_readings[cols_to_keep]

        # Dump the result to an excel file named for the serial number
        df_meter_readings.to_csv(f'data/{serial}.csv')
    
        # Print status message
        rt_et = dt.now()
        print(f"{serial} | {rt_et-rt_st} elapsed | success")
        
    except Exception as e:
        rt_et = dt.now()
        
        # Capture the exception and print the error message
        print(f"{serial} | {rt_et-rt_st} elapsed | failure | error: {e}")
        

00980845 | 0:00:02.541002 elapsed | success
00980E1F | 0:00:00.104355 elapsed | success
00980DCD | 0:00:00.114887 elapsed | success
00980E2A | 0:00:00.084970 elapsed | success
00980E29 | 0:00:00.083954 elapsed | success
00980B76 | 0:00:00.134370 elapsed | failure | error: can't multiply sequence by non-int of type 'float'
00980B1E | 0:00:00.555114 elapsed | success
00980B1C | 0:00:01.133349 elapsed | success
00980B13 | 0:00:01.125420 elapsed | success


In [5]:
df_meter_readings

Unnamed: 0,index,DeviceSerial,Timestamp,Wh,kWH,KW,gmt_timestamp,month,week,day_of_month,day_of_week,time,timeslot_mean,timeslot_median,Wh_Outlier,KVA Rating,EOR,LOR
0,0,00980B13,1671426000,7564,7.564,30.256,2022-12-19T05:00:00+00:00,12,51,19,monday,05:00,2202.265306,2270.0,True,60,0.189847,
1,1,00980B13,1671426900,8179,8.179,32.716,2022-12-19T05:15:00+00:00,12,51,19,monday,05:15,1665.960000,82.5,True,60,0.100379,
2,2,00980B13,1671427800,2140,2.140,8.560,2022-12-19T05:30:00+00:00,12,51,19,monday,05:30,1386.826923,83.0,False,60,3.205607,
3,3,00980B13,1671428700,76,0.076,0.304,2022-12-19T05:45:00+00:00,12,51,19,monday,05:45,1254.961538,81.0,False,60,117.421053,
4,4,00980B13,1671429600,72,0.072,0.288,2022-12-19T06:00:00+00:00,12,51,19,monday,06:00,1159.846154,81.0,False,60,124.000000,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33631,35073,00980B13,1702991700,80,0.080,0.320,2023-12-19T12:15:00+00:00,12,51,19,tuesday,12:15,846.960784,80.0,False,60,111.500000,
33632,35074,00980B13,1702992600,82,0.082,0.328,2023-12-19T12:30:00+00:00,12,51,19,tuesday,12:30,952.431373,81.0,False,60,108.756098,
33633,35075,00980B13,1702993500,81,0.081,0.324,2023-12-19T12:45:00+00:00,12,51,19,tuesday,12:45,888.254902,81.0,False,60,110.111111,
33634,35076,00980B13,1702994400,76,0.076,0.304,2023-12-19T13:00:00+00:00,12,51,19,tuesday,13:00,933.529412,82.0,False,60,117.421053,


In [None]:
# Disregard outliers
# Disregard 0's
# Parse exact date from timestamp
# Week of year from timestamp
# Parse out month from timestamp

# Option to display all four values aggregated on different bases (day, month, week, etc.) (KWH, KW, EOR, LOR)

# Ignore/skip 0's and outliers and see what resulting dataset looks like for each meter; if needed, try plugging

0 - 11:00 
0 - 11:15
0 - 11:30
0 - 11:45
    

- if we have only one reading within an hour, we also mark to be ignored
- only consider hourly total in cases where we have at least 2 readings within that hour
--> keep track of % missing/outlier values
--> Display as total 

    
5 - 12:00
95 - outlier 6 - 12:15
7 - 12:30
0 - zero 6 - 12:45