In [16]:
import pandas as pd
from sqlalchemy import create_engine
from datetime import datetime as dt
print('Libraries imported')

# Create connection string to SQL DB with meter readings

#######ENGINE = create_engine('postgresql://postgres:4raxeGo5xgB$@localhost:5432/eyedro_meters')
ENGINE = create_engine('mysql://lor__eor:lor__eor@db4free.net/eyedro_meters') # connect to server

print('SQL Connection String Created')

# Set power factor and load assumptions
FIXED_PF = 0.8
FIXED_LOAD = 0.75
MIN_FIXED_LOAD_FACTOR = .3
print(f'Power factor and load assumptions set at PF = {FIXED_PF}; Load = {FIXED_LOAD}; Minimum Fixed Load Factor = {MIN_FIXED_LOAD_FACTOR}')

# Pull list of generator meter info from Excel
gen_info = 'generator_info.xlsx'
df_gen_info = pd.read_excel(gen_info)

# Add cleaned, non-dash serial number to pair with meter readings later
df_gen_info.insert(df_gen_info.columns.get_loc('Meter Serial No.') + 1, 'DeviceSerial_NoDash', df_gen_info['Meter Serial No.'].str.replace('-', ''))
print('Meter-Generator info file imported and cleaned')

# Create functions for use in the script

def calc_eor(kva,pf,load,kw):
    return ((kva*pf*load)-kw)/kw

print('Function created: calc_eor')

def calc_lor(kva,pf,load,kw):
    if kva*pf <= MIN_FIXED_LOAD_FACTOR:
        return ((kva*pf*load)-kw)/kw
    else:
        pass

print('Function created: calc_lor')


Libraries imported
SQL Connection String Created
Power factor and load assumptions set at PF = 0.8; Load = 0.75; Minimum Fixed Load Factor = 0.3
Meter-Generator info file imported and cleaned
Function created: calc_eor
Function created: calc_lor


In [21]:
meter_serials = df_gen_info['DeviceSerial_NoDash'].to_list()
print('XXXXXXXXX',meter_serials)
for serial in meter_serials:
    
    rt_st = dt.now()
    
    try:
        # Call PG database to pull in meter readings for a sample meter
        df_meter_readings = pd.read_sql_query(f'select * from `{serial}`',con=ENGINE)
        print("df_meter_readings",serial)
        continue

        # Create a new "kWH" column by dividing "Wh" by 1000
        df_meter_readings.insert(df_meter_readings.columns.get_loc("Wh") + 1, "kWH", df_meter_readings['Wh'] / 1000)

        # Create a new "KW" column by parsing from the "kWH" column
        df_meter_readings.insert(df_meter_readings.columns.get_loc("kWH") + 1, "KW", df_meter_readings['kWH'] * 4)

        # Remove 0-value readings from the dataset
        df_meter_readings = df_meter_readings[df_meter_readings['Wh'] != 0]

        # Create a boolean column 'Wh_Outlier' based being 2 std dev above or below the timeslot mean
        df_meter_readings['Wh_Std'] = df_meter_readings.groupby(['day_of_week', 'time'])['Wh'].transform('std')
        df_meter_readings['Wh_Outlier'] = ((df_meter_readings['Wh'] > df_meter_readings['timeslot_mean'] + 2 * df_meter_readings['Wh_Std']) | (df_meter_readings['Wh'] < df_meter_readings['timeslot_mean'] - 2 * df_meter_readings['Wh_Std']))

        # Pull in the KVA Rating for the given meter/generator
        df_meter_readings = df_meter_readings.merge(df_gen_info[['DeviceSerial_NoDash', 'KVA Rating']], 
                                                    left_on='DeviceSerial', 
                                                    right_on='DeviceSerial_NoDash', 
                                                    how='left')

        # Drop the extra "DeviceSerial_NoDash" column if needed
        df_meter_readings = df_meter_readings.drop(columns=['DeviceSerial_NoDash'])

        # Calculate EOR
        df_meter_readings['EOR'] = df_meter_readings.apply(lambda row: calc_eor(row['KVA Rating'], FIXED_PF, FIXED_LOAD, row['KW']), axis=1)

        # Calculate LOR
        df_meter_readings['LOR'] = df_meter_readings.apply(lambda row: calc_lor(row['KVA Rating'], FIXED_PF, FIXED_LOAD, row['KW']), axis=1)

        # Trim off columns
        cols_to_keep = ['index',
                         'DeviceSerial',
                         'Timestamp',
                         'Wh',
                         'kWH',
                         'KW',
                         'gmt_timestamp',
                         'month',
                         'week',
                         'day_of_month',
                         'day_of_week',
                         'time',
                         'timeslot_mean',
                         'timeslot_median',
                         'Wh_Outlier',
                         'KVA Rating',
                         'EOR',
                         'LOR']

        df_meter_readings = df_meter_readings[cols_to_keep]

        # Dump the result to an excel file named for the serial number
        df_meter_readings.to_csv(f'data/{serial}.csv')
    
        # Print status message
        rt_et = dt.now()
        print(f"{serial} | {rt_et-rt_st} elapsed | success")
        
    except Exception as e:
        rt_et = dt.now()
        
        # Capture the exception and print the error message
        print(f"{serial} | {rt_et-rt_st} elapsed | failure | error: {e}")
        

XXXXXXXXX ['00980845', '00980E1F', '00980DCD', '00980E2A', '00980E29', '00980B76', '00980B1E', '00980B1C', '00980B13']
df_meter_readings 00980845
df_meter_readings 00980E1F
df_meter_readings 00980DCD
df_meter_readings 00980E2A
00980E29 | 0:00:00.578684 elapsed | failure | error: (MySQLdb.ProgrammingError) (1146, "Table 'eyedro_meters.00980E29' doesn't exist")
[SQL: select * from `00980E29`]
(Background on this error at: https://sqlalche.me/e/20/f405)
00980B76 | 0:00:00.638723 elapsed | failure | error: (MySQLdb.ProgrammingError) (1146, "Table 'eyedro_meters.00980B76' doesn't exist")
[SQL: select * from `00980B76`]
(Background on this error at: https://sqlalche.me/e/20/f405)
df_meter_readings 00980B1E
df_meter_readings 00980B1C
df_meter_readings 00980B13


In [None]:
df_meter_readings

In [None]:
# Disregard outliers
# Disregard 0's
# Parse exact date from timestamp
# Week of year from timestamp
# Parse out month from timestamp

# Option to display all four values aggregated on different bases (day, month, week, etc.) (KWH, KW, EOR, LOR)

# Ignore/skip 0's and outliers and see what resulting dataset looks like for each meter; if needed, try plugging

# 0 - 11:00 
# 0 - 11:15
# 0 - 11:30
# 0 - 11:45
    

# - if we have only one reading within an hour, we also mark to be ignored
# - only consider hourly total in cases where we have at least 2 readings within that hour
# --> keep track of % missing/outlier values
# --> Display as total 

    
# 5 - 12:00
# 95 - outlier 6 - 12:15
# 7 - 12:30
# 0 - zero 6 - 12:45