In [22]:
import pandas as pd
import json as js
import requests
from sqlalchemy import create_engine
from datetime import datetime as dt, timedelta, timezone
import time
import pytz  # for timezone handling
import traceback
print('Libraries Imported')

### ENGINE = create_engine('postgresql://postgres:4raxeGo5xgB@localhost:5432/eyedro_meters')
ENGINE = create_engine('postgresql://avnadmin:AVNS_zSrniBsHGVQSqhqunlJ@pg-unhcr-unhcr-007.a.aivencloud.com:15602/defaultdb', 
                       connect_args={'options': '-csearch_path=gb_2024'})

print('SQL Connection String Created')

API_BASE_URL = "https://api.eyedro.com/customcmd"
USER_KEY = "UNHCRMHiYgbHda9cRv4DuPp28DnAnfeV8s6umP5R"
USER_KEY_GET_DATA = "UNHCRp28DnAV8s6uHdMHiYgba95RcRv4DnfeuPmP"
print('EyeDro Endpoint and Key Set')


# Reference the below view which calls the SQL database to list the table names for the meter tables
# This is our list of meters which already have data which we will then update

'''
create or replace view vw_table_list
as 
select "table_name"
from information_schema.tables
where "table_catalog" = 'eyedro_meters'
and "table_name" like '009%'
and "table_type" = 'BASE TABLE'
;
'''

# Function to call meter inventory API method
def get_device_inventory_list():
    url = f"{API_BASE_URL}?&Cmd=Unhcr.GetDeviceInventoryList&UserKey={USER_KEY}"
    response = requests.get(url)
    data = response.json()
    serials = [(item['Serial']) for item in data['List']]
    return serials

print('function created: get_device_inventory_list')

# Function to call meter GetData API method
def eyedro_getdata(serial, timestamp):
    
    '''
    This function takes as its input a meter serial number and an epoch timestamp and calls the GetData API to 
    retrieve the prior day's readings (96 steps at 15-minute intervals). It returns the response as JSON text
    '''
    meter_url = "https://api.eyedro.com/customcmd?Cmd=Unhcr.GetData&DeviceSerial=" + str(serial) + "&DateStartSecUtc=" + str(timestamp) + f"&DateNumSteps=96&UserKey={USER_KEY_GET_DATA}"
    response = requests.get(meter_url, timeout=600)
    return js.loads(response.text)

print('function created: eyedro_getdata')

def get_midnight_epoch_timestamps(dt):
    try:
        # Get the current date
        current_date = dt.utcnow().date()
        
        # Calculate the start date, which is 'past_months' months before the current date
        # Assuming 30 days per month for simplicity
        ###start_date = current_date - timedelta(days=30.437 * past_months) # average 30.437 days per month
        
        start_date =  dt
        
        # List to store the midnight epoch timestamps
        midnight_timestamps = []

        # Loop over each day from the start date to the current date
        while start_date <= current_date:
            # Create a datetime object for midnight of the current date
            midnight = dt(start_date.year, start_date.month, start_date.day)
            
            # Convert the datetime object to an epoch timestamp and add it to the list
            epoch_timestamp = int(time.mktime(midnight.timetuple()))
            midnight_timestamps.append(epoch_timestamp)
            
            # Move to the next day
            start_date += timedelta(days=1)
        print('ZZZZZZ')
        return midnight_timestamps
    except Exception as e:
        print("MD EEEEE")
        traceback.print_exc()
        exit(123)

print('function created: get_midnight_epoch_timestamps')

def parse_timestamp(timestamp):
    '''
    Function to parse out date and time information from timestamp for later use (feature engineering)
    '''
    ts = dt.utcfromtimestamp(timestamp).replace(tzinfo=pytz.utc)

    # Extract various components
    gmt_timestamp = ts.isoformat()
    year = ts.year
    month = ts.month
    week = ts.isocalendar()[1]  # Week number of the year
    day_of_month = ts.day
    day_of_week = ts.strftime('%A').lower()  # Full weekday name in lowercase
    hour = ts.hour
    minute = ts.minute
    time = ts.strftime('%H:%M')

    return {
        'gmt_timestamp': gmt_timestamp,
        'year': year,
        'month': month,
        'week': week,
        'day_of_month': day_of_month,
        'day_of_week': day_of_week,
        'hour': hour,
        'minute': minute,
        'time': time
    }

print('function created: parse_timestamp')

def fill_missing_timestamps(df):
    '''
    Function to scan for missing timestamps and synthetically create 0-value Wh readings to fill these gaps
    '''
    # Convert the "Timestamp" column to a datetime object
    df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='s')

    # Find the minimum and maximum timestamps in the dataframe
    min_timestamp = df['Timestamp'].min()
    max_timestamp = df['Timestamp'].max()

    # Generate a list of expected timestamps at 15-minute intervals
    expected_timestamps = pd.date_range(start=min_timestamp, end=max_timestamp, freq='15T')

    # Identify missing timestamps
    missing_timestamps = expected_timestamps[~expected_timestamps.isin(df['Timestamp'])]

    # Create new rows for missing timestamps with 0 in the "Wh" column
    missing_data = pd.DataFrame({
        'Timestamp': missing_timestamps,
        'DeviceSerial': df['DeviceSerial'].iloc[0],  # Assuming all rows have the same serial number
        'Wh': 0
    })

    # Concatenate the missing data with the original dataframe
    df = pd.concat([df, missing_data])

    # Sort the dataframe by timestamp
    df.sort_values(by='Timestamp', inplace=True)
    
    # Convert the timestamp back to epoch format
    df['Timestamp'] = df['Timestamp'].astype(int) // 10**9

    return df

print('function created: fill_missing_timestamps')

def impute_and_summarize(df):
    # Calculate the mean Wh value for each timeslot of each day of the week (only non-zero values)
    df['timeslot_mean'] = df.groupby(['day_of_week', 'time'])['Wh'].transform(lambda x: x[x > 0].mean())

    # Calculate the median Wh value for each timeslot of each day of the week (only non-zero values)
    df['timeslot_median'] = df.groupby(['day_of_week', 'time'])['Wh'].transform(lambda x: x[x > 0].median())

    # Create "imputed_mean" column based on conditions
    df['imputed_mean'] = df.apply(lambda row: row['timeslot_mean'] if row['Wh'] == 0 else row['Wh'], axis=1)

    # Create "imputed_median" column based on conditions
    df['imputed_median'] = df.apply(lambda row: row['timeslot_median'] if row['Wh'] == 0 else row['Wh'], axis=1)

    # Create a boolean column to indicate when the calculated value was used
    df['calculated_used'] = df['Wh'] == 0

    return df

print('function created: impute_and_summarize')

def trim_dataframe(input_df):
    # Sort the DataFrame by the "Timestamp" column
    input_df.sort_values(by='Timestamp', inplace=True)
    
    # Reset the index
    input_df.reset_index(drop=True, inplace=True)

    # Find the index of the first non-zero value in the "Wh" column
    first_non_zero_index = input_df['Wh'].gt(0).idxmax()

    # Extract the trimmed DataFrame
    trimmed_df = input_df.loc[first_non_zero_index:]

    return trimmed_df

print('function created: trim_dataframe')


Libraries Imported
SQL Connection String Created
EyeDro Endpoint and Key Set
function created: get_device_inventory_list
function created: eyedro_getdata
function created: get_midnight_epoch_timestamps
function created: parse_timestamp
function created: fill_missing_timestamps
function created: impute_and_summarize
function created: trim_dataframe


In [23]:
# Gather list of existing meters in SQL database
s_sql_serials = set()
try:
    s_sql_serials = set(pd.read_sql_query("select * from vw_table_list;",con=ENGINE).serial_num.to_list())
except Exception as e:
    print('EEEEEEEEEEEEEE',e)
    pass

# Gather list of meters from inventory API
s_api_serials = set(get_device_inventory_list())
print('A')
# Create set of meters to be called which are not already in SQL database
serials_to_call = s_api_serials - s_sql_serials
print('B')
# Convert the set back to a list in case we need to slice it
SERIALS_TO_CALL = list(serials_to_call)
print('C')
print(SERIALS_TO_CALL,'!!!!!!!!')




for serial in SERIALS_TO_CALL:
    
    rt_st = dt.now()
    
    try:
        
        # Generate the list of midnight epoch timestamps for the past 12 months
        ###midnight_timestamps = get_midnight_epoch_timestamps(12)

        # Generate the list of midnight epoch timestamps for the past 12 months
        midnight_timestamps = get_midnight_epoch_timestamps(dt(year=2024, month=1, day=1, hour=0, minute=0, second=0).date())
        print('AAAAAAA')

        # Create list to hold responses to the API calls, storing each response as an element in a list
        li_responses = []

        # Call the API to fetch data, skipping if a fatal error is encountered
        for timestamp in midnight_timestamps:
            try:
                li_responses.append(eyedro_getdata(serial, timestamp))
            except:
                pass

        # Prepare an empty list to hold all rows of the final DataFrame
        all_rows = []
        print('BBBB')
        # Iterate over each response in the list of responses and format into a dataframe
        for data in li_responses:
            header_info = {
                'DeviceSerial': data['DeviceSerial']
            }

            for reading in data['Data']['Wh'][0]:
                timestamp, meter_reading = reading
                row = {**header_info, 'Timestamp': timestamp, 'Wh': meter_reading}

                # Add the combined information to the list
                all_rows.append(row)

        print('CCCCCC')
        # Create a DataFrame from the API response which we will add to the existing data
        df_new_data = pd.DataFrame(all_rows)
        print('DDDDD')
        # Scan for missing 15-minute increments and fill gaps in the data with 0-Wh readings
        df_new_data = fill_missing_timestamps(df_new_data)
        print('EEEEE')
        # Parse date and time info out of the timestamps
        parsed_timestamps = df_new_data['Timestamp'].apply(parse_timestamp)
        print('FFFF')
        # Add the parsed data back into the DataFrame
        df_new_data = df_new_data.join(pd.json_normalize(parsed_timestamps))

        # Sort the DataFrame by 'Wh' in descending order to put non-zero Wh values first (for use later in dropping duplicates)
        df_new_data = df_new_data.sort_values(by=['Wh'], ascending=False)

        # Drop duplicates based on 'Timestamp' and keep the first occurrence
        df_new_data = df_new_data.drop_duplicates(subset=['Timestamp'], keep='first')

        # Re-sort the dataframe by timestamp
        df_new_data = df_new_data.sort_values(by=['Timestamp'], ascending=True)

        # Impute means and medians and create imputed value columns for later use
        df_new_data = impute_and_summarize(df_new_data)
        
        # Trim resulting dataframe such that any unnecessary 0's at beginning of dataset are removed 
        # This is necessary since not all meters have been online for full 12 months of data period covered by update
        df_new_data = trim_dataframe(df_new_data)
        
        # Load resulting update to SQL
        df_new_data.to_sql(f"{serial}", ENGINE, if_exists='replace')
        
        # Print status message
        rt_et = dt.now()
        print(f"{serial} | {rt_et-rt_st} elapsed | success | Rows Loaded: {len(df_new_data)}")
    
    except Exception as e:
        rt_et = dt.now()
        
        # Capture the exception and print the error message
        print(f"{serial} | {rt_et-rt_st} elapsed | failure | error: {e}")
        print("EEEEE")
        traceback.print_exc()
        break
    

EEEEEEEEEEEEEE 'DataFrame' object has no attribute 'serial_num'
A
B
C
['00980B8C', '0098095F', '00980923', '00980A13', '00980B5E', '00980E10', '00980901', '00980857', '00980873', '009809FC', '00980B75', '00980E13', '00980891', '00980B97', '009807D6', '00980861', '00980B7F', '009809B5', '00980A00', '0098089D', '00980A57', '00980A66', '009808F0', '00980B90', '00980864', '00980937', '009809B6', '00980989', '00980A39', '00980DA7', '0098090C', '009809FE', '00980879', '009807D7', '00980AE3', '009004E6', '00980E14', '0098092F', '00980833', '0098090D', '009004E2', '00980A01', '009808BE', '00980A02', '00980AC8', '00980B6E', '00980E2A', '00980842', '00980B2F', '00980741', '0098075B', '00980A21', '00980902', '00980835', '00980DA8', '00980A99', '00980DAA', '009809AC', '00980A22', '009808EA', '00980742', '00980B13', '00980757', '0098089E', '00980988', '009808DE', '00980962', '0098079A', '00980830', '00980A2C', '00980929', '00980B80', '009808AF', '009809F6', '0098087B', '00980DEC', '009808FB', '0098

Traceback (most recent call last):
  File "C:\Users\steve\AppData\Local\Temp\ipykernel_7756\2978491743.py", line 63, in get_midnight_epoch_timestamps
    current_date = dt.utcnow().date()
AttributeError: 'datetime.date' object has no attribute 'utcnow'
Traceback (most recent call last):
  File "C:\Users\steve\AppData\Local\Temp\ipykernel_7756\818045206.py", line 40, in <module>
    for timestamp in midnight_timestamps:
TypeError: 'NoneType' object is not iterable
