# Set up

In [1]:
import getpass
import pandas as pd
import numpy as np
# import matplotlib as mplot
import urllib.parse
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.display import Markdown as md
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import pandas as pd
import plotly.express as px
import pytz

pd.set_option('display.max_columns', None)

**Connection**

Enter the EDM server address and the login credentials provided by Awesense. If you do not have the credentials, or have any trouble connecting, please contact api@awesense.com.
<span style='color:red'> **Please do NOT store the credentials in the notebook, nor share them with anyone.** </span>

In [2]:
# Input creditials by code (need to remove your private info before you pushed it)
edm_address = ""
edm_name = ""
edm_password = ""

# User input the creditials manually 
edm_address = getpass.getpass(prompt='EDM server address: ')

print('\nEDM login information')
edm_name = getpass.getpass(prompt='Username: ')
edm_password = getpass.getpass(prompt='Password: ')

# edm_password = urllib.parse.quote(edm_password)

%load_ext sql
%sql postgresql://$edm_name:$edm_password@$edm_address/edm
%config SqlMagic.displaycon = False
%config SqlMagic.feedback = False

# Delete the credential variables for security purposes.

del edm_name, edm_password

#### Input Parameters
Input the grid name to find all the high-level transformers in this grid. 

In [3]:
# User input for the grid.
# grid_id = input('Enter grid ID: ') # awefice

# For simplitiy, set grid_id = awefice by code
grid_id = 'awefice'#'North Central Zone'

In [4]:
result = %sql SELECT grid_element_id, \
                        is_producer,\
                        meta, \
                        phases\
                FROM grid_element\
                WHERE grid_id = '{grid_id}'\
                    AND is_producer = True\
                    AND type = 'Transformer';

# Convert the results to a data frame.
df_transformers = result.DataFrame()

# Pull out the information from `meta` column saved as JSONB.
df_transformers = pd.concat([df_transformers.drop(['meta'], axis=1),
                             df_transformers['meta'].apply(pd.Series)], axis=1)

print(df_transformers.columns)

# Choose the relevant columns to display transformers. 
df_transformers = df_transformers[['grid_element_id', 'is_producer', 'ownership', 'rating_kva', 
                                   'phases', 'voltage_level', 'hvmv_parent_element', 
                                   'primary_voltage', 'secondary_voltage']]

# Display the results.
df_transformers

Index(['grid_element_id', 'is_producer', 'phases', 'name', 'label', 'latitude',
       'load_loss', 'longitude', 'mv_feeder', 'ownership', 'make_model',
       'rating_kva', 'enclosure_id', 'no_load_loss', 'voltage_level',
       'commission_date', 'primary_voltage', 'maintenance_type',
       'manufacture_date', 'installation_type', 'secondary_voltage',
       'hvmv_parent_element', 'is_operation_status', 'parent_transformer_id',
       'winding_configuration'],
      dtype='object')


Unnamed: 0,grid_element_id,is_producer,ownership,rating_kva,phases,voltage_level,hvmv_parent_element,primary_voltage,secondary_voltage
0,transformer_2,True,utility,20000.0,ABC,HV/MV,,200000,12500
1,transformer_6,True,utility,20000.0,ABC,HV/MV,,200000,12500


---

### Define the grid id and grid element that you would like to investigate

In [5]:
grid_id = 'North Central Zone'
grid_element_id = '12373_hvmv'

### Create View grid_element_metric to grab meters and other grid elements info

In [6]:
%%sql
    
CREATE OR REPLACE TEMPORARY VIEW grid_element_metric AS
    SELECT grid_id,
            grid_element_id,
            phases,
            type,
            provider,
            direction,
            friendly_id,
            metric_key AS metric,
            valid,
            timestamp,
            value
    FROM grid_element_data_source geds
    JOIN UNNEST(geds.metrics::TEXT[]) AS metric_key
        ON true
    LEFT JOIN ts_data_source_select(grid_element_data_source_id, metric_key) AS ts
        ON true;

[]

### Create View meter_data_source

In [7]:
%%sql

CREATE OR REPLACE TEMPORARY VIEW meter_data_source2 AS
    SELECT meter.grid_id,
            meter.grid_element_id,
            geds.grid_element_data_source_id,
            geds.friendly_id,
            geds.phases,
            geds.provider,
            metric_key as metric,
            lower(geds.valid) as start_time,
            upper(geds.valid) as end_time
    FROM grid_get_downstream('{grid_id}', '{grid_element_id}') AS meter
    LEFT JOIN grid_element_data_source geds
        ON meter.grid_element_id = geds.grid_element_id
        AND meter.grid_id = geds.grid_id
        AND geds.type = 'CONSUMER'
    JOIN UNNEST(geds.metrics::TEXT[]) AS metric_key
        ON true
    WHERE meter.type = 'Meter';

[]

### Create View meter_consumption

In [8]:
%%sql

CREATE OR REPLACE TEMPORARY VIEW meter_consumption2 AS
SELECT meter.grid_id,
        meter.grid_element_id,
        meter.friendly_id,
        meter.phases,
        timestamp,
        value AS kWh
FROM meter_data_source2 meter
LEFT JOIN grid_element_metric gem
    ON gem.grid_id = meter.grid_id
    AND gem.grid_element_id = meter.grid_element_id
WHERE gem.metric = 'kWh'
   AND gem.type = 'CONSUMER';

[]

In [9]:
# %%sql

# WITH ts_stats AS (
#     SELECT SUM(kWh) AS kWh, MIN(timestamp) AS start_timerange, MAX(timestamp) AS end_timerange
#     FROM meter_consumption2
# )
# SELECT name, value FROM (
#     SELECT 1 AS idx, 'Meters Found' AS name, (SELECT COUNT(DISTINCT grid_element_id) FROM meter_data_source2)::text AS value
#     UNION
#     SELECT 2, 'Meters w/ Datasources', (SELECT COUNT(DISTINCT grid_element_id) FROM meter_data_source2 WHERE grid_element_data_source_id IS NOT NULL)::text
#     UNION
#     SELECT 3, 'Common DS Timerange', (SELECT CONCAT(MAX(start_time), ' - ',  MIN(end_time)) FROM meter_data_source2)::text
#     UNION
#     SELECT 4, 'Common Timeseries Timerange', (SELECT CONCAT(start_timerange, ' - ', end_timerange) FROM ts_stats)::text
#     UNION
#     SELECT 5, 'Total Consumption', (SELECT kwh FROM ts_stats)::text
# ) x ORDER BY idx
# ;

In [10]:
# grab all information from sql and save to a python varable
# the information is restricted to timestamp >= '2024-01-01 00:00:00+00:00'
meter_consumption = %sql SELECT grid_id,\
                grid_element_id,\
                friendly_id,\
                phases,\
                timestamp,\
                kWh\
            FROM meter_consumption2;
                    
# Sort the data by date saved as `month`.
meter_consumption_df = meter_consumption.DataFrame()
meter_consumption_df.head()

Unnamed: 0,grid_id,grid_element_id,friendly_id,phases,timestamp,kwh
0,North Central Zone,17AY10-1,17AY10-1,B,2021-01-01 05:00:00+00:00,1.69211
1,North Central Zone,17AY10-1,17AY10-1,B,2021-01-01 06:00:00+00:00,1.745985
2,North Central Zone,17AY10-1,17AY10-1,B,2021-01-01 07:00:00+00:00,1.346091
3,North Central Zone,17AY10-1,17AY10-1,B,2021-01-01 08:00:00+00:00,1.506344
4,North Central Zone,17AY10-1,17AY10-1,B,2021-01-01 09:00:00+00:00,1.537056


In [11]:
# Create columns to indicate the month, day and hour information
meter_consumption_df['month'] = meter_consumption_df['timestamp'].dt.to_period('M').dt.to_timestamp().dt.date
meter_consumption_df['day'] = meter_consumption_df['timestamp'].dt.to_period('D').dt.to_timestamp().dt.date
meter_consumption_df['hour'] = meter_consumption_df['timestamp']

  meter_consumption_df['month'] = meter_consumption_df['timestamp'].dt.to_period('M').dt.to_timestamp().dt.date
  meter_consumption_df['day'] = meter_consumption_df['timestamp'].dt.to_period('D').dt.to_timestamp().dt.date


In [12]:
meter_consumption_df

Unnamed: 0,grid_id,grid_element_id,friendly_id,phases,timestamp,kwh,month,day,hour
0,North Central Zone,17AY10-1,17AY10-1,B,2021-01-01 05:00:00+00:00,1.692110,2021-01-01,2021-01-01,2021-01-01 05:00:00+00:00
1,North Central Zone,17AY10-1,17AY10-1,B,2021-01-01 06:00:00+00:00,1.745985,2021-01-01,2021-01-01,2021-01-01 06:00:00+00:00
2,North Central Zone,17AY10-1,17AY10-1,B,2021-01-01 07:00:00+00:00,1.346091,2021-01-01,2021-01-01,2021-01-01 07:00:00+00:00
3,North Central Zone,17AY10-1,17AY10-1,B,2021-01-01 08:00:00+00:00,1.506344,2021-01-01,2021-01-01,2021-01-01 08:00:00+00:00
4,North Central Zone,17AY10-1,17AY10-1,B,2021-01-01 09:00:00+00:00,1.537056,2021-01-01,2021-01-01,2021-01-01 09:00:00+00:00
...,...,...,...,...,...,...,...,...,...
19210899,North Central Zone,17Y95-1,17Y95-1,A,2024-06-20 13:00:00+00:00,2.910039,2024-06-01,2024-06-20,2024-06-20 13:00:00+00:00
19210900,North Central Zone,17Y95-1,17Y95-1,A,2024-06-20 14:00:00+00:00,2.900143,2024-06-01,2024-06-20,2024-06-20 14:00:00+00:00
19210901,North Central Zone,17Y95-1,17Y95-1,A,2024-06-20 15:00:00+00:00,3.243275,2024-06-01,2024-06-20,2024-06-20 15:00:00+00:00
19210902,North Central Zone,17Y95-1,17Y95-1,A,2024-06-20 16:00:00+00:00,2.561676,2024-06-01,2024-06-20,2024-06-20 16:00:00+00:00


In [13]:
element_downstream = %sql SELECT grid_id, grid_element_id, phases, terminal1_cn, terminal2_cn FROM grid_get_downstream('{grid_id}', '{grid_element_id}')
element_downstream = element_downstream.DataFrame()
element_downstream

#Merging elements that come one following the other
BB = element_downstream.merge(element_downstream, left_on='terminal2_cn', right_on='terminal1_cn',suffixes=('_upper', '_lower'))

#Tracing elements where the phases change from ABC to non-ABC
bif = BB[(BB['phases_upper']=='ABC') & (BB['phases_lower']!='ABC')]

meters_under_junctions = pd.DataFrame(columns=['grid_element_id', 'junction'])

#Finding meters under each junction
for junction in bif['grid_element_id_lower']:
    # Find meters under each junction
    meters_under_junction = %sql SELECT grid_element_id FROM grid_get_downstream('{grid_id}', '{junction}')\
                            WHERE type='Meter';
                            
    meters_under_junction = meters_under_junction.DataFrame()
    meters_under_junction['junction'] = junction
    
    meters_under_junctions = pd.concat([meters_under_junctions, meters_under_junction], axis=0)

meter_consumption_df = meter_consumption_df.merge(meters_under_junctions, on='grid_element_id')
meter_consumption_df

Unnamed: 0,grid_id,grid_element_id,friendly_id,phases,timestamp,kwh,month,day,hour,junction
0,North Central Zone,17AY10-1,17AY10-1,B,2021-01-01 05:00:00+00:00,1.692110,2021-01-01,2021-01-01,2021-01-01 05:00:00+00:00,L17-57719
1,North Central Zone,17AY10-1,17AY10-1,B,2021-01-01 06:00:00+00:00,1.745985,2021-01-01,2021-01-01,2021-01-01 06:00:00+00:00,L17-57719
2,North Central Zone,17AY10-1,17AY10-1,B,2021-01-01 07:00:00+00:00,1.346091,2021-01-01,2021-01-01,2021-01-01 07:00:00+00:00,L17-57719
3,North Central Zone,17AY10-1,17AY10-1,B,2021-01-01 08:00:00+00:00,1.506344,2021-01-01,2021-01-01,2021-01-01 08:00:00+00:00,L17-57719
4,North Central Zone,17AY10-1,17AY10-1,B,2021-01-01 09:00:00+00:00,1.537056,2021-01-01,2021-01-01,2021-01-01 09:00:00+00:00,L17-57719
...,...,...,...,...,...,...,...,...,...,...
19271693,North Central Zone,17Y95-1,17Y95-1,A,2024-06-20 13:00:00+00:00,2.910039,2024-06-01,2024-06-20,2024-06-20 13:00:00+00:00,L17-58341
19271694,North Central Zone,17Y95-1,17Y95-1,A,2024-06-20 14:00:00+00:00,2.900143,2024-06-01,2024-06-20,2024-06-20 14:00:00+00:00,L17-58341
19271695,North Central Zone,17Y95-1,17Y95-1,A,2024-06-20 15:00:00+00:00,3.243275,2024-06-01,2024-06-20,2024-06-20 15:00:00+00:00,L17-58341
19271696,North Central Zone,17Y95-1,17Y95-1,A,2024-06-20 16:00:00+00:00,2.561676,2024-06-01,2024-06-20,2024-06-20 16:00:00+00:00,L17-58341


In [14]:
#grouping power consumption each junction
meter_consumption_df.groupby(['phases', 'junction']).agg({'kwh': 'sum'}).reset_index()

Unnamed: 0,phases,junction,kwh
0,A,104479,68896.1
1,A,11628,116541.7
2,A,118021,222289.7
3,A,118184,82215.37
4,A,118861,114026.7
5,A,L17-136410,267426.2
6,A,L17-137897,82646.16
7,A,L17-138359,163286.9
8,A,L17-140870,76194.39
9,A,L17-58026,3357338.0


In [15]:
# # Algorithm for grouping the meters according to its closest junction (root of subtree that starts )

# meter_list = meter_consumption_df[meter_consumption_df['phases'].apply(lambda x: len(x)==1)]['grid_element_id'].unique()

# meter_terminals = []

# for meter in meter_list:
#     trace_element = element_downstream[element_downstream['grid_element_id']==meter]

#     while len(trace_element['phases'].iloc[0])==1:
#         last_trace_element = trace_element
#         terminal = trace_element['terminal1_cn'].iloc[0]
#         trace_element = element_downstream[element_downstream['terminal2_cn']==terminal]
    
#     # junction = trace_element['grid_element_id'].iloc[0]
#     junction = last_trace_element['grid_element_id'].iloc[0]
#     # print(meter, last_trace_element)
        
#     meter_terminals.append([meter, junction, terminal])
    
# meter_group_df = pd.DataFrame(data=meter_terminals, columns=['grid_element_id', 'junction', 'junction_terminal1'])
# meter_group_df


# Group the data 

### From this point, we can group data according to your needs and do some filtering if necessary

### Use Case 1: by meter and month

In [17]:
meter_month_df = meter_consumption_df.groupby(['friendly_id', 'month'])['kwh'].mean().reset_index()
meter_month_df

Unnamed: 0,friendly_id,month,kwh
0,17AY10-1,2021-01-01,2.143684
1,17AY10-1,2021-02-01,2.291655
2,17AY10-1,2021-03-01,1.916410
3,17AY10-1,2021-04-01,2.023651
4,17AY10-1,2021-05-01,2.407175
...,...,...,...
13267,17Y95-1,2024-02-01,1.528928
13268,17Y95-1,2024-03-01,1.354950
13269,17Y95-1,2024-04-01,1.390599
13270,17Y95-1,2024-05-01,1.721543


In [18]:
# Plot graph
px.line(meter_month_df, x='month', y='kwh', 
        title='Average Hourly Consumption in each Month by Meter',
        color='friendly_id')

### Use Case 2: by phase and month

In [19]:
phases_month_df = meter_consumption_df.groupby(['phases', 'month'])['kwh'].mean().reset_index()
phases_month_df

Unnamed: 0,phases,month,kwh
0,A,2021-01-01,1.566452
1,A,2021-02-01,1.787051
2,A,2021-03-01,1.332542
3,A,2021-04-01,1.418628
4,A,2021-05-01,1.651791
...,...,...,...
121,C,2024-02-01,1.361080
122,C,2024-03-01,1.210070
123,C,2024-04-01,1.237503
124,C,2024-05-01,1.549370


In [20]:
# Plot graph
px.line(phases_month_df, x='month', y='kwh', 
        title='Average Hourly Consumption in each Month by Meter',
        color='phases')

### Use Case 3: by phase, cluster and month

In [21]:
phases_cluster_month_df = meter_consumption_df.groupby(['junction', 'phases', 'month'])['kwh'].mean().reset_index()
phases_cluster_month_df

Unnamed: 0,junction,phases,month,kwh
0,104479,A,2021-01-01,1.006964
1,104479,A,2021-02-01,1.169960
2,104479,A,2021-03-01,0.694370
3,104479,A,2021-04-01,0.858829
4,104479,A,2021-05-01,1.094979
...,...,...,...,...
1675,L17-84944,B,2024-02-01,1.885265
1676,L17-84944,B,2024-03-01,1.638827
1677,L17-84944,B,2024-04-01,1.720648
1678,L17-84944,B,2024-05-01,2.150993


In [22]:
cluster_month_df = meter_consumption_df.groupby(['phases', 'junction', 'month'])['kwh'].mean().reset_index()

In [None]:
# Plot graph
px.line(cluster_month_df, x='month', y='kwh', 
        title='Average Hourly Consumption in each Month by Cluster',
        color='phases')