### SET UP

In [2]:
import getpass
import pandas as pd
import numpy as np
# import matplotlib as mplot
import urllib.parse
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.display import Markdown as md
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta
import pandas as pd
import plotly.express as px
import pytz

# Extra import
import copy
import random

pd.set_option('display.max_columns', None)

#### Connection

In [3]:
# Input creditials by code (need to remove your private info before you pushed it)
# edm_address = ""
# edm_name = ""
# edm_password = ""

# User input the creditials manually 
edm_address = 'sandpit-edm.awesense.com'
edm_name = 'PIMS-Marco'
edm_password = getpass.getpass(prompt='Password: ')

edm_password = urllib.parse.quote(edm_password)

%load_ext sql
%sql postgresql://$edm_name:$edm_password@$edm_address/edm
%config SqlMagic.displaycon = False
%config SqlMagic.feedback = False

# Delete the credential variables for security purposes.

del edm_name, edm_password

### UPLOAD DATA

#### Select parameters

In [4]:
# grid_id = 'awefice'
grid_id = 'North Central Zone'
# grid_element_id = 'transformer_2'
grid_element_id = '12373_hvmv'

#### Create Temporary Views

In [5]:
%%sql 

CREATE OR REPLACE TEMPORARY VIEW grid_element_metric AS
    SELECT grid_id,
            grid_element_id,
            phases,
            type,
            provider,
            direction,
            friendly_id,
            metric_key AS metric,
            valid,
            timestamp,
            value
    FROM grid_element_data_source AS geds
    JOIN UNNEST(geds.metrics::TEXT[]) AS metric_key
        ON true
    LEFT JOIN ts_data_source_select(grid_element_data_source_id, metric_key) AS ts
        ON true;


[]

In [6]:
%%sql

CREATE OR REPLACE TEMPORARY VIEW meter_data_source2 AS
    SELECT meter.grid_id,
            meter.grid_element_id,
            geds.grid_element_data_source_id,
            geds.friendly_id,
            geds.phases,
            geds.provider,
            metric_key as metric,
            lower(geds.valid) as start_time,
            upper(geds.valid) as end_time
    FROM grid_get_downstream('{grid_id}', '{grid_element_id}') AS meter
    LEFT JOIN grid_element_data_source AS geds
        ON meter.grid_element_id = geds.grid_element_id
        AND meter.grid_id = geds.grid_id
        AND geds.type = 'CONSUMER'
    JOIN UNNEST(geds.metrics::TEXT[]) AS metric_key
        ON true
    WHERE meter.type = 'Meter';

[]

In [7]:
%%sql

CREATE OR REPLACE TEMPORARY VIEW meter_consumption2 AS
    SELECT meter.grid_id,
           meter.grid_element_id,
           meter.phases,
           timestamp,
           value AS kWh
    FROM meter_data_source2 AS meter
    LEFT JOIN grid_element_metric AS gem
        ON gem.grid_id = meter.grid_id
        AND gem.grid_element_id = meter.grid_element_id
    WHERE gem.metric = 'kWh'
    AND gem.type = 'CONSUMER';

[]

#### Last Query

In [8]:
query = """
SELECT grid_element_id, phases, timestamp, kwh
FROM meter_consumption2
WHERE timestamp >= '2024-01-01 00:00:00'
AND timestamp <= '2024-01-7 23:59:59';
"""

# Execute the query using %sql magic command.
result = %sql $query

meter_consumption_2024_df = result.DataFrame()
meter_consumption_2024_df['index'] = [i for i in range(len(meter_consumption_2024_df))]
meter_consumption_2024_df = meter_consumption_2024_df.set_index('index')

#### Save the data in a CSV

In [108]:
meter_consumption_2024_df.to_csv('2024_data.csv')

## SECOND PART: Work on an algorithm

#### Load data

In [9]:
df_2024 = pd.read_csv('2024_data.csv', index_col = 0)
print('Head: ', df_2024.head(), '\n')
print('Tail: ', df_2024.tail())

Head:        grid_element_id phases                  timestamp       kwh
index                                                            
0            17AY10-1      B  2024-01-01 00:00:00+00:00  1.366261
1            17AY10-1      B  2024-01-01 01:00:00+00:00  2.120388
2            17AY10-1      B  2024-01-01 02:00:00+00:00  1.567344
3            17AY10-1      B  2024-01-01 03:00:00+00:00  1.822171
4            17AY10-1      B  2024-01-01 04:00:00+00:00  1.434942 

Tail:         grid_element_id phases                  timestamp       kwh
index                                                             
106171         17Y95-1      A  2024-01-07 19:00:00+00:00  1.854647
106172         17Y95-1      A  2024-01-07 20:00:00+00:00  1.602400
106173         17Y95-1      A  2024-01-07 21:00:00+00:00  2.463776
106174         17Y95-1      A  2024-01-07 22:00:00+00:00  1.955457
106175         17Y95-1      A  2024-01-07 23:00:00+00:00  2.018963


### Split the meters
Create a list of pairs. Each pair is composed by:
 - 'meter', a DataFrame containing two columns 'Timestamp' (index), and 'kwh'
 - 'current_phase', a string containing the current phase of the meter
 - 'grid_element_id', the name of the meter

In [10]:
# It may be useful to create an object instead of having the list [meter, current_phase, meter_name]
meters = {}
for m in df_2024.grid_element_id.unique():
    meter_name = m
    # Extract the data of a meter
    meter = df_2024[df_2024['grid_element_id'] == m]
    # Identify the current phase of the meter
    current_phase = meter.iloc[0].phases
    # Remove the columns that are not used
    meter = meter[['timestamp', 'kwh']]
    # Set 'timestamp' as the index of the DataFrame
    meter = meter.set_index('timestamp')
    # Average consumptino
    ave_consumption = meter.kwh.mean()
    meters[m] = [meter, current_phase, ave_consumption]


### Aggragate the consumption of phases A, B, C
In the dataset there are no AB, AC,or BC.  The consumption of ABC is distributed uniformily between A, B, and C. Hence, it does not influence the phase balance of the grid.


In [11]:
def aggregate_consumption(meters):
    consumption_A, consumption_B, consumption_C = [], [], []
    #
    for t in df_2024.timestamp.unique():
        c_A, c_B, c_C = 0,0,0
        for id_m in meters.keys():
            m = meters[id_m]
            if m[1] == 'A':
                c_A += m[0].loc[t].kwh.values[0]
            if m[1] == 'B':
                c_B += m[0].loc[t].kwh.values[0]
            if m[1] == 'C':
                c_C += m[0].loc[t].kwh.values[0]
        consumption_A.append([t,c_A])
        consumption_B.append([t,c_B])
        consumption_C.append([t,c_C])
    #
    consumption_A = pd.DataFrame(consumption_A, columns = ['timestamp','kwh']).set_index('timestamp')
    consumption_B = pd.DataFrame(consumption_B, columns = ['timestamp','kwh']).set_index('timestamp')
    consumption_C = pd.DataFrame(consumption_C, columns = ['timestamp','kwh']).set_index('timestamp')
    #
    return consumption_A, consumption_B, consumption_C


### Compute phase imbalance

Define the function to measure imbalance

In [12]:
def phase_imbalance(p_A,p_B,p_C):
    ''' 
    INPUT:
        'p_A', 'p_B', 'p_C', the consumption on phases A,B,C at a certain time.
    OUTPUT:
        'p_imbalance', a positive real number quantifying the phase imbalance of the considered time.
        It is computed by the following formula:
        p_imbalance = ( max_deviation ) * 100 / P_ave,
        The quantitiy P_ave is the average of the three consumptions.
    '''
    # Average
    p_ave = (p_A + p_B + p_C)/3
    # Apply formula
    p_imbalance = max([ abs(p_A-p_ave), abs(p_B-p_ave), abs(p_C-p_ave)] )* 100 / p_ave
    return p_imbalance

In [13]:
def compute_imbalances(consumption_A, consumption_B, consumption_C):
    phase_imbalances = []
    for t in df_2024.timestamp.unique():
        # Compute imbalance at time t
        p_A = consumption_A.loc[t].values[0]
        p_B = consumption_B.loc[t].values[0]
        p_C = consumption_C.loc[t].values[0]
        p_imbalance = phase_imbalance(p_A,p_B,p_C)
        phase_imbalances.append([t,p_imbalance])
    phase_imbalances = pd.DataFrame( phase_imbalances, columns = ['timestamp', 'imbalance']).set_index('timestamp')
    return phase_imbalances


### Analyze Imbalances

In [14]:
def plot_and_stats_imbalances(phase_imbalances):
    print('Head of the DataFrame: ')
    print(phase_imbalances.head())
    print()
    print(f"Sum imbalances: {phase_imbalances.imbalance.sum()}")
    print(f"Average imbalances: {phase_imbalances.imbalance.mean()}")
    fig = px.line(x=phase_imbalances.index, y=phase_imbalances.imbalance, title='Plot of the Imbalances in January')
    fig.show()

In [15]:
# Quick test
a,b,c = aggregate_consumption(meters)
fig_a = px.line(a, title = 'Test A')
fig_b = px.line(b)
fig_c = px.line(c)

# Change colors# Customize the colors of the scatter plot
for trace in fig_a.data:
    trace.line.color = 'red'
    trace.name = 'Phase A'

# Customize the colors of the line plot
for trace in fig_b.data:
    trace.line.color = 'green'
    trace.name = 'Phase B'

# Customize the colors of the line plot
for trace in fig_c.data:
    #trace.line.color = 'blue'
    trace.name = 'Phase C'
    
fig = go.Figure(data = fig_a.data + fig_b.data + fig_c.data)
fig.update_layout(
    xaxis_title='timestamp',
    yaxis_title='consumption'
)
fig.show()
result = compute_imbalances( a,b,c )
plot_and_stats_imbalances(result)

Head of the DataFrame: 
                           imbalance
timestamp                           
2024-01-01 00:00:00+00:00  66.360334
2024-01-01 01:00:00+00:00  63.629532
2024-01-01 02:00:00+00:00  60.809710
2024-01-01 03:00:00+00:00  63.521132
2024-01-01 04:00:00+00:00  60.798972

Sum imbalances: 10704.066126362059
Average imbalances: 63.71467932358368


## Working towards an algorithm

### Find the best change in the subgrid

In [29]:
def best_phase_change(meters, improvement = 1.4):
    """
    INPUT: 
     - 'meters', a list containing a list per each meter.
    OUTPUT:
     - id of a meter
     - a phase 
    Output description:
    The algorithm looks for the best one-change that can be done in the grid in order to minimize the sum of the imbalances
    """
    best_meter = None
    best_phase = None

    # Compute original score
    # -- Compute aggregation
    tmp_cons_A,  tmp_cons_B, tmp_cons_C = aggregate_consumption(meters)
    # -- Compute imbalances
    tmp_imbalances = compute_imbalances(tmp_cons_A, tmp_cons_B, tmp_cons_C)
    # -- Sum of the imbalances
    initial_imbalance = tmp_imbalances.imbalance.mean()
    min_sum_imbalances = initial_imbalance
    print('Initial Score: ', min_sum_imbalances)

    shuffled_keys = list(meters.keys())
    random.shuffle( shuffled_keys )
    
    for id_m in shuffled_keys:
        original_phase = meters[id_m][1]
        if original_phase == 'ABC':
            continue
        other_phases = ['A','B','C']
        other_phases.remove(original_phase)
        for p in other_phases:
            # Modify the phase of the meter m to p
            meters[id_m][1] = p
            # Compute new aggregation
            tmp_cons_A,  tmp_cons_B, tmp_cons_C = aggregate_consumption(meters)
            # Compute imbalances
            tmp_imbalances = compute_imbalances(tmp_cons_A, tmp_cons_B, tmp_cons_C)
            # Sum of the imbalances
            tmp_score = tmp_imbalances.imbalance.mean()
            # Compare with the optimum so far
            if tmp_score < min_sum_imbalances:
                best_meter = id_m
                best_phase = meters[id_m][1]
                min_sum_imbalances = tmp_score
                tmp_improvement = 100 - (tmp_score / initial_imbalance) * 100
                if tmp_improvement > improvement:
                    # Fix the input
                    meters[id_m][1] = original_phase
                    # Return the result
                    print(f"Change in meter {id_m}: from {original_phase} to {p}")
                    print('Improvement: ', tmp_improvement)
                    return best_meter, best_phase

        # Reverse the change
        meters[id_m][1] = original_phase
    
    # If best_id= None, it means that it is not possible to improve the phase balance
    # by changing only the phase of one meter
    if best_meter == None:
        return False
    
    return best_meter, best_phase

def multistep_best_phase_change(meters, improvement = 1.4, n_steps = 2):
    changes = []
    local_meters = copy.deepcopy(meters)
    step = 0
    while step < n_steps:
        print('Step ', step)
        change = best_phase_change(local_meters, improvement)
        if change == False:
            return changes
        else:
            step +=1
            meter_change= change[0]
            phase_change = change[1]
            # Apply the changes
            local_meters[meter_change][1] = phase_change
            # Save changes
            changes.append(change)
    return changes

changes = multistep_best_phase_change(meters, 1, 15)
print(changes)
            



Step  0
Initial Score:  63.71467932358368
Change in meter 17AY43-14: from B to C
Improvement:  1.3416543538132544
Step  1
Initial Score:  62.85984855442067
Change in meter 17FY15: from A to C
Improvement:  1.6104698005982812
Step  2
Initial Score:  61.847509676749915
Change in meter 17FY13: from A to C
Improvement:  1.1787257893238916
Step  3
Initial Score:  61.118497130135474
Change in meter 17AY23-8-8-10ZA: from B to C
Improvement:  1.129642798908435
Step  4
Initial Score:  60.428076428503836
Change in meter 17Y124-3-6: from A to C
Improvement:  1.7676283076890087
Step  5
Initial Score:  59.35993264376165
Change in meter 17GY11-ZB: from A to C
Improvement:  1.170267411429748
Step  6
Initial Score:  58.66526269658506
Change in meter 17Y124-3ZA: from A to C
Improvement:  1.1077736853180085
Step  7
Initial Score:  58.015384354009605
Change in meter 17Y145-1: from A to C
Improvement:  1.4828474762712176
Step  8
Initial Score:  57.15510469126713
Change in meter 17F1Y19-1: from A to C
Impr

### Work in progress algorithm

In [None]:
def best_double_phase_change(meters):
    """
    INPUT: 
     - 'meters', a list containing a list per each meter.
    OUTPUT:
     - id of TWO meters
     - TWO phase 
    Output description:
    The algorithm looks for the best two-change that can be done in the grid in order to minimize the sum of the imbalances
    """
    best_id_1 = None
    best_id_2 = None
    best_meter_1 = None
    best_meter_2 = None
    best_phase_1 = None
    best_phase_2 = None

    # Compute original score
    # -- Compute aggregation
    tmp_cons_A,  tmp_cons_B, tmp_cons_C = aggregate_consumption(meters)
    # -- Compute imbalances
    tmp_imbalances = compute_imbalances(tmp_cons_A, tmp_cons_B, tmp_cons_C)
    # -- Max of the imbalances
    min_sum_imbalances = tmp_imbalances.imbalance.max()
    print('Initial Score: ', min_sum_imbalances)

    for i_1 in range( len(meters) ):
        original_phase_1 = meters[i_1][1]
        if original_phase_1 == 'ABC':
            continue
        other_phases_1 = ['A','B','C']
        other_phases_1.remove(original_phase_1)
        for p_1 in other_phases_1:
            # Modify the phase of the meter m to p
            meters[i_1][1] = p_1
            # Second meter
            for i_2 in range(i_1+1, len(meters)):
                original_phase_2 = meters[i_2][1]
                if original_phase_2 == 'ABC':
                    continue
                other_phases_2 = ['A','B','C']
                other_phases_2.remove(original_phase_2)
                for p_2 in other_phases_2:
                    # Modify the phase of the second meter m to p_2
                    meters[i_2][1] = p_2
                    # Compute new aggregation
                    tmp_cons_A,  tmp_cons_B, tmp_cons_C = aggregate_consumption(meters)
                    # Compute imbalances
                    tmp_imbalances = compute_imbalances(tmp_cons_A, tmp_cons_B, tmp_cons_C)
                    # Sum of the imbalances
                    tmp_score = tmp_imbalances.imbalance.max()
                    print(tmp_score)
                    # Compare with the optimum so far
                    if tmp_score < min_sum_imbalances:
                        best_id_1 = i_1
                        best_meter_1 = meters[i_1][2]
                        best_phase_1 = p_1
                        best_id_2 = i_2
                        best_meter_2 = meters[i_2][2]
                        best_phase_2 = p_2
                        min_sum_imbalances = tmp_score
                        print(tmp_score, i_1, p_1, i_2, p_2)
                        print(f"Change in meter {best_meter_1}: from {original_phase_1} to {p_1}")
                        print(f"Change in meter {best_meter_2}: from {original_phase_2} to {p_2}")
                meters[i_2][1] = original_phase_2
        # Reverse the change
        meters[i_1][1] = original_phase_1
    
    # If best_meter = None, it means that it is not possible to improve the phase balance y changing only the phase of one meter

    return best_id_1, best_meter_1, best_phase_1, best_id_2, best_meter_2, best_phase_2


id_meter_1, best_meter_1, best_phase_1, id_meter_2, best_meter_2, best_phase_2 = best_double_phase_change(meters)
print('Round 1, ended')


## Compare Original Grid and Grid After we change the phases of two meters


In [30]:
# Original Data
original_cons_A, original_cons_B, original_cons_C = aggregate_consumption(meters)
original_imbalances = compute_imbalances(original_cons_A, original_cons_B, original_cons_C)
fig_original = px.line(original_imbalances)
# Modify Data
new_meters = copy.deepcopy(meters)
for c in changes:
    id_m = c[0]
    phase_m = c[1]
    new_meters[id_m][1] = phase_m
updated_cons_A, updated_cons_B, updated_cons_C = aggregate_consumption(new_meters)
updated_imbalances = compute_imbalances(updated_cons_A, updated_cons_B, updated_cons_C)
fig_updated = px.line(updated_imbalances)
# Change colors# Customize the colors of the scatter plot
for trace in fig_original.data:
    print(type(trace))
    trace.marker.color = 'blue'
    trace.name = 'Original Imbalance'

# Customize the colors of the line plot
for trace in fig_updated.data:
    trace.line.color = 'green'
    trace.name = 'Updated Imbalance'
fig = go.Figure(data = fig_original.data + fig_updated.data)
fig.update_layout(
    xaxis_title='timestamp',
    yaxis_title='imbalance'
)
fig.show()


<class 'plotly.graph_objs._scatter.Scatter'>
