In [1]:
# load carbon traces into a pandas dataframe
import pandas as pd
import numpy as np

In [2]:
# AWS regions
names = [
    "us-east-1",      # US East (N. Virginia)
    "us-west-1",      # US West (N. California)
    "us-west-2",      # US West (Oregon)
    "af-south-1",     # Africa (Cape Town)
    "ap-south-2",     # Asia Pacific (Hyderabad)
    "ap-northeast-2", # Asia Pacific (Seoul)
    "ap-southeast-2", # Asia Pacific (Sydney)
    "ca-central-1",   # Canada (Central)
    "eu-central-1",   # Europe (Frankfurt)
    "eu-west-2",      # Europe (London)
    "eu-west-3",      # Europe (Paris)
    "eu-north-1",     # Europe (Stockholm)
    "sa-east-1",       # South America (São Paulo)
    "il-central-1"    # Israel (Tel Aviv)
]

In [12]:
# define a dict of dataframes
dfs = {}
for name in names:
    df = pd.read_csv(f"carbon-data/{name}.csv", parse_dates=["datetime"])
    # keep only the columns we need
    df = df[["datetime", "carbon_intensity_avg"]]
    dfs[name] = df
    # print(len(df))

# print the first few rows of the first dataframe
print(dfs["us-east-1"].head())

                   datetime  carbon_intensity_avg
0 2020-01-01 00:00:00+00:00                382.14
1 2020-01-01 01:00:00+00:00                377.11
2 2020-01-01 02:00:00+00:00                375.74
3 2020-01-01 03:00:00+00:00                373.20
4 2020-01-01 04:00:00+00:00                360.48


In [13]:
# load the metric space
import metric
m = metric.MetricSpace()

# get the "column names" of the vectors for the tree embedding in the metric space
name_vector = m.name_vector

c_vector: [0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0]


In [14]:
print(name_vector)

['root', ['us-east-1', 'eu-north-1', 'af-south-1', 'il-central-1', 'ap-south-2', 'us-west-1', 'ap-southeast-2', 'ca-central-1', 'ap-northeast-2', 'us-west-2', 'eu-west-2', 'sa-east-1', 'eu-central-1', 'eu-west-3'], ['us-east-1', 'eu-north-1', 'af-south-1', 'il-central-1', 'ap-south-2', 'us-west-1', 'ap-southeast-2', 'ca-central-1', 'ap-northeast-2', 'us-west-2', 'eu-west-2', 'sa-east-1', 'eu-central-1', 'eu-west-3'], ['us-east-1', 'eu-north-1', 'af-south-1', 'il-central-1', 'ap-south-2', 'us-west-1', 'ap-southeast-2', 'ca-central-1', 'ap-northeast-2', 'us-west-2', 'eu-west-2', 'sa-east-1', 'eu-central-1', 'eu-west-3'], ['us-east-1', 'eu-north-1', 'af-south-1', 'il-central-1', 'ap-south-2', 'us-west-1', 'ap-southeast-2', 'ca-central-1', 'ap-northeast-2', 'us-west-2', 'eu-west-2', 'sa-east-1', 'eu-central-1', 'eu-west-3'], 'us-west-2', 'us-west-2 OFF', ['eu-north-1', 'af-south-1', 'il-central-1', 'ap-southeast-2', 'sa-east-1', 'eu-west-2', 'eu-central-1', 'eu-west-3'], 'eu-north-1', 'eu-

In [18]:
for i, name in enumerate(name_vector):
    if "[" in name: 
        name_vector[i] = "none" 
    if "OFF" in name:
        name_vector[i] = "none"
    if name == 'root':
        name_vector[i] = "none" 

print(name_vector)

['none', ['us-east-1', 'eu-north-1', 'af-south-1', 'il-central-1', 'ap-south-2', 'us-west-1', 'ap-southeast-2', 'ca-central-1', 'ap-northeast-2', 'us-west-2', 'eu-west-2', 'sa-east-1', 'eu-central-1', 'eu-west-3'], ['us-east-1', 'eu-north-1', 'af-south-1', 'il-central-1', 'ap-south-2', 'us-west-1', 'ap-southeast-2', 'ca-central-1', 'ap-northeast-2', 'us-west-2', 'eu-west-2', 'sa-east-1', 'eu-central-1', 'eu-west-3'], ['us-east-1', 'eu-north-1', 'af-south-1', 'il-central-1', 'ap-south-2', 'us-west-1', 'ap-southeast-2', 'ca-central-1', 'ap-northeast-2', 'us-west-2', 'eu-west-2', 'sa-east-1', 'eu-central-1', 'eu-west-3'], ['us-east-1', 'eu-north-1', 'af-south-1', 'il-central-1', 'ap-south-2', 'us-west-1', 'ap-southeast-2', 'ca-central-1', 'ap-northeast-2', 'us-west-2', 'eu-west-2', 'sa-east-1', 'eu-central-1', 'eu-west-3'], 'us-west-2', 'none', ['eu-north-1', 'af-south-1', 'il-central-1', 'ap-southeast-2', 'sa-east-1', 'eu-west-2', 'eu-central-1', 'eu-west-3'], 'eu-north-1', 'eu-west-3', 

In [19]:
# create a numpy matrix where each column is the corresponding carbon intensity trace for that region
X = np.zeros((len(dfs["us-east-1"]), len(name_vector)))

for i, name in enumerate(name_vector):
    print(name)
    if name == "none":
        continue
    X[:, i] = np.array(dfs[name]["carbon_intensity_avg"])

# print the first few rows of the matrix
print(X[:5, :])

# save the datetimes to a separate pandas series
datetimes = dfs["us-east-1"]["datetime"]

print(datetimes.head())


none
['us-east-1', 'eu-north-1', 'af-south-1', 'il-central-1', 'ap-south-2', 'us-west-1', 'ap-southeast-2', 'ca-central-1', 'ap-northeast-2', 'us-west-2', 'eu-west-2', 'sa-east-1', 'eu-central-1', 'eu-west-3']


TypeError: unhashable type: 'list'

In [8]:
import carbonTraces

In [12]:
# get the numpy array of the carbon traces
datetimes, carbonMat = carbonTraces.get_numpy(m)

In [13]:
print(datetimes[:5])

0   2020-01-01 00:00:00+00:00
1   2020-01-01 01:00:00+00:00
2   2020-01-01 02:00:00+00:00
3   2020-01-01 03:00:00+00:00
4   2020-01-01 04:00:00+00:00
Name: datetime, dtype: datetime64[ns, UTC]


In [14]:
print(carbonMat[:5, :])

[[  0.     0.     0.     0.     0.   101.48 741.99 369.98 589.31   0.
   28.23 382.14 669.14 325.75 755.7    0.     0.     0.     0.     0.
  452.69 477.98   0.     0.     0.     0.     0.    22.9   65.6  221.52
    0.     0.     0.     0.     0.  ]
 [  0.     0.     0.     0.     0.   106.59 742.1  403.58 589.31   0.
   28.37 377.11 665.54 339.59 758.61   0.     0.     0.     0.     0.
  448.76 470.83   0.     0.     0.     0.     0.    22.87  56.1  222.48
    0.     0.     0.     0.     0.  ]
 [  0.     0.     0.     0.     0.   109.89 734.37 397.43 589.31   0.
   29.2  375.74 671.44 348.48 761.52   0.     0.     0.     0.     0.
  445.49 467.67   0.     0.     0.     0.     0.    22.83  43.01 213.51
    0.     0.     0.     0.     0.  ]
 [  0.     0.     0.     0.     0.   113.49 713.62 392.26 589.31   0.
   28.69 373.2  675.39 340.05 756.49   0.     0.     0.     0.     0.
  442.58 466.59   0.     0.     0.     0.     0.    23.26  43.82 210.23
    0.     0.     0.     0.     0.  ]
