In [1]:
# load carbon traces into a pandas dataframe
import pandas as pd
import numpy as np

In [2]:
# AWS regions
names = [
    "us-east-1",      # US East (N. Virginia)
    "us-west-1",      # US West (N. California)
    "us-west-2",      # US West (Oregon)
    "af-south-1",     # Africa (Cape Town)
    "ap-south-2",     # Asia Pacific (Hyderabad)
    "ap-northeast-2", # Asia Pacific (Seoul)
    "ap-southeast-2", # Asia Pacific (Sydney)
    "ca-central-1",   # Canada (Central)
    "eu-central-1",   # Europe (Frankfurt)
    "eu-west-2",      # Europe (London)
    "eu-west-3",      # Europe (Paris)
    "eu-north-1",     # Europe (Stockholm)
    "sa-east-1",       # South America (São Paulo)
    "il-central-1"    # Israel (Tel Aviv)
]

In [3]:
# define a dict of dataframes
dfs = {}
for name in names:
    df = pd.read_csv(f"carbon-data/{name}.csv", parse_dates=["datetime"])
    # keep only the columns we need
    df = df[["datetime", "carbon_intensity_avg"]]
    dfs[name] = df
    # print(len(df))

# print the first few rows of the first dataframe
print(dfs["us-east-1"].head())

                   datetime  carbon_intensity_avg
0 2020-01-01 00:00:00+00:00                382.14
1 2020-01-01 01:00:00+00:00                377.11
2 2020-01-01 02:00:00+00:00                375.74
3 2020-01-01 03:00:00+00:00                373.20
4 2020-01-01 04:00:00+00:00                360.48


In [4]:
# load the metric space
import metric
m = metric.MetricSpace()

# get the "column names" of the vectors for the tree embedding in the metric space
name_vector = m.name_vector

In [5]:
print(name_vector)

['root', "['ap-south-2', 'eu-north-1', 'us-west-2', 'eu-west-2', 'af-south-1', 'eu-west-3', 'us-east-1', 'ap-southeast-2', 'ca-central-1', 'eu-central-1', 'us-west-1', 'ap-northeast-2', 'il-central-1', 'sa-east-1']", "['ap-south-2', 'eu-north-1', 'us-west-2', 'eu-west-2', 'af-south-1', 'eu-west-3', 'us-east-1', 'ap-southeast-2', 'ca-central-1', 'eu-central-1', 'us-west-1', 'ap-northeast-2', 'il-central-1', 'sa-east-1']", "['ap-south-2', 'eu-north-1', 'us-west-2', 'eu-west-2', 'af-south-1', 'eu-west-3', 'us-east-1', 'ap-southeast-2', 'ca-central-1', 'eu-central-1', 'us-west-1', 'ap-northeast-2', 'il-central-1', 'sa-east-1']", 'sa-east-1', 'ap-south-2', 'ca-central-1', "['eu-north-1', 'us-west-2', 'eu-west-2', 'af-south-1', 'eu-west-3', 'us-east-1', 'ca-central-1', 'eu-central-1', 'us-west-1']", "['eu-north-1', 'us-west-2', 'eu-west-2', 'af-south-1', 'eu-west-3', 'us-east-1', 'ca-central-1', 'eu-central-1', 'us-west-1']", 'us-east-1', 'af-south-1', "['eu-north-1', 'us-west-2', 'eu-west-2

In [6]:
for i, name in enumerate(name_vector):
    if "[" in name: 
        name_vector[i] = "none" 
    if "OFF" in name:
        name_vector[i] = "none"
    if name == 'root':
        name_vector[i] = "none" 

print(name_vector)

['none', 'none', 'none', 'none', 'sa-east-1', 'ap-south-2', 'ca-central-1', 'none', 'none', 'us-east-1', 'af-south-1', 'none', 'ap-northeast-2', 'ap-southeast-2', 'il-central-1', 'none', 'none', 'none', 'us-west-2', 'us-west-1', 'none', 'none', 'none', 'eu-west-2', 'eu-central-1', 'none', 'none', 'none', 'none', 'none', 'eu-west-3', 'eu-north-1', 'none', 'none', 'none', 'none']


In [7]:
# create a numpy matrix where each column is the corresponding carbon intensity trace for that region
X = np.zeros((len(dfs["us-east-1"]), len(name_vector)))

for i, name in enumerate(name_vector):
    print(name)
    if name == "none":
        continue
    X[:, i] = np.array(dfs[name]["carbon_intensity_avg"])

# print the first few rows of the matrix
print(X[:5, :])

# save the datetimes to a separate pandas series
datetimes = dfs["us-east-1"]["datetime"]

print(datetimes.head())


none
none
none
none
sa-east-1
ap-south-2
ca-central-1
none
none
us-east-1
af-south-1
none
ap-northeast-2
ap-southeast-2
il-central-1
none
none
none
us-west-2
us-west-1
none
none
none
eu-west-2
eu-central-1
none
none
none
none
none
eu-west-3
eu-north-1
none
none
none
none
[[  0.     0.     0.     0.   101.48 741.99  28.23   0.     0.   382.14
  755.7    0.   477.98 669.14 589.31   0.     0.     0.   325.75 369.98
    0.     0.     0.   221.52 452.69   0.     0.     0.     0.     0.
   65.6   22.9    0.     0.     0.     0.  ]
 [  0.     0.     0.     0.   106.59 742.1   28.37   0.     0.   377.11
  758.61   0.   470.83 665.54 589.31   0.     0.     0.   339.59 403.58
    0.     0.     0.   222.48 448.76   0.     0.     0.     0.     0.
   56.1   22.87   0.     0.     0.     0.  ]
 [  0.     0.     0.     0.   109.89 734.37  29.2    0.     0.   375.74
  761.52   0.   467.67 671.44 589.31   0.     0.     0.   348.48 397.43
    0.     0.     0.   213.51 445.49   0.     0.     0.     0.    

In [8]:
import carbonTraces

In [9]:
# get the numpy array of the carbon traces
datetimes, carbonMat = carbonTraces.get_numpy(m)

In [10]:
print(datetimes[:5])

0   2020-01-01 00:00:00+00:00
1   2020-01-01 01:00:00+00:00
2   2020-01-01 02:00:00+00:00
3   2020-01-01 03:00:00+00:00
4   2020-01-01 04:00:00+00:00
Name: datetime, dtype: datetime64[ns, UTC]


In [11]:
print(carbonMat[:5, :])

[[  0.     0.     0.     0.   101.48 741.99  28.23   0.     0.   382.14
  755.7    0.   477.98 669.14 589.31   0.     0.     0.   325.75 369.98
    0.     0.     0.   221.52 452.69   0.     0.     0.     0.     0.
   65.6   22.9    0.     0.     0.     0.  ]
 [  0.     0.     0.     0.   106.59 742.1   28.37   0.     0.   377.11
  758.61   0.   470.83 665.54 589.31   0.     0.     0.   339.59 403.58
    0.     0.     0.   222.48 448.76   0.     0.     0.     0.     0.
   56.1   22.87   0.     0.     0.     0.  ]
 [  0.     0.     0.     0.   109.89 734.37  29.2    0.     0.   375.74
  761.52   0.   467.67 671.44 589.31   0.     0.     0.   348.48 397.43
    0.     0.     0.   213.51 445.49   0.     0.     0.     0.     0.
   43.01  22.83   0.     0.     0.     0.  ]
 [  0.     0.     0.     0.   113.49 713.62  28.69   0.     0.   373.2
  756.49   0.   466.59 675.39 589.31   0.     0.     0.   340.05 392.26
    0.     0.     0.   210.23 442.58   0.     0.     0.     0.     0.
   43.82  