In [2]:
%pip install powerlaw 
import eikon as ek
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from plotly.subplots import make_subplots
import powerlaw
import networkx as nx
import plotly.graph_objects as go
from scipy.optimize import curve_fit
import warnings
import plotly.io as pio




Note: you may need to restart the kernel to use updated packages.


In [3]:
df_close = pd.read_csv("../data/euro50_prices.csv")
df_esg = pd.read_csv("../data/euro50_esg.csv")

In [4]:
#Use daily close for correlation

if 'Date' in df_close.columns:
    df_close.set_index('Date', inplace=True)
df_close.index = pd.to_datetime(df_close.index)

df_pct = df_close.pct_change().dropna()
df_returns = df_close
print(df_returns.head())

# Compute the correlation matrix
corr_matrix = df_returns.corr()

# Create heatmap using plotly
fig = go.Figure(data=go.Heatmap(
    z=corr_matrix,
    x=corr_matrix.columns,
    y=corr_matrix.columns,
    colorscale='RdBu_r',
    zmid=0,  # Center the colorscale at 0
))

# Update layout
fig.update_layout(
    title="Correlation Matrix of Euro Stoxx50 Stocks (Daily Close)",
    width=1000,
    height=1000,
    xaxis_tickangle=-45
)

fig.show()

# 3) (Optional) threshold the correlation matrix by some cutoff rho
rho = 0.9612
adj_matrix = (corr_matrix.abs() > rho).astype(int)
np.fill_diagonal(adj_matrix.values, 0)

# adj_matrix is now a 0/1 adjacency matrix, where edges exist if |c_ij| > rho
print("Correlation Matrix:\n", corr_matrix)
print("Adjacency Matrix (|corr| > {}):\n".format(rho), adj_matrix)

#save correlatin  matrix to csv
corr_matrix.to_csv("../data/corr_matrix_close.csv")

  df_pct = df_close.pct_change().dropna()


            DHLn.DE  PERP.PA  NDAFI.HE     AIRP.PA     IBE.MC  SIEGn.DE  \
Date                                                                      
2023-01-02   36.005   184.90    10.190  122.727285  10.047895    129.82   
2023-01-03   36.265   184.45    10.250  122.418194  10.015953    130.94   
2023-01-04   36.680   186.40    10.448  125.709103  10.139157    135.28   
2023-01-05   36.955   184.30    10.636  125.090922  10.088963    134.84   
2023-01-06   36.980   186.30       NaN  128.436376  10.117934    136.14   

            SAN.MC  VOWG_p.DE  SAPG.DE  CRDI.MI  ...  RACE.MI  LVMH.PA  \
Date                                             ...                     
2023-01-02  2.8495     120.04    97.42   13.446  ...    202.5    694.1   
2023-01-03  2.8570     122.06    98.51   13.910  ...    203.8    702.8   
2023-01-04  2.9630     125.88   100.70   14.430  ...    206.1    738.0   
2023-01-05  2.9740     127.12   100.84   14.534  ...    205.9    728.4   
2023-01-06  3.0465     128.16 

Correlation Matrix:
             DHLn.DE   PERP.PA  NDAFI.HE   AIRP.PA    IBE.MC  SIEGn.DE  \
DHLn.DE    1.000000  0.627080 -0.112090 -0.251772 -0.466473 -0.310887   
PERP.PA    0.627080  1.000000 -0.414047 -0.750212 -0.735812 -0.713815   
NDAFI.HE  -0.112090 -0.414047  1.000000  0.332543  0.115259  0.427448   
AIRP.PA   -0.251772 -0.750212  0.332543  1.000000  0.632983  0.783291   
IBE.MC    -0.466473 -0.735812  0.115259  0.632983  1.000000  0.729903   
SIEGn.DE  -0.310887 -0.713815  0.427448  0.783291  0.729903  1.000000   
SAN.MC    -0.496217 -0.850572  0.488334  0.834092  0.736822  0.797504   
VOWG_p.DE  0.621640  0.791413  0.029294 -0.497887 -0.775638 -0.419691   
SAPG.DE   -0.582870 -0.912598  0.287030  0.802697  0.864196  0.852058   
CRDI.MI   -0.538818 -0.918688  0.366937  0.875169  0.809740  0.830581   
AD.AS     -0.228691  0.016941 -0.377130 -0.054766  0.488421  0.114268   
BNPP.PA   -0.162947 -0.373446  0.589226  0.459372  0.315493  0.420846   
DTEGn.DE  -0.589259 -0.774903 

In [37]:
def plot_network_graph(adj_matrix, sparse=0.1, largest_cc=False, drop_isolates=False, title = "Financial Network Graph of EuroStoxx 50 Stocks"):
    # Create a graph from the adjacency matrix
    G = nx.from_pandas_adjacency(adj_matrix)
    
    # Remove self-loops
    G.remove_edges_from(nx.selfloop_edges(G))

    if largest_cc:
        cc = max(nx.connected_components(G), key=len)
        G = G.subgraph(cc).copy()

    if drop_isolates:
        G = nx.subgraph(G, [node for node, degree in dict(G.degree()).items() if degree > 0]).copy()

    # Generate positions for all nodes
    communities = nx.community.greedy_modularity_communities(G)

    supergraph = nx.cycle_graph(len(communities))
    superpos = nx.spring_layout(supergraph, k=sparse/np.sqrt(len(G.nodes())),scale=4, seed=429)

    # Use the "supernode" positions as the center of each node cluster
    centers = list(superpos.values())
    pos = {}
    for center, comm in zip(centers, communities):
        pos.update(nx.spring_layout(nx.subgraph(G, comm), center=center))


    # Create edges
    edge_x = []
    edge_y = []
    for edge in G.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.append(x0)
        edge_x.append(x1)
        edge_x.append(None)
        edge_y.append(y0)
        edge_y.append(y1)
        edge_y.append(None)

    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.5, color='#888'),
        hoverinfo='none',
        mode='lines'
    )


    # Create nodes and name them by their labels
    node_x = []
    node_y = []
    for node in G.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)



    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode="markers+text",
        hoverinfo='text',
        textposition="bottom center",
        text=list(G.nodes()),
        marker=dict(
            showscale=True,
            colorscale='YlGnBu',
            reversescale=True,
            color=[],
            size=10,
            colorbar=dict(
                thickness=15,
                title='Node Connections',
                xanchor='left',
                titleside='right'
            ),
            line_width=2
        )
    )

    # Color node points by the number of connections
    node_adjacencies = []
    node_text = []
    for node, adjacencies in enumerate(G.adjacency()):
        node_adjacencies.append(len(adjacencies[1]))
        node_text.append(f'{list(G.nodes())[node]}<br>C: ' + str(len(adjacencies[1])))
    node_trace.marker.color = node_adjacencies
    node_trace.text = node_text

    # Create network graph
    fig = go.Figure(data=[edge_trace, node_trace],
                    layout=go.Layout(
                        #include rho in title to two decimal places
                        title=title,
                        titlefont_size=16,
                        showlegend=False,
                        hovermode='closest',
                        margin=dict(b=20, l=5, r=5, t=40),
                        annotations=[dict(
                            text="Python code: <a href='https://plotly.com/ipython-notebooks/network-graphs/'> https://plotly.com/ipython-notebooks/network-graphs/</a>",
                            showarrow=False,
                            xref="paper", yref="paper",
                            x=0.005, y=-0.002
                        )],
                        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
                    )
    )

    fig.update_layout(
    # update width and height of the figure
    width=1000,
    height=800,
    # update the layout margin
)

    fig.show()

# Example usage
plot_network_graph(adj_matrix, sparse=1, largest_cc=False, drop_isolates=True)

In [6]:

# fit using linear regression with scipy


# Flatten adjacency to degrees
G_price = nx.from_pandas_adjacency(adj_matrix)
G_price.remove_edges_from(nx.selfloop_edges(G_price))

degrees = [deg for (_, deg) in G_price.degree()]


# Max degree so we know how far to compute the histogram
max_degree = max(degrees)

# Histogram of degrees: hist[k] = number of nodes with degree k
hist = np.bincount(degrees)

# Probability p(k) for each degree k
p_k = hist / hist.sum()

print(f"p(k) for each degree k: {p_k}")

def exponential_degree(k, alpha, gamma):
    return alpha * np.exp(-gamma * k)

k_values = np.arange(1, max_degree + 1)   # degrees from 1..max_degree
p_values = p_k[1:]  # skip p_k[0] if needed


# Filter out any zero-probability bins so curve_fit doesn’t choke
nonzero_indices = np.where(p_values > 0)
k_fit = k_values[nonzero_indices]
p_fit = p_values[nonzero_indices]

# Now do the fit
popt, pcov = curve_fit(exponential_degree, k_fit, p_fit, p0=(1.0, 0.1))
alpha_fit, gamma_fit = popt
print("Fitted alpha =", alpha_fit)
print("Fitted gamma =", gamma_fit)

# Evaluate the fitted exponential at each k
p_exp_fit = exponential_degree(k_values, alpha_fit, gamma_fit)

# Compute sum of absolute deviations
e_fitting = np.sum(np.abs(p_k[1:] - p_exp_fit))  # ignoring k=0 if you like
print("Fitting error =", e_fitting)

# Create plotly figure
fig = go.Figure()

# Add empirical data points
fig.add_trace(go.Scatter(
    x=k_fit,
    y=p_fit,
    mode='markers+lines',
    opacity=0.7,
    name='Empirical p(k)',
    line=dict(color='blue')
))

# Add fitted curve
fig.add_trace(go.Scatter(
    x=k_fit,
    y=exponential_degree(k_fit, alpha_fit, gamma_fit),
    mode='lines',
    name=r'αe^(-γk) (fit)',
    line=dict(color='red')
))

# Update layout
fig.update_layout(
    title=f'Degree Distribution with Exponential Fit (α={alpha_fit:.2f}, γ={gamma_fit:.2f})',
    xaxis_title='Degree k',
    yaxis_title='p(k)',
    xaxis_type='log',
    yaxis_type='log',
    
)

fig.show()





p(k) for each degree k: [0.74 0.08 0.06 0.04 0.04 0.02 0.   0.02]
Fitted alpha = 0.10416868791698984
Fitted gamma = 0.2785246065426848
Fitting error = 0.04309719168181543


In [26]:



max_ccs = []
errors = []
nodes = []


def exponential_degree(k, alpha, gamma):
    return alpha * np.exp(-gamma * k)

def make_graph(rho):
    # 1. Threshold the correlation matrix
    adj_matrix = (corr_matrix.abs() > rho).astype(int)
    np.fill_diagonal(adj_matrix.values, 0)

    # 2. Create a graph from the adjacency matrix
    G = nx.from_pandas_adjacency(adj_matrix)
    G.remove_edges_from(nx.selfloop_edges(G))

    cc = max(nx.connected_components(G), key=len)
    max_ccs.append(len(cc))

    return G

def fitting_error_for_threshold(rho):
    # 1. Build the graph at threshold rho
    G = make_graph(rho)

    #Count the number of connected components and store it in the nodes list
    nodes.append(len(list(nx.connected_components(G))))


    max_cc = max(nx.connected_components(G), key=len)
    

    # 2. Degree distribution
    degrees = [deg for _, deg in G.degree()]
    hist = np.bincount(degrees)
    p_k = hist / hist.sum()

    # 3. Fit alpha, gamma
    k_vals = np.arange(1, len(p_k))
    p_vals = p_k[1:]
    nonzero = p_vals > 0
    k_fit = k_vals[nonzero]
    p_fit = p_vals[nonzero]
    popt, _ = curve_fit(exponential_degree, k_fit, p_fit, p0=(1.0, 0.1))
    alpha_fit, gamma_fit = popt

    # 4. Compute fitting error
    p_exp_fit = exponential_degree(k_vals, alpha_fit, gamma_fit)
    e_fit = np.sum(np.abs(p_k[1:] - p_exp_fit))

    errors.append(e_fit)

    #make sure there are at least 10 nodes
    if len(max_cc) < 10:
        raise Warning(f"Graph has less than 10 nodes at rho = {rho}")
    return e_fit, alpha_fit, gamma_fit, max_cc

import warnings

print(nodes)

# Loop over candidate rho’s
rhos = np.linspace(0.8, 0.99, 100)
best_rho = None
best_err = np.inf
rhos_run = []
problems = []
for rho in rhos:
    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            err, alpha_fit, gamma_fit, max_cc = fitting_error_for_threshold(rho)
        rhos_run.append(rho)
        
        if err < best_err:
            best_rho = rho
            best_err = err
    except:
        problems.append(rho)
        continue
    
# Plot the error as a function of rho
fig = go.Figure()
fig.add_trace(go.Scatter(x=rhos, y=errors, mode='lines', name='Fitting Error'))
fig.update_layout(
    title='Fitting Error vs. Threshold rho in Financial Network',
    xaxis_title='Threshold rho',
    yaxis_title='Fitting Error',
    width=1000,
    height=400
)
fig.show()

# Plot max connected component size
fig = go.Figure()
fig.add_trace(go.Scatter(x=rhos, y=max_ccs[:93], mode='lines', name='Max CC Size'))
fig.update_layout(
    title='Max Connected Component Size vs. Threshold rho in Financial Network',
    xaxis_title='Threshold rho',
    yaxis_title='Max CC Size',
    width=1000,
    height=400
)
fig.show()


print(f"Problems at thresholds: {problems}")

print(f"Best threshold = {best_rho}, with error = {best_err}")


[]


Problems at thresholds: [np.float64(0.9631313131313132), np.float64(0.965050505050505), np.float64(0.966969696969697), np.float64(0.9688888888888889), np.float64(0.9708080808080808), np.float64(0.9727272727272728), np.float64(0.9746464646464646), np.float64(0.9765656565656566), np.float64(0.9784848484848485), np.float64(0.9804040404040404), np.float64(0.9823232323232323), np.float64(0.9842424242424243), np.float64(0.9861616161616161), np.float64(0.9880808080808081), np.float64(0.99)]
Best threshold = 0.9612121212121212, with error = 0.04309719168181543


In [8]:
G = nx.from_pandas_adjacency(adj_matrix)    
G.remove_edges_from(nx.selfloop_edges(G))
cc = max(nx.connected_components(G), key=len)
G = G.subgraph(cc).copy()

# Calculate metrics
number_of_nodes = nx.number_of_nodes(G)
number_of_edges = nx.number_of_edges(G)
average_shortest_path_length = nx.average_shortest_path_length(G) if nx.is_connected(G) else None
diameter = nx.diameter(G) if nx.is_connected(G) else None
average_clustering = nx.average_clustering(G)
average_degree = sum(dict(G.degree()).values()) / number_of_nodes
mean_fitting_error = best_err

# Display results
results = {
    "Number of Nodes": number_of_nodes,
    "Number of Edges": number_of_edges,
    "Average Shortest Path Length": average_shortest_path_length,
    "Diameter": diameter,
    "Average Clustering Coefficient": average_clustering,
    "Average Degree": average_degree,
    "Mean Fitting Error": mean_fitting_error,
    "Gradient": gamma_fit,
}

for key, value in results.items():
    print(f"{key}: {value}")
    

Number of Nodes: 11
Number of Edges: 17
Average Shortest Path Length: 1.9090909090909092
Diameter: 4
Average Clustering Coefficient: 0.483982683982684
Average Degree: 3.090909090909091
Mean Fitting Error: 0.006386405540035268
Gradient: 0.49858359367037575


In [9]:
# Create subplot figure
rho_values = [0, 0.35, 0.7, 0.85, 0.961, 0.978485]

fig = make_subplots(rows=2, cols=3, subplot_titles=[f"rho = {rho}" for rho in rho_values])

# Generate subplots
for i, rho in enumerate(rho_values):
    adj_matrix = (corr_matrix.abs() > rho).astype(int)
    G = nx.from_pandas_adjacency(adj_matrix)
    G.remove_edges_from(nx.selfloop_edges(G))
    G.remove_nodes_from(list(nx.isolates(G)))
    communities = nx.community.greedy_modularity_communities(G)
    supergraph = nx.cycle_graph(len(communities))
    superpos = nx.spring_layout(G, scale=10, seed=429)
    centers = list(superpos.values())
    pos = {}
    for center, comm in zip(centers, communities):
        pos.update(nx.spring_layout(nx.subgraph(G, comm), center=center))
    # pos = nx.spring_layout(G)
    
    edge_x = []
    edge_y = []
    for edge in G.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.append(x0)
        edge_x.append(x1)
        edge_x.append(None)
        edge_y.append(y0)
        edge_y.append(y1)
        edge_y.append(None)

    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        #set the length of edge to be inverse to correlation between the two nodes
        line=dict(width=0.5, color='#888'),
        hoverinfo='text',
        mode='lines',
        text=[f"Correlation: {corr_matrix.loc[edge[0], edge[1]]}" for edge in G.edges()]
    )

    node_x = []
    node_y = []
    for node in G.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)

    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers',
        hoverinfo='text',
        marker=dict(
            showscale=(i == 0),  # Only show colorbar in the first subplot
            colorscale='Viridis',
            reversescale=True,
            color=[],
            size=10,
            cmin=0,
            cmax=49,
            colorbar=dict(
                thickness=15,
                title='Node Connections',
                xanchor='left',
                titleside='right',
                x=1.05  # Adjust this to position the colorbar correctly
            ),
            line_width=2
        )
    )

    node_adjacencies = []
    node_text = []
    for node, adjacencies in enumerate(G.adjacency()):
        node_adjacencies.append(len(adjacencies[1]))
        node_text.append(f'{list(G.nodes())[node]}<br>connections: ' + str(len(adjacencies[1])))
    node_trace.marker.color = node_adjacencies
    node_trace.text = node_text

    row = i // 3 + 1
    col = i % 3 + 1
    fig.add_trace(edge_trace, row=row, col=col)
    fig.add_trace(node_trace, row=row, col=col)

fig.update_layout(
    height=800, width=1400, 
    title_text="Financial Network Graphs with Different rho Values",
    showlegend=False  # Hide the legend
)
fig.show()


In [10]:
# Normalize the daily close prices p_j(t) to [0, 1] Using the formula:  p_j(t) = x1_j(t) / max_t x1_j(t).


def normalize_prices(df_prices):
    """Normalize each column (stock) by dividing by its column max."""
    df_norm = df_prices.div(df_prices.max(axis=0), axis=1)
    return df_norm


# 3) Normalize the ESG scores to [0.1, 1] 


def normalize_esg(esg_series):
    """
    If esg_series is a pd.Series for a single stock, shape=(N,) 
    or a pd.DataFrame shape=(N,1).
    We'll assume we just have one ESG score per stock, i.e. esg_series is shape=(N,)
    keyed by stock ticker. 
    """
    esg_min = esg_series.min()
    esg_max = esg_series.max()

    norm_esg = 1.0 - ((esg_max - esg_series) / (esg_max - esg_min)) * 0.9
    return norm_esg


# 4) Compute the composite indicator: 


def compute_composite_indicator(df_prices, esg_series):


    esg_series_aligned = esg_series.reindex(df_prices.columns)
    
    # broadcast ESG across each row => same ESG each day
    esg_matrix = np.tile(esg_series_aligned.values, (df_prices.shape[0], 1))
    esg_df = pd.DataFrame(esg_matrix, index=df_prices.index, columns=df_prices.columns)

    # CI_j(t) = sqrt( p_j(t) * esg_j )
    ci_df = np.sqrt(df_prices * esg_df)
    
    return ci_df


# 5) Putting it all together


def incorporate_esg_and_prices(df_prices, df_esg_single):
    """
    df_prices: raw daily close prices (T x N)
    df_esg_single: single ESG value per stock j. Index=stock, shape=(N,).
    
    Steps:
      1) normalize the close prices
      2) normalize the ESG
      3) compute composite indicator
    """
    # 1) Price normalization
    df_prices_norm = normalize_prices(df_prices)
    
    # 2) ESG normalization
    esg_norm = normalize_esg(df_esg_single)
    
    # 3) Composite
    CI = compute_composite_indicator(df_prices_norm, esg_norm)
    
    return CI

def convert_esg_data_to_series(df_esg):
    """
    Convert the retrieved ESG data to a pd.Series format.
    
    Parameters:
    df_esg (pd.DataFrame): DataFrame containing ESG data with columns ['Instrument', 'ESG Score']
    
    Returns:
    pd.Series: Series with stock tickers as index and ESG scores as values
    """
    esg_series = df_esg.set_index('Instrument')['ESG_Score']
    return esg_series




In [28]:
df_esg = pd.read_csv("../data/euro50_esg.csv")

CI_price = incorporate_esg_and_prices(df_returns, convert_esg_data_to_series(df_esg))
    
print("Normalized composite indicator (first 5 rows):")
print(CI_price.head())

Normalized composite indicator (first 5 rows):
             DHLn.DE   PERP.PA  NDAFI.HE   AIRP.PA    IBE.MC  SIEGn.DE  \
Date                                                                     
2023-01-02  0.615244  0.530460  0.705153  0.736366  0.706548  0.719197   
2023-01-03  0.617461  0.529815  0.707226  0.735438  0.705424  0.722293   
2023-01-04  0.620984  0.532608  0.714024  0.745257  0.709749  0.734165   
2023-01-05  0.623307  0.529599  0.720419  0.743423  0.707990  0.732970   
2023-01-06  0.623518  0.532465       NaN  0.753298  0.709006  0.736495   

              SAN.MC  VOWG_p.DE   SAPG.DE   CRDI.MI  ...   RACE.MI   LVMH.PA  \
Date                                                 ...                       
2023-01-02  0.704178   0.808827  0.600453  0.458825  ...  0.218572  0.727680   
2023-01-03  0.705104   0.815604  0.603803  0.466675  ...  0.219272  0.732226   
2023-01-04  0.718065   0.828268  0.610477  0.475317  ...  0.220506  0.750339   
2023-01-05  0.719397   0.832337  0

In [29]:
# calculate the correlation matrix of the composite indicator
corr_matrix_ci = CI_price.corr()

# Create heatmap using plotly


fig = go.Figure(data=go.Heatmap(
    z=corr_matrix_ci,
    x=corr_matrix_ci.columns,
    y=corr_matrix_ci.columns,
    colorscale='RdBu_r',
    zmid=0,  # Center the colorscale at 0
))

# Update layout
fig.update_layout(
    title="Correlation Matrix of Euro Stoxx50 Stocks (close prices)",
    width=1000,
    height=1000,
    xaxis_tickangle=-45
)

fig.show()

# 3) (Optional) threshold the correlation matrix by some cutoff rho
rho = 0.9689
adj_matrix_ci = (corr_matrix_ci.abs() > rho).astype(int)
np.fill_diagonal(adj_matrix_ci.values, 0)

# adj_matrix is now a 0/1 adjacency matrix, where edges exist if |c_ij| > rho
print("Correlation Matrix:\n", corr_matrix_ci)
print("Adjacency Matrix (|corr| > {}):\n".format(rho), adj_matrix_ci)


Correlation Matrix:
             DHLn.DE   PERP.PA  NDAFI.HE   AIRP.PA    IBE.MC  SIEGn.DE  \
DHLn.DE    1.000000  0.643302 -0.115838 -0.246132 -0.465005 -0.299775   
PERP.PA    0.643302  1.000000 -0.407700 -0.741878 -0.755174 -0.714418   
NDAFI.HE  -0.115838 -0.407700  1.000000  0.330533  0.120408  0.430190   
AIRP.PA   -0.246132 -0.741878  0.330533  1.000000  0.641026  0.780469   
IBE.MC    -0.465005 -0.755174  0.120408  0.641026  1.000000  0.732861   
SIEGn.DE  -0.299775 -0.714418  0.430190  0.780469  0.732861  1.000000   
SAN.MC    -0.489491 -0.853097  0.499823  0.836396  0.736059  0.789735   
VOWG_p.DE  0.637809  0.803724  0.018046 -0.494678 -0.777655 -0.415045   
SAPG.DE   -0.567183 -0.925629  0.299756  0.828007  0.855972  0.850156   
CRDI.MI   -0.516643 -0.919764  0.377823  0.888499  0.794529  0.818709   
AD.AS     -0.235061 -0.017140 -0.378923 -0.052195  0.481836  0.107379   
BNPP.PA   -0.157521 -0.365259  0.591255  0.453305  0.321422  0.421602   
DTEGn.DE  -0.590992 -0.800197 

In [64]:
max_ccs = []
errors = []
num_nodes_without_isolates = []


def exponential_degree(k, alpha, gamma):
    return alpha * np.exp(-gamma * k)

def make_graph(rho):
    # 1. Threshold the correlation matrix
    adj_matrix_ci = (corr_matrix_ci.abs() > rho).astype(int)
    np.fill_diagonal(adj_matrix_ci.values, 0)

    # 2. Create a graph from the adjacency matrix
    G = nx.from_pandas_adjacency(adj_matrix_ci)
    G.remove_edges_from(nx.selfloop_edges(G))

    count_non_isolates = 0
    for node, degree in dict(G.degree()).items():
        if degree > 0:
            count_non_isolates += 1

    num_nodes_without_isolates.append(count_non_isolates)

    cc = max(nx.connected_components(G), key=len)
    max_ccs.append(len(cc))

    return G

def fitting_error_for_threshold(rho):
    # 1. Build the graph at threshold rho
    G = make_graph(rho)

    max_cc = max(nx.connected_components(G), key=len)

    # 2. Degree distribution
    degrees = [deg for _, deg in G.degree()]
    hist = np.bincount(degrees)
    p_k = hist / hist.sum()

    # 3. Fit alpha, gamma
    k_vals = np.arange(1, len(p_k))
    p_vals = p_k[1:]
    nonzero = p_vals > 0
    k_fit = k_vals[nonzero]
    p_fit = p_vals[nonzero]
    popt, _ = curve_fit(exponential_degree, k_fit, p_fit, p0=(1.0, 0.1))
    alpha_fit, gamma_fit = popt

    # 4. Compute fitting error
    p_exp_fit = exponential_degree(k_vals, alpha_fit, gamma_fit)
    e_fit = np.sum(np.abs(p_k[1:] - p_exp_fit))

    errors.append(e_fit)

    if len(max_cc) < 10:
        raise Warning(f"Graph has less than 10 nodes at rho = {rho}")
    return e_fit, alpha_fit, gamma_fit, max_cc

import warnings

# Loop over candidate rho’s
rhos = np.linspace(0.8, 0.99, 100)
best_rho = None
best_err = float('inf')
rhos_run = []
problems = []
for rho in rhos:
    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            err, alpha_fit, gamma_fit, max_cc = fitting_error_for_threshold(rho)
        rhos_run.append(rho)
        
        if err < best_err:
            best_rho = rho
            best_err = err
    except:
        problems.append(rho)
        continue
    
# Plot the error as a function of rho
fig = go.Figure()
fig.add_trace(go.Scatter(x=rhos, y=errors, mode='lines', name='Fitting Error'))
fig.update_layout(
    title='Fitting Error vs. Threshold rho',
    xaxis_title='Threshold rho',
    yaxis_title='Fitting Error',
    width=1000,
    height=400
)
fig.show()

# Plot max connected component size
fig = go.Figure()
fig.add_trace(go.Scatter(x=rhos, y=max_ccs[:93], mode='lines', name='Max CC Size'))
fig.update_layout(
    title='Max Connected Component Size vs. Threshold rho',
    xaxis_title='Threshold rho',
    yaxis_title='Max CC Size',
    width=1000,
    height=400
)
fig.show()

# Plot number of nodes without isolates
fig = go.Figure()
fig.add_trace(go.Scatter(x=rhos, y=num_nodes_without_isolates, mode='lines', name='Nodes without Isolates'))
fig.update_layout(
    title='Number of Nodes without Isolates vs. Threshold rho',
    xaxis_title='Threshold rho',
    yaxis_title='Number of Nodes without Isolates',
    width=1000,
    height=400
)
fig.show()


print(f"Problems at thresholds: {problems}")

print(f"Best threshold = {best_rho}, with error = {best_err}")


Problems at thresholds: [np.float64(0.9631313131313132), np.float64(0.965050505050505), np.float64(0.966969696969697), np.float64(0.9688888888888889), np.float64(0.9708080808080808), np.float64(0.9727272727272728), np.float64(0.9746464646464646), np.float64(0.9765656565656566), np.float64(0.9784848484848485), np.float64(0.9804040404040404), np.float64(0.9823232323232323), np.float64(0.9842424242424243), np.float64(0.9861616161616161), np.float64(0.9880808080808081), np.float64(0.99)]
Best threshold = 0.9592929292929293, with error = 0.12943104030381483


In [65]:
rho = 0.9439
adj_matrix_ci = (corr_matrix_ci.abs() > rho).astype(int)

# Flatten adjacency to degrees
CI_price = nx.from_pandas_adjacency(adj_matrix_ci)
CI_price.remove_edges_from(nx.selfloop_edges(CI_price))

degrees = [deg for (_, deg) in CI_price.degree()]


# Max degree so we know how far to compute the histogram
max_degree = max(degrees)

# Histogram of degrees: hist[k] = number of nodes with degree k
hist = np.bincount(degrees)

# Probability p(k) for each degree k
p_k = hist / hist.sum()

print(f"p(k) for each degree k: {p_k}")

def exponential_degree(k, alpha, gamma):
    return alpha * np.exp(-gamma * k)

k_values = np.arange(1, max_degree + 1)   
p_values = p_k[1:]  


# Filter out any zero-probability bins so curve_fit doesn’t choke
nonzero_indices = np.where(p_values > 0)
k_fit = k_values[nonzero_indices]
p_fit = p_values[nonzero_indices]

# Now do the fit
popt, pcov = curve_fit(exponential_degree, k_fit, p_fit, p0=(1.0, 0.1))
alpha_fit, gamma_fit = popt
print("Fitted alpha =", alpha_fit)
print("Fitted gamma =", gamma_fit)

# Evaluate the fitted exponential at each k
p_exp_fit = exponential_degree(k_values, alpha_fit, gamma_fit)

# Compute sum of absolute deviations
e_fitting = np.sum(np.abs(p_k[1:] - p_exp_fit))  
print("Fitting error =", e_fitting)


# Create plotly figure
fig = go.Figure()

# Add empirical data points
fig.add_trace(go.Scatter(
    x=k_fit,
    y=p_fit,
    mode='markers',
    name='Empirical p(k)',
))

# Add fitted curve
fig.add_trace(go.Scatter(
    x=k_fit,
    y=exponential_degree(k_fit, alpha_fit, gamma_fit),
    mode='lines',
    name=r'αe^(-γk) (fit)',
    line=dict(color='red')
))

# Update layout
fig.update_layout(
    title='Degree Distribution with Exponential Fit (α={:.2f}, γ={:.2f}), (rho={:.4f})'.format(alpha_fit, gamma_fit, rho),
    xaxis_title='Degree k',
    yaxis_title='p(k)',
    xaxis_type='log',
    yaxis_type='log',
    
)

fig.show()

p(k) for each degree k: [0.66 0.06 0.04 0.02 0.   0.   0.   0.08 0.02 0.04 0.04 0.04]
Fitted alpha = 0.0454733124857367
Fitted gamma = 0.010725152208759744
Fitting error = 0.2349561858735204


In [32]:

G = nx.from_pandas_adjacency(adj_matrix_ci)
G.remove_edges_from(nx.selfloop_edges(G))
cc = max(nx.connected_components(G), key=len)
G = G.subgraph(cc).copy()

# Calculate metrics
number_of_nodes = nx.number_of_nodes(G)
number_of_edges = nx.number_of_edges(G)
average_shortest_path_length = nx.average_shortest_path_length(G) if nx.is_connected(G) else None
diameter = nx.diameter(G) if nx.is_connected(G) else None
average_clustering = nx.average_clustering(G)
average_degree = sum(dict(G.degree()).values()) / number_of_nodes
mean_fitting_error = best_err

# Display results
results = {
    "Number of Nodes": number_of_nodes,
    "Number of Edges": number_of_edges,
    "Average Shortest Path Length": average_shortest_path_length,
    "Diameter": diameter,
    "Average Clustering Coefficient": average_clustering,
    "Average Degree": average_degree,
    "Mean Fitting Error": mean_fitting_error,
    "Modularity": nx.community.modularity(G, nx.community.greedy_modularity_communities(G)),
    "Power Law Exponent": gamma_fit,
    "Gradient": alpha_fit,
    "Clustering Coefficient": nx.average_clustering(G)
}

for key, value in results.items():
    print(f"{key}: {value}")
    

Number of Nodes: 11
Number of Edges: 21
Average Shortest Path Length: 1.8181818181818181
Diameter: 4
Average Clustering Coefficient: 0.5385281385281385
Average Degree: 3.8181818181818183
Mean Fitting Error: 0.004692595510491233
Modularity: 0.18367346938775508
Power Law Exponent: 0.08033316755725417
Gradient: 0.058162958759046965
Clustering Coefficient: 0.5385281385281385


In [33]:
#Plot a graph of clustering coefficient vs degree
clustering = nx.clustering(G)
degree = dict(G.degree())
clustering_values = [clustering[node] for node in G.nodes()]
degree_values = [degree[node] for node in G.nodes()]

fig = go.Figure()
fig.add_trace(go.Scatter(x=degree_values, y=clustering_values, mode='markers', text=list(G.nodes()), ))
fig.update_layout(
    title='Clustering Coefficient vs. Degree',
    xaxis_title='Degree',
    yaxis_title='Clustering Coefficient',
    width=800,
    height=600
)
fig.show()

#Add a fitter power law line to the graph
def power_law(x, a, b):
    return a * x ** b

# Fit the power law
popt, _ = curve_fit(power_law, degree_values, clustering_values, p0=(1, -1))
a_fit, b_fit = popt
print("Fitted a =", a_fit)
print("Fitted b =", b_fit)

# Evaluate the fitted power law at each x
y_fit = power_law(degree_values, a_fit, b_fit)

# Compute sum of absolute deviations
e_fitting = np.sum(np.abs(clustering_values - y_fit))
print("Fitting error =", e_fitting)

# Create plotly figure
fig = go.Figure()

Fitted a = 0.497357085447794
Fitted b = 0.06482899249114113
Fitting error = 2.0018182333883825


In [41]:
rho = 0.9612
adj_matrix = (corr_matrix.abs() > rho).astype(int)
plot_network_graph(adj_matrix, sparse=0.7, largest_cc=False, drop_isolates=True, title=f"Financial Network Graph (rho={rho:.4f})")

G_SelectedFN = nx.from_pandas_adjacency(adj_matrix)
G_SelectedFN.remove_edges_from(nx.selfloop_edges(G_SelectedFN))
G_SelectedFN.remove_nodes_from(list(nx.isolates(G_SelectedFN)))

# print nodes names
print(G_SelectedFN.nodes())
print(len(G_SelectedFN.nodes()))

['PERP.PA', 'SAPG.DE', 'CRDI.MI', 'ALVG.DE', 'SAF.PA', 'WLSNc.AS', 'PRTP.PA', 'SCHN.PA', 'MUVGn.DE', 'RACE.MI', 'ITX.MC', 'ISP.MI', 'SGOB.PA']
13


In [40]:
rho = 0.9439
adj_matrix_ci = (corr_matrix.abs() > rho).astype(int)

plot_network_graph(adj_matrix_ci, sparse=0.7, largest_cc=False, drop_isolates=True, title=f"Financial Network Graph with Composite Indicator (rho={rho:.4f})")

G_SelectedESG = nx.from_pandas_adjacency(adj_matrix_ci)
G_SelectedESG.remove_edges_from(nx.selfloop_edges(G_SelectedESG))
G_SelectedESG.remove_nodes_from(list(nx.isolates(G_SelectedESG)))

# print nodes names
print(G_SelectedESG.nodes())
print(len(G_SelectedESG.nodes()))

['PERP.PA', 'SAN.MC', 'SAPG.DE', 'CRDI.MI', 'ESLX.PA', 'ALVG.DE', 'SAF.PA', 'WLSNc.AS', 'PRTP.PA', 'SCHN.PA', 'MUVGn.DE', 'RACE.MI', 'AXAF.PA', 'ITX.MC', 'ISP.MI', 'SGOB.PA']
16


## Index Calculation

In [19]:
market_cap = pd.read_csv("../data/euro50_marketcap.csv")
if 'Date' in market_cap.columns:
    market_cap.set_index('Date', inplace=True)
market_cap.index = pd.to_datetime(market_cap.index)
market_cap.head()

Unnamed: 0_level_0,ABI.BR,AD.AS,ADSGn.DE,ADYEN.AS,AIR.PA,AIRP.PA,ALVG.DE,ASML.AS,AXAF.PA,BASFn.DE,...,SAPG.DE,SASY.PA,SCHN.PA,SGEF.PA,SGOB.PA,SIEGn.DE,STLAM.MI,TTEF.PA,VOWG_p.DE,WLSNc.AS
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-01-02,98846180000.0,26870320000.0,22986000000.0,40474760000.0,89745020000.0,70662000000.0,81892910000.0,208381300000.0,62285870000.0,42891630000.0,...,119680900000.0,116203400000.0,75806870000.0,55962330000.0,24219980000.0,110347000000.0,43856100000.0,157200300000.0,70226240000.0,25288090000.0
2023-01-03,98741950000.0,27198250000.0,23738400000.0,40524340000.0,90281000000.0,70484040000.0,82760030000.0,209752000000.0,62697420000.0,43700570000.0,...,121020000000.0,117027300000.0,76469340000.0,55897490000.0,24854370000.0,111299000000.0,44428080000.0,154476400000.0,71011640000.0,25344740000.0
2023-01-04,100791800000.0,27471530000.0,24908400000.0,41075440000.0,90943090000.0,72378820000.0,85300910000.0,216484400000.0,63602840000.0,45550850000.0,...,123710400000.0,116951200000.0,79838790000.0,57217720000.0,25963250000.0,114988000000.0,45604180000.0,150862000000.0,72817400000.0,25561050000.0
2023-01-05,99263110000.0,27183350000.0,24937200000.0,39792610000.0,90801220000.0,72026760000.0,84514450000.0,217653500000.0,62897320000.0,46140790000.0,...,123882400000.0,114517600000.0,79735990000.0,57412220000.0,26494480000.0,114614000000.0,46079760000.0,151752500000.0,73323930000.0,25123280000.0
2023-01-06,99784270000.0,27203220000.0,25322400000.0,40623040000.0,91857410000.0,73953050000.0,85421900000.0,222329900000.0,64309180000.0,47204480000.0,...,125725100000.0,115493600000.0,81038090000.0,58042860000.0,27015390000.0,115719000000.0,46420380000.0,153690600000.0,73671170000.0,25396240000.0


In [20]:
# Calculate the daily index based on percentage changes in market cap, sum in axis 1
market_cap_sum = market_cap.sum(axis=1)
market_index_pct = market_cap_sum.pct_change().add(1)
market_index_pct.head()

Date
2023-01-02         NaN
2023-01-03    1.007654
2023-01-04    1.027035
2023-01-05    0.997806
2023-01-06    1.015240
dtype: float64

In [45]:
# Plot the daily change in market cap
fig = go.Figure()

fig.add_trace(go.Scatter(x=market_index_pct.index, y=market_index_pct, mode='lines', 
                        name='Market Index',
                        line=dict(color='#f8c471 ', dash='solid')))

# selected rics
FNrics = ['PERP.PA', 'SAPG.DE', 'CRDI.MI', 'ALVG.DE', 'SAF.PA', 'WLSNc.AS', 'PRTP.PA', 'SCHN.PA', 'MUVGn.DE', 'RACE.MI', 'ITX.MC', 'ISP.MI', 'SGOB.PA']


#print number of selected rics
print(len(FNrics))
FN_esg_rics = ['PERP.PA', 'SAN.MC', 'SAPG.DE', 'CRDI.MI', 'ESLX.PA', 'ALVG.DE', 'SAF.PA', 'WLSNc.AS', 'PRTP.PA', 'SCHN.PA', 'MUVGn.DE', 'RACE.MI', 'AXAF.PA', 'ITX.MC', 'ISP.MI', 'SGOB.PA']




market_cap_selected = market_cap[FNrics]
market_prices_selected = df_returns[FNrics]
esg_cap_selected = market_cap[FN_esg_rics]
esg_prices_selected = df_returns[FN_esg_rics]

market_prices_selected_sum = market_prices_selected.sum(axis=1)
esg_prices_selected_sum = esg_prices_selected.sum(axis=1)

market_cap_selected_sum = market_cap_selected.sum(axis=1)
esg_cap_selected_sum = esg_cap_selected.sum(axis=1)
market_index_selected = market_cap_selected_sum.pct_change().add(1)
esg_index_selected = esg_cap_selected_sum.pct_change().add(1)



fig.add_trace(go.Scatter(x=market_index_selected.index, y=market_index_selected, 
                        mode='lines',
                        name='Selected Stocks',
                        opacity=0.5,
                        line=dict(color='black', dash='solid')))

fig.add_trace(go.Scatter(x=esg_index_selected.index, y=esg_index_selected, 
                        mode='lines',
                        name='Selected Stocks',
                        opacity=0.5,
                        line=dict(color='red', dash='dash')))

fig.update_layout(
    title='Euro Stoxx50 Market Index',
    xaxis_title='Date',
    yaxis_title='Daily Return',
    width=1200,
    height=400,
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01
    )
)

fig.show()

# Plot the index 



13


In [49]:
#Calculate and plot the index for the selected stocks and the market index with the first stock as the base
base = market_cap_sum.iloc[0]
market_cap_sum = market_cap.sum(axis=1)
market_index = market_cap_sum / base

base = market_cap_selected_sum.iloc[0]
market_index_selected = market_cap_selected_sum / base
base = esg_cap_selected_sum.iloc[0]
esg_index_selected = esg_cap_selected_sum / base

fig = go.Figure()

fig.add_trace(go.Scatter(x=market_index.index, y=market_index, mode='lines',
                        name='Market Index',
                        opacity=0.5,
                        line=dict(color='black', dash='solid')))
fig.add_trace(go.Scatter(x=market_index_selected.index, y=market_index_selected, mode='lines',
                        name='Selected Stocks',
                        opacity=0.5,
                        line=dict(color='blue', dash='dash')))
fig.add_trace(go.Scatter(x=esg_index_selected.index, y=esg_index_selected, mode='lines',
                        name='ESG Network Stocks',
                        opacity=0.5,
                        line=dict(color='red', dash='solid')))
fig.update_layout(
    title='Euro Stoxx50 Market Index',
    xaxis_title='Date',
    yaxis_title='Daily Return',
    width=1200,
    height=400,
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01
    )
)


In [50]:
#plot the differences between the market index and the selected stocks index
fig = go.Figure()

fig.add_trace(go.Scatter(x=market_index_selected.index, y=market_index_selected - market_index, mode='lines',
                        name='Selected Stocks',
                        opacity=0.5,
                        line=dict(color='blue', dash='dash')))

fig.add_trace(go.Scatter(x=esg_index_selected.index, y=esg_index_selected - market_index, mode='lines',
                        name='ESG Network Stocks',
                        opacity=0.5,
                        line=dict(color='red', dash='solid')))

fig.update_layout(
    title='Euro Stoxx50 Market Index',
    xaxis_title='Date',
    yaxis_title='Daily Return Error',
    width=1200,
    height=400,
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01
    )
)

fig.show()


In [62]:
#Calculate and plot the index for the selected stocks and the market index with the first stock as the base
df_returns_sum = df_returns.sum(axis=1)
base = df_returns_sum.iloc[0]
market_index_price = df_returns_sum / base

base = market_prices_selected_sum.iloc[0]
market_price_index_selected = market_prices_selected_sum / base
base = esg_prices_selected_sum.iloc[0]
esg_price_index_selected = esg_prices_selected_sum / base



fig = go.Figure()

fig.add_trace(go.Scatter(x=market_index_price.index, y=market_index_price, mode='lines',
                        name='Market Index',
                        opacity=0.5,
                        line=dict(color='black', dash='solid')))
fig.add_trace(go.Scatter(x=market_price_index_selected.index, y=market_price_index_selected, mode='lines',
                        name='Selected Stocks',
                        opacity=0.5,
                        line=dict(color='blue', dash='dash')))
fig.add_trace(go.Scatter(x=esg_price_index_selected.index, y=esg_price_index_selected, mode='lines',
                        name='ESG Network Stocks',
                        opacity=0.5,
                        line=dict(color='red', dash='solid')))
fig.update_layout(
    title='Euro Stoxx50 Market Index',
    xaxis_title='Date',
    yaxis_title='Daily Return',
    width=1200,
    height=400,
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01
    )
)

fig.show()


In [63]:
#plot the differences between the market index and the selected stocks index
fig = go.Figure()

fig.add_trace(go.Scatter(x=market_price_index_selected.index, y=market_price_index_selected - market_index_price, mode='lines',
                        name='Selected Stocks',
                        opacity=0.5,
                        line=dict(color='blue', dash='dash')))

fig.add_trace(go.Scatter(x=esg_price_index_selected.index, y=esg_price_index_selected - market_index_price, mode='lines',
                        name='ESG Network Stocks',
                        opacity=0.5,
                        line=dict(color='red', dash='solid')))

fig.update_layout(
    title='Euro Stoxx50 Market Index',
    xaxis_title='Date',
    yaxis_title='Daily Return Error',
    width=1200,
    height=400,
    legend=dict(
        yanchor="top",
        y=0.99,
        xanchor="left",
        x=0.01
    )
)

fig.show()