In [11]:
from setup.initialize_django import initialize_django
initialize_django()
import pandas as pd
from eulytix_tools.networks.graph_manipulation import *

Django already configured..


In [12]:
from datetime import date
from analytics.models import *

from django.db.models import Subquery, OuterRef

In [13]:
import os
print(os.environ["DB_NAME"])

eulytix_complete


In [14]:
cycle_start = LegislativeCycle.objects.filter(legislative_cycle_num=10).values_list('period__startswith', flat=True).first()

cycle_start

datetime.date(2024, 7, 1)

In [15]:
node_list = pd.DataFrame(
    #PersonEPGroup.objects.filter(period__contains=date(2024, 7, 16))
    PersonEPGroup.objects.filter(period__contains=date(2024, 7, 1))
    .distinct("person")
    .values_list("person__person_full_name", "person__person_given_name", "person__person_family_name", "epgroup__epgroup_abbr", "person__country__country_name"),
    columns=["person_full_name", "person_given_name", "person_family_name", "epgroup_abbr", "country_name"]
)

In [45]:
node_list.head()

Unnamed: 0,person_full_name,person_given_name,person_family_name,epgroup_abbr,country_name
0,Loucas Fourlas,Loucas,Fourlas,EPP,Cyprus
1,Łukasz Kohut,Łukasz,Kohut,EPP,Poland
2,Karlo Ressler,Karlo,Ressler,EPP,Croatia
3,Andrey Novakov,Andrey,Novakov,EPP,Bulgaria
4,Jadwiga Wiśniewska,Jadwiga,Wiśniewska,ECR,Poland


In [17]:
def generate_label_name(row):
    if pd.notna(row["person_given_name"]) and row["person_given_name"]:
        return f"{row['person_given_name'][:1].upper()}. {row['person_family_name']}"
    else:
        return row["person_family_name"] if pd.notna(row["person_family_name"]) else ""


In [18]:
node_list["person_label_name"] = node_list.apply(generate_label_name, axis=1)

In [19]:
actual_epgroup = (
        PersonEPGroup.objects.filter(person=OuterRef("person"))
        .filter(period__contains=OuterRef("amendment__document__document_date"))
        .values("epgroup__epgroup_abbr")[:1]
    )

In [20]:
edge_list = pd.DataFrame(
    PersonAmendment.objects.annotate(epgroup_abbr=Subquery(actual_epgroup))
    .filter(person__person_full_name__in=node_list["person_full_name"])
    .filter(amendment__document__document_date__gte=date(2024, 7, 16))
    #.filter(amendment__document__document_date__lte=date(2024, 7, 1))
    .values_list("person__person_full_name", "amendment_id", "epgroup_abbr"),
    columns=["person_full_name", "amendment_id", "epgroup_abbr"]
)

In [13]:
edge_list.head()

Unnamed: 0,person_full_name,amendment_id,epgroup_abbr
0,Gwendoline Delbos-Corfield,16,Greens/EFA
1,Gwendoline Delbos-Corfield,18,Greens/EFA
2,Nikolaj Villumsen,33,GUE/NGL
3,László Trócsányi,37,NI
4,Gwendoline Delbos-Corfield,50,Greens/EFA


In [16]:
#G = onestep_projection(edge_list, "person_full_name", "amendment_id", "collaboration", "actor")
#G = onestep_projection(edge_list, "person_full_name", "amendment_id", "collaboration", "actor")
G = onestep_projection(edge_list, "person_full_name", "amendment_id", "simple_weighted", "actor")

In [17]:
G = fill_node_attributes(G, node_list, "person_full_name", ["epgroup_abbr", "country_name"])

In [18]:
import networkx as nx

In [19]:
nx.write_gexf(G,"./networks/tests/by_mep_network_10_simple_weighted.gexf")

## Natparty Graph

In [5]:
actual_natparty = (
        PersonNationalParty.objects.filter(person=OuterRef("person"))
        .filter(period__contains=OuterRef("amendment__document__document_date"))
        .values("national_party__national_party_name")[:1]
    )

In [None]:
node_list = pd.DataFrame(
    PersonEPGroup.objects.filter(period__contains=date(2024, 7, 16))
    #PersonEPGroup.objects.filter(period__contains=date(2024, 7, 1))
    .distinct("person")
    .values_list("person__personnationalparty__national_party__national_party_name",
                 "person__person_full_name", 
                 "person__person_given_name", 
                 "person__person_family_name", 
                 "epgroup__epgroup_abbr", 
                 "person__country__country_name"
    ),
    columns=["national_party_name", "person_full_name", "person_given_name", "person_family_name", "epgroup_abbr", "country_name"]
)

node_list.head()

Unnamed: 0,national_party_name,person_full_name,person_given_name,person_family_name,epgroup_abbr,country_name
0,Democratic Rally,Loucas Fourlas,Loucas,Fourlas,EPP,Cyprus
1,Independent,Łukasz Kohut,Łukasz,Kohut,EPP,Poland
2,Hrvatska demokratska zajednica,Karlo Ressler,Karlo,Ressler,EPP,Croatia
3,Citizens for European Development of Bulgaria,Andrey Novakov,Andrey,Novakov,EPP,Bulgaria
4,Prawo i Sprawiedliwość,Jadwiga Wiśniewska,Jadwiga,Wiśniewska,ECR,Poland


In [7]:
edge_list = pd.DataFrame(
    PersonAmendment.objects.annotate(national_party_name=Subquery(actual_natparty))
    .filter(national_party_name__in=node_list["national_party_name"])
    .filter(amendment__document__document_date__gte=date(2024, 7, 16))
    #.filter(amendment__document__document_date__lte=date(2024, 7, 1))
    .values_list("national_party_name", "amendment_id"),
    columns=["national_party_name", "amendment_id"]
).drop_duplicates(keep="first")

In [8]:
edge_list.head()

Unnamed: 0,national_party_name,amendment_id
0,Partei Mensch Umwelt Tierschutz,1
1,Partij voor de Dieren,1
2,Miljöpartiet de gröna,2
3,Les Écologistes,2
4,Socialistisk Folkeparti,2


In [24]:
edge_list.drop_duplicates(subset=["amendment_id", "national_party_name"], inplace=True)

In [25]:
G = onestep_projection(edge_list, "national_party_name", "amendment_id", "collaboration", "actor")

In [26]:
G = fill_node_attributes(G, node_list, "national_party_name", ["epgroup_abbr", "country_name"])

In [27]:
nx.write_gexf(G,"./networks/by_natparty_network_10.gexf")

In [9]:
node_list.to_csv("./networks/by_natparty_network_10.csv", index=False)

# EPGroup Graph

In [31]:
node_list = pd.DataFrame(
    PersonEPGroup.objects.filter(period__contains=date(2024, 7, 16))
    #PersonEPGroup.objects.filter(period__contains=date(2024, 7, 1))
    .distinct("person")
    .values_list("person__personnationalparty__national_party__national_party_name",
                 "person__person_full_name", 
                 "person__person_given_name", 
                 "person__person_family_name", 
                 "epgroup__epgroup_abbr", 
                 "person__country__country_name"
    ),
    columns=["national_party_name", "person_full_name", "person_given_name", "person_family_name", "epgroup_abbr", "country_name"]
)

node_list.head()

Unnamed: 0,national_party_name,person_full_name,person_given_name,person_family_name,epgroup_abbr,country_name
0,Democratic Rally,Loucas Fourlas,Loucas,Fourlas,EPP,Cyprus
1,Independent,Łukasz Kohut,Łukasz,Kohut,EPP,Poland
2,Hrvatska demokratska zajednica,Karlo Ressler,Karlo,Ressler,EPP,Croatia
3,Citizens for European Development of Bulgaria,Andrey Novakov,Andrey,Novakov,EPP,Bulgaria
4,Prawo i Sprawiedliwość,Jadwiga Wiśniewska,Jadwiga,Wiśniewska,ECR,Poland


In [32]:
node_list["epgroup_abbr"].value_counts()

epgroup_abbr
EPP           188
S&D           136
PfE            84
ECR            77
RE             77
Greens/EFA     53
GUE/NGL        46
NI             32
ESN            25
Name: count, dtype: int64

In [33]:
actual_epgroup = (
        PersonEPGroup.objects.filter(person=OuterRef("person"))
        .filter(period__contains=OuterRef("amendment__document__document_date"))
        .values("epgroup__epgroup_abbr")[:1]
    )

In [34]:
edge_list = pd.DataFrame(
    PersonAmendment.objects.annotate(epgroup_abbr=Subquery(actual_epgroup))
    .filter(epgroup_abbr__in=node_list["epgroup_abbr"])
    .filter(amendment__document__document_date__gte=date(2024, 7, 16))
    #.filter(amendment__document__document_date__lte=date(2024, 7, 1))
    .values_list("epgroup_abbr", "amendment_id"),
    columns=["ep_group", "amendment_id"]
).drop_duplicates(keep="first")

In [35]:
edge_list.drop_duplicates(subset=["amendment_id", "ep_group"], inplace=True)

In [36]:
edge_list.head()

Unnamed: 0,ep_group,amendment_id
0,GUE/NGL,1
2,Greens/EFA,2
5,ESN,3
6,EPP,4
8,GUE/NGL,5


In [37]:
G = onestep_projection(edge_list, "ep_group", "amendment_id", "collaboration", "actor")

In [38]:
G = fill_node_attributes(G, node_list, "epgroup_abbr", ["epgroup_abbr", "country_name"])

In [39]:
nx.write_gexf(G,"./networks/by_epgroup_network_10.gexf")

# Extract MEP Pairs with Co-sponsorship Data

In [None]:
node_list = pd.DataFrame(
    #
    PersonEPGroup.objects.filter(period__contains=date(2024, 7, 16))
    #PersonEPGroup.objects.filter(period__contains=date(2024, 7, 1))
    .distinct("person")
    .values_list("person__person_full_name", "person__person_given_name", "person__person_family_name", "epgroup__epgroup_abbr", "person__country__country_name"),
    columns=["person_full_name", "person_given_name", "person_family_name", "epgroup_abbr", "country_name"]
)

In [21]:
actual_epgroup = (
        PersonEPGroup.objects.filter(person=OuterRef("person"))
        .filter(period__contains=OuterRef("amendment__document__document_date"))
        .values("epgroup__epgroup_abbr")[:1]
    )

In [22]:
edge_list_to_check = pd.DataFrame(
    PersonAmendment.objects.annotate(epgroup_abbr=Subquery(actual_epgroup))
    .filter(person__person_full_name__in=node_list["person_full_name"])
    .filter(amendment__document__document_date__gte=date(2024, 7, 16))
    #.filter(amendment__document__document_date__lte=date(2024, 7, 1))
    .values_list("person__person_full_name", "amendment_id", "epgroup_abbr"),
    columns=["person_full_name", "amendment_id", "epgroup_abbr"]
)

In [23]:
#edge_list_for_pairs = edge_list_to_check.drop_duplicates(subset=["amendment_id", "epgroup_abbr"])
edge_list_for_pairs = edge_list_to_check.copy()

edge_list_for_pairs.head()

Unnamed: 0,person_full_name,amendment_id,epgroup_abbr
0,Anja Hazekamp,1,GUE/NGL
1,Anja Hazekamp,7,GUE/NGL
2,Gabriel Mato,13,EPP
3,Francisco José Millán Mon,13,EPP
4,France Jamet,16,PfE


In [None]:
# Group amendments by amendment_id to find all MEPs per amendment
#amendments_by_mep = edge_list_for_pairs.groupby("amendment_id")["person_full_name"].apply(list).reset_index()
amendments_by_mep = edge_list_for_pairs.groupby("amendment_id")[["person_full_name", "epgroup_abbr"]].apply(lambda x: x.values.tolist()).reset_index(name="person_epgroup_list")
amendments_by_mep.columns = ["amendment_id", "mep_list"]

# Generate all pairs within each amendment
mep_pairs = []
for _, row in amendments_by_mep.iterrows():
    mep_list = row["mep_list"]
    # Create all combinations of pairs from the mep_list
    for i in range(len(mep_list)):
        for j in range(i + 1, len(mep_list)):
            mep_pairs.append({
                "mep1": mep_list[i],
                "mep2": mep_list[j],
                "amendment_id": row["amendment_id"]
            })

mep_pairs_df = pd.DataFrame(mep_pairs)

mep_pairs_df[["mep1", "epgroup1"]] = mep_pairs_df["mep1"].apply(pd.Series)
mep_pairs_df[["mep2", "epgroup2"]] = mep_pairs_df["mep2"].apply(pd.Series)

# Count joint amendments for each pair
pair_counts = mep_pairs_df.groupby(["mep1", "mep2"])["amendment_id"].count().reset_index()
pair_counts.columns = ["mep1", "mep2", "joint_amendment_count"]

# Sort to ensure consistent ordering (smaller name first)
pair_counts["pair_sorted"] = pair_counts.apply(
    lambda row: tuple(sorted([row["mep1"], row["mep2"]])), axis=1
)
pair_counts[["mep1", "mep2"]] = pd.DataFrame(
    pair_counts["pair_sorted"].tolist(), index=pair_counts.index
)
pair_counts = pair_counts.drop("pair_sorted", axis=1).drop_duplicates()

print(f"Total unique pairs: {len(pair_counts)}")
pair_counts.head()

Total unique pairs: 31106


Unnamed: 0,mep1,mep2,joint_amendment_count
0,Abir Al-Sahlani,Alice Kuhnke,20
1,Abir Al-Sahlani,Alin Mituța,15
2,Abir Al-Sahlani,Andreas Glück,9
3,Abir Al-Sahlani,Andrus Ansip,6
4,Abir Al-Sahlani,Anna Júlia Donáth,150


In [None]:
# Merge with node_list to get MEP1 attributes
result = pair_counts.merge(
    node_list[["person_full_name", "country_name", "epgroup_abbr"]],
    left_on="mep1",
    right_on="person_full_name",
    how="left"
).drop("person_full_name", axis=1)

result.columns = ["mep1", "mep2", "joint_amendment_count", "mep1_country", "mep1_epgroup"]

# Merge with node_list to get MEP2 attributes
result = result.merge(
    node_list[["person_full_name", "country_name", "epgroup_abbr"]],
    left_on="mep2",
    right_on="person_full_name",
    how="left"
).drop("person_full_name", axis=1)

result.columns = ["mep1", "mep2", "joint_amendment_count", "mep1_country", "mep1_epgroup", "mep2_country", "mep2_epgroup"]

# Add serial number and reorder columns
result.insert(0, "serial_number", range(1, len(result) + 1))

# Reorder to match requested format:
# Serial number; MEP1 name; MEP1 Member State; MEP1 EP group; MEP2; MEP2 Member State, MEP2 EP group; number of joint amendments
result = result[["serial_number", "mep1", "mep1_country", "mep1_epgroup", "mep2", "mep2_country", "mep2_epgroup", "joint_amendment_count"]]

print(f"Total pairs extracted: {len(result)}")
print("\nFirst 10 pairs:")
result.head(10)

Total pairs extracted: 31106

First 10 pairs:


Unnamed: 0,serial_number,mep1,mep1_country,mep1_epgroup,mep2,mep2_country,mep2_epgroup,joint_amendment_count
0,1,Abir Al-Sahlani,Sweden,RE,Alice Kuhnke,Sweden,Greens/EFA,20
1,2,Abir Al-Sahlani,Sweden,RE,Alin Mituța,Romania,RE,15
2,3,Abir Al-Sahlani,Sweden,RE,Andreas Glück,Germany,RE,9
3,4,Abir Al-Sahlani,Sweden,RE,Andrus Ansip,Estonia,RE,6
4,5,Abir Al-Sahlani,Sweden,RE,Anna Júlia Donáth,Hungary,RE,150
5,6,Abir Al-Sahlani,Sweden,RE,Arba Kokalari,Sweden,EPP,12
6,7,Abir Al-Sahlani,Sweden,RE,Asger Christensen,Denmark,RE,15
7,8,Abir Al-Sahlani,Sweden,RE,Bart Groothuis,Netherlands,RE,41
8,9,Abir Al-Sahlani,Sweden,RE,Caroline Nagtegaal,Netherlands,RE,27
9,10,Abir Al-Sahlani,Sweden,RE,Catherine Amalric,France,RE,3


In [None]:
result.to_csv("./networks/mep_joint_amendment_pairs_cycle_9.csv", index=False)
result.to_excel("./networks/mep_joint_amendment_pairs_cycle_9.xlsx", index=False)

In [24]:
import numpy as np
from scipy.sparse import lil_matrix

# Create MEP to index mapping
unique_meps = edge_list_for_pairs["person_full_name"].unique()
mep_to_idx = {mep: idx for idx, mep in enumerate(unique_meps)}
idx_to_mep = {idx: mep for mep, idx in mep_to_idx.items()}
n_meps = len(unique_meps)

print(f"Total unique MEPs: {n_meps}")

# Initialize sparse adjacency matrix (more memory efficient for large graphs)
adj_matrix = lil_matrix((n_meps, n_meps), dtype=np.int32)

# Group by amendment and populate adjacency matrix
print("Building adjacency matrix...")
amendments_grouped = edge_list_for_pairs.groupby("amendment_id")["person_full_name"].apply(list)

for amendment_id, mep_list in amendments_grouped.items():
    # Get indices for all MEPs in this amendment
    indices = [mep_to_idx[mep] for mep in mep_list]
    
    # Increment count for all pairs (including both directions)
    for i in indices:
        for j in indices:
            if i != j:  # Don't count self-loops
                adj_matrix[i, j] += 1

print("Converting to edge list format...")

# Convert adjacency matrix to edge list (keeping both directions)
adj_matrix_coo = adj_matrix.tocoo()

pairs_data = []
for i, j, count in zip(adj_matrix_coo.row, adj_matrix_coo.col, adj_matrix_coo.data):
    pairs_data.append({
        "mep1": idx_to_mep[i],
        "mep2": idx_to_mep[j],
        "joint_amendment_count": int(count)
    })

mep_pairs_full = pd.DataFrame(pairs_data)

print(f"\nTotal directed pairs (with both directions): {len(mep_pairs_full)}")
print(f"Total undirected pairs (unique): {len(mep_pairs_full) // 2}")

# Add MEP attributes
mep_pairs_full = mep_pairs_full.merge(
    node_list[["person_full_name", "country_name", "epgroup_abbr"]],
    left_on="mep1",
    right_on="person_full_name",
    how="left"
).drop("person_full_name", axis=1)

mep_pairs_full.columns = ["mep1", "mep2", "joint_amendment_count", "mep1_country", "mep1_epgroup"]

mep_pairs_full = mep_pairs_full.merge(
    node_list[["person_full_name", "country_name", "epgroup_abbr"]],
    left_on="mep2",
    right_on="person_full_name",
    how="left"
).drop("person_full_name", axis=1)

mep_pairs_full.columns = ["mep1", "mep2", "joint_amendment_count", "mep1_country", "mep1_epgroup", "mep2_country", "mep2_epgroup"]

# Add serial number
mep_pairs_full.insert(0, "serial_number", range(1, len(mep_pairs_full) + 1))

# Reorder columns
mep_pairs_full = mep_pairs_full[["serial_number", "mep1", "mep1_country", "mep1_epgroup", "mep2", "mep2_country", "mep2_epgroup", "joint_amendment_count"]]

print("\nFirst 10 pairs:")
mep_pairs_full.head(10)


Total unique MEPs: 321
Building adjacency matrix...
Converting to edge list format...

Total directed pairs (with both directions): 5986
Total undirected pairs (unique): 2993

First 10 pairs:


Unnamed: 0,serial_number,mep1,mep1_country,mep1_epgroup,mep2,mep2_country,mep2_epgroup,joint_amendment_count
0,1,Anja Hazekamp,Netherlands,GUE/NGL,Giorgos Georgiou,Cyprus,GUE/NGL,110
1,2,Gabriel Mato,Spain,EPP,Francisco José Millán Mon,Spain,EPP,260
2,3,Gabriel Mato,Spain,EPP,Norbert Lins,Germany,EPP,5
3,4,Gabriel Mato,Spain,EPP,Christine Schneider,Germany,EPP,36
4,5,Gabriel Mato,Spain,EPP,Daniel Buda,Romania,EPP,298
5,6,Gabriel Mato,Spain,EPP,Dan-Ştefan Motreanu,Romania,EPP,31
6,7,Gabriel Mato,Spain,EPP,Manolis Kefalogiannis,Greece,EPP,36
7,8,Gabriel Mato,Spain,EPP,Alexander Bernhuber,Austria,EPP,3
8,9,Gabriel Mato,Spain,EPP,Herbert Dorfmann,Italy,EPP,31
9,10,Gabriel Mato,Spain,EPP,Isabel Benjumea Benjumea,Spain,EPP,46


In [25]:
# Save the full adjacency matrix (with both directions)
# mep_pairs_full.to_csv("./networks/mep_joint_amendment_pairs_cycle_10_full_adjacency.csv", index=False)
# mep_pairs_full.to_excel("./networks/mep_joint_amendment_pairs_cycle_10_full_adjacency.xlsx", index=False)

mep_pairs_full.to_excel("test.xlsx")

print(f"Saved full adjacency matrix with {len(mep_pairs_full)} directed edges")
print(f"Approximately {len(mep_pairs_full) // 2} unique undirected pairs")


Saved full adjacency matrix with 5986 directed edges
Approximately 2993 unique undirected pairs
