In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df_edges = pd.read_csv('/Users/wassim/threejs-cd-preview/ressources/Dark_GD_Contest_Edges.csv')
df_events= pd.read_csv('/Users/wassim/threejs-cd-preview/ressources/Dark_GD_Contest_Events.csv')


In [4]:
## Entrypoints
# Get unique values from Source and Target columns
source_nodes = set(df_edges['Source'])
target_nodes = set(df_edges['Target'])
comparison_df = pd.DataFrame({
    'Source': sorted(list(source_nodes)),
    'Is_Target': [node in target_nodes for node in sorted(list(source_nodes))]
})
#show only nodes that are not in Target
source_only_df = comparison_df[~comparison_df['Is_Target']]

print("\nNodes that appear only as Source (not as Target):")
print(source_only_df[['Source']].to_string(index=False))
print(f"\nTotal count: {len(source_only_df)}")


Nodes that appear only as Source (not as Target):
 Source
    1.0
   36.0
   39.0
    NaN
  107.0
  162.0
  171.0
  188.0
  230.0
  235.0
  236.0
  323.0
  351.0
  388.0
  392.0
  395.0
  397.0
  400.0
  404.0
  410.0
  417.0
  423.0
  439.0
  470.0
  498.0
  543.0
    NaN

Total count: 27


In [2]:
## Count nodes with degree 2 (one target and one source)
# Count occurrences of each node in Source and Target columns
source_counts = df_edges['Source'].value_counts()
target_counts = df_edges['Target'].value_counts()

all_nodes = pd.DataFrame(index=sorted(set(df_edges['Source'].unique()) | set(df_edges['Target'].unique())))
all_nodes['source_count'] = all_nodes.index.map(lambda x: source_counts.get(x, 0))
all_nodes['target_count'] = all_nodes.index.map(lambda x: target_counts.get(x, 0))
all_nodes['total_degree'] = all_nodes['source_count'] + all_nodes['target_count']

#  nodes with degree 2
degree_2_nodes = all_nodes[all_nodes['total_degree'] == 2]

# only nodes with exactly one source and one target
degree_2_nodes = degree_2_nodes[
    (degree_2_nodes['source_count'] == 1) & 
    (degree_2_nodes['target_count'] == 1)
]

print("\nNodes with exactly one source and one target:")
print(degree_2_nodes[['source_count', 'target_count']].to_string())
print(f"\nTotal count: {len(degree_2_nodes)}")


Nodes with exactly one source and one target:
       source_count  target_count
2.0               1             1
3.0               1             1
4.0               1             1
5.0               1             1
6.0               1             1
7.0               1             1
8.0               1             1
10.0              1             1
13.0              1             1
15.0              1             1
16.0              1             1
17.0              1             1
18.0              1             1
19.0              1             1
20.0              1             1
21.0              1             1
23.0              1             1
24.0              1             1
25.0              1             1
26.0              1             1
28.0              1             1
29.0              1             1
30.0              1             1
31.0              1             1
32.0              1             1
33.0              1             1
34.0              1             1
3

In [4]:
## High degree nodes : total count is equal or higher than 3 
high_degree_nodes = all_nodes[all_nodes['total_degree'] >= 3]
print("\nHigh degree nodes (degree >= 3):")
print(high_degree_nodes[['source_count', 'target_count', 'total_degree']].to_string())
print(f"\nTotal count: {len(high_degree_nodes)}")


High degree nodes (degree >= 3):
       source_count  target_count  total_degree
11.0              2             1             3
22.0              2             1             3
38.0              2             2             4
41.0              4             1             5
42.0              1             2             3
43.0              1             2             3
46.0              2             1             3
50.0              2             1             3
53.0              1             2             3
57.0              1             2             3
62.0              1             2             3
67.0              1             2             3
78.0              1             2             3
79.0              2             1             3
84.0              1             2             3
91.0              2             1             3
92.0              1             2             3
95.0              1             2             3
103.0             2             1             3
113.0 

In [3]:
from collections import defaultdict


def build_graph(edges):
    graph = defaultdict(list)
    for _, row in edges.iterrows():
        graph[row['Source']].append(row['Target'])
    return graph

def get_all_paths(graph, start, end, path=[]):
    path = path + [start]
    if start == end:
        return [path]
    if start not in graph:
        return []
    paths = []
    for node in graph[start]:
        if node not in path:
            newpaths = get_all_paths(graph, node, end, path)
            paths.extend(newpaths)
    return paths

df_events['Date'] = pd.to_datetime(df_events['Date'], format='%d-%m-%Y')
event_dates = df_events.set_index('ID')['Date'].to_dict()

back_in_time_count = 0
for _, row in df_edges.iterrows():
    source_date = event_dates.get(row['Source'])
    target_date = event_dates.get(row['Target'])
    if source_date and target_date and target_date < source_date:
        back_in_time_count += 1

print(f'Number of back-in-time edges: {back_in_time_count}')



Number of back-in-time edges: 61


In [None]:
# def get_all_paths(df_edges):
#     # Create a dictionary to store edges for each node
#     edges_dict = {}
#     for _, row in df_edges.iterrows():
#         source = row['Source']
#         target = row['Target']
#         if source not in edges_dict:
#             edges_dict[source] = []
#         edges_dict[source].append(target)
    
#     def find_paths(current_node, current_path, all_paths):
#         # If current node is already in path, stop to avoid cycles
#         if current_node in current_path:
#             return
        
#         # Add current node to path
#         current_path.append(current_node)
        
#         # If current node has no outgoing edges, save the path
#         if current_node not in edges_dict:
#             all_paths.append(current_path.copy())
#             current_path.pop()
#             return
        
#         # Explore all possible next nodes
#         for next_node in edges_dict[current_node]:
#             find_paths(next_node, current_path, all_paths)
        
#         # Remove current node from path before backtracking
#         current_path.pop()
    
#     # Find all paths starting from each node
#     all_paths = []
#     for start_node in edges_dict.keys():
#         find_paths(start_node, [], all_paths)
    
#     return all_paths

# # Get all possible paths
# all_paths = get_all_paths(df_edges)

# # Print results
# print("\nAll possible paths (avoiding cycles):")
# for i, path in enumerate(all_paths, 1):
#     print(f"Path {i}: {' -> '.join(map(str, path))}")
# print(f"\nTotal number of paths: {len(all_paths)}")

In [9]:
import pandas as pd
from collections import defaultdict

# Load the edges CSV
edges = pd.read_csv('Dark_GD_Contest_Edges.csv')

# Build adjacency list
graph = defaultdict(list)
for _, row in edges.iterrows():
    src = row['Source']
    tgt = row['Target']
    graph[src].append(tgt)

# Find all back-in-time edges (edges that close a cycle)
def find_back_in_time_edges(graph):
    back_in_time_edges = set()
    visited = set()

    def dfs(node, path):
        if node in path:
            # Cycle detected, but don't mark here (we want to mark the edge that closes the cycle)
            return
        path.add(node)
        for neighbor in graph[node]:
            if neighbor in path:
                # This edge closes a cycle
                back_in_time_edges.add((node, neighbor))
            else:
                dfs(neighbor, path)
        path.remove(node)

    for node in graph:
        dfs(node, set())
    return back_in_time_edges

back_in_time_edges = find_back_in_time_edges(graph)

print(f"Number of back-in-time (cycle-closing) edges: {len(back_in_time_edges)}")
if back_in_time_edges:
    print("Back-in-time edges (Source, Target):")
    for edge in back_in_time_edges:
        print(edge)
else:
    print("No back-in-time edges found.")


RuntimeError: dictionary changed size during iteration