In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

%config InlineBackend.figure_format = 'retina'
plt.rcParams['figure.figsize'] = 8, 5
plt.rcParams['font.size'] = 12
plt.rcParams['savefig.format'] = 'pdf'
sns.set_style('darkgrid')

In [32]:
import networkx as nx

G: nx.classes.digraph.DiGraph = nx.read_edgelist('yekaterinburg_speed_history.edgelist', create_using=nx.DiGraph())
len(G.edges()), len(G.nodes)

(4347, 2434)

Проверим, что координате соответствует только одна точка

In [3]:
coords_unique_points = {}

for u, v, data in G.edges(data=True):
    assert u != v, 'There are loops'
    # print(f"  Ребро ({u} -> {v}), Атрибуты: {data}")
    start_coords = (
        data['x_coordinate_start'],
        data['y_coordinate_start']
    )
    if start_coords in coords_unique_points:
        # assert coords_unique_points[start_coords] == u
        if coords_unique_points[start_coords] != u:
            print(
                coords_unique_points[start_coords], u
            )
    else:
        coords_unique_points[start_coords] = u
    
    end_coords = (
        data['x_coordinate_end'],
        data['y_coordinate_end']
    )
    if end_coords in coords_unique_points:
        assert coords_unique_points[end_coords] == v
    else:
        coords_unique_points[end_coords] = v

Проверим, что нету одинаковых рёбер по вершинам и координатам

In [26]:
unique_edges_coords = set()
unique_edges = set()


for u, v, data in G.edges(data=True):
    edge_coords = (
        data['x_coordinate_start'],
        data['y_coordinate_start'],
        data['x_coordinate_end'],
        data['y_coordinate_end']
    )
    edge = (u, v)
    assert edge not in unique_edges
    assert edge_coords not in unique_edges_coords
    unique_edges.add(edge)
    unique_edges_coords.add(edge_coords)

Проверим, что граф направленный. То есть там много пар координат, между которыми два ребра (туда и обратно)

In [29]:
from collections import defaultdict
unique_non_directed_edges = defaultdict(int)

for u, v, data in G.edges(data=True):
    assert u != v, 'There are loops'
    edge = (u, v) if u < v else (v, u)
    unique_non_directed_edges[edge] += 1

value_counts = defaultdict(int)
for value in unique_non_directed_edges.values():
    value_counts[value] += 1
value_counts

defaultdict(int, {2: 1723, 1: 901})

Визуализируем координаты на карте

In [33]:
import folium

m = folium.Map(location=[56.896876, 60.589692])

for u, v, data in G.edges(data=True):
    line = folium.PolyLine(locations=[
        (data['y_coordinate_start'], data['x_coordinate_start']),
        (data['y_coordinate_end'], data['x_coordinate_end'])
    ], color='blue', weight=5, opacity=0.8)
    line.add_to(m)
m

Найдём компоненты связности

In [6]:
wcc = list(nx.weakly_connected_components(G))
len(wcc)

10

Визуализируем их

In [None]:
for nodes in wcc:
    subgraph = G.subgraph(nodes)
    print(len(subgraph.edges()), len(subgraph.nodes))
    m = folium.Map(location=[56.896876, 60.589692])
    for u, v, data in subgraph.edges(data=True):
        line = folium.PolyLine(locations=[
            (data['y_coordinate_start'], data['x_coordinate_start']),
            (data['y_coordinate_end'], data['x_coordinate_end'])
        ], color='blue', weight=5, opacity=0.8)
        line.add_to(m)
    display(m)

Возьмём самую большую компоненту связности

In [30]:
neq_G = G.subgraph(wcc[0])
nx.write_edgelist(neq_G, 'yekaterinburg_speed_history_cleared.edgelist')

In [2]:
import networkx as nx

G: nx.classes.digraph.DiGraph = nx.read_edgelist('yekaterinburg_speed_history_cleared.edgelist', create_using=nx.DiGraph())
len(G.edges()), len(G.nodes)

(3552, 2043)