In [17]:
import numpy as np
import pandas as pd
import json
from pypinyin import pinyin, lazy_pinyin, Style
pd.set_option("display.precision", 2)

In [18]:
df = pd.read_csv('train_time_table.csv')
df.head()

Unnamed: 0,车次,起点,终点,出发时间,到达时间,类别,服务
0,C1001,长春,延吉西,5:47,8:04,动车,2
1,C1002,延吉西,长春,6:20,8:25,动车,2
2,C1002,延吉西,长春,6:15,8:30,动车,2
3,C1003,长春,珲春,6:30,9:40,动车,2
4,C1004,延吉西,长春,7:42,9:59,动车,2


In [42]:
# Translate Chinese City name to English
cache_map = {}
translate_start = ""
translate_target = ""

def create_cache( arr ):
    start = arr['起点']
    target = arr['终点']
    if not start in cache_map:
        translate_start = "".join([c.capitalize() for c in lazy_pinyin(start)])
        cache_map[start] = translate_start
    if not target in cache_map:
        translate_target = "".join([c.capitalize() for c in lazy_pinyin(target)])
        cache_map[target] = translate_target
    
def translate_name( arr, col_name ):
    if col_name == 'start':
        return cache_map[arr['起点']]
    else:
        return cache_map[arr['终点']]
    

In [43]:
station_statistic = {
    "#nodes": 0,
    "#links": 0,
    "links": [],
    "link_weights":{},
    "node_list":[],
    "node_size":[],
    "pie_list":[]
}


df.apply(create_cache , axis = 1)
df['start'] = df.apply(translate_name, axis = 1, col_name = 'start')
df['target'] = df.apply(translate_name, axis = 1, col_name = 'target')
# print(df.head(10))
# print(cache_map)

stat = {}
for index, row in df.iterrows():
    start = row["start"]
    end = row["target"]
    if not start in stat:
        stat[start] = {}
    if not end in stat[start]:
        stat[start][end] = 1
    else:
        stat[start][end] += 1

stations = []
for start, targets in stat.items():
    count = 0
    for num in targets.values():
        count += num
    if count > 60:
        stations.append(start)

#stations = ['北京南', '天津', '上海南', '金山卫', '武汉', '成都东', '贵阳北', '深圳北', '深圳', '广州东', '广州南', '哈尔滨西', '上海虹桥', '福州', '西安北', '上海', '重庆北', '北京西', '长沙南', '合肥南']
station_statistic['#nodes'] = len(stations)
station_statistic['node_list'] = stations
station_statistic['node_size'] = [0] * len(stations)
for i in range(len(stations)):
    station_statistic['pie_list'].append([])
print(stations, len(stations))

station_mapping = {}
links_weighted = []
for id, station in enumerate(stations):
    station_mapping[station] = id
    
for start, targets in stat.items():
    if start in stations:
        weight = 0
        for target, num in targets.items():
            if target in stations:
                pie = {
                    'x': target,
                    'y': num
                };
                station_statistic['pie_list'][station_mapping[start]].append(pie)
                
                # if B->A 已经在link中，把A->B的weight 加上
                weight += num
                flag = True
                for link in links_weighted:
                    if link['source'] == station_mapping[target] and link['target'] == station_mapping[start]:
                        link['weight'] += num
                        flag = False
                        break
                if flag:
                    edge = {
                        'source': station_mapping[start],
                        'target': station_mapping[target],
                        'weight': num
                    }
                    links_weighted.append(edge)
                    station_statistic['#links'] += 1  
        station_statistic['node_size'][station_mapping[start]] += weight

for link in links_weighted:
    edge = {
        'source': link['source'],
        'target': link['target']
    }
    station_statistic['links'].append(edge)
    station_statistic['link_weights'][str(link['source'])+'-'+str(link['target']) ] = link['weight']

#stations = ['Beijing South', 'Tianjing', 'Shanghai South', 'Jinshanwei', 'Wuhan', 'Chengdu East', 'Guiyang North', 'Shenzhen North', 'Shenzhen', 'Guangzhou East', 'Guangzhou South', 'Harbin West', 'Shanghai Hongqiao', 'Fuzhou', 'Xian North', 'Shanghai', 'Chongqing North', 'Beijing West', 'Changsha South', 'Hefei South']
print(station_statistic)

with open("train_data_noweight.json", "w") as f:
    f.write(json.dumps(station_statistic, ensure_ascii=False, indent=4, separators=(',', ':')))

['BeiJingNan', 'TianJin', 'ShangHaiNan', 'JinShanWei', 'WuHan', 'ChengDuDong', 'GuiYangBei', 'ShenZhenBei', 'ShenZhen', 'GuangZhouDong', 'GuangZhouNan', 'HaErBinXi', 'ShangHaiHongQiao', 'FuZhou', 'XiAnBei', 'ShangHai', 'ChongQingBei', 'BeiJingXi', 'ChangShaNan', 'HeFeiNan'] 20
{'#nodes': 20, '#links': 55, 'links': [{'source': 0, 'target': 1}, {'source': 0, 'target': 15}, {'source': 0, 'target': 12}, {'source': 0, 'target': 13}, {'source': 0, 'target': 19}, {'source': 0, 'target': 11}, {'source': 1, 'target': 18}, {'source': 2, 'target': 3}, {'source': 4, 'target': 5}, {'source': 4, 'target': 16}, {'source': 4, 'target': 12}, {'source': 4, 'target': 13}, {'source': 4, 'target': 7}, {'source': 4, 'target': 17}, {'source': 4, 'target': 14}, {'source': 4, 'target': 10}, {'source': 4, 'target': 11}, {'source': 4, 'target': 6}, {'source': 5, 'target': 12}, {'source': 5, 'target': 13}, {'source': 5, 'target': 16}, {'source': 5, 'target': 17}, {'source': 5, 'target': 10}, {'source': 6, 'target