# Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import pickle
from collections import defaultdict
import matplotlib.colors as mcolors
import plotly.graph_objects as go

plt.rcParams['font.family'] = 'Times New Roman'

# Path

In [None]:
data_path = '/path/to/your/data/folder/Data for visulization/'
figure_path = '/path/to/your/data/folder/Folder for outputs/'

# Data

In [None]:
with open(data_path + 'SPTFlows_NSF_NSFC.pkl', 'rb') as f:
    flow_counts = pickle.load(f)

# Visualization

In [None]:
C1 = '#1f77b4' 
middle_color = "#d3d3d3" 
C0 = '#ff7f0e'

colors = [C0, middle_color, C1]
cmap_custom = mcolors.LinearSegmentedColormap.from_list("coolwarm_custom", colors)

In [None]:
keywords = list(set([kw1 for (kw1, kw2) in flow_counts.keys()] + [kw2 for (kw1, kw2) in flow_counts.keys()]))
keyword_index = {kw: idx for idx, kw in enumerate(keywords)}

sources = []
targets = []
values = []
colors = []
node_flow_counts = defaultdict(lambda: {"NSF": 0, "NSFC": 0})

for (kw1, kw2), count_dict in flow_counts.items():
    sources.append(keyword_index[kw1])
    targets.append(keyword_index[kw2])
    total_count = sum(count_dict.values())
    values.append(total_count)

    node_flow_counts[kw1]["NSF"] += count_dict["NSF"]
    node_flow_counts[kw1]["NSFC"] += count_dict["NSFC"]
    node_flow_counts[kw2]["NSF"] += count_dict["NSF"]
    node_flow_counts[kw2]["NSFC"] += count_dict["NSFC"]

In [None]:
cmap=cmap_custom
alpha = 0.825

def get_gradient_color(val, alpha=alpha):
    color = cmap(val) 
    return (color[0], color[1], color[2], alpha)

def remove_prefix(keyword):
    return keyword[2:] if keyword.startswith(('s_', 'p_', 't_')) else keyword

for (kw1, kw2), count_dict in flow_counts.items():
    nsf_count_kw1 = node_flow_counts[kw1]["NSF"]
    nsfc_count_kw1 = node_flow_counts[kw1]["NSFC"]
    total_kw1 = nsf_count_kw1 + nsfc_count_kw1
    ratio_kw1 = nsf_count_kw1 / total_kw1 if total_kw1 != 0 else 0

    nsf_count_kw2 = node_flow_counts[kw2]["NSF"]
    nsfc_count_kw2 = node_flow_counts[kw2]["NSFC"]
    total_kw2 = nsf_count_kw2 + nsfc_count_kw2
    ratio_kw2 = nsf_count_kw2 / total_kw2 if total_kw2 != 0 else 0

    avg_ratio = (ratio_kw1 + ratio_kw2) / 2

    color_rgba = get_gradient_color(avg_ratio)
    colors.append(f'rgba({color_rgba[0] * 255}, {color_rgba[1] * 255}, {color_rgba[2] * 255}, {color_rgba[3]})')

nodes_with_group = []
node_colors = []

for kw in keywords:
    nodes_with_group.append(remove_prefix(kw)) 
    node_colors.append("rgba(128, 128, 128, 0.75)")

### ***sankey***

In [None]:
fig = go.Figure(go.Sankey(
    node=dict(
        pad=15,
        thickness=30,
        line=dict(color="black", width=0),
        label=nodes_with_group,
        color=node_colors
    ),
    link=dict(
        source=sources,
        target=targets,
        value=values,
        color=colors 
    )))

fig.add_annotation(
    text="Urban Science",
    xref="paper", yref="paper",
    x=0.025, y=1.0475, 
    showarrow=False,
    font=dict(size=45, color="black")
)

fig.add_annotation(
    text="Real-World Problems",
    xref="paper", yref="paper",
    x=0.5, y=1.0475,
    showarrow=False,
    font=dict(size=45, color="black")
)

fig.add_annotation(
    text="Urban Technology",
    xref="paper", yref="paper",
    x=0.975, y=1.0475, 
    showarrow=False,
    font=dict(size=45, color="black")
)

fig.update_layout(
    xaxis=dict(showticklabels=False),
    yaxis=dict(showticklabels=False),
    font=dict(family="Times New Roman", size=36, color="black"),
    width=2000,
    height=2000,
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)'
)

fig.write_html(figure_path + 'Figure_name.html')