In [15]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import json
import sys
from io import StringIO

In [16]:
from viscom_metrics.util import get_metric_df_from_file, get_all_metrics_from_dir, split_camel_case

In [17]:
data_dir = "./latexData/sortings"

# List all files in the directory
files = os.listdir(data_dir)

print("Files in directory:", files)

Files in directory: ['metrics_SortingComparison_2025-04-11T10-11-48.json']


In [18]:
df_sortings_full = get_all_metrics_from_dir(data_dir)
df_sortings_full

Unnamed: 0,name,type,aspectRatio,edgeCrossings,totalEdgeCrossings,pathEfficiencyRatio,pathEfficiencyRatioNormalized,stress,pathContinuity,weightedPathContinuity,pathAngularPrediction,nodeEdgeOverlaps,nodeNodeOverlaps,totalPathLength,title,nodes,connections
0,viscom_byId,viscomDefault,0.983013,0.263217,234,0.998427,0.998096,0.361775,0.683235,0.595966,0.386638,1,1,21599.593366,SortingComparison,26,452
1,viscom_Random,viscomDefault,0.998197,0.289089,257,1.0,0.998628,0.344949,0.72787,0.76034,0.395,1,1,22817.778794,SortingComparison,26,452
2,viscom_Topological,viscomDefault,0.998405,0.218223,194,1.0,0.996096,0.354641,0.633578,0.637051,0.36728,1,1,19721.968841,SortingComparison,26,452
3,viscom_WeightedTopological,viscomDefault,0.985856,0.210349,187,1.0,0.998431,0.327972,0.601972,0.595782,0.334279,1,1,19844.639133,SortingComparison,26,452
4,viscom_BreadthFirst,viscomDefault,0.98897,0.248594,221,1.0,0.999175,0.338754,0.653632,0.711286,0.423873,1,1,18654.416891,SortingComparison,26,452
5,viscom_DepthFirst,viscomDefault,0.990434,0.107987,96,1.0,0.998435,0.309516,0.626105,0.604438,0.383813,1,1,16764.850515,SortingComparison,26,452
6,viscom_NodeScore,viscomDefault,0.989342,0.278965,248,1.0,0.997767,0.362099,0.756623,0.788301,0.39162,1,1,22829.41525,SortingComparison,26,452
7,viscom_SourceScoreWeighted,viscomDefault,0.986849,0.329584,293,1.0,0.999117,0.344551,0.684593,0.715065,0.411592,1,1,22115.804146,SortingComparison,26,452
8,viscom_SourceScoreDiff,viscomDefault,0.986849,0.329584,293,1.0,0.999117,0.344551,0.684593,0.715065,0.411592,1,1,22115.804146,SortingComparison,26,452
9,viscom_ChildCount,viscomDefault,0.984183,0.337458,300,0.996751,0.9967,0.36187,0.70112,0.65745,0.322786,1,1,22903.701582,SortingComparison,26,452


In [19]:
relevant_sorting_cols = [
    "name",
    "edgeCrossings",
    "totalEdgeCrossings",
    "totalPathLength"
]

df_sortings = df_sortings_full[relevant_sorting_cols]
df_sortings

Unnamed: 0,name,edgeCrossings,totalEdgeCrossings,totalPathLength
0,viscom_byId,0.263217,234,21599.593366
1,viscom_Random,0.289089,257,22817.778794
2,viscom_Topological,0.218223,194,19721.968841
3,viscom_WeightedTopological,0.210349,187,19844.639133
4,viscom_BreadthFirst,0.248594,221,18654.416891
5,viscom_DepthFirst,0.107987,96,16764.850515
6,viscom_NodeScore,0.278965,248,22829.41525
7,viscom_SourceScoreWeighted,0.329584,293,22115.804146
8,viscom_SourceScoreDiff,0.329584,293,22115.804146
9,viscom_ChildCount,0.337458,300,22903.701582


In [20]:
# "totalPathLengthNormalized" should be the "totalPathLength" col divided by the maximum value of the "totalPathLength" column
# Keep df_sortings as is, but use .loc when adding the new column
df_sortings_norm = df_sortings.copy()
df_sortings_norm.loc[:, "totalPathLengthNormalized"] = df_sortings_norm["totalPathLength"] / df_sortings_norm["totalPathLength"].min()

In [21]:
# Sort Rows by value in "Total Edge Crossings" column
# df_sortings_format.sort_values(by="Sorting Algorithm", inplace=True)
df_sortings_norm.sort_values(by="totalEdgeCrossings", ascending=True, inplace=True)
df_sortings_norm

Unnamed: 0,name,edgeCrossings,totalEdgeCrossings,totalPathLength,totalPathLengthNormalized
12,viscom_WeightedFlow,0.088864,79,14165.64517,1.0
5,viscom_DepthFirst,0.107987,96,16764.850515,1.183487
11,viscom_Flow,0.107987,96,17215.748627,1.215317
3,viscom_WeightedTopological,0.210349,187,19844.639133,1.400899
13,viscom_WeightedTopologicalUnadapted,0.214848,191,19982.603521,1.410638
2,viscom_Topological,0.218223,194,19721.968841,1.392239
4,viscom_BreadthFirst,0.248594,221,18654.416891,1.316877
0,viscom_byId,0.263217,234,21599.593366,1.524787
6,viscom_NodeScore,0.278965,248,22829.41525,1.611604
1,viscom_Random,0.289089,257,22817.778794,1.610783


In [22]:
df_sortings_norm.drop(columns=["totalPathLength"], inplace=True, errors="ignore")
df_sortings_norm.rename(columns={"totalPathLengthNormalized": "totalPathLength"}, inplace=True)

# Replace the names by removing "viscom_" from the front and split_camel_case for the rest
df_sortings_norm["name"] = df_sortings_norm["name"].str.replace("viscom_", "", regex=False)
df_sortings_norm["name"] = df_sortings_norm["name"].apply(split_camel_case)

df_sortings_norm.rename(columns={"name": "Sorting Algorithm"}, inplace=True)
df_sortings_norm.rename(columns={"edgeCrossings": "Edge Crossings"}, inplace=True)
df_sortings_norm.rename(columns={"totalEdgeCrossings": "Total Edge Crossings"}, inplace=True)
df_sortings_norm.rename(columns={"totalPathLength": "Path Length Ratio"}, inplace=True)

# Names to ignore
names_to_ignore = [
    "Source Score Weighted",
    "Source Score Diff"
]


df_sortings_norm = df_sortings_norm[~df_sortings_norm["Sorting Algorithm"].isin(names_to_ignore)]


# Convert all floats to strings with 2 decimal places
# df_sortings_norm = df_sortings_norm.round(3)
df_sortings_norm


Unnamed: 0,Sorting Algorithm,Edge Crossings,Total Edge Crossings,Path Length Ratio
12,Weighted Flow,0.088864,79,1.0
5,Depth First,0.107987,96,1.183487
11,Flow,0.107987,96,1.215317
3,Weighted Topological,0.210349,187,1.400899
13,Weighted Topological Unadapted,0.214848,191,1.410638
2,Topological,0.218223,194,1.392239
4,Breadth First,0.248594,221,1.316877
0,By Id,0.263217,234,1.524787
6,Node Score,0.278965,248,1.611604
1,Random,0.289089,257,1.610783


In [23]:
# For each column, mark the minimum value with \\textbf{}

df_sortings_format = df_sortings_norm.copy()

# Multiply the "Path Length Ratio" column by 100 to get a percentage
df_sortings_format["Path Length Ratio"] = df_sortings_format["Path Length Ratio"] * 100

# Columns to process
value_cols = [
    ['Edge Crossings', '{:.2f}'], 
    ['Total Edge Crossings', '{:.0f}'], 
    ['Path Length Ratio', '{:.1f}\\%']
]

# Function to bold the min or max values
def bold_extremes(col, mode='min', formatter = None):
    
    formatter = formatter or "{:.2f}"
    
    if mode == 'min':
        extreme_val = col.min()
    elif mode == 'max':
        extreme_val = col.max()
    # return col.apply(lambda x: f"\\textbf{{{x:.2f}}}" if x == extreme_val else f"{x:.2f}")
    return col.apply(lambda x: f"\\textbf{{{formatter.format(x)}}}" if x == extreme_val else f"{formatter.format(x)}")

# Apply bold formatting
for entry in value_cols:
    col = entry[0]
    formatter = entry[1]
    df_sortings_format[col] = bold_extremes(df_sortings_format[col], mode='min', formatter=formatter)  # change to 'max' if you want maximums
df_sortings_format

Unnamed: 0,Sorting Algorithm,Edge Crossings,Total Edge Crossings,Path Length Ratio
12,Weighted Flow,\textbf{0.09},\textbf{79},\textbf{100.0\%}
5,Depth First,0.11,96,118.3\%
11,Flow,0.11,96,121.5\%
3,Weighted Topological,0.21,187,140.1\%
13,Weighted Topological Unadapted,0.21,191,141.1\%
2,Topological,0.22,194,139.2\%
4,Breadth First,0.25,221,131.7\%
0,By Id,0.26,234,152.5\%
6,Node Score,0.28,248,161.2\%
1,Random,0.29,257,161.1\%


In [24]:
# Mark "Weighted Flow" cell with \textbf{}
df_sortings_format["Sorting Algorithm"] = df_sortings_format["Sorting Algorithm"].replace("Weighted Flow", "\\textbf{Weighted Flow*}", regex=False)
df_sortings_format["Sorting Algorithm"] = df_sortings_format["Sorting Algorithm"].replace("Flow", "Flow*", regex=False)
df_sortings_format["Sorting Algorithm"] = df_sortings_format["Sorting Algorithm"].replace("Weighted Topological Unadapted", "Weight. Topo. Unadapted*", regex=False)
df_sortings_format["Sorting Algorithm"] = df_sortings_format["Sorting Algorithm"].replace("Weighted Topological", "Weighted Topological*", regex=False)


In [25]:

# df_sortings_format.rename(columns={"Edge Crossings": "EC", "Total Edge Crossings": "TEC", "Path Length Ratio": "PLR"}, inplace=True)


In [26]:
# Rename all colums so that multiple words are split up to different lines, using makecell
# \makecell{\textbf{word1} \\ \textbf{word2} ...}
def makecell(text: str):
    # Split the text into words and join them with \\
    return "\\makecell[c]{" + " \\\\ ".join(text.split()) + "}" 

# Apply makecell to all column names
df_sortings_format.rename(columns=lambda x: makecell(x), inplace=True)
df_sortings_format

Unnamed: 0,\makecell[c]{Sorting \\ Algorithm},\makecell[c]{Edge \\ Crossings},\makecell[c]{Total \\ Edge \\ Crossings},\makecell[c]{Path \\ Length \\ Ratio}
12,\textbf{Weighted Flow*},\textbf{0.09},\textbf{79},\textbf{100.0\%}
5,Depth First,0.11,96,118.3\%
11,Flow*,0.11,96,121.5\%
3,Weighted Topological*,0.21,187,140.1\%
13,Weight. Topo. Unadapted*,0.21,191,141.1\%
2,Topological,0.22,194,139.2\%
4,Breadth First,0.25,221,131.7\%
0,By Id,0.26,234,152.5\%
6,Node Score,0.28,248,161.2\%
1,Random,0.29,257,161.1\%


In [27]:
# Create a latex table from the dataframe
print(df_sortings_format.to_latex(index=False, escape=False, column_format="lccc", float_format="%.2f"))


\begin{tabular}{lccc}
\toprule
\makecell[c]{Sorting \\ Algorithm} & \makecell[c]{Edge \\ Crossings} & \makecell[c]{Total \\ Edge \\ Crossings} & \makecell[c]{Path \\ Length \\ Ratio} \\
\midrule
\textbf{Weighted Flow*} & \textbf{0.09} & \textbf{79} & \textbf{100.0\%} \\
Depth First & 0.11 & 96 & 118.3\% \\
Flow* & 0.11 & 96 & 121.5\% \\
Weighted Topological* & 0.21 & 187 & 140.1\% \\
Weight. Topo. Unadapted* & 0.21 & 191 & 141.1\% \\
Topological & 0.22 & 194 & 139.2\% \\
Breadth First & 0.25 & 221 & 131.7\% \\
By Id & 0.26 & 234 & 152.5\% \\
Node Score & 0.28 & 248 & 161.2\% \\
Random & 0.29 & 257 & 161.1\% \\
Child Count & 0.34 & 300 & 161.7\% \\
Degree & 0.37 & 333 & 172.0\% \\
\bottomrule
\end{tabular}

