# Converting the Modified Swiss Dwellings Dataset into TopologicPy Graphs
## Step 2
The Modified Swiss Dwellings is a machine learing-ready Floor Plan Dataset of Residential Building Complexes.
It was created at Delft University. The owner of the dataset is: Casper van Engelenburg.
The license for the dataset as well as this entire work and the dataset it creates is governed by:
the CC BY-SA 4.0 license (https://creativecommons.org/licenses/by-sa/4.0/).

This script converts the graphs found in the dataset into TopologicPy graphs. It preserves the semantic information by storing dictionary values at the graph, edge, and vertex levels. It also saves the polygons of the rooms in the dictionary of the vertices under the "geometry" key. However, it does not import structure, doors, or windows.
 

## Instructions

1. Pip install kagglehub: <code>pip install kagglehub</code>
2. Run the cell below <em>ONLY ONCE</em>
3. Make note of the printed <code>folder_path</code>

In [None]:
# Install dependencies as needed:
# pip install kagglehub[pandas-datasets]
import kagglehub
from kagglehub import KaggleDatasetAdapter

# Set the path to the file you'd like to load
# Download latest version
folder_path = kagglehub.dataset_download("caspervanengelenburg/modified-swiss-dwellings")

print("Path to dataset files:", folder_path)
print("\nDone")

## Import the needed libraries

In [None]:
# You don't need these two lines if you have pip installed topologicpy. Remove!
import sys
sys.path.append("C:/Users/sarwj/OneDrive - Cardiff University/Documents/GitHub/topologicpy/src")

from topologicpy.Vertex import Vertex
from topologicpy.Wire import Wire
from topologicpy.Face import Face
from topologicpy.Cluster import Cluster
from topologicpy.Topology import Topology
from topologicpy.Dictionary import Dictionary
from topologicpy.Graph import Graph
from topologicpy.Plotly import Plotly
from topologicpy.Helper import Helper

from pathlib import Path
import os
import warnings
from tqdm.auto import tqdm
import pandas as pd

# On my machine, I was getting a torch FutureWarning. I decided to silence it here.
warnings.filterwarnings(
    "ignore",
    message=r"You are using `torch\.load`.*weights_only=False",
    category=FutureWarning,
)

print("\nDone")

## Check Software Versions

In [None]:
import sys
print("This workflow was designed and verified to work with python 3.11 and TopologicPy 0.8.61.\n")
py_version = sys.version.split()[0]
print("Python version:", py_version)

print("TopologicPy Version:", Helper.Version(check=False))
print(" ")
print("TopologicPy Version:", Helper.Version(check=True))
print("\nDONE")

## Set some constants

In [None]:
APARTMENT_COLORS = ['#FF0000',
                    '#FF7F00',
                    '#FFBF00',
                    '#FFFF00',
                    '#BFFF00',
                    '#7FFF00',
                    '#00FF00',
                    '#00FF7F',
                    '#00FFFF',
                    '#007FFF',
                    '#0000FF',
                    '#7F00FF']

ROOM_COLORS = ['#1f77b4',
                   '#e6550d',
                   '#fd8d3c',
                   '#fdae6b',
                   '#fdd0a2',
                   '#72246c',
                   '#5254a3',
                   '#6b6ecf',
                   '#2ca02c',
                   '#000000',
                   '#ffc000',
                   '#98df8a',
                   '#d62728']

ROOM_NUMBERS = {'Bedroom': 0,
                'Livingroom': 1,
                'Kitchen': 2,
                'Dining': 3,
                'Corridor': 4,
                'Stairs': 5,
                'Storeroom': 6,
                'Bathroom': 7,
                'Blacony': 8}

ROOM_NAMES = ['Bedroom',
              'Livingroom',
              'Kitchen',
              'Dining',
              'Corridor',
              'Stairs',
              'Storeroom',
              'Bathroom',
              'Balcony',
              'Structure',
              'Door',
              'Entrance Door',
              'Window']

ZONE_NUMBERS = {'Bedroom': 1,
                'Livingroom': 2,
                'Kitchen': 2,
                'Dining': 2,
                'Corridor': 2,
                'Stairs': 3,
                'Storeroom': 3,
                'Bathroom': 3,
                'Balcony': 4}

ZONE_NAMES = {'Bedroom': 'Zone1',
                'Livingroom': 'Zone2',
                'Kitchen': 'Zone2',
                'Dining': 'Zone2',
                'Corridor': 'Zone2',
                'Stairs': 'Zone3',
                'Storeroom': 'Zone3',
                'Bathroom': 'Zone3',
                'Balcony': 'Zone4'}

ZONE_COLORS = ['#1f77b4',
                 '#ff7f0e',
                 '#72246c',
                 '#2ca02c',
                 '#000000',
                 '#ffc000',
                 '#98df8a',
                 '#d62728']

META_KEYS = ['site_id',
             'building_id',
             'plan_id',
             'floor_id',
             'elevation',
             'height',
             'unit_usage']

print("\nDone")

## Set File Paths (Change according to your folder location)

In [None]:
import os
folder_path = r"C:\Users\sarwj\.cache\kagglehub\datasets\caspervanengelenburg\modified-swiss-dwellings\versions\6"

# modified-swiss-dwellings-v2/
csv_file_path = os.path.join(folder_path, "mds_V2_5.372k.csv")
graph_out_folders = [os.path.join(folder_path, "modified-swiss-dwellings-v2","train", "graph_out"),
                     os.path.join(folder_path, "modified-swiss-dwellings-v2","test", "graph_out")]

graph_in_folders = [os.path.join(folder_path, "modified-swiss-dwellings-v2","train", "graph_in"),
                     os.path.join(folder_path, "modified-swiss-dwellings-v2","test", "graph_in")]

# The desired output folder for your JSON files.
json_output_folder = r"C:\Users\sarwj\OneDrive - Cardiff University\Desktop\json_graphs"
print("\nDone")

## Define some general helper functions

In [None]:
def filenames_without_ext(folder: str) -> list[str]:
    folder = Path(folder)
    return sorted(p.stem for p in folder.iterdir() if p.is_file())

import pickle

def load_pickle(filename):
    with open(filename, 'rb') as f:
        object = pickle.load(f)
        f.close()
    return object

import re

def extract_xy(wkt: str):
    """
    Extract (x, y) pairs from a WKT-like string such as:
    "POLYGON ((3.2 -0.94, 3.85 2.22, 6.41 1.65))"
    Returns: points [(x, y), ...], xs [x...], ys [y...]
    """
    # Grab the content between the first '(' and the last ')', if any
    start = wkt.find('(')
    end = wkt.rfind(')')
    inner = wkt[start+1:end] if (start != -1 and end != -1 and end > start) else wkt

    # Regex for a float, including scientific notation
    num = r'[-+]?(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][-+]?\d+)?'

    # Find "x y" pairs separated by whitespace; commas are between pairs
    pair_re = re.compile(rf'({num})\s+({num})')

    points = [(float(x), float(y)) for x, y in pair_re.findall(inner)]

    return points

def coords_to_face(coords):
    geo_face = None
    geo_verts = [Vertex.ByCoordinates(coord) for coord in coords]
    geo_verts = Vertex.Fuse(geo_verts)
    geo_wire = Wire.ByVertices(geo_verts, close=True, silent=True)
    if Topology.IsInstance(geo_wire, "wire"):
        if  Wire.IsClosed(geo_wire):
            geo_face = Face.ByWire(geo_wire, silent=True)
    return geo_face

def str_to_face(geom):
    points = extract_xy(geom)
    return coords_to_face(points)

def get_faces(result):
    faces = []
    apartment_ids = list(set([row.to_dict().get("apartment_id", None) for row in result.iloc]))
    apartment_ids = [x for x in apartment_ids if isinstance(x, str)]
    for row in result.iloc:
        row_dict = row.to_dict()
        apartment_id = row_dict.get("apartment_id", None)
        if not apartment_id in apartment_ids:
            apartment_number = 0
            apartment_color = "#aaaaaa"
        else:
            apartment_number = apartment_ids.index(apartment_id)+1
            apartment_color = APARTMENT_COLORS[min(apartment_number-1, (len(APARTMENT_COLORS)-1))]
        row_dict['apartment_number'] = apartment_number
        row_dict['apartment_color'] = apartment_color
        geom = row_dict['geom']
        face = str_to_face(geom)
        if face:
            face = Topology.SetDictionary(face, row_dict)
            faces.append(face)
    return faces

def match_vertex_to_face(v, faces):
    for face in faces:
        if Vertex.IsInternal(v, face):
            return face

## How many graphs do you want to export? Set max_n to None if you want all graphs to be exported (thousands).

In [None]:
max_n = None
print("\nDone")

In [None]:
# 1) Load CSV (the “main” dataframe shipped with the dataset)
df = pd.read_csv(csv_file_path)

graph_paths = []
geom_paths = []

num_graphs = 0

for graph_out_folder in graph_out_folders:
    if "test" in graph_out_folder:
        label = "Test Dataset"
        meta_dict_py = {"ml_type": "test"}
    else:
        label = "Train Dataset"
        meta_dict_py = {"ml_type": "train"}
    file_names = filenames_without_ext(graph_out_folder)
    if max_n == None:
        max_n = len(file_names)
    enumerator = tqdm(enumerate(file_names[:max_n]), desc=label, total=max_n, leave=False)
    for i, file_name in enumerator:
        if num_graphs >= max_n:
            break
        floor_id = int(file_name)
        # get metadata from CSV file
        result = df.query('floor_id == @floor_id and entity_type == "area"')
        faces = get_faces(result)
        first_row_dict = result.iloc[0].to_dict() #All rows have the same meta key values
        for meta_key in META_KEYS:
            meta_dict_py[meta_key] = first_row_dict.get(meta_key, None)
        meta_dict = Dictionary.ByPythonDictionary(meta_dict_py)
        ng = load_pickle(os.path.join(graph_out_folder,file_name+".pickle"))
        g = Graph.ByNetworkXGraph(ng, coordsKey="centroid")
        vertices = Graph.Vertices(g)
        mesh_data = Graph.MeshData(g)
        edges = mesh_data['edges']
        edge_dicts = mesh_data['edgeDictionaries']
        vert_dicts = mesh_data['vertexDictionaries']
        verts = []
        new_v_dicts = []
        for v in vertices:
            d = Topology.Dictionary(v)
            node_type = Dictionary.ValueAtKey(d, "room_type")
            node_name = ROOM_NAMES[int(node_type)]
            node_color = ROOM_COLORS[int(node_type)]
            zone_type = ZONE_NUMBERS[node_name]
            zone_name = ZONE_NAMES[node_name]
            zone_color = ZONE_COLORS[zone_type]
            d = Dictionary.RemoveKey(d, "room_type")
            d = Dictionary.RemoveKey(d, "centroid")
            d = Dictionary.SetValuesAtKeys(d, ["node_type", "node_name", "node_color", "zone_type", "zone_name", "zone_color"], [node_type, node_name, node_color, zone_type, zone_name, zone_color])
            v = Topology.SetDictionary(v, d)
            coords = Dictionary.ValueAtKey(d, "geometry")
            geo_face = coords_to_face(coords)
            if Topology.IsInstance(geo_face, "face"):
                geometry = [Vertex.Coordinates(v, outputType="xy", mantissa=6) for v in Topology.Vertices(geo_face)]
                d = Dictionary.SetValueAtKey(d, "geometry", geometry)
                iv = Face.InternalVertex(geo_face)
                parent_face = match_vertex_to_face(iv, faces)
                if Topology.IsInstance(parent_face, "face"):
                    parent_dict = Topology.Dictionary(parent_face)
                    d = Dictionary.ByMergedDictionaries(d, parent_dict)
                    d = Dictionary.SetValueAtKey(d, "area", Face.Area(geo_face))
                    geo_face = Topology.SetDictionary(geo_face, d)
                    faces.append(geo_face)
            verts.append(Vertex.Coordinates(v))
            new_v_dicts.append(Dictionary.PythonDictionary(d))

        new_g = Graph.ByMeshData(verts, edges, vertexDictionaries=new_v_dicts, edgeDictionaries=edge_dicts)
        new_g = Topology.SetDictionary(new_g, meta_dict)
        edges = Graph.Edges(new_g)
        for e in edges:
            d = Topology.Dictionary(e)
            d = Dictionary.SetValueAtKey(d, "edge_width", 4)
            e = Topology.SetDictionary(e, d)
        json_graph_file_name = file_name+"_graph.json"
        json_geom_file_name = file_name+"_geom.json"
        json_graph_path = os.path.join(json_output_folder,json_graph_file_name)
        json_geom_path = os.path.join(json_output_folder,json_geom_file_name)
        graph_paths.append(json_graph_path)
        geom_paths.append(json_geom_path)
        _ = Graph.ExportToJSON(new_g, json_graph_path, overwrite=True)
        _ = Topology.ExportToJSON(faces, json_geom_path, overwrite=True)
        num_graphs += 1

print(num_graphs)
print("\nDone")

## Reload the graphs and the faces from the file system

In [None]:
msd_output_folder = r"C:\Users\sarwj\OneDrive - Cardiff University\Desktop\msd_json"
graph_file_names = filenames_without_ext(os.path.join(msd_output_folder, "graphs"))
geom_file_names = filenames_without_ext(os.path.join(msd_output_folder, "geometries"))

graph_names = [os.path.join(msd_output_folder,"graphs",f)+".json" for f in graph_file_names if "graph" in f]
geom_names = [os.path.join(msd_output_folder,"geometries",f)+".json" for f in geom_file_names if "geom" in f]
id_names = [f.split("_")[0] for f in graph_names]

enumerator = tqdm(enumerate(graph_names), desc="Loading Data", total=len(graph_names), leave=False)

figures = [] # list to hold the Plotly figures.
png_names = [] # list to hold the PNG file names
for i, graph_path in enumerator:
    geom_path = geom_names[i]
    id_name = id_names[i]

    graph = Graph.ByJSONPath(path = graph_path)
    faces = Topology.ByJSONPath(path = geom_path)
    faces = [Topology.Translate(f, 0, 0, -0.1) for f in faces]
    # Create a bounding rectangle
    cluster = Cluster.ByTopologies(faces)
    centroid = Topology.Centroid(cluster)
    br = Face.Rectangle(origin=centroid, width=80, length=80)
    br = Topology.Translate(br, 0, 0, -0.1)
    faces.append(br)
    vertices = Graph.Vertices(graph)

    for v in vertices:
        d = Topology.Dictionary(v)
        room_name = Dictionary.ValueAtKey(d, "node_name", "Untitled")
        ap_number = Dictionary.ValueAtKey(d, "apartment_number", 0)
        if ap_number > 0:
            node_label = f"{room_name} (Apt: {ap_number})"
        else:
            node_label = f"{room_name}"
        d = Dictionary.SetValueAtKey(d, "label", node_label)
        d = Dictionary.SetValueAtKey(d, "graphVertexSize", 7)
        d = Dictionary.SetValueAtKey(d, "graphVertexBorderWidth", 1)
        v = Topology.SetDictionary(v, d)
    edges = Graph.Edges(graph)
    for e in edges:
        d = Topology.Dictionary(e)
        d = Dictionary.SetValueAtKey(d, "graphEdgeWidth", 2)
        d = Dictionary.SetValueAtKey(d, "graphEdgeColor", "black")

        e = Topology.SetDictionary(e, d)
    
    figure = Topology.Show(faces,
              graph,
              faceColorKey="node_color",
              faceOpacity = 1,
              vertexSize=1,
              edgeWidthKey="graphEdgeWidth",
              edgeColorKey="graphEdgeColor",
              vertexSizeKey="graphVertexSize",
              vertexBorderWidthKey="graphVertexBorderWidth",
              vertexColorKey="node_color",
              showVertexLabel=False,
              vertexLabelKey="label",
              camera = [0,0,9],
              up = [0,1,0],
              backgroundColor="white",
              width=1200,
              height=1200,
              showFigure=False)
    figures.append(figure)
    png_names.append(id_name+"_roomtypes.png")
    figure = Topology.Show(faces,
              graph,
              faceColorKey="apartment_color",
              faceOpacity=1,
              vertexSize=1,
              edgeWidthKey="graphEdgeWidth",
              edgeColorKey="graphEdgeColor",
              vertexSizeKey="graphVertexSize",
              vertexBorderWidthKey="graphVertexBorderWidth",
              vertexColorKey="apartment_color",
              showVertexLabel=False,
              vertexLabelKey="label",
              camera = [0,0,9],
              up = [0,1,0],
              backgroundColor="white",
              width=1200,
              height=1200,
              showFigure=False)
    figures.append(figure)
    png_names.append(id_name+"_apartments.png")

print("\nDone")


## Export the figures to PNG files

In [None]:
from topologicpy.Plotly import Plotly
enumerator = tqdm(enumerate(figures), desc="Exporting PNG", total=len(figures), leave=False)
for i, figure in enumerator:
    if i%2 == 0:
        png_path = os.path.join(json_output_folder, "images", "room_types", png_names[i])
    else:
        png_path = os.path.join(json_output_folder, "images", "units", png_names[i])
    Plotly.ExportToImage(figure, png_path, format="png", width="1200", height="1200")
print("\nDone")

In [None]:
figure = Topology.Show(faces,
              graph,
              faceColorKey="apartment_color",
              faceOpacity=1,
              vertexSize=1,
              edgeWidthKey="graphEdgeWidth",
              vertexSizeKey="graphVertexSize",
              vertexBorderWidthKey="graphVertexBorderWidth",
              vertexColorKey="apartment_color",
              showVertexLabel=True,
              vertexLabelKey="label",
              camera = [0,0,5.2],
              up = [0,1,0],
              backgroundColor="white",
              width=1024,
              height=900,
              showFigure=True,
              renderer="png")
print(figure)
print("\nDone")