# Deep Learning-Based WWR and Floor Count Extraction from Fa√ßade Images to Improve UBEM

CISBAT 2025

[Ayca Duran](https://systems.arch.ethz.ch/ayca-duran), [Panagiotis Karapiperis](https://www.linkedin.com/in/panagiotis-karapiperis-ethz/), [Christoph Waibel](https://systems.arch.ethz.ch/christoph-waibel), [Arno Schlueter](https://systems.arch.ethz.ch/arno-schlueter)

### FLOOR COUNT Extraction Workflow

This notebook performs the floor count extraction using the rectified images and feature masks.

In [1]:
# Import Libraries
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import PIL.Image
import matplotlib.patches as patches
from scipy.cluster.hierarchy import fcluster, linkage

In [21]:
### Set Paths
images_path = "example/rectified/images"
facade_predictions_path = "example/rectified/facades"
window_predictions_path_original = "example/predictions/fcn_resnet50_rectified"
window_predictions_path_gsam = "example/predictions/gsam_rectified"

# Save Paths
figs_save_path = "example/FLOORS/floors_detection"
floors_path = "example/FLOORS/floors.csv"

os.makedirs(figs_save_path, exist_ok=True)

In [17]:
def count_white_rows_top_bottom(image):
    """
    Counts how many completely white rows are at the top and bottom of an image.
    Assumes image is a NumPy array (H, W) or (H, W, C) with white = 255.
    Returns (top_count, bottom_count)
    """
    # If image has 3 channels, reduce along the channel axis
    if image.ndim == 3:
        white_mask = np.all(image == 255, axis=2)
    else:
        white_mask = image == 255

    # Count white rows from top
    top_count = next((i for i, row in enumerate(white_mask) if not np.all(row)), len(white_mask))

    # Count white rows from bottom
    bottom_count = next((i for i, row in enumerate(reversed(white_mask)) if not np.all(row)), len(white_mask))

    return top_count, bottom_count

In [6]:
def replace_black_with_white(image):
    
    # Create a mask for black pixels
    black_pixels = np.all(image <= [8, 8, 8], axis=-1)
    
    # Replace black pixels with white
    image[black_pixels] = [255, 255, 255]
    
    return image

In [8]:
### Function to fix the clustering of the windows to go from top to bottom
def reassign_cluster_labels(clusters):
    return_clusters = []
    return_clusters.append(1)
    index = 1
    for i in range(1,len(clusters)):
        if clusters[i] == clusters[i-1]:
            return_clusters.append(index)
        else:
            index = index+1
            return_clusters.append(index)
    return_clusters = np.array(return_clusters)
    return return_clusters

## Loop FLOOR Count Estimation

In [None]:
# Create an empty DataFrame with the defined columns
columns = ['filename'] + ['FLOORS']
df = pd.DataFrame(columns=columns)

# Iterate
for filename in os.listdir(images_path):

    # Load img & masks
    img=cv2.imread(os.path.join(images_path, filename))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    facade_prediction = cv2.imread(os.path.join(facade_predictions_path, filename.split(".")[0]+".png"), cv2.IMREAD_GRAYSCALE)
    window_prediction_fcn = cv2.imread(os.path.join(window_predictions_path_original, filename.split(".")[0]+".png"), cv2.IMREAD_GRAYSCALE)
    window_prediction_gsam = cv2.imread(os.path.join(window_predictions_path_gsam, filename.split(".")[0]+".png"), cv2.IMREAD_GRAYSCALE)
    window_prediction = np.logical_or(window_prediction_fcn, window_prediction_gsam).astype(np.uint8)

    # Crop left & Rights
    y_indices, x_indices = np.where(facade_prediction > 0)
    x_min, x_max = x_indices.min(), x_indices.max()

    cropped_img = img[:, x_min:x_max+1]
    cropped_facade = facade_prediction[:, x_min:x_max+1]
    cropped_windows = window_prediction[:, x_min:x_max+1]

    # Remove top & bottom white rows
    cropped_img = replace_black_with_white(cropped_img)
    num_white_top, num_white_bottom = count_white_rows_top_bottom(cropped_img)
    cropped_img = cropped_img[num_white_top:cropped_img.shape[0] - num_white_bottom,:]
    cropped_facade = cropped_facade[num_white_top:cropped_facade.shape[0] - num_white_bottom,:]
    cropped_windows = cropped_windows[num_white_top:cropped_windows.shape[0] - num_white_bottom,:]

    # Prepare
    # Remove small blobs from rectified window predictions
    kernel1 = np.ones((5, 5), np.uint8)  # Kernel size can be adjusted
    cropped_windows = cv2.morphologyEx(cropped_windows, cv2.MORPH_OPEN, kernel1)

    ### Identify instances of windows with connected components
    num_labels_per, labels_per = cv2.connectedComponents(cropped_windows)
    unique_labels_per = np.unique(labels_per)
    unique_labels_per = unique_labels_per[unique_labels_per != 0]
    # Count distinct regions
    # Note: num_labels includes the background as label 0, so subtract 1 for the count of class 1 regions
    num_instances_per = num_labels_per - 1
    #print(num_instances)

    # Visualize rectified image and facade / windows predictions
    fig, axes = plt.subplots(1, 3, figsize=(20, 10), sharex=True)
    axes[0].imshow(cropped_img)
    axes[0].set_title(f"Image {filename}")
    axes[0].grid(False)
    
    ### Find uppermost Point for each window
    windows_uppermost_points = {}
    for label in unique_labels_per:
        class_positions = np.argwhere(labels_per == label)
        uppermost_pixel = class_positions[np.argmin(class_positions[:, 0])]
        windows_uppermost_points[label] = tuple(uppermost_pixel)

    # Extract points as a NumPy array
    points_per = np.array(list(windows_uppermost_points.values()))
    if len(points_per) <= 1:
        continue
    # Use only the first coordinate (row) for clustering
    rows_per = points_per[:, 0].reshape(-1, 1)
    # Perform hierarchical clustering
    linked_per = linkage(rows_per, method='ward') 
    clusters_per = fcluster(linked_per, t=70, criterion='distance')  # t=50 defines max distance between clusters
    ## Sort clusters to ensure starting from top to bottom
    clusters_per = reassign_cluster_labels(clusters_per)

    # Group points by clusters
    clustered_points_per = {}
    for idx, cluster_id in enumerate(clusters_per):
        clustered_points_per.setdefault(cluster_id, []).append(list(windows_uppermost_points.values())[idx])

    # Group windows with clusters
    # Initialize a new mask with the same shape as the `labels` array
    clustered_mask_per = np.zeros_like(labels_per)
    # Map each instance label in `labels` to its cluster index
    for instance_label, cluster_index in enumerate(clusters_per, start=1):  # start=1 to match labels
        clustered_mask_per[labels_per == instance_label] = cluster_index   # +1 to keep 0 for background
    axes[1].imshow(cropped_facade, cmap="rainbow", alpha= 0.5)
    axes[1].imshow(clustered_mask_per, cmap="rainbow", alpha= 0.5)
    axes[1].set_title(f"Windows Clustered in {len(np.unique(clusters_per))} Floors")
    axes[1].grid(False)

    # Plot the uppermost points
    for label, uppermost_pixel in windows_uppermost_points.items():
        y, x = uppermost_pixel
        axes[1].plot(x, y, "bo")  
        axes[1].text(x + 5, y, f"Window {label}", color="white", fontsize=10, bbox=dict(facecolor='black', alpha=0.5))
    
    # Visualize Floors
    mask = clustered_mask_per.astype(np.uint8)
    output = np.zeros_like(mask, dtype=np.uint8)
    
    # Loop over each cluster
    for cluster_id in np.unique(mask):
        if cluster_id == 0:
            continue  # skip background

        # Extract only this cluster
        cluster_mask = (mask == cluster_id).astype(np.uint8)
        # Label connected blobs within this cluster
        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(cluster_mask, connectivity=8)
        # Skip background (label 0)
        centroids = centroids[1:]
        # Sort centroids left-to-right by x coordinate
        centroids = sorted(centroids, key=lambda c: c[0])
        axes[2].scatter(*zip(*centroids))
        # Convert to int tuple points for cv2.polylines
        pts = np.array(centroids, dtype=np.int32).reshape(-1, 1, 2)
        # Draw polyline connecting the blobs for this cluster
        if len(pts) > 1:
            cv2.polylines(output, [pts], isClosed=False, color=int(cluster_id), thickness=5)
        
        mid_idx = len(centroids) // 2
        mid_x, mid_y = map(int, centroids[mid_idx])
        label = f"Floor {cluster_id}"
        color=int(cluster_id)
        axes[2].text(mid_x + 10, mid_y - 10, label, color='white', fontsize=16, bbox=dict(facecolor='black', alpha=0.2))

    axes[2].imshow(cropped_img)
    axes[2].imshow(cropped_windows, cmap="gray", alpha=0.5)
    axes[2].imshow(output, cmap="rainbow", alpha=0.6)
    axes[2].set_title("Floors Visualization")
    plt.savefig(os.path.join(figs_save_path, filename), dpi=300, bbox_inches='tight') # Save clustered windows fig
    plt.close()

    # Store results in DataFrame
    row = {'filename': filename, 'FLOORS': len(np.unique(clusters_per))}
    df.loc[len(df)] = row
    #break

# Save Results to CSV
df.to_csv(floors_path, index=False)