The method is described in [Intro2Robotics: Connected Components in a Binary Image](https://www.youtube.com/watch?v=ticZclUYy88) and its Python implementation has been illustrated [here](https://zhuanlan.zhihu.com/p/97689424). Basically, we need to interrogate each pixel in each row and label them different number if they are not connected and do this row by row. Eventually, we are able to update the graph with an equivalency list, which should which components are essentially the same.

In [2]:
import pandas as pd
import numpy as np

In [3]:
raw_image = pd.read_table("input_question_4", header = None) # Remove auto-assigned col names
nrow_total, ncol_total = raw_image.shape
raw_image

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1
1,1,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0
2,0,1,1,1,1,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0
3,0,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,1,0,1,1
4,1,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1
5,1,1,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1
6,1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1
7,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,1
8,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,1,0
9,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,0,0,0,1


And then we can make all pixel labelled "1" a set, called **pixel set**.

In [4]:
# pixel value in pd
def pixel_value(x,y):
    # return an array with (coord, value) 
    return [tuple([x,y]),raw_image[y][x]] 

pixel_set=[]
for y in range (ncol_total):
    for x in range (nrow_total):
        if pixel_value(x,y)[1]==1:
            pixel_set+=tuple([pixel_value(x,y)])
pixel_set = dict(pixel_set)

And then we need to start from the very left to the very right, and do this row by row. We should know that:

1. for each pixel, we should consider all adjacent pixels, which makes an adjacent pixel set that is always updating. (**adjacency set**)
2. we will check whether there are recognisable pixels in the **adjacency set**, and: 1) label them the same number if they are not labelled; 2) if they are already labelled, we add the two numbers to an **equivalency set**.
3. Finally, after all pixels are labelled, we will then update the whole image with our **equivalency set**, which produces the desired output.

In [5]:
# 4-connectivity adjacency_set
def adjacency_set (coord, data):
    if data[coord]==1:
        x=coord[0]
        y=coord[1]
        
        adjacency_set=[]
        # all 4 possible adjacent pixels + the pixel itself
        adjacency_set_unconfirmed = [(x,y),(x-1,y),(x+1,y),(x,y-1),(x,y+1)]
        for i in adjacency_set_unconfirmed:
            if i in data:
                adjacency_set+=[i]
        return adjacency_set

In [6]:
# Make two sets. One for reading the original data and another for labelling.
label_set=pixel_set.fromkeys(pixel_set,0)
data_set=pixel_set

We will read from label_set to see whether a label is needed and make all changes to the label set.

In [7]:
adjacency_set_all=[]
label_n=0

for label in label_set:
    adjacency = adjacency_set(label, data_set)
    adjacency_set_all.append(adjacency)

# adjacency_set_all

In [None]:
def compare_merge(list1,list2,dataset):
    if [x for x in list1 if x in list2] != None:
        for i in list2:
            list1.append(i)
        dataset.remove(list2)
    return set(list1)

## Takes too long time to run this. Cannot be done on Jupyter Notebook.
connected_set=[]
for list1 in adjacency_set_all:
    for list2 in adjacency_set_all:
        compare_merge(list1, list2, adjacency_set_all)

Seems it turns out we don't actually need two different steps, but if we continue to run this step for multiple times, we will get the desired result. But we have included the second pass here:

The basic idea here is: starting from 1, ask what 1 could equal to in the whole list.

In [None]:
i=0
label_dict={0:0}
while i < len(labels):
    equals=[ ]
    for x in labels:
        for y in x:
            if y == i and y not in label_dict:
                equals+=x
    for z in equals:
        label_dict[z]=i
    i+=1
len(label_dict)
label_dict

In [None]:
updated_image=np.array(raw_image)
for label in label_set:
    updated_image[label[0],label[1]]=label_set[label]
pd.DataFrame(updated_image)

In [None]:
# Writing the output
output= open("output_question_4.txt","a")
for row in updated_image:
    allinrow=""
    for i in row:
        allinrow+=str(i)+"	"
    output.write (allinrow)
    output.write ("\n")
output.close()

In [None]:
for pointx in point0:
    
    # Ask whether this is a visited point
    if pointx in point_unvisited:
        point_visited=set()
        for listy in list0:
            
            # Ask whether this is a visited area
            if pointx in listy and listy in list_unvisited:
                list_unvisited.remove(listy)
                for pointy in listy:
                    # Add all visited 
                    if pointy in point_unvisited:
                        point_visited.add(pointy)
                        point_unvisited.remove(pointy)
                        
    if len(point_visited) > 0:
        list_visited.append(tuple(point_visited))
        list_visited=list(set(list_visited))

print(len(point_unvisited), len(list_unvisited), len(list_visited))