The method is described in [Intro2Robotics: Connected Components in a Binary Image](https://www.youtube.com/watch?v=ticZclUYy88) and its Python implementation has been illustrated [here](https://zhuanlan.zhihu.com/p/97689424). Basically, we need to interrogate each Px in each row and label them different number if they are not connected and do this row by row. Eventually, we are able to update the graph with an equivalency list, which should which components are essentially the same.

In [22]:
import pandas as pd
import numpy as np

In [23]:
RawImage = pd.read_table("input_question_4", header = None) 
# Remove auto-assigned col names
RawImage

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,0,0,0,0,1,1,0,0,0,1,0,1,0,1,1,1,0,0,1,1
1,1,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,1,0,0
2,0,1,1,1,1,1,1,0,1,0,1,0,1,0,0,0,0,0,1,0
3,0,0,0,0,1,1,0,0,0,0,1,0,0,0,1,1,1,0,1,1
4,1,0,0,1,0,0,0,0,0,0,1,1,1,0,1,1,1,0,1,1
5,1,1,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,1,0,1
6,1,1,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,1
7,0,1,0,0,0,1,1,0,0,0,0,1,1,0,0,0,0,1,1,1
8,0,0,1,1,1,0,0,1,0,1,0,0,1,0,0,0,1,1,1,0
9,1,0,1,0,1,0,1,1,1,1,0,0,1,0,1,0,0,0,0,1


And then we can make all Px labelled "1" a set, called **Px set**.

In [225]:
# Px value in pd
def Px_value(x,y, pd):
    # return an array with (coord, value) 
    return [tuple([x,y]),pd[y][x]] 

def AllColouredPx(image):
    AllColouredPx=[]
    nrow_total, ncol_total = image.shape
    for y in range (ncol_total):
        for x in range (nrow_total):
            if Px_value(x,y, RawImage)[1]==1:
                AllColouredPx+=tuple([(x,y)])
    return AllColouredPx
AllColouredPx(RawImage) 

[(1, 0),
 (4, 0),
 (5, 0),
 (6, 0),
 (9, 0),
 (2, 1),
 (5, 1),
 (6, 1),
 (7, 1),
 (1, 2),
 (2, 2),
 (5, 2),
 (8, 2),
 (9, 2),
 (2, 3),
 (4, 3),
 (5, 3),
 (8, 3),
 (0, 4),
 (2, 4),
 (3, 4),
 (8, 4),
 (9, 4),
 (0, 5),
 (2, 5),
 (3, 5),
 (5, 5),
 (7, 5),
 (2, 6),
 (6, 6),
 (7, 6),
 (9, 6),
 (8, 7),
 (9, 7),
 (1, 8),
 (2, 8),
 (9, 8),
 (0, 9),
 (1, 9),
 (8, 9),
 (9, 9),
 (2, 10),
 (3, 10),
 (4, 10),
 (0, 11),
 (4, 11),
 (6, 11),
 (7, 11),
 (2, 12),
 (4, 12),
 (6, 12),
 (7, 12),
 (8, 12),
 (9, 12),
 (0, 13),
 (5, 13),
 (0, 14),
 (1, 14),
 (3, 14),
 (4, 14),
 (9, 14),
 (0, 15),
 (3, 15),
 (4, 15),
 (3, 16),
 (4, 16),
 (5, 16),
 (8, 16),
 (1, 17),
 (5, 17),
 (6, 17),
 (7, 17),
 (8, 17),
 (0, 18),
 (2, 18),
 (3, 18),
 (4, 18),
 (7, 18),
 (8, 18),
 (0, 19),
 (3, 19),
 (4, 19),
 (5, 19),
 (6, 19),
 (7, 19),
 (9, 19)]

And then we need to start from the very left to the very right, and do this row by row. We should know that:

1. for each Px, we should consider all adjacent Pxs, which makes an adjacent Px set that is always updating. (**Adj set**)
2. we will check whether there are recognisable Pxs in the **Adj set**, and: 1) label them the same number if they are not labelled; 2) if they are already labelled, we add the two numbers to an **equivalency set**.
3. Finally, after all Pxs are labelled, we will then update the whole image with our **equivalency set**, which produces the desired output.

In [116]:
def AdjPoints(coord, image):
    AllColouredPx0=AllColouredPx(image)
    x=coord[0]
    y=coord[1]
    Adj_unconfirmed = [(x,y),(x-1,y),(x+1,y),(x,y-1),(x,y+1)]
    Adj_confirmed=[]
    for Px in Adj_unconfirmed:
        if Px in AllColouredPx0:
            Adj_confirmed.append(tuple(Px))
    return Adj_confirmed

In [224]:
def AllAdjSets(image):
    # determine legal range
    AllColouredPx0=AllColouredPx(image)
    
    i=0
    AllAdjSets=[]
    while i< len(AllColouredPx0):
        xy=list(AllColouredPx0[i])
        AdjPoints(xy, image)
        AllAdjSets.append(set(AdjPoints(xy, image)))
        i+=1
    return AllAdjSets
AllAdjSets(RawImage) 

[{(1, 0)},
 {(4, 0), (5, 0)},
 {(4, 0), (5, 0), (5, 1), (6, 0)},
 {(5, 0), (6, 0), (6, 1)},
 {(9, 0)},
 {(2, 1), (2, 2)},
 {(5, 0), (5, 1), (5, 2), (6, 1)},
 {(5, 1), (6, 0), (6, 1), (7, 1)},
 {(6, 1), (7, 1)},
 {(1, 2), (2, 2)},
 {(1, 2), (2, 1), (2, 2), (2, 3)},
 {(5, 1), (5, 2), (5, 3)},
 {(8, 2), (8, 3), (9, 2)},
 {(8, 2), (9, 2)},
 {(2, 2), (2, 3), (2, 4)},
 {(4, 3), (5, 3)},
 {(4, 3), (5, 2), (5, 3)},
 {(8, 2), (8, 3), (8, 4)},
 {(0, 4), (0, 5)},
 {(2, 3), (2, 4), (2, 5), (3, 4)},
 {(2, 4), (3, 4), (3, 5)},
 {(8, 3), (8, 4), (9, 4)},
 {(8, 4), (9, 4)},
 {(0, 4), (0, 5)},
 {(2, 4), (2, 5), (2, 6), (3, 5)},
 {(2, 5), (3, 4), (3, 5)},
 {(5, 5)},
 {(7, 5), (7, 6)},
 {(2, 5), (2, 6)},
 {(6, 6), (7, 6)},
 {(6, 6), (7, 5), (7, 6)},
 {(9, 6), (9, 7)},
 {(8, 7), (9, 7)},
 {(8, 7), (9, 6), (9, 7), (9, 8)},
 {(1, 8), (1, 9), (2, 8)},
 {(1, 8), (2, 8)},
 {(9, 7), (9, 8), (9, 9)},
 {(0, 9), (1, 9)},
 {(0, 9), (1, 8), (1, 9)},
 {(8, 9), (9, 9)},
 {(8, 9), (9, 8), (9, 9)},
 {(2, 10), (3, 10)},


For each point, if they appear in multiple lists in AllAdjSets, 
1. we add all points in the set to a trimed set and removed them from the point_unvisited list
2. we remove the lists from the AllAdjSets, and then add a new set to the list

In [227]:

point0= AllColouredPx(RawImage)  
point_unvisited=AllColouredPx(RawImage)  
list0 =AllAdjSets(RawImage) 
list_unvisited =AllAdjSets(RawImage) 
list_visited=[]

# Think from a point-wise perspective

for pointx in point0:
    
    # Ask whether this is a visited point
    if pointx in point_unvisited:
        point_visited=set()
        for listy in list0:
            
            # Ask whether this is a visited area
            if pointx in listy and listy in list_unvisited:
                list_unvisited.remove(listy)
                for pointy in listy:
                    # Add all visited 
                    if pointy in point_unvisited:
                        point_visited.add(pointy)
                        point_unvisited.remove(pointy)
                        
    if len(point_visited) > 0:
        list_visited.append(tuple(point_visited))
        list_visited=list(set(list_visited))

print(list_visited,list_unvisited)

[((5, 3), (4, 3)), ((6, 19), (4, 19), (5, 19)), ((8, 2), (8, 3), (8, 4), (9, 2)), ((0, 4), (0, 5)), ((7, 19),), ((8, 7), (9, 6), (9, 7), (9, 8)), ((9, 19),), ((2, 12),), ((1, 0),), ((9, 14),), ((3, 18), (3, 19), (4, 18), (2, 18)), ((7, 18), (6, 17)), ((4, 0), (5, 0), (5, 1), (6, 0)), ((9, 4),), ((6, 12), (7, 11), (7, 12), (6, 11)), ((5, 13),), ((1, 8), (0, 9), (1, 9), (2, 8)), ((1, 17),), ((4, 16), (5, 16), (5, 17)), ((9, 0),), ((6, 6), (7, 5), (7, 6)), ((2, 4), (3, 4), (2, 6), (2, 5), (3, 5)), ((4, 10), (2, 10), (3, 10)), ((0, 18), (0, 19)), ((0, 11),), ((3, 14), (3, 16), (4, 15), (4, 14), (3, 15)), ((4, 11), (4, 12)), ((2, 3), (1, 2), (2, 1), (2, 2)), ((8, 9), (9, 9)), ((5, 5),), ((1, 14), (0, 13), (0, 14), (0, 15)), ((8, 18), (7, 17), (8, 16), (8, 17)), ((9, 12), (8, 12)), ((6, 1), (7, 1), (5, 2))] [{(1, 2), (2, 2)}, {(5, 3), (5, 1), (5, 2)}, {(2, 5), (3, 4), (3, 5)}, {(2, 5), (2, 6)}, {(6, 6), (7, 6)}, {(8, 7), (9, 7)}, {(9, 7), (9, 8), (9, 9)}, {(0, 9), (1, 9)}, {(0, 14), (1, 14)}

In [228]:
# Think from a list perspective, not a point's one.
# We find that when we use up our points, there are still 11 lists not going through,
# which means that the 11 lists contain points that are used by other lists.
# Thus, we ask what which lists of the 34 intersects with the 11. 
# For those who intersect with the 11, they will be removed from the result list and 
# and merge with other lists.

result_list=list_visited
print(result_list)
for list0 in list_unvisited:
    for list1 in list_visited:
        # print(list0, list1)
        # print(type(list0), type(list1))
        if len(list0&set(list1))>0:
            result_list.remove(list1)
            list0=list0|set(list1)
    result_list.append(list0)
    
print(len(result_list))
print(result_list)

[((5, 3), (4, 3)), ((6, 19), (4, 19), (5, 19)), ((8, 2), (8, 3), (8, 4), (9, 2)), ((0, 4), (0, 5)), ((7, 19),), ((8, 7), (9, 6), (9, 7), (9, 8)), ((9, 19),), ((2, 12),), ((1, 0),), ((9, 14),), ((3, 18), (3, 19), (4, 18), (2, 18)), ((7, 18), (6, 17)), ((4, 0), (5, 0), (5, 1), (6, 0)), ((9, 4),), ((6, 12), (7, 11), (7, 12), (6, 11)), ((5, 13),), ((1, 8), (0, 9), (1, 9), (2, 8)), ((1, 17),), ((4, 16), (5, 16), (5, 17)), ((9, 0),), ((6, 6), (7, 5), (7, 6)), ((2, 4), (3, 4), (2, 6), (2, 5), (3, 5)), ((4, 10), (2, 10), (3, 10)), ((0, 18), (0, 19)), ((0, 11),), ((3, 14), (3, 16), (4, 15), (4, 14), (3, 15)), ((4, 11), (4, 12)), ((2, 3), (1, 2), (2, 1), (2, 2)), ((8, 9), (9, 9)), ((5, 5),), ((1, 14), (0, 13), (0, 14), (0, 15)), ((8, 18), (7, 17), (8, 16), (8, 17)), ((9, 12), (8, 12)), ((6, 1), (7, 1), (5, 2))]
30
[((6, 19), (4, 19), (5, 19)), ((8, 2), (8, 3), (8, 4), (9, 2)), ((0, 4), (0, 5)), ((7, 19),), ((9, 19),), ((2, 12),), ((1, 0),), ((9, 14),), ((3, 18), (3, 19), (4, 18), (2, 18)), ((9, 

In [230]:
updated_image=np.array(RawImage) 
# we will use the original pic, which may helps us to see whether anything goes wrong

Nlabel=0
for area in result_list:
    Nlabel+=1
    for point in area:
        updated_image[point[0],point[1]]=Nlabel
pd.DataFrame(updated_image)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,0,0,0,0,3,3,0,0,0,28,0,18,0,29,29,29,0,0,17,17
1,7,0,23,0,0,0,0,0,28,28,0,0,0,0,29,0,0,13,0,0
2,0,23,23,23,25,25,25,0,28,0,16,0,6,0,0,0,0,0,9,0
3,0,0,0,0,25,25,0,0,0,0,16,0,0,0,19,19,19,0,9,9
4,24,0,0,24,0,0,0,0,0,0,16,20,20,0,19,19,14,0,9,1
5,24,24,24,24,0,21,0,0,0,0,0,0,0,12,0,0,14,14,0,1
6,24,24,0,0,0,0,26,0,0,0,0,11,11,0,0,0,0,30,0,1
7,0,24,0,0,0,26,26,0,0,0,0,11,11,0,0,0,0,30,30,4
8,0,0,2,2,2,0,0,27,0,27,0,0,22,0,0,0,30,30,30,0
9,15,0,2,0,10,0,27,27,27,27,0,0,22,0,8,0,0,0,0,5


In [192]:
# Writing the output
output= open("output_question_4.txt","a")
for row in updated_image:
    allinrow=""
    for i in row:
        allinrow+=str(i)+"	"
    output.write (allinrow)
    output.write ("\n")
output.close()