In [1]:
# Import packages
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from os import listdir
import cv2
import math
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [2]:
# Function that counts the number of floors
def count_level(img):
    # Convert img to grayscale
    gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    
    # Get shape
    shape = gray.shape
    
    # Get width of the image
    width = shape[1]
    
    # Detect edges
    edges = cv2.Canny(gray, 80, 120)
    
    #HoughLinesP returns an array of (rho, theta) values. 
    #rho is measured in pixels and theta is measured in radians
    
    # Detect lines representing the floors which are longer than 80% of the width of the building
    lines = cv2.HoughLinesP(edges, rho = 1, theta = math.pi/2, minLineLength = 0.8*width, threshold = 1, maxLineGap = 3)
    lines.tolist()
    
#     for i in lines:
#         for j in lines:
#             if j != i and abs(i[0][1]-j[0][1]) < 15:
#                 lines.remove(j)
    
    # Delete repeated lines and line detected from the roof (we only need floor lines to count the number of floors)
    
    flags = [0]*len(lines)  # flags will mark the redundant lines as 1, lines we need as 0
    for i in range(len(lines)):
        for j in range(len(lines)):
            if j < i and (abs(lines[i][0][1]-lines[j][0][1]) < 15):  # detect lines very close to each other 
                flags[j] = 1
        if abs(lines[i][0][1]-0) < 10:  # roof lines: y Coordinate -> 0
            flags[i] = 1
    counter = 0
    for i in range(len(flags)):
        if flags[i] == 0:
            counter += 1                
    return counter

In [3]:
# Function that calculates proportion of sum of all windows' widths (on all floors) 
# to the overall length of building

def fraction_width(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    height = gray.shape[0]
    width = gray.shape[1]
    
    ret, thresh = cv2.threshold(gray, 200, 255, 0)
    
    # Contours is a tree of lists of points which describe each contour
    contours, h = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    # Create a list storing quadrilaterals that represent openings
    quadrilaterals = []
    for i in range(len(contours)):
        
        # Contour approximation will mark four vertices of a quadrilateral
        polygon = cv2.approxPolyDP(contours[i],0.01*cv2.arcLength(contours[i],True),True)
        if len(polygon) == 4:
            quadrilaterals.append(polygon) 
            
    redflag = [0]*len(quadrilaterals)
    
    # Detect and delete small quadrilaterals adjacent to windows/doors which should not be counted as openings
    for i in range(len(quadrilaterals)):
        q = quadrilaterals[i]
        for j in range(4):
            if abs(q[j][0][0] - gray.shape[1]) < 5 or abs(q[j][0][0]) < 5:
                redflag[i] = 1
    
    # Get a blank canvas for drawing width of a side of each quadrilateral
    detection_series = np.zeros(gray.shape[1], dtype = 'uint8')
    
    # The width of a side should be the larger x cordinate of the right vertics 
    # minus the x cordinate of the left vertics
    for i in range(len(quadrilaterals)):
        q = quadrilaterals[i]
        if redflag[i]!=1:
            x_min = np.min(q[:,0,0])
            x_max = np.max(q[:,0,0])
            detection_series[x_min:x_max] = np.ones(x_max-x_min, dtype = 'uint8')
    
    # Return percent of sum of all windows' widths (on all floors) to the overall length of building
    return np.sum(detection_series)/width

In [4]:
def count_openings(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    height = gray.shape[0]
    width = gray.shape[1]
    
    ret,thresh = cv2.threshold(gray, 200, 255, 0)
    contours, h = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    quadrilaterals = []
    for i in range(len(contours)):
        polygon = cv2.approxPolyDP(contours[i],0.01*cv2.arcLength(contours[i],True),True)
        if len(polygon) == 4:
            quadrilaterals.append(polygon) 
            
    redflag = [0]*len(quadrilaterals)
    
    for i in range(len(quadrilaterals)):
        q = quadrilaterals[i]
        for j in range(4):
            if abs(q[j][0][0] - gray.shape[1]) < 5 or abs(q[j][0][0]) < 5:
                redflag[i] = 1
    
    return (len(quadrilaterals) - np.sum(redflag))

In [5]:
# Set path for training images
filepaths_go = [f for f in listdir("../GO_noGO Data Set_Images/TestGo") if f.endswith('.png')]
filepaths_nogo = [f for f in listdir("../GO_noGO Data Set_Images/TestNoGo") if f.endswith('.png')]

path_generated_go = '../GO_noGO Data Set_Images/TestGo/'
path_generated_nogo = '../GO_noGO Data Set_Images/TestNoGo/'

In [6]:
# Read all GO and NoGO images into a list
list_img = []
for i in filepaths_go:
    list_img.append(cv2.imread(path_generated_go + i))
    
for i in filepaths_nogo:
    list_img.append(cv2.imread(path_generated_nogo + i))

In [9]:
n = len(list_img)

In [10]:
n

8370

In [11]:
# Create a list called 'levels' to store number of floors for each building
levels = []
for i in range(n):
    n_level = count_level(list_img[i])
    levels.append(n_level)

In [12]:
set(levels)

{1, 2, 3, 4}

In [13]:
# Create a list called 'fractions' to store proportion of sum of all windows' widths (on all floor) 
# to the overall length of building
fractions = []
fractions1 = []
for i in range(len(list_img)):
    frac = fraction_width(list_img[i])
    # divide by number of floor can show average number of openings on each floor
    frac1 = frac/levels[i]  
    fractions.append(frac)
    fractions1.append(frac1)

In [14]:
# Create a list called 'openings' to store number of openings
openings = []
for i in range(n):
    opens = count_openings(list_img[i])
    openings.append(opens)

In [15]:
# Extract image index (four digit number)
files_go_idx = []
for file in filepaths_go:
    files_go_idx.append(int(file.split("Img")[1].split(".")[0]))
files_go_idx[:10]

[6081, 1936, 6095, 6903, 8090, 8084, 333, 6917, 6724, 6042]

In [16]:
files_nogo_idx = []
for file in filepaths_nogo:
    if "Img" in file:
        files_nogo_idx.append(int(file.split("Img")[1].split(".")[0]))
files_nogo_idx[:10]

[1088, 3847, 2581, 4696, 5588, 469, 1922, 4682, 3853, 2595]

In [17]:
files = files_go_idx + files_nogo_idx

In [18]:
# Create a a dataframe with  three features and image index as columns
dic = {"openings":openings, "fractions":fractions1, "levels":levels, "filename":files}
df = pd.DataFrame(dic)

In [19]:
df.head()

Unnamed: 0,openings,fractions,levels,filename
0,2,0.182203,1,6081
1,2,0.049906,2,1936
2,4,0.151601,2,6095
3,1,0.088136,1,6903
4,2,0.055932,2,8090


In [20]:
# Add GO/NoGo column to label each image
df['Go/NoGo']=df['filename'].apply(lambda x: 1 if x in files_go_idx else 0)

In [21]:
df.head()

Unnamed: 0,openings,fractions,levels,filename,Go/NoGo
0,2,0.182203,1,6081,1
1,2,0.049906,2,1936,1
2,4,0.151601,2,6095,1
3,1,0.088136,1,6903,1
4,2,0.055932,2,8090,1


In [22]:
# Fitting in logistic model and evaluate the performance(accuracy)
X_train, X_test, y_train, y_test = train_test_split(df[['openings', 'fractions', 'levels']], df['Go/NoGo'], test_size=0.3, random_state=0)
logreg = LogisticRegression()
logreg.fit(X_train, y_train)

y_pred = logreg.predict(X_test)
print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(logreg.score(X_test, y_test)))



Accuracy of logistic regression classifier on test set: 0.80
