In [1]:
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np
import math
import pandas as pd

In [21]:
from sklearn import preprocessing
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [2]:
def count_level(img):
    gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    shape = gray.shape
    width = shape[1]
    edges = cv2.Canny(gray, 80, 120)
    lines = cv2.HoughLinesP(edges, rho = 1, theta = math.pi/2, minLineLength = 0.8*width, threshold = 1, maxLineGap = 3)
    lines = lines.tolist()
#     for i in lines:
#         for j in lines:
#             if j != i and abs(i[0][1]-j[0][1]) < 15:
#                 lines.remove(j)
    flags = [0]*len(lines)
    for i in range(len(lines)):
        for j in range(len(lines)):
            if j < i and (abs(lines[i][0][1]-lines[j][0][1]) < 15):
                flags[j] = 1
        if abs(lines[i][0][1]-0) < 10:
            flags[i] = 1
    counter = 0
    for i in range(len(flags)):
        if flags[i] == 0:
            counter += 1                
    return counter

In [3]:
def fraction_width(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    height = gray.shape[0]
    width = gray.shape[1]
    
    ret, thresh = cv2.threshold(gray, 200, 255, 0)
    _, contours, h = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    quadrilaterals = []
    for i in range(len(contours)):
        polygon = cv2.approxPolyDP(contours[i],0.01*cv2.arcLength(contours[i],True),True)
        if len(polygon) == 4:
            quadrilaterals.append(polygon) 
            
    redflag = [0]*len(quadrilaterals)
    
    for i in range(len(quadrilaterals)):
        q = quadrilaterals[i]
        for j in range(4):
            if abs(q[j][0][0] - gray.shape[1]) < 5 or abs(q[j][0][0]) < 5:
                redflag[i] = 1
    
    detection_series = np.zeros(gray.shape[1], dtype = 'uint8')
    
    for i in range(len(quadrilaterals)):
        q = quadrilaterals[i]
        if redflag[i]!=1:
            x_min = np.min(q[:,0,0])
            x_max = np.max(q[:,0,0])
            detection_series[x_min:x_max] = np.ones(x_max-x_min, dtype = 'uint8')
    
    return np.sum(detection_series)/gray.shape[1]

In [4]:
def count_openings(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    height = gray.shape[0]
    width = gray.shape[1]
    
    ret,thresh = cv2.threshold(gray, 200, 255, 0)
    _, contours, h = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    
    quadrilaterals = []
    for i in range(len(contours)):
        polygon = cv2.approxPolyDP(contours[i],0.01*cv2.arcLength(contours[i],True),True)
        if len(polygon) == 4:
            quadrilaterals.append(polygon) 
            
    redflag = [0]*len(quadrilaterals)
    
    for i in range(len(quadrilaterals)):
        q = quadrilaterals[i]
        for j in range(4):
            if abs(q[j][0][0] - gray.shape[1]) < 5 or abs(q[j][0][0]) < 5:
                redflag[i] = 1
    
    return (len(quadrilaterals) - np.sum(redflag))

In [5]:
path_generated_go = r'GO_noGO Data Set_Images/TestGo//'
path_generated_nogo = r'GO_noGO Data Set_Images/TestNoGo//'

In [6]:
files_go = list(os.walk(path_generated_go))[0][2]
files_nogo = list(os.walk(path_generated_nogo))[0][2]
files_1 = files_go + files_nogo
files = [int(i.split("Img")[1].split(".")[0]) for i in files_1]

In [7]:
list_img = []
for i in files_go:
    list_img.append(cv2.imread(path_generated_go + i))
for i in files_nogo:
    list_img.append(cv2.imread(path_generated_nogo + i))

In [8]:
levels = []
for i in range(len(list_img)):
    n_level = count_level(list_img[i])
    levels.append(n_level)

In [9]:
set(levels)

{1, 2, 3, 4}

In [10]:
fractions = []
for i in range(len(list_img)):
    frac = fraction_width(list_img[i])
    fractions.append(frac)

In [11]:
openings = []
for i in range(len(list_img)):
    opens = count_openings(list_img[i])
    openings.append(opens)

In [12]:
files_go_idx = []
for file in files_go:
    files_go_idx.append(int(file.split("Img")[1].split(".")[0]))
files_go_idx[:10]

[6081, 1936, 6095, 6903, 8090, 8084, 333, 6917, 6724, 6042]

In [13]:
files_nogo_idx = []
for file in files_nogo:
    if "Img" in file:
        files_nogo_idx.append(int(file.split("Img")[1].split(".")[0]))
files_nogo_idx[:10]

[1088, 3847, 2581, 4696, 5588, 469, 1922, 4682, 3853, 2595]

In [14]:
dic = {"openings":openings, "fractions":fractions, "levels":levels, "filename":files}
df = pd.DataFrame(dic)

In [15]:
df.head()

Unnamed: 0,openings,fractions,levels,filename
0,2,0.182203,1,6081
1,2,0.099812,2,1936
2,4,0.303202,2,6095
3,1,0.088136,1,6903
4,2,0.111864,2,8090


In [16]:
df['Go/NoGo']=df['filename'].apply(lambda x: 1 if x in files_go_idx else 0)

In [17]:
df.head()

Unnamed: 0,openings,fractions,levels,filename,Go/NoGo
0,2,0.182203,1,6081,1
1,2,0.099812,2,1936,1
2,4,0.303202,2,6095,1
3,1,0.088136,1,6903,1
4,2,0.111864,2,8090,1


In [23]:
X_train, X_test, y_train, y_test = train_test_split(df[['openings', 'fractions', 'levels']], df['Go/NoGo'], test_size=0.3, random_state=0)
logreg = LogisticRegression()
logreg.fit(X_train, y_train)

y_pred = logreg.predict(X_test)
print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(logreg.score(X_test, y_test)))

Accuracy of logistic regression classifier on test set: 0.80


