# ・[Find the best scenes with tropical fish in your videos with machine learning.]

### ・Learning tropical fish.
   - Manually sort 150 photos of fish and 150 photos without fish.

In [4]:
import cv2
import os, glob
from sklearn.model_selection import train_test_split
from sklearn import datasets, metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.externals import joblib

#! Specify image learning size and path.
image_size = (64, 32)
path = os.path.dirname(os.path.abspath("__file__"))
path_fish = path + "/fish"
path_nofish = path + "/nofish"
#! Image data.
x = []
#! Label data.
y = []

#! Read image data and add to array.
def read_dir(path, label):
    files = glob.glob(path + "/*.jpg")
    for f in files:
        img = cv2.imread(f)
        img = cv2.resize(img, image_size)
        img_data = img.reshape(-1, )
        x.append(img_data)
        y.append(label)
#! Read image data.
read_dir(path_nofish, 0)
read_dir(path_fish, 1)

#! Divide the data for training and testing.
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

#! Learn data.
clf = RandomForestClassifier(n_estimators=100)
clf.fit(x_train, y_train)

#! Check accuracy.
y_pred = clf.predict(x_test)
print(accuracy_score(y_test, y_pred))

#! Save the data.
joblib.dump(clf, "fish.pkl")



0.9333333333333333


['fish.pkl']

### ・Video analysis.
   - The image is recognized again using the PKL data that trained the fish.

In [1]:
import cv2, os, copy
from sklearn.externals import joblib

#! Extract the trained data.
clf = joblib.load("fish.pkl")
output_dir = "./bestshot"
#! Previous video.
img_last = None
#! Threshold for output image.
fish_th = 3
count = 0
frame_count = 0
if not os.path.isdir(output_dir):
    os.mkdir(output_dir)

#! Start input from video file.
cap = cv2.VideoCapture("fish.mp4")
while True:
    #! Get image.
    is_ok, frame = cap.read()
    if not is_ok:
        break
    frame = cv2.resize(frame, (640, 360))
    frame2 = copy.copy(frame)
    frame_count += 1
    #! Convert to black and white image.
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (15, 15), 0)
    img_b = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)[1]
    #! Check the difference.
    if not img_last is None:
        frame_diff = cv2.absdiff(img_last, img_b)
        cnts = cv2.findContours(frame_diff, 
                               cv2.RETR_EXTERNAL,
                               cv2.CHAIN_APPROX_SIMPLE)[0]
        #! Output the difference area to a file.
        fish_count = 0
        for pt in cnts:
            x, y, w, h = cv2.boundingRect(pt)
            #! Noise removal.
            if w < 100 or w > 500:
                continue
            #! Save the extracted area as an image.
            imgex = frame[y:y+h, x:x+w]
            imagex = cv2.resize(imgex, (64, 32))
            image_data = imagex.reshape(-1, )
            pred_y = clf.predict([image_data])
            if pred_y[0] == 1:
                fish_count += 1
                cv2.rectangle(frame2, (x, y), (x+w, y+h), (0, 255, 0), 2)
        #! Is the fish reflected?
        if fish_count > fish_th:
            fname = output_dir + "/fish" + str(count) + ".jpg"
            cv2.imwrite(fname, frame)
            count += 1
    cv2.imshow("FISH!", frame2)
    #! Press ESC or Enter to exit the loop.
    k = cv2.waitKey(1)
    if k == 27 or k == 13:
        break
    img_last = img_b
cap.release()
cv2.destroyAllWindows()
print("OK", count, "/", frame_count)




OK 85 / 1987
