In [1]:
import cv2
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.cluster import KMeans
import csv
import pickle

This notebook contains the code for training the non-DL model. The thought here is
1. extract all the keypoints and corresponding descriptors in the image
2. find out if the keypoints are in or out of bboxes; if in a box, label the keypoint as 1; and 0 otherwise.
3. use the descriptor as the feature to train a SVM model.

I thought about using bboxes as the label. The challenge is each bbox contains a different number of keypoints and thus a different number of descriptors. If I regard each bbox as a different label, then there will be too many classes, each class with a limited number of observations, which makes the model too complex and inefficient.

The output of the current approach would be an image with marked keypoints which are predicted to be wheat. To reduce the complexity and improve efficiency of this model, I used the resized pictures from the training of DL model, set the total number of bbox to be 30000, and limit the nfeatures of orb to be 100. The reason I didn't choose SURF was the restriction of patented functions.

In [1]:
from zipfile import ZipFile
file_name = 'resized.zip'

with ZipFile(file_name, 'r') as zipped:
  zipped.extractall()
  print('Done')

Done


In [None]:
def prepare_data():
  features = []
  labels = []

  orb = cv2.ORB_create(scoreType=cv2.ORB_FAST_SCORE, nlevels=1, nfeatures=100) # Use nfeature=1 to reduce the complexity of data

  # convert the string of bbox like [1.0, 2.0, 3.0, 4.0] to a list of integers, in which elements are 1/4 of the original float values, to match the resizing
  def process_bbox(bbox):
    nums = bbox[1:len(bbox)-1].split(', ')
    return int(float(nums[0]) * 0.25), int(float(nums[1]) * 0.25), int(float(nums[2]) * 0.25), int(float(nums[3]) * 0.25)

  # this image_bbox will be {'image_id':[list of bboxes]} for extracting features and labels
  image_bbox = {}

  with open('train.csv', 'r') as f:
    # read the rows as dicts line by line
    reader = csv.DictReader(f)
    cnt = 1
    for row in reader:
      if cnt > 30000: # had to use 30000 as the upper limit, otherwise training would be too slow
        break
      cnt += 1
      img_id = row['image_id']
      if img_id not in image_bbox:
        image_bbox[img_id] = []
      image_bbox[img_id].append(process_bbox(row['bbox'])) # Add the bboxes of this image to the image_bbox dict, corresponding to the key image_id

  for img in image_bbox:
    image_file = cv2.imread(f"resized/{img}.jpg", cv2.IMREAD_GRAYSCALE) # descriptor extractors can only take grayscale images. ref: https://answers.opencv.org/question/155/do-inputs-of-descriptor-extractors-are-required-to-be-grayscale/
    keypoints, descriptors = orb.detectAndCompute(image_file, None)
    bbox_list = image_bbox[img]
    for kpt, des in zip(keypoints, descriptors):
      x, y = kpt.pt
      inside_bbox = any([x >= bbox[0] and y >= bbox[1] and x <= bbox[0] + bbox[2] and y <= bbox[1] + bbox[3] for bbox in bbox_list]) # to determine if the keypoint is inside any of the bboxes
      features.append(des)
      # append the labels based on their relative location to bboxes
      if not inside_bbox:
        labels.append(0)
      else:
        labels.append(1)
  return features, labels

In [None]:
def train_and_save_model(features, labels):
  X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=0)

  svm = SVC()
  svm.fit(X_train, y_train)
  y_pred = svm.predict(X_val)

  accuracy = accuracy_score(y_val, y_pred)
  print(f'accuracy: {accuracy}')
  with open('svm_model.pkl', 'wb') as file:
    pickle.dump(svm, file)
# The final accuracy is ~72%

In [8]:
features, labels = prepare_data()
train_and_save_model(features, labels)