In [2]:
import json
from tqdm import tqdm
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import cv2
import pandas as pd
import warnings

from sklearn.preprocessing import LabelEncoder
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

pd.options.display.max_columns = None  # Отображение всех столбцов без сокращения
pd.options.display.max_rows = 100 

warnings.filterwarnings("ignore")

## Цель эксперимента:

Проверка возможности классификации объектов исключительно на основе их формы (без учёта цвета). Основной задачей было определить, насколько снижается качество классификации, когда основным признаком является только форма объекта.  
Используемые шаги в эксперименте:
1. Изображения были преобразованы к черно-белому формату и изменены до размера 64 на 64 пикселя.
2. После этого, с использованием фильтра, пиксели были отмечены значением 1, если их интенсивность была больше нуля, иначе - ноль.
3. В качестве модели использовался метод опорных векторов (SVM).

Результаты оказались не очень хорошими:

- Точность (Accuracy): 0.3587692991225147

| Класс               | Точность (precision) | Полнота (recall) | F1-мера | Поддержка (support) |
|---------------------|-----------------------|------------------|---------|---------------------|
| Bridge              | 0.15                  | 0.31             | 0.20    | 478                 |
| Ground_Track_Field  | 0.25                  | 0.37             | 0.30    | 158                 |
| Harbor              | 0.68                  | 0.43             | 0.53    | 1000                |
| Helicopter          | 0.04                  | 0.08             | 0.05    | 86                  |
| Large_Vehicle       | 0.37                  | 0.23             | 0.28    | 1000                |
| Roundabout          | 0.16                  | 0.36             | 0.22    | 188                 |
| Small_Vehicle       | 0.44                  | 0.36             | 0.40    | 1000                |
| Soccer_ball_field   | 0.17                  | 0.37             | 0.23    | 188                 |
| Swimming_pool       | 0.14                  | 0.17             | 0.15    | 743                 |
| baseball_diamond    | 0.24                  | 0.32             | 0.27    | 220                 |
| basketball_court    | 0.11                  | 0.27             | 0.16    | 154                 |
| plane               | 0.76                  | 0.42             | 0.54    | 1000                |
| ship                | 0.22                  | 0.18             | 0.20    | 1000                |
| storage_tank        | 0.55                  | 0.51             | 0.53    | 1000                |
| tennis_court        | 0.64                  | 0.64             | 0.64    | 788                 |



## PREPARE RAW DATA

In [3]:
# Функция Влада
def img_data_create(data, cat, im_path):
    '''Функция, создающая словарь типа {image_id:{'image_size':...,'polygons':[[...],...,[...]],
    'bboxes':[[...],...,[...]]},...} для категории cat, где image_id - id изображения, image_size - размер изображения,
    polygons - массив координат для построения масок объектов на изображении, bboxes - массив ограничивающих рамок объектов на 
    изображении
    Входные параметры: data - словарь с аннотациями для объектов
                       cat - категория
                       im_path - путь к изображениям
    Выходные данные: словарь'''
    cat_id = [x for x in data['categories'] if x['name'] == cat][0]['id']
    anns = [ann for ann in data['annotations'] if ann['category_id'] == cat_id]
    img_data = {}
    for ann in anns:
        img_data[ann['image_id']] = {}
        img_data[ann['image_id']]['polygons'] = [] 
        img_data[ann['image_id']]['bboxes'] = [] 
    for ann in anns:
        img_data[ann['image_id']]['polygons'].append(ann['segmentation'][0])
        img_data[ann['image_id']]['bboxes'].append([ann['bbox'][0],ann['bbox'][1],ann['bbox'][0]+ann['bbox'][2]-1,ann['bbox'][1]+ann['bbox'][3]-1])
    for im_id in tqdm(img_data):
        im_fn = [x for x in data['images'] if x['id'] == im_id][0]['seg_file_name']
        im = Image.open(f'{im_path}/{im_fn}')
        img_data[im_id]['img_size']=im.size
    return img_data

In [15]:
with open('data/TrainData/train/Annotations/iSAID_train.json', 'r', encoding='Utf-8') as json_data:
    tmd=json.load(json_data)

categories = list() 
for i in range(0, 15):
    categories.append(tmd['categories'][i]['name'])
print('categories: ', categories)

categories:  ['storage_tank', 'Large_Vehicle', 'Small_Vehicle', 'plane', 'ship', 'Swimming_pool', 'Harbor', 'tennis_court', 'Ground_Track_Field', 'Soccer_ball_field', 'baseball_diamond', 'Bridge', 'basketball_court', 'Roundabout', 'Helicopter']


In [16]:
def get_data(im_path):
    img_data = dict.fromkeys(categories)

    for cat in categories: 
        img_data[cat] = img_data_create(tmd, cat, im_path)

    for cat in categories:
        for key in img_data[cat].keys(): 
            img_data[cat][key]['seg_file_name'] = im_path + '/' + tmd['images'][key]['seg_file_name']

    return img_data

In [17]:
train_img_data = get_data(im_path='data/TrainData/train/Semantic_masks/images/images')

dfs = []
for cat in categories:
    df = pd.DataFrame(data = train_img_data[cat]).T.explode(column = 'bboxes').drop(columns='polygons')
    df['category'] = cat
    dfs.append(df)
train_df = pd.concat(dfs, ignore_index=True)

train_df.head(10)

train_df.to_pickle('obj/train_df.pkl')
del train_df

100%|██████████| 245/245 [00:00<00:00, 1135.81it/s]
100%|██████████| 770/770 [00:00<00:00, 1487.96it/s]
100%|██████████| 1099/1099 [00:00<00:00, 2111.10it/s]
100%|██████████| 198/198 [00:00<00:00, 3324.16it/s]
100%|██████████| 434/434 [00:00<00:00, 2947.04it/s]
100%|██████████| 259/259 [00:00<00:00, 4544.72it/s]
100%|██████████| 339/339 [00:00<00:00, 4585.99it/s]
100%|██████████| 310/310 [00:00<00:00, 3868.08it/s]
100%|██████████| 197/197 [00:00<00:00, 3690.11it/s]
100%|██████████| 184/184 [00:00<00:00, 5007.34it/s]
100%|██████████| 146/146 [00:00<00:00, 3025.03it/s]
100%|██████████| 225/225 [00:00<00:00, 3303.13it/s]
100%|██████████| 119/119 [00:00<00:00, 5271.62it/s]
100%|██████████| 182/182 [00:00<00:00, 4732.07it/s]
100%|██████████| 38/38 [00:00<00:00, 4338.50it/s]


In [18]:
with open('data/ValidationData/val/Annotations/iSAID_val.json', 'r', encoding='Utf-8') as json_data:
    tmd=json.load(json_data)

categories = list() 
for i in range(0, 15):
    categories.append(tmd['categories'][i]['name'])
print('categories: ', categories)

categories:  ['storage_tank', 'Large_Vehicle', 'Small_Vehicle', 'ship', 'Harbor', 'baseball_diamond', 'Ground_Track_Field', 'Soccer_ball_field', 'Swimming_pool', 'Roundabout', 'tennis_court', 'basketball_court', 'plane', 'Helicopter', 'Bridge']


In [19]:
val_img_data = get_data(im_path='data/ValidationData/val/Semantic_masks/images/images')

dfs = []
for cat in categories:
    df = pd.DataFrame(data = val_img_data[cat]).T.explode(column = 'bboxes').drop(columns='polygons')
    df['category'] = cat
    dfs.append(df)
val_df = pd.concat(dfs, ignore_index=True)

val_df.head(10)

val_df.to_pickle('obj/val_df.pkl')
del val_df

100%|██████████| 71/71 [00:00<00:00, 1271.85it/s]
100%|██████████| 227/227 [00:00<00:00, 979.49it/s]
100%|██████████| 339/339 [00:00<00:00, 1937.63it/s]
100%|██████████| 150/150 [00:00<00:00, 493.65it/s]
100%|██████████| 111/111 [00:00<00:00, 3724.69it/s]
100%|██████████| 57/57 [00:00<00:00, 2567.31it/s]
100%|██████████| 75/75 [00:00<00:00, 2180.49it/s]
100%|██████████| 72/72 [00:00<00:00, 2344.99it/s]
100%|██████████| 77/77 [00:00<00:00, 2476.58it/s]
100%|██████████| 64/64 [00:00<00:00, 3591.11it/s]
100%|██████████| 94/94 [00:00<00:00, 978.01it/s]
100%|██████████| 46/46 [00:00<00:00, 7039.99it/s]
100%|██████████| 72/72 [00:00<00:00, 2491.69it/s]
100%|██████████| 15/15 [00:00<00:00, 4582.94it/s]
100%|██████████| 79/79 [00:00<00:00, 4421.12it/s]


In [20]:
def process_image(row, size = (64, 64)):
    image = cv2.imread(row['seg_file_name'])
    
    x0, y0, x1, y1 = map(int, row['bboxes'])
    object_image = image[y0:y1,x0:x1]

    gray_object_image = cv2.cvtColor(object_image, cv2.COLOR_BGR2GRAY)
    gray_object_image[gray_object_image > 0] = 1
    
    resized_object_image = cv2.resize(gray_object_image, size)
    
    return resized_object_image


def process_df(original_df, n_sample_per_class = 10):

    df = pd.DataFrame()

    df['processed_image'] = original_df.groupby('category').apply(lambda x: x.head(n_sample_per_class)).reset_index(drop=True).apply(process_image, axis=1)
    df['category'] = original_df.groupby('category').apply(lambda x: x.head(n_sample_per_class)).reset_index(drop=True)['category']

    return df.sample(frac=1, random_state=42)


## TRAIN & TEST

In [26]:
train_df = pd.read_pickle('obj/train_df.pkl')
processed_train_df = process_df(train_df, 1000)
del train_df

val_df = pd.read_pickle('obj/val_df.pkl')
processed_val_df = process_df(val_df, 1000)
del val_df

In [27]:
label_encoder = LabelEncoder()

y_train = label_encoder.fit_transform(processed_train_df['category'])
X_train = pd.DataFrame(processed_train_df['processed_image'].apply(lambda x: x.ravel()).tolist())

del processed_train_df


y_val = label_encoder.transform(processed_val_df['category'])
X_val = pd.DataFrame(processed_val_df['processed_image'].apply(lambda x: x.ravel()).tolist())

del processed_val_df

In [28]:
clf = LinearSVC(dual="auto", random_state=0, tol=1e-5)
clf.fit(X_train, y_train)

In [31]:
import pickle
with open('obj/model.pkl', 'wb') as f: 
    pickle.dump(clf, f)

In [29]:
def evaluate_multiclass_classification(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    print("Accuracy:", accuracy)

    report = classification_report(y_true, y_pred, target_names=list(label_encoder.classes_))
    print("Classification Report:")
    print(report)
    
y_pred = clf.predict(X_val)
evaluate_multiclass_classification(y_val, y_pred)


Accuracy: 0.3587692991225147
Classification Report:
                    precision    recall  f1-score   support

            Bridge       0.15      0.31      0.20       478
Ground_Track_Field       0.25      0.37      0.30       158
            Harbor       0.68      0.43      0.53      1000
        Helicopter       0.04      0.08      0.05        86
     Large_Vehicle       0.37      0.23      0.28      1000
        Roundabout       0.16      0.36      0.22       188
     Small_Vehicle       0.44      0.36      0.40      1000
 Soccer_ball_field       0.17      0.37      0.23       188
     Swimming_pool       0.14      0.17      0.15       743
  baseball_diamond       0.24      0.32      0.27       220
  basketball_court       0.11      0.27      0.16       154
             plane       0.76      0.42      0.54      1000
              ship       0.22      0.18      0.20      1000
      storage_tank       0.55      0.51      0.53      1000
      tennis_court       0.64      0.64      0.