In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import pandas as pd

import os
import cv2
import glob
import json

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings(action='ignore')

## Making lists of dataset files

In [None]:
train_jsons = sorted(glob.glob('/content/drive/Shareddrives/해커톤/1_data/위성영상_객체판독/Training/train_objects_labeling/*'))

## Loading training meta data (json) 

In [None]:
train_meta = []
for j in train_jsons:
    json_obj = json.load(open(j))
    # json_obj['features']
    for f in json_obj['features']:
        # geometry
        geo_coord = f['geometry']['coordinates']
        geo_type = f['geometry']['type']
        # properties
        object_imcoords = f['properties']['object_imcoords']
        object_angle = f['properties']['object_angle']
        building_imcoords = f['properties']['building_imcoords']
        road_imcoords = f['properties']['road_imcoords'] 
        image_id = f['properties']['image_id'] # 영상 ID
        ingest_time = f['properties']['ingest_time'] # 	영상 생성 시간
        type_id = f['properties']['type_id'] # 정수 (1~N), 클래스 id
        type_name = f['properties']['type_name'] # 클래스 이름

        # add to list
        train_meta.append([image_id, type_id, type_name, geo_coord, geo_type, object_angle,
                           object_imcoords, building_imcoords, road_imcoords, ingest_time])
# Make dataframe
df_train = pd.DataFrame(train_meta,
                        columns = ['image_id','type_id','type_name','geo_coord','geo_type','object_angle',
                                   'object_imcoords','building_imcoords','road_imcoords','ingest_time'])

In [None]:
df_train.head(5)

In [None]:
df_train.shape

### 동일한 값을 갖는 컬럼들 확인

In [None]:
df_train['geo_coord'].map(lambda x: str(x)).value_counts()

In [None]:
df_train['geo_type'].value_counts()

In [None]:
df_train['building_imcoords'].value_counts()

In [None]:
df_train['road_imcoords'].value_counts()

### 불필요 컬럼 제거 : 동일한 항목 4개와 수집시간 

In [None]:
df_train = df_train[['image_id','type_id','type_name','object_angle','object_imcoords']]
df_train = df_train.sort_values(by = ['image_id','type_id']).reset_index(drop=True)
df_train.head(10)

## Objects distribution per training images (800)

In [None]:
df_image = df_train.image_id.value_counts().to_frame(name = 'obj_count') # obj 카운터 컬럼 넣기
df_image = df_image.join(df_train[['type_name','image_id']].drop_duplicates()['image_id'].value_counts()).rename(columns = {'image_id':'obj_nunique'})
df_img2obj = df_image.join(pd.crosstab(df_train['image_id'], df_train['type_name'])[df_train['type_name'].value_counts().index])
df_img2obj.head(3)

In [None]:
df_img2obj.tail(3)

### Training Image들의 객체 분포
* X축 : Image 800장을 포함된 객체의 빈도 순으로 정렬
* Y축 : 빈도가 높은 객체유형 순으로 정렬

<br></br>
#### Sparse & Imbalanced => Data Augmentation이 중요한 상황 

In [None]:
plt.figure(figsize = (25,5))
ax = sns.heatmap(df_img2obj.iloc[:,2:].transpose(), cmap='Reds', cbar=False, xticklabels=False)

### Objects per single Image
* 사진1장에 포함된 객체수의 분포
* 전체 분포와 'small car' 분포가 별 차이가 없음 ...

In [None]:
fig, axs = plt.subplots(1,3, figsize = (18,4))
sns.distplot(df_img2obj['obj_count'], ax=axs[0])
sns.distplot(df_img2obj['small car'], ax=axs[1])
sns.distplot(df_img2obj['military aircraft'], ax=axs[2])
plt.show()

### Image에 포함된 고유한 객체유형수 1~10
* 고유한 객체유형(type_id)은 10개

In [None]:
df_img2obj['obj_nunique'].value_counts().sort_index().plot.bar(figsize=(12,4))
plt.show()

## 객체유형별 전체 객체수(obj_count), 객체를 포함하는 이미지수(image_count)

In [None]:
df_object = df_train.type_name.value_counts().to_frame(name = 'obj_count')
df_object = df_object.join(df_train[['type_name','image_id']].drop_duplicates()['type_name'].value_counts()).rename(columns = {'type_name':'image_count'})
#df_object.plot.barh(figsize=(12,6))

In [None]:
df_object

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(16, 8))
df_object['obj_count'].plot.pie(ax=axs[0])
df_object['image_count'].plot.pie(ax=axs[1])

### Split Object Coordinates

In [None]:
imcoords_columns = []
for i in range(4):
    df_train['p{}_x'.format(i+1)] = 0.0
    df_train['p{}_y'.format(i+1)] = 0.0
    imcoords_columns.append('p{}_x'.format(i+1))
    imcoords_columns.append('p{}_y'.format(i+1))

# Parse 'object_imcoords'
df_train[imcoords_columns] = df_train.apply(lambda x: list(map(float, x['object_imcoords'].split(','))), axis=1, result_type='expand')
df_train.head(3)

In [None]:
df_train['type_id'] = df_train['type_id'].astype(int)

df_types = df_train[['type_id','type_name']].drop_duplicates().sort_values(by='type_id').reset_index(drop=True)
df_types.index = df_types.index + 1

# type_id : type_name
type_name_dict = df_types['type_name'].to_dict()

# Object Type : Color (plt.cm.tab20b)
type_colors = dict(zip(type_name_dict.keys(), plt.cm.tab20.colors))

### Display Image & Objects
* 사진과 객체를 출력해주는 유틸리티 함수 

In [None]:
def show_n_mask(image_id, obj_type=None, gray_mode=True):
    fig= plt.figure(figsize=(18,18))
    # plot image
    img = cv2.imread('/content/drive/MyDrive/sia/'+image_id, cv2.COLOR_BGR2RGB)
    if gray_mode:
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), cmap='gray')
    else:
        plt.imshow(img)
    
    # All object or selected objects(obj_type)
    for idx, row in df_train[df_train.image_id == image_id][['type_id','type_name']+imcoords_columns].iterrows():
        if (obj_type==row.type_name):
            coords = list(row[imcoords_columns].values) + list(row[imcoords_columns].values[:2])
            plt.fill(coords[::2],coords[1::2], facecolor=type_colors[row.type_id], edgecolor=type_colors[row.type_id], linewidth=2, alpha=0.5)
            # Label 'type_id' 1~16
            cx = row[imcoords_columns].values[::2].mean()
            cy = row[imcoords_columns].values[1::2].mean()
            plt.text(cx,cy, str(row.type_id), color='white', fontsize=12, fontweight='semibold')
        elif obj_type == None:
            coords = list(row[imcoords_columns].values) + list(row[imcoords_columns].values[:2])
            plt.fill(coords[::2],coords[1::2], facecolor=type_colors[row.type_id], edgecolor=type_colors[row.type_id], linewidth=2, alpha=0.5)
            # Label 'type_id' 1~16
            cx = row[imcoords_columns].values[::2].mean()
            cy = row[imcoords_columns].values[1::2].mean()
            plt.text(cx,cy, str(row.type_id), color='white', fontsize=12, fontweight='semibold')
    plt.show()

In [None]:
show_n_mask('OBJ00028_PS3_K3_NIA0078.png')

In [None]:
show_n_mask('OBJ00028_PS3_K3_NIA0078.png', 'small car')

#### Sample : 1 Object (1 bridge)

In [None]:
show_n_mask('OBJ00028_PS3_K3_NIA0078.png')

In [None]:
# EOF