In [1]:
# helper library
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import os, re
import json, shutil
import collections

plt.style.use("seaborn")
pd.set_option('display.max_row', 50)
pd.set_option('display.max_columns',50)

# Image
from PIL import Image,ImageDraw
import cv2 as cv

In [2]:
# aimmo 전체 dataset 경로
data_path = 'C:/Users/Lee/Desktop/AIFFEL/Aimmo_dataset'

In [3]:
# dataset의 개수를 파악하는 function
def number_of_data(data_path,number_of_image = 0 ,number_of_annotation = 0,total_data = 0):
    '''
    data_path: data directory path
    number_of_image: number of image
    number_of_annotation: number of anntation
    '''
    for file in os.listdir(data_path):
        path = os.path.join(data_path+'/',file)
        if os.path.isfile(path):
            total_data +=1
            if file.endswith('json'):
                number_of_annotation += 1
            elif file.endswith('png'):
                number_of_image += 1
        elif os.path.isdir(path) :
            number_of_image,number_of_annotation,total_data = number_of_data(path,number_of_image,number_of_annotation,total_data)
    return number_of_image,number_of_annotation,total_data

In [4]:
# annotaion만 따로 list로 저장하는 함수
def get_annotation(data_path,annotation_list=[]):
    '''
    data_path: data directory path
    annotation_list: list of annotaion(label)
    '''
    for file in os.listdir(data_path):
        path = os.path.join(data_path+'/',file)
        if os.path.isfile(path):
            if file.endswith('json'):
                with open(path,'r',encoding="UTF-8") as annotation:
                    annotation_list.append(json.load(annotation))
        elif os.path.isdir(path) :
            get_annotation(path,annotation_list)
    return annotation_list

In [5]:
def annotation_summary(annotations):
    '''
    annotations: annotation of datas
    
    size : image width,height
    sun_day: data annotaion
    '''
    size = []
    sun_day = 0
    train_annotation = 0
    validation_annotation = 0
    test_annotation = 0
    attribute = []
    label = []
    for annotation in annotations:
        size.append(annotation['size'])
        if annotation['weather'] == 'sunny' and annotation['time'] == 'day':
            sun_day +=1

            if annotation['data_purpose'] == 'train':
                train_annotation += 1
            elif annotation['data_purpose'] == 'validation':
                validation_annotaion += 1
            else :
                test_annotation += 1
                
            for annotaion_ in annotation['annotations']:
                label.append(annotaion_['label'])
                attribute.append(annotaion_ ['attribute'])
                
    print('size of image',set(size))
    print('number of label',len(set(label)))
    print('number of class',len(set(attribute)))
    print('number of sunny and day image:', sun_day)
    print('number of train annotation:', train_annotation)
    print('number of validation annotation:', validation_annotation)
    print('number of test annotation:', test_annotation)
    
    return size,label,attribute

In [6]:
def feature_summary(df,feature):
    print('number of feature uniqe :',df[f'{feature}'].nunique())
    print('feature factor',df[f'{feature}'].unique())
    print(df[f'{feature}'].value_counts())

In [7]:
def feature_visualization(df,feature):
    num = df[f'{feature}'].nunique()
    colors = sns.color_palette('pastel')[0:num]
    dict_value = dict(df[f'{feature}'].value_counts())
    plt.figure(figsize=(13 , 7))
    
    plt.subplots_adjust(wspace=0.8)
    #plt.subplots(constrained_layout=True)
    
    plt.subplot(1,2,1)
    ax = sns.countplot(x = f'{feature}',data =df , order = df[f'{feature}'].value_counts().index, palette = sns.color_palette('coolwarm') )
    plt.title('countplot')
    plt.xticks(rotation=45)
    for p in ax.patches:
        ax.annotate(format(p.get_height() ), (p.get_x() + p.get_width() / 2., p.get_height()), ha = 'center', va = 'center', xytext = (0, 5), textcoords = 'offset points')    

   
    
    plt.subplot(1,2,2)
    plt.pie(dict_value.values(), labels = dict_value.keys(),autopct='%.1f%%',colors=colors)
    plt.title('pieplot')
    
    plt.show()

In [8]:
def get_annotataion_df(annotations):
    
    anno_list = []
    name_list = []

    for index,annotation in enumerate(annotations):
        name = annotation['filename']
        if annotation['weather'] == 'sunny' and annotation['time'] == 'day':
            for data in annotation['annotations']:
                anno_list.append(data)
                name_list.append(name)

    anno_df = pd.DataFrame(anno_list)
    anno_df['ano_name'] = name_list
    return  anno_df

def feature_summary(df,feature):
    print('number of feature uniqe :',df[f'{feature}'].nunique())
    print('\n')
    print(df[f'{feature}'].value_counts())

In [9]:
# annotaion만 따로 list로 저장하는 함수
def move_file(data_path,move_list=[],move_path = 'C:/Users/Lee/Desktop/AIFFEL/Aimmo_dataset'):
    '''
    data_path: data directory path
    annotation_list: list of annotaion(label)
    '''
    for file in os.listdir(data_path):
        path = os.path.join(data_path+'/',file)
        if os.path.isfile(path):
            if file.endswith('json'):
                with open(path,'r',encoding="UTF-8") as annotation:
                    anno = json.load(annotation)
                    if anno['weather'] == 'sunny' and anno['time'] == 'day':
                        move_list.append(path)
                        move_list.append(path.replace('_Bbox_GT.json','.png'))
                                         
        elif os.path.isdir(path) :
            move_file(path,move_list)
    for i in move_list:
        shutil.move(i,move_path) 
    
    return move_list

#move_list = move_file(data_path)

In [10]:
def sunny_day_annotation(move_path,annotation_list=[]):
    for file in os.listdir(move_path):
        if file.endswith('json'):
            with open(move_path+'/'+file,'r',encoding="UTF-8") as annotation:
                annotation_list.append(json.load(annotation))
    return annotation_list