In [None]:
import numpy as np
import pandas as pd
import os
import cv2
from icc.data_loaderB import ImageDataLoader

### Collecting different window density sizes

In [None]:
threshold = 0.0015

In [None]:
train_path = 'data/train/images'
train_gt_path = 'data/train/ground_truth_csv'

val_path = 'data/val/images'
val_gt_path = 'data/val/ground_truth_csv'

dense_train_img_path = 'data/dense/train/images'
dense_train_gt_path = 'data/dense/train/ground_truth_csv'

dense_val_img_path = 'data/dense/val/images'
dense_val_gt_path = 'data/dense/val/ground_truth_csv'

In [None]:
if not os.path.exists(dense_train_img_path): os.makedirs(dense_train_img_path)
if not os.path.exists(dense_train_gt_path): os.makedirs(dense_train_gt_path)
if not os.path.exists(dense_val_img_path): os.makedirs(dense_val_img_path)
if not os.path.exists(dense_val_gt_path): os.makedirs(dense_val_gt_path)

In [None]:
def extract_dense_images(img_data_path, gt_data_path, img_save_path, gt_save_path, stride = 3):
    
    img_files = [filename for filename in os.listdir(img_data_path) \
                           if os.path.isfile(os.path.join(img_data_path, filename))]
    
    TD_List = []
    idx = 0
    for i, fname in enumerate(img_files):

        img = cv2.imread(os.path.join(img_data_path, fname))
        gt_density = pd.read_csv(os.path.join(gt_data_path, os.path.splitext(fname)[0] + '.csv'), sep=',', header=None).as_matrix()
        
        h = img.shape[0]
        w = img.shape[1]
        
        th = int(h/3.0 - ((h/3.0) % 4))
        tw = int(w/3.0 - ((w/3.0) % 4))

        th_small = th//4
        tw_small = tw//4

        density = gt_density.reshape((h, w))

        area = th*tw
        x = 0
        
        while x < (h-th):
            y = 0
            while y < (w-tw):
                den = round(np.sum(density[x:x+th, y:y+tw]).item(), 2) / area
                if den > 0:
                    TD_List.append(den)
                    
                if den > threshold:
                    idx = idx+1
                    
                    cv2.imwrite(os.path.join(img_save_path, str(idx) + '.jpg'), img[x:x+th, y:y+tw])
                    np.save(os.path.join(gt_save_path, str(idx)), density[x:x+th, y:y+tw])
                    
                y += int(tw/stride)
            x += int(th/stride)
        
        if ((i+1) % 100 == 0) or (i+1 == len(img_files)):
            print ('Processed ', i+1, '/', len(img_files), 'files')
            
    return TD_List

In [None]:
Train_TD_List = extract_dense_images(train_path, train_gt_path, dense_train_img_path, dense_train_gt_path)

In [None]:
Val_TD_List = extract_dense_images(val_path, val_gt_path, dense_val_img_path, dense_val_gt_path)

### Visualizing data for thresholding

In [None]:
import matplotlib.pyplot as plt

In [None]:
def hist(data, title, bins = 50):
    plt.hist(data, bins)
    plt.yscale('log')
    plt.xlabel('Total Density')
    plt.ylabel('Frequency')
    plt.title(title)

In [None]:
hist(Train_TD_List, 'Training Data Density histogram')

In [None]:
densities = np.array(Train_TD_List)
print("Selecting {}/{} based on threshold".format(len(densities[densities > threshold]), len(densities)))

In [None]:
hist(Val_TD_List, 'Val Data Density histogram')

In [None]:
densities = np.array(Val_TD_List)
print("Selecting {}/{} based on threshold".format(len(densities[densities > threshold]), len(densities)))