In [None]:
import numpy as np
import pickle
from icc.data_loaderB import ImageDataLoader

### Collecting different window density sizes

In [None]:
threshold = 0.0015

In [None]:
train_path = 'data/train/images'
train_gt_path = 'data/train/ground_truth_csv'

val_path = 'data/val/images'
val_gt_path = 'data/val/ground_truth_csv'

In [None]:
train_data_loader = ImageDataLoader(train_path, 
                                    train_gt_path,
                                    shuffle=False,
                                    gt_downsample=False,
                                    pre_load=False,
                                    sr_mode=False)

val_data_loader = ImageDataLoader(val_path, 
                                  val_gt_path,
                                  shuffle=False,
                                  gt_downsample=False,
                                  pre_load=False,
                                  sr_mode=False)

In [None]:
def extract_dense_images(data_loader, stride = 3):

    TD_Dict = {}
    candidates = 0
    selected = 0
    
    idx = 0
    for blob in data_loader:

        fname = blob['fname']
        img = blob['data']
        gt_density = blob['gt_density']
        
        h = img.shape[0]
        w = img.shape[1]
        
        th = int(h/3.0 - ((h/3.0) % 4))
        tw = int(w/3.0 - ((w/3.0) % 4))

        th_small = th//4
        tw_small = tw//4

        density = gt_density.reshape((h, w))

        area = th*tw
        x = 0
        idx = idx+1
        
        TD_Dict[fname] = []
        while x < (h-th):
            y = 0
            while y < (w-tw):
                den = round(np.sum(density[x:x+th, y:y+tw]).item(), 2) / area
                if den > 0:
                    candidates += 1
                if den > threshold:
                    selected += 1
                    TD_Dict[fname].append([x, x+th, y, y+tw])
                    
                y += int(tw/stride)
            x += int(th/stride)
        
        if (idx % 100 == 0) or (idx == data_loader.get_num_samples()):
            print ('Processed ', idx, '/', data_loader.get_num_samples(), 'files')
            
    return TD_Dict, candidates, selected

In [None]:
Train_TD_Dict, candidates, selected = extract_dense_images(train_data_loader)

print("Selecting {}/{} based on threshold".format(selected, candidates))

In [None]:
Val_TD_Dict, candidates, selected = extract_dense_images(val_data_loader)

print("Selecting {}/{} based on threshold".format(selected, candidates))

In [None]:
def save(file, data):
    with open(file, 'wb') as f:
        pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
        
save('data/train_dense_data.pkl', Train_TD_Dict)
save('data/val_dense_data.pkl', Val_TD_Dict)

### Visualizing data for thresholding

In [None]:
Train_TD_List = [item for sublist in Train_TD_Dict.values() for item in sublist]
Val_TD_List = [item for sublist in Val_TD_Dict.values() for item in sublist]

In [None]:
import matplotlib.pyplot as plt

In [None]:
def hist(data, title, bins = 50):
    plt.hist(data, bins)
    plt.yscale('log')
    plt.xlabel('Total Density')
    plt.ylabel('Frequency')
    plt.title(title)

In [None]:
hist(Train_TD_List, 'Training Data Density histogram')

In [None]:
hist(Val_TD_List, 'Val Data Density histogram')