### Load the data, extract originals and labels

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import patch
import dataset

In [None]:
start_time = time.time()

originals, labels, count = patch.process_rois('CS8_ROIs.csv', 'cellsuspensionED8/', 0)
print('count = ', count)

print("--- %s seconds ---" % (time.time() - start_time))

In [None]:
labels = labels[:count, :]
print('labels shape: ', labels.shape)
originals = originals[: count, :, :]
print('originals shape: ', originals.shape)

In [None]:
patch.plot_skeleton(labels[252720:275520, 0], 172, 174, 23)

In [None]:
patch.plot_patches(originals[252720:275520, : , :], 172, 174, 23)

### Evaluate the data

In [None]:
path = '/Users/vladarozova/Dropbox/PhD/angiogenesis/Anna\'s results/cellsuspensionED8/'
filename1 = 'egg2_1_blurred_skeletone.jpg'
filename2 = 'egg3_2_blurred_skeletone.jpg'
img1 = plt.imread(path + filename1)
img2 = plt.imread(path + filename2)
plt.subplot(121)
plt.imshow(img1)
plt.subplot(122)
plt.imshow(img2)
plt.show()

In [None]:
plt.hist(labels, bins = 255)
plt.show()

### Create binary labels

In [None]:
binary_labels = dataset.binarize_labels(labels, 128)
patch.plot_skeleton(binary_labels[252720:275520, 0], 172, 174, 23)

### Choose samples to make the ratio 1:1

In [None]:
n_pos = int(sum(binary_labels))
n_neg = count - n_pos
print('Number of negative samples: ', n_neg)
print('Number of positive samples: ', n_pos)
print('Fraction of positive samples: ', n_pos / count * 100, '%')
new_ratio = 1
n_neg_new = int(new_ratio * n_pos)
print('Need to randomly choose ', n_neg_new, ' negative samples to make it', new_ratio, ': 1')

In [None]:
neg_indeces = (np.where(binary_labels == 0))[0]
np.random.shuffle(neg_indeces)
neg_indeces_new = neg_indeces[:n_neg_new]

### Create stratified training and test sets

In [None]:
train_ratio = 0.8
n_samples = n_pos + n_neg_new

print(n_samples, n_pos, n_neg_new)
print('train: ', round(train_ratio * n_samples), ', test: ', round((1 - train_ratio) * n_samples))
print('Positive. y_train: ', round(train_ratio * n_pos), ', y_test: ', round((1 - train_ratio) * n_pos))
print('Negative. y_train: ', round(train_ratio * n_neg_new), ', y_test: ', round((1 - train_ratio) * n_neg_new))

In [None]:
pos_indeces = (np.where(binary_labels == 1))[0]
np.random.shuffle(pos_indeces)
pos_indeces_train = pos_indeces[:round(train_ratio * n_pos)]
pos_indeces_test = pos_indeces[round(train_ratio * n_pos):]

neg_indeces_train = neg_indeces_new[:round(train_ratio * n_neg_new)]
neg_indeces_test = neg_indeces_new[round(train_ratio * n_neg_new):]

In [None]:
train_indeces = np.append(pos_indeces_train, neg_indeces_train, axis=0)
np.random.shuffle(train_indeces)
X_train = originals[train_indeces, :, :]
y_train = binary_labels[train_indeces]

test_indeces = np.append(pos_indeces_test, neg_indeces_test, axis = 0)
np.random.shuffle(test_indeces)
X_test = originals[test_indeces, :, :]
y_test = binary_labels[test_indeces]

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)
X_train.dtype

### Calculate stats on training set

In [None]:
train_mean = np.mean(X_train)
train_sd = np.std(X_train, ddof = 1)

In [None]:
X_train_std = dataset.standardize(X_train, train_mean, train_sd)

In [None]:
X_test_std = dataset.standardize(X_test, train_mean, train_sd)

### Create a small dataset

In [None]:
X_train_small = X_train_std[:100, :, :]
y_train_small = y_train[:100, :]
print(X_train_small.shape, y_train_small.shape)
print(y_train_small.sum())