# Baseline 1: manually-extracted features

Train script for a breast keypoint detection simple solution.

This script was made as an example to the VISUM challenge.

## Initialization

First, load the data:

In [None]:
import numpy as np
import pickle
from matplotlib import pyplot as plt
from shapely.geometry import LineString, Point
import baseline1.process_image as proc
import baseline1.config as config
from utils import scoring
from utils import dists


# Load training dataset
with open('/data/X_train.pickle', 'rb') as f:
    X_train = pickle.load(f)
with open('/data/y_train.pickle', 'rb') as f:
    y_train = pickle.load(f)

np.random.seed(config.seed)
indexes = np.random.permutation(np.arange(config.DATA_SET_SIZE))
X_train = X_train[indexes]
y_train = y_train[indexes]

# Lists with the features of all nipples on the training set
# After storing all features the lists are used to compute a 
# probabilistic model.
left_nipple_values = []
right_nipple_values = []

# Mean model is used if the algorithm fails to present a solution
# (i.e. no initial keypoints are found)
mean_model = []

## Auxiliary functions
Auxiliary functions used during training:

### compute_prob_dist
Given a list of values computes the mean and standard desviation

### get_angle
Given the breast contour points and the nipple position computes an angle value for the position of the nipple

### get_dist
Given the breast contour points and the nipple position computes the euclidean distance between nipple and breast contour.

### get_color
Given the image, breast contour points and the nipple position returns the relative rgb color of the nipple

In [None]:
def compute_prob_dist(nipple_values):
    nipple_values = np.array(nipple_values)
    means = nipple_values.mean(axis=0)
    stds = nipple_values.std(axis=0)
    return (means,stds) 


def get_angle(breast,nipple):
    mean_point = (breast[0]+breast[-1])/2
    vec = nipple-mean_point
    return np.arctan2(vec[0,0],vec[0,1])


def get_dist(breast,nipple):
    x,y = dists.spline(breast,n_points=1000)
    points = np.stack([x,y],axis=1)
    line = LineString(points)
    return Point(np.squeeze(nipple)).distance(line)


def get_color(img,breast,nipple):
    shape = [*img.shape[0:2]]
    mask = proc.get_breast_mask(shape, breast)
    
    # Get the mean color in the nipple area [5x5] square
    nipple = np.round(nipple).astype(int)
    nipple_color = np.average(img[nipple[0,0]-2:nipple[0,0]+3,nipple[0,1]-2:nipple[0,1]+3],axis=(0,1))
    
    # Get the mean color of the breast
    mean_color = np.asarray([np.average(img[:,:,0],weights=mask),
                             np.average(img[:,:,1],weights=mask),
                             np.average(img[:,:,2],weights=mask)
                            ])
    # return colors normalized by the difference
    return list(nipple_color-mean_color)

## "Training"

Finding features from the images.

In [None]:
print("Training started (%d images)" % config.n_images_in_train)
print("\tCollecting patient features")

# For each patient breasts shapes and nipple features are captured.
for i in range(110, config.n_images_in_train):
    img = X_train[i]
    y = y_train[i]
    img,y = proc.preprocess_img(img, y)
    mean_model.append(y)

    # Get the angle, distance and color features of the nipple on the left
    # and add it to the list 
    y_left_breast = dists.get_keypoints(y,"left_boundary")
    y_left_nipple = dists.get_keypoints(y,"left_nipple")
    left_angle = get_angle(y_left_breast,y_left_nipple)
    left_rel_distance = get_dist(y_left_breast,y_left_nipple)
    left_color = get_color(img,y_left_breast,y_left_nipple)
    left_nipple_values.append([left_angle,left_rel_distance,*left_color])

    # Get the angle, distance and color features of the nipple on the right
    # and add it to the list    
    y_right_breast = dists.get_keypoints(y,"right_boundary")
    y_right_nipple = dists.get_keypoints(y,"right_nipple")
    right_angle = get_angle(y_right_breast,y_right_nipple)
    right_rel_distance = get_dist(y_right_breast,y_right_nipple)
    right_color = get_color(img,y_right_breast,y_right_nipple)
    right_nipple_values.append([right_angle,right_rel_distance,*right_color])

print("\tComputing nipple probability distributions")
    
# Compute probability distribution for nipples on the left and on the right
# Feature probability is modelled as a gaussian distribution
left_nipple_params = compute_prob_dist(left_nipple_values)
right_nipple_params = compute_prob_dist(right_nipple_values)

print("\tSaving models")
    
# Save all the created models
np.save('models/left_nipple_params.npy', left_nipple_params)
np.save('models/right_nipple_params.npy', right_nipple_params)
mean_model = np.mean(mean_model, 0) 
np.save('models/mean_model.npy', mean_model)

print("\tTraining finished")

## Validation

Run this chunk to have an idea of how good the previous code was.

In [None]:
DEBUG_FINAL = True  # check the prediction images

In [None]:
import model
from utils import scoring

detections = []
ground_truths = []
original_shapes = []

if config.validate == "remaining":
    init_image = config.n_images_in_train    
elif config.validate == "all":
    init_image = 0

print("Validation started (%d images)" % (config.DATA_SET_SIZE - init_image))

for i in range(init_image, config.DATA_SET_SIZE):
    print("Image: ", indexes[i])
    det = model.test(X_train[i], debug_verbose=config.debug_final, suffix=str(i))
    detections.append(det)
    ground_truths.append(y_train[i])
    original_shapes.append(X_train[i].shape)
    if config.debug_final:
        plt.clf()
        y = y_train[i].reshape([37,2])
        detection = det.reshape([37,2])
        plt.imshow(X_train[i])
        plt.scatter(detection[:,0], detection[:,1], c="r", s=4)
        plt.scatter(y[:,0], y[:,1], c="b", s=4)
        plt.show()

with open('models/val_detections.pkl', 'wb') as f:
    pickle.dump(detections, f)
with open('models/val_ground_truths.pkl', 'wb') as f:
    pickle.dump(ground_truths, f)
with open('models/val_original_shapes.pkl', 'wb') as f:
    pickle.dump(original_shapes, f)

scores = scoring.generate_scores(detections,ground_truths,original_shapes)
with open('models/val_scores.pkl', 'wb') as f:
    pickle.dump(scores, f)
print("\tFinished validation")

score_mean = list(np.round(np.asarray(scores).mean(axis=0), decimals=4))

print("Task:  ", "BrstB ", "nipple", "juggul")
print("Scores:", *score_mean)