In [1]:
import cv2
import numpy as np
import pandas as pd
import pickle
import xgboost as xgb
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import skimage
#%matplotlib inline
%pylab inline
import os
import sys
from time import time
from glob import glob
sys.path.append(os.environ['REPO_DIR'])
from extractPatches import patch_extractor
from lib.utils import configuration

Populating the interactive namespace from numpy and matplotlib


In [2]:
stack = 'MD594'
fp = os.path.join(os.environ['ROOT_DIR'], 'CSHL_data_processed', stack, stack + '_sorted_filenames.txt')
with open(fp, 'r') as f:
    fn_idx_tuples = [line.strip().split() for line in f.readlines()]
    section_to_filename = {int(idx): fn for fn, idx in fn_idx_tuples}

In [3]:
fname = os.path.join(os.environ['ROOT_DIR'], 'CSHL_data_processed', stack, 'All_patch_locations.pkl')
all_patch_locations = pickle.load(open(fname, 'rb'), encoding='latin1')

In [4]:
def CDF(x):
    x=np.sort(x)
    size=x.shape[0]
    y=np.arange(0,size)/size
    return x,y

In [5]:
def features_extractor(patch,params):
    extractor=patch_extractor(patch,params)
    tile=patch #cv2.imread(patch,0)
    if params['preprocessing']['polarity']==-1:
        tile = 255-tile
    min_std=params['preprocessing']['min_std']
    _std = np.std(tile.flatten())

    extracted = []
    if _std < min_std:
        print('image',patches[i],'std=',_std, 'too blank')
        features.append([0] * 201)
    else:
        Stats = extractor.segment_cells(tile)
        cells = extractor.extract_blobs(Stats,tile)
        cells = pd.DataFrame(cells)
        cells = cells[cells['padded_patch'].notnull()]
        cells = cells.drop(['padded_patch','left','top'],1)
        cells = np.asarray(cells)
        for k in range(len(cells)):
            cells[k][0] = cells[k][0][:10]
        origin = np.concatenate((np.array(list(cells[:,0])),cells[:,1:]),axis=1)
        for k in range(origin.shape[1]):
            x, y = CDF(origin[:,k])
            ten = [x[np.argmin(np.absolute(y-0.1*(j+1)))] for j in range(10)]
            extracted.extend(ten)
        extracted.extend([cells.shape[0]/100])
    return extracted

In [6]:
#Parameters
param = {}
param['max_depth']= 3   # depth of tree
param['eta'] = 0.2      # shrinkage parameter
param['silent'] = 1     # not silent
param['objective'] = 'binary:logistic' #'multi:softmax'
param['nthread'] = 7 # Number of threads used
param['num_class']=1
num_round = 100

In [7]:
yamlfile=os.environ['REPO_DIR']+'/shape_params-aws.yaml'
params=configuration(yamlfile).getParams()
print(params)

{'name': 'aws', 'paths': {'scripts_dir': '/data/Github/shapeology_code/scripts', 'exec_dir': '/home/ubuntu/Datajoint_Interface/project_schemas/atlas_schema_python_v3/Cell_Extractor/', 'data_dir': '/data/BstemAtlasDataBackup/ucsd_brain/', 'DiffusionMap': '/data/Github/shapeology_code/notebooks/diffusionMap'}, 'preprocessing': {'polarity': -1, 'min_std': 10, 'offset': -20, 'min_area': 10}, 'normalization': {'size_thresholds': [15, 51, 201]}}


  self.D=yaml.load(open(yamlFile,'r'))


In [15]:
paired_structures = ['5N', '6N', '7n', 'Amb', 'LC', 'LRt', 'Pn', 'Tz', 'VLL', 'RMC', \
                     'SNC', 'SNR', '3N', '4N', 'Sp5I', 'Sp5O', 'Sp5C', 'PBG', '10N', 'VCA', 'VCP', 'DC']
singular_structures = ['AP', '12N', 'RtTg', 'SC', 'IC']

all_structures = paired_structures + singular_structures
stack = 'MD594'
cell_dir = os.environ['ROOT_DIR'] + 'CSHL_patches_features_less/MD589/'
raw_images_root = os.environ['ROOT_DIR']+'/CSHL_data_processed/'+stack+'/'+stack+'_prep2_lossless_gray/'
savepath = os.environ['ROOT_DIR']+'/CSHL_hsv/'
if not os.path.exists(savepath):
    os.mkdir(savepath)
savepath = savepath+stack+'/'
if not os.path.exists(savepath):
    os.mkdir(savepath)

resol = 0.46
half_size = 112

In [None]:
t0=time()
for structure in all_structures:  
    t1=time()
    subpath = savepath+structure+'/'
    if not os.path.exists(subpath):
        os.mkdir(subpath)
    else:
        continue
    fp =[dir for dir in glob(cell_dir+structure+'/*')]
    features = []
    labels = []
    for state in range(2):
        clouds = pickle.load(open(fp[state],'rb'))
        features.extend(np.array(clouds))
        labels.extend([1-state]*len(clouds))
    features = np.array(features)
    labels = np.array(labels)
    X_train, X_valid, y_train, y_valid = train_test_split(features, labels, test_size=0.30, random_state=6)
    dtrain = xgb.DMatrix(X_train, label=y_train)
    bst = xgb.train(param, dtrain, num_round, verbose_eval=False)


    section_indices = [index for index in all_patch_locations[structure].keys()]
    section_indices = np.sort(section_indices)
    if len(section_indices)>10:
        half = int(len(section_indices)/2)
        section_indices = section_indices[half-5:half+5]
    
    negative = structure+'_surround_500um_noclass'
    structures = [structure, negative]

    for section in section_indices:
        img = cv2.imread( raw_images_root+section_to_filename[section]+'_prep2_lossless_gray.tif', 2)
        m, n = img.shape
        [left, right, up, down] = [int(max(min(all_patch_locations[negative][section][:,0])-half_size,0)), 
                                   int(min(np.ceil(max(all_patch_locations[negative][section][:,0])+half_size),n-1)),
                                   int(max(min(all_patch_locations[negative][section][:,1])-half_size,0)), 
                                   int(min(np.ceil(max(all_patch_locations[negative][section][:,1])+half_size),m-1))]

        hsv = np.zeros([down-up+1, right-left+1,3])
        hsv[:,:,2] = 1
        for state in range(2):
            structure = structures[state]
            n_choose = min(len(all_patch_locations[structure][section]),20)
            indices_choose = np.random.choice(range(len(all_patch_locations[structure][section])),n_choose,replace=False)
            patches_choose = all_patch_locations[structure][section][indices_choose,:]
            for index in range(n_choose):
                try:
                    x = int( float( patches_choose[index][0] ) )
                    y = int( float( patches_choose[index][1] ) )
                    patch = img[y-half_size:y+half_size,x-half_size:x+half_size]
                    extracted = features_extractor(patch, params)
                    xtest=xgb.DMatrix(extracted)
                    score = bst.predict(xtest, output_margin=True, ntree_limit=bst.best_ntree_limit)
                    value_img = patch/255
                    hsv[y-half_size-up:y+half_size-up, x-half_size-left:x+half_size-left,2] = value_img
                    satua_img = np.zeros_like(value_img)+score
                    origin = hsv[y-half_size-up:y+half_size-up, x-half_size-left:x+half_size-left,1]
                    comp = np.absolute(origin)-np.absolute(satua_img)
                    hsv[y-half_size-up:y+half_size-up, x-half_size-left:x+half_size-left,1] = origin*(comp>0)+satua_img*(comp<0)
                except:
                    continue
        hsv[:,:,0] = (hsv[:,:,1]>0)*0.66 + (hsv[:,:,1]<0)*1.0
        hsv[:,:,1] = np.absolute(hsv[:,:,1])
        hsv[:,:,1] = hsv[:,:,1]/hsv[:,:,1].max()
        rgb = skimage.color.hsv2rgb(hsv)
        rgb=rgb*255
        rgb=rgb.astype(np.uint8)
        filename = subpath + structures[0]+'_'+str(section)+'.tif'
        cv2.imwrite(filename, rgb)
    print(structures[0] + ' finished in %5.1f seconds' % (time() - t1))
print('Finished in %5.1f seconds' % (time() - t0))
    

Amb finished in 183.3 seconds
LC finished in 197.7 seconds
LRt finished in 189.0 seconds
Pn finished in 201.6 seconds
Tz finished in 180.0 seconds
