In [8]:
def Generate_Merfish_Code(num_bits, on_bits, hamming_distance, 
                          num_regs=None, randomize=True, repeat=500, verbose=True):
    '''Function to generate a MERFISH encoding scheme with given number of bits, on-bits and hemming distance
    Inputs:
        num_bits: total number of bits for MERFISH code, int
        on_bits: number of on-bits for this code, int (< num_bits)
        hemming_distance: minimum hemming distance allowed for this code, int (< num_bits)
        num_regs: number of different regions, if -1 then generate all, int (default: -1)
        randomize: whether randomly choose from candidate codes, bool (default: True)
        repeat: number of repeats in generating codes, int (default: 100)
        verbose: say something!, bool (default: True)
    Output:
        hyb_matrix: encoding scheme, num_bits by num_regs 0-1 array
        '''
    import numpy as np
    from itertools import combinations
    from random import randint
    from time import time
    _start = time()
    # convert inputs into int
    _num_bits = np.int(num_bits)
    _on_bits = np.int(on_bits)
    _d = np.int(hamming_distance)
    if verbose:
        print("- Generate encoding scheme of "+str(_num_bits)+" bits with "+str(_on_bits)+" on-bits")
        print("-- hamming distance is", _d)
    # check inputs
    if _num_bits < _on_bits:
        raise ValueError('on-bits is larger than total_bits!')
    if _num_bits < _d:
        raise ValueError('hamming_distance is larger than total_bits!')
    if num_regs == None:
        num_regs = np.inf

    def code_distance(_code1, _code2):
        '''given two codes, calculate hamming distance'''
        _c1 = list(_code1)
        _c2 = list(_code2)
        _overlap = 0
        for _on_bit in _c1:
            if _on_bit in _c2:
                _overlap += 1
        _distance = len(_c1) + len(_c2) - 2 * _overlap
        return _distance
    # number of repeats
    _repeat_num = 1 + int(randomize) * repeat
    if verbose:
        print("-- number of repeats:", _repeat_num)
    _best_coding, _best_var = [], np.inf
    for _i in range(_repeat_num):
        # generate all code as candidates
        _cand_codes = list(combinations(list(range(_num_bits)), _on_bits))
        # initialize chosen codes
        _chosen_codes = []
        # loop through candidate codes, find good code
        _chosen_codes.append(_cand_codes.pop(0))
        while len(_cand_codes) > 0 or len(_chosen_codes) >= num_regs:
            if randomize:
                _rand_id = randint(0, len(_cand_codes)-1)
                _c = _cand_codes.pop(_rand_id)
            else:
                _c = _cand_codes.pop(0)
            _keep = True
            for _chosen_c in _chosen_codes:
                if code_distance(_c, _chosen_c) < _d:
                    _keep = False
                    break
            if _keep:
                _chosen_codes.append(list(_c))
        _uid, _ucount = np.unique(np.array(_chosen_codes), return_counts=True)
        
        if len(_chosen_codes) > len(_best_coding) and np.var(_ucount) <= _best_var:
            _best_coding = _chosen_codes
            _best_var = np.var(_ucount)
            if verbose:
                print("-- length of all possible code", len(_best_coding), ", variance", _best_var)
    # select subset
    if not num_regs:
        _select_set = _best_coding
    else: # select subset
        _select_set, _select_var = [], np.inf
        
    
    
    _end = time()
    if verbose:
        print("-- Duration: ", _end-_start)

if __name__ =='__main__':
    Generate_Merfish_Code(16, 4, 4, randomize=True)

- Generate encoding scheme of 16 bits with 4 on-bits
-- hamming distance is 4
-- number of repeats: 501
-- length of all possible code 102 , variance 2.0
-- length of all possible code 103 , variance 1.6875
-- length of all possible code 105 , variance 1.3125
-- length of all possible code 107 , variance 1.1875
-- length of all possible code 109 , variance 1.1875
-- Duration:  29.721194982528687


In [1]:
%run "E:\Users\puzheng\Documents\Startup_py3.py"
sys.path.append(r"E:\Users\puzheng\Documents")

import ImageAnalysis3 as ia
%matplotlib notebook

from ImageAnalysis3 import get_img_info, visual_tools, corrections

# save codebook

In [2]:
import csv

In [3]:
codebook_file = r'E:\Users\puzheng\Documents\Libraries\CTP-05\Ref_Codebook\L26E1_codebook.csv'

In [16]:
codebook = []
with open(codebook_file,'r') as csvfile:
    csv.register_dialect('strip', skipinitialspace=True)
    reader = csv.reader(csvfile, delimiter=',', dialect='strip')
    for i in range(3):
        next(reader,None)
    header = next(reader,None)
    print('header',header)
    for row in reader:
        codebook.append({h:info for h,info in zip(header, row)})

header ['name', 'id', 'barcode']


In [17]:
codebook

[{'name': 'Blank-1', 'id': '', 'barcode': '0011100000001000'},
 {'name': 'Blank-10', 'id': '', 'barcode': '1001100100000000'},
 {'name': 'Blank-2', 'id': '', 'barcode': '0011000100100000'},
 {'name': 'Blank-3', 'id': '', 'barcode': '0011010001000000'},
 {'name': 'Blank-4', 'id': '', 'barcode': '0010010000001100'},
 {'name': 'Blank-5', 'id': '', 'barcode': '0010001000101000'},
 {'name': 'Blank-6', 'id': '', 'barcode': '1000101000001000'},
 {'name': 'Blank-7', 'id': '', 'barcode': '0101100000000010'},
 {'name': 'Blank-8', 'id': '', 'barcode': '0001000001001100'},
 {'name': 'Blank-9', 'id': '', 'barcode': '0000100100101000'},
 {'name': 'AKAP11', 'id': 'ENST00000025301.3', 'barcode': '0110100010000000'},
 {'name': 'CBX5', 'id': 'ENST00000209875.8', 'barcode': '0110000000001010'},
 {'name': 'CCDC113', 'id': 'ENST00000219299.8', 'barcode': '0000110001001000'},
 {'name': 'CEMIP', 'id': 'ENST00000220244.7', 'barcode': '0000100000001101'},
 {'name': 'SIPA1L3',
  'id': 'ENST00000222345.10',
  'b

In [26]:
np.array(list(codebook[0]['barcode']),dtype=int)

array([0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0])

In [27]:
len(codebook)

140

In [32]:
hyb_matrix = np.zeros([len(codebook), len(np.array(list(codebook[0]['barcode']),dtype=int))])

In [33]:
for i, species in enumerate(codebook):
    hyb_matrix[i] = np.array(list(species['barcode']),dtype=int)

In [37]:
np.save(r'E:\Users\puzheng\Documents\Libraries\CTP-05\Ref_Codebook\hyb_matrix', hyb_matrix)

In [42]:
hyb_matrix[np.sum(hyb_matrix[:,-1:],1)==0].shape

(105, 16)