# Analysis of DNA-MERFISH for CTP11

by Pu Zheng

2022.06.15

analysis for dataset:

dna_data_folder = r'\\10.245.74.158\Chromatin_NAS_5\20220419-P_brain_CTP11-13_from_0415storm65'

rna_data_folder = r'\\10.245.74.158\Chromatin_NAS_0\20220415-P_brain_M1_noclear_storm65'

save_folder = r'\\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65'

In [1]:
%run "..\..\Startup_py3.py"
sys.path.append(r"..\..\..\..\Documents")

import ImageAnalysis3 as ia
%matplotlib notebook

from ImageAnalysis3 import *
print(os.getpid())

import h5py
from ImageAnalysis3.classes import _allowed_kwds
import ast

29704


## 0.1 Folders

In [4]:
save_folder = r'\\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65'
save_filenames = [os.path.join(save_folder, _fl) for _fl in os.listdir(save_folder)
                  if _fl.split(os.extsep)[-1]=='hdf5']
# extract fov_id
save_fov_ids = [int(os.path.basename(_fl).split('.hdf5')[0].split('_')[-1]) for _fl in save_filenames]

debug = True

print(f"{len(save_filenames)} fovs detected")

segmentation_folder = os.path.join(save_folder, 'Segmentation')
if not os.path.exists(segmentation_folder):
    os.makedirs(segmentation_folder)
    print(f"Creating segmentation_folder: {segmentation_folder}")
else:
    print(f"Use segmentation_folder: {segmentation_folder}")

cand_spot_folder = os.path.join(save_folder, 'CandSpots')
if not os.path.exists(cand_spot_folder):
    os.makedirs(cand_spot_folder)
    print(f"Creating cand_spot_folder: {cand_spot_folder}")
else:
    print(f"Use cand_spot_folder: {cand_spot_folder}")

decoder_folder = cand_spot_folder.replace('CandSpots', 'Decoder')
if debug:
    _version = 0
    while os.path.exists(os.path.join(decoder_folder, f'v{_version}')):
        _version += 1
    decoder_folder = os.path.join(decoder_folder, f'v{_version}')
if not os.path.exists(decoder_folder):
    os.makedirs(decoder_folder)
    print(f"Creating decoder_folder: {decoder_folder}")
else:
    print(f"Use decoder_folder: {decoder_folder}")

217 fovs detected
Use segmentation_folder: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Segmentation
Use cand_spot_folder: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\CandSpots
Creating decoder_folder: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Decoder\v0


In [3]:
pixel_sizes = np.array([250,108,108])
single_im_size = np.array([50,2048,2048])


# 1. Decoding of DNA-MERFISH

In [6]:
# load two codebooks
import pandas as pd
codebook_gn = pd.read_csv(r'\\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-11_brain\Summary_tables\CTP11-mouse-genome-1000_codebook.csv', header=0)
codebook_se = pd.read_csv(r'\\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-13_brain-super-enhancers\Summary_tables\CTP13-super-enhancers-1000_codebook.csv', header=0)
codebook_se['id'] = codebook_se['id'] + np.max(codebook_gn['id'])

In [47]:
# combine region_ids
merged_codebook = pd.concat([codebook_gn, codebook_se], axis=0, join='outer',ignore_index=True).fillna(0)

merged_codebook['reg_start'] = [int(_name.split(':')[1].split('-')[0]) for _name in merged_codebook['name']]
merged_codebook['reg_end'] = [int(_name.split(':')[1].split('-')[1]) for _name in merged_codebook['name']]
merged_codebook['reg_mid'] = (merged_codebook['reg_start'] + merged_codebook['reg_end'])/2

for _chr in np.unique(merged_codebook['chr']):
    _chr_codebook = merged_codebook[merged_codebook['chr']==_chr]
    _reg_order = np.argsort(merged_codebook.loc[merged_codebook['chr']==_chr, 'reg_mid'])
    merged_codebook.loc[_chr_codebook.index[_reg_order], 'chr_order'] = np.arange(len(_chr_codebook))
    
# cleanup 
codebook_df = merged_codebook[[_c for _c in merged_codebook.columns if 'reg_' not in _c]]

In [77]:
print(list(codebook_df.columns))

['name', 'id', 'NDB_784', 'NDB_755', 'NDB_826', 'NDB_713', 'NDB_865', 'NDB_725', 'NDB_817', 'NDB_710', 'NDB_652', 'NDB_971', 'NDB_718', 'NDB_743', 'NDB_847', 'NDB_683', 'NDB_643', 'NDB_815', 'NDB_760', 'NDB_965', 'NDB_790', 'NDB_917', 'NDB_778', 'NDB_728', 'NDB_844', 'NDB_974', 'NDB_961', 'NDB_746', 'NDB_661', 'NDB_875', 'NDB_901', 'NDB_773', 'NDB_868', 'NDB_812', 'NDB_1027', 'NDB_716', 'NDB_754', 'NDB_860', 'NDB_856', 'NDB_680', 'NDB_634', 'NDB_737', 'NDB_715', 'NDB_698', 'NDB_883', 'NDB_863', 'NDB_1033', 'NDB_878', 'NDB_631', 'NDB_758', 'NDB_1075', 'NDB_653', 'NDB_1060', 'NDB_905', 'NDB_805', 'NDB_866', 'NDB_730', 'NDB_887', 'NDB_880', 'NDB_752', 'NDB_832', 'NDB_896', 'NDB_835', 'NDB_989', 'NDB_946', 'NDB_641', 'NDB_721', 'NDB_899', 'NDB_853', 'NDB_941', 'NDB_838', 'NDB_647', 'NDB_994', 'NDB_881', 'NDB_1066', 'NDB_665', 'NDB_637', 'NDB_734', 'NDB_706', 'NDB_935', 'NDB_889', 'NDB_788', 'NDB_862', 'NDB_779', 'NDB_694', 'NDB_977', 'NDB_751', 'NDB_836', 'NDB_742', 'NDB_929', 'NDB_958', '

## Load bit_2_name

In [19]:
readout_df_gn = pd.read_csv(r'\\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-11_brain\Summary_tables\CTP11-mouse-genome-1000_combo_readouts.csv')
readout_df_se = pd.read_csv(r'\\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-13_brain-super-enhancers\Summary_tables\CTP13-mouse-SE_combo_readouts.csv')

# merge
readout_df_se['Bit']+=100
readout_df = pd.concat([readout_df_gn, readout_df_se])

In [24]:
readout_df

Unnamed: 0,Bit,ReadoutName
0,1,NDB_784
1,2,NDB_755
2,3,NDB_826
3,4,NDB_713
4,5,NDB_865
...,...,...
90,191,NDB_592
91,192,NDB_368
92,193,NDB_436
93,194,NDB_629


## 3.3 prepare decoding args

In [49]:
with h5py.File(save_filenames[0], "r", libver='latest') as _f:
    _grp = _f['combo']
    combo_channels = [_ch.decode() for _ch in _grp['channels'][:]]
    combo_ids = _grp['ids'][:]
    
bit_2_channel = {_b:_ch for _b,_ch in zip(combo_ids, combo_channels)}

In [50]:
%%time
from tqdm import tqdm
from ImageAnalysis3.classes import decode
reload(decode)

overwrite_decoder = False
return_decoder = False
skip_exist = True
load_exist = True

pair_search_radius = 300

decode_args = []
for _fov_id, _save_filename in zip(save_fov_ids, save_filenames):
    print(f"- Preparing decoding args for fov: {_fov_id}")
    # load candidate spots for the fov
    cand_spot_filename = os.path.join(cand_spot_folder,
                                      os.path.basename(_save_filename).replace('.hdf5', f'_CandSpots.csv') )
    if os.path.isfile(cand_spot_filename):
        _fov_spots_df = pd.read_csv(cand_spot_filename)
    else:
        continue # skip if fov doesn't exist
        
    for _cell_id in np.unique(_fov_spots_df['cell_id']):
        # get decoder filename
        _decoder_filename = os.path.join(decoder_folder, f'Fov-{_fov_id}_Cell-{_cell_id}_Decoder.hdf5')
        if os.path.exists(_decoder_filename) and skip_exist:
            continue
        # get cell_df
        _cell_spots_df =_fov_spots_df[_fov_spots_df['cell_id']==_cell_id]

        _args = (_cell_spots_df, codebook_df, _decoder_filename, combo_ids,
                 False, True, bit_2_channel,
                 pixel_sizes, 2, 0.1, 
                 pair_search_radius, -1, 1, 5, 0, -25, 
                 load_exist, overwrite_decoder, return_decoder, False) 
        # append
        decode_args.append(_args)
    break
print(len(decode_args))

- Preparing decoding args for fov: 0
102
Wall time: 5.09 s


In [259]:
from ImageAnalysis3.classes.preprocess import SpotTuple,Spots3D
from ImageAnalysis3.io_tools.spots import CellSpotsDf_2_CandSpots, SpotTuple_2_Dict
from scipy.spatial import KDTree
from itertools import combinations
from copy import copy
default_radius = 250
default_eps = 0.25
default_weights = np.array([1,1,1,1,1,])

In [258]:
from ImageAnalysis3.classes.decode import generate_score_metrics,generate_scores,summarize_score


class SpotDecoder():
    """Class to decode one-codebook"""
    def __init__(self, 
                 candSpotDf:pd.DataFrame, # candidate spots
                 codebook:pd.DataFrame, # Codebook, ReadoutName+binaryTable
                 readoutDf:pd.DataFrame, # Bit+ReadoutName
                 saveFile:str=None, # save filename
                 search_radius:float=default_radius,
                 search_eps:float=default_eps,
                 autoRun:bool=True,
                 preLoad:bool=True,
                 overwrite:bool=False,
                 verbose:bool=True,
                 ):
        # input dataframes
        self.candSpotDf = candSpotDf
        self.candSpots = CellSpotsDf_2_CandSpots(self.candSpotDf) # convert into cand_spots
        self.codebook = codebook
        self.readoutDf = readoutDf
        self.saveFile = saveFile
        # parameters
        self.search_radius = search_radius
        self.search_eps = search_eps
        self.overwrite = overwrite
        self.verbose = verbose
        # Load from exist
        if preLoad:
            self._load()
        if autoRun:
            # step1: summarize bit_codebook
            self._match_bit_2_codebook()
            # step2: find valid bit pairs
            self._process_codebook_2_pairs()
            # step3: search pairs
            self._search_candidate_pairs()
            # step4: select tuples
            self._select_spot_tuples()

    # combine codebook to readout into ID
    def _match_bit_2_codebook(self):
        if self.verbose:
            print(f"- Matching {len(self.readoutDf)} bits to {self.codebook.shape} codebook")
        self.default_cols = ['name', 'id', 'chr']
        self.bit_codebook = pd.DataFrame(self.codebook[self.default_cols])
        self.bits = []
        for _col in self.codebook.columns:
            if _col in default_cols:
                continue
            else:
                _matched_bit = self.readoutDf.loc[self.readoutDf['ReadoutName']==_col, 'Bit'].values
                if len(_matched_bit) > 0:
                    self.bit_codebook[_matched_bit[0]] = self.codebook[_col].copy()
                    self.bits.append(_matched_bit[0])
        #[_b for _b in bit_codebook.columns if isinstance(_b, np.int64)]
        # summarize
        self.bits = np.array(self.bits, dtype=np.int32)
        if self.verbose:
            print(f"-- {len(self.bits)} bits matched")
        return
    # Process codebook to find valid bit pairs/tuples
    def _process_codebook_2_pairs(self):
        if self.verbose:
            print(f"- Process {self.bit_codebook.shape} codebook into valid pairs")
        if not hasattr(self, 'bit_codebook') or not hasattr(self, 'bits'):
            self._match_bit_2_codebook()
        codebook_matrix = self.bit_codebook[self.bits].values
        self.ValidBitPair_2_RegionId = {}
        self.RegionId_2_Bits = {}
        for _icode, _code in enumerate(codebook_matrix):
            # pairs
            for _p in combinations(np.where(_code > 0)[0], 2):
                _bs = tuple(np.sort(self.bits[np.array(_p)]))
                if _bs not in self.ValidBitPair_2_RegionId:
                    self.ValidBitPair_2_RegionId[_bs] = self.bit_codebook.loc[_icode, 'id']
            # tuples
            self.RegionId_2_Bits[self.bit_codebook.loc[_icode, 'id']] = self.bits[np.where(_code > 0)[0]]
        if self.verbose:
            print(f"-- {len(self.ValidBitPair_2_RegionId)} valid pairs detected.")
        # return
        return self.ValidBitPair_2_RegionId
    # search spot_pairs by KDtree
    def _search_candidate_pairs(self,):
        if self.verbose:
            print(f"- Searching for spot-pairs within {self.search_radius}nm.")
        if hasattr(self, 'candSpotPairInds_list') and hasattr(self, 'candSpotPair_list') and not self.overwrite:
            return self.candSpotPair_list
        else:
            self.candSpotPairInds_list = []
            self.candSpotPair_list = []
        # extract all coordinates
        _cand_positions = self.candSpots.to_positions()
        # build kd-tree
        if not hasattr(self, 'kdtree'):
            if self.verbose:
                print(f"-- find candidate pairs by KDTree")
            self.kdtree = KDTree(_cand_positions)
        _candSpotPairInds_list = list(self.kdtree.query_pairs(self.search_radius, eps=self.search_eps))
        # loop through all pairs
        if self.verbose:
            print(f"-- filter candidate pairs by codebook")
        for _inds in _candSpotPairInds_list:
            # only keep the valid pairs
            _pair_bits = tuple( np.sort(self.candSpots.bits[np.array(_inds)]) )
            if _pair_bits in self.ValidBitPair_2_RegionId:
                self.candSpotPairInds_list.append(_inds)
        # Convert into spotPairs
        self.candSpotPair_list = [SpotTuple(self.candSpots[np.array(_inds)], 
                                            spots_inds=np.array(_inds), 
                                            tuple_id=self.ValidBitPair_2_RegionId\
                                                [tuple(self.candSpots[np.array(_inds)].bits)])
                                  for _inds in self.candSpotPairInds_list]
        if self.verbose:
            print(f"-- {len(self.candSpotPair_list)} pairs selected.")
    # According to spotPair scoring, select candidate pairs
    def _select_spot_tuples(self, _maxSpotUsage=1, 
                            _weights=default_weights, 
                            _errorCorrection=True):
        """Function to select spot tuples given self.candSpotPair_list found previously"""
        if self.verbose:
            print(f"- Select spot_groups from candidate pairs with radius:{self.search_radius}, max_usage={_maxSpotUsage}")
        # initialize _spot_usage and tuples
        _spotUsage = np.zeros(len(self.candSpots))
        if hasattr(self, 'spotGroups') and not overwrite:
            for _g in self.spotGroups:
                _spotUsage[_g.spots_inds] += 1
            # check if this is valid
            if np.max(_spotUsage) <= _maxSpotUsage:
                # save spot_usage
                setattr(self, 'spotUsage', _spotUsage)
                if self.verbose:
                    print(f"-- directly return {len(self.spotGroups)} spot_groups.")
                return
                #return self.spotGroups, self.spotUsage
            # otherwise clear spot_usage
            else:
                _spotUsage = np.zeros(len(self.candSpots))
        # otherwise continue
        self.spotGroups = []
        # 1. if no candidate pairs detected, do the previous step
        if not hasattr(self, 'candSpotPair_list'):
            self._search_candidate_pairs
        # 2. scoring all pairs
        if self.verbose:
            print(f"-- calculate scores for candSpotPairs")
        _pair_ref_metrics = generate_score_metrics(self.candSpotPair_list,)
        _pair_ref_metrics = np.concatenate(_pair_ref_metrics, axis=0)
        _pair_scores, _ = generate_scores(self.candSpotPair_list, _pair_ref_metrics,)
        _final_scores = summarize_score(self.candSpotPair_list, weights=_weights)
        # 3. First iteration: complete tuples given high-valued spot-pairs
        for _pair in tqdm(sorted(self.candSpotPair_list, key=lambda _p:-_p.final_score)): 
            # start from highest scored pairs
            # search neighborhood whether the 3rd point exist
            _nb_spot_inds = self.kdtree.query_ball_point(_pair.centroid_spot().to_positions()[0],
                                                         self.search_radius, eps=self.search_eps)
            # skip for now if no neighboring spots detected
            if len(_nb_spot_inds) == 0:
                #print("--- no neighbors detected, skip")
                continue
            # skip if spots exceeds max_usage
            if (_spotUsage[_pair.spots_inds] >= _maxSpotUsage).any():
                #print("--- spot used, skip")
                continue
            ## get neighboring spots information:
            # extract bits
            _nb_spot_bits = self.candSpots.bits[np.array(_nb_spot_inds)]
            # the region_id
            _reg_id = _pair.tuple_id
            ## check if any spots exists in valid_bits
            # on-bit tuple for this
            _on_bits = self.RegionId_2_Bits[_reg_id]
            # find related bits
            _related_bits = list(set(_on_bits).difference(set(_pair.bits)))
            ## generate tentative tuples
            _temp_tuples = []
            ### TODO: Currently only allow 3-on-bits
            for _ind, _b in zip(_nb_spot_inds, _nb_spot_bits):
                if _b in _related_bits and _spotUsage[_ind] <= _maxSpotUsage:
                    _merged_inds = np.concatenate([_pair.spots_inds, [_ind]])
                    # assemble tentative tuples
                    _merged_bits = np.concatenate([_pair.bits, [_b]])
                    _merged_spots = Spots3D(np.concatenate([_pair.spots, 
                                                            self.candSpots[_ind][np.newaxis,:]], axis=0),
                                            bits=_merged_bits,
                                            pixel_sizes=_pair.pixel_sizes,)
                    _merged_tuple = SpotTuple(_merged_spots,
                                            bits=_merged_bits,
                                            pixel_sizes=_pair.pixel_sizes,
                                            spots_inds=_merged_inds,
                                            tuple_id=_pair.tuple_id,
                                            )
                    _temp_tuples.append(_merged_tuple)
            # if temp_tuple exists, pick the best one
            if len(_temp_tuples) > 0:
                _temp_metrics = generate_score_metrics(_temp_tuples,)
                _temp_scores,_ = generate_scores(_temp_tuples, _pair_ref_metrics,)
                _temp_final_scores = summarize_score(_temp_tuples)
                _max_tp_ind, _max_ihomo = np.unravel_index(np.argmax(_temp_final_scores), np.shape(_temp_final_scores))
                # append the best match
                self.spotGroups.append( copy(_temp_tuples[_max_tp_ind]) )
                _spotUsage[_temp_tuples[_max_tp_ind].spots_inds] += 1
        if self.verbose:
            print(f"-- {len(self.spotGroups)} full tuples selected")
        # 4. select pairs if allowing error_correction
        if _errorCorrection:
            _num_corrected = 0
            for _pair in tqdm(sorted(self.candSpotPair_list, key=lambda _p:-_p.final_score)):
                # skip if spots are used
                if (_spotUsage[_pair.spots_inds] >= _maxSpotUsage).any():
                    #print("--- spot used, skip")
                    continue
                # append the pair
                self.spotGroups.append(copy(_pair))
                _spotUsage[_pair.spots_inds] += 1 
                _num_corrected += 1
        if self.verbose:
            print(f"-- {_num_corrected} pairs selected")
        if self.verbose:
            print(f"-- in total {len(self.spotGroups)} spot_groups detected")
        # add select orders as attribute
        for _i, _g in enumerate(self.spotGroups):
            _g.sel_ind = _i
        # save spot_usage
        setattr(self, 'spotUsage', _spotUsage)
        #return self.spotGroups, self.spotUsage
        return
    # Load from savefile:
    def _load(self,):
        pass
    # save into savefile:
    def _save(self, _forceSaving=True):
        if self.verbose:
            print(f"- Save decoder into file: {self.saveFile}")
        if self.saveFile is None:
            print(f"saveFile not given, skip saving!")
            if _forceSaving:
                raise ValueError(f"saveFile not given, cannot save anything!")
        # list of saving:
        #self.bit_codebook, self.bits, self.candSpotDf, self.kdtree, self.spotGroups,
        
        
        
        

4298.571033333334

In [265]:
from ImageAnalysis3.io_tools.spots import Axis3D_infos

In [276]:
def spotTuple_2_positionDict(spot_tuple, axes_infos=Axis3D_infos):
    _posDict = {f"center_{_name}":_pos
                for _name, _pos in zip(axes_infos, spot_tuple.centroid_spot().to_positions()[0])}
    _posDict["center_intensity"] = np.mean(spot_tuple.intensities())
    _posDict["center_intensity_var"] = np.std(spot_tuple.intensities())/np.mean(spot_tuple.intensities())
    _posDict["center_internal_dist"] = np.median(spot_tuple.dist_internal())
    return _posDict

In [277]:
spotTuple_2_positionDict(_decoder.spotGroups[0])

{'center_z': 3468.7524999999996,
 'center_x': 29667.850200000004,
 'center_y': 53553.425760000006,
 'center_intensity': 4298.571033333334,
 'center_intensity_var': 0.15099112137992352,
 'center_internal_dist': 177.60173417116894}

In [278]:
SpotTuple_2_Dict(_decoder.spotGroups[0])

{'fov_id': None,
 'cell_id': None,
 'uid': None,
 'homolog': None,
 'sel_index': None,
 'height_0': 4769.1533,
 'z_0': 13.691415,
 'x_0': 274.81702,
 'y_0': 496.01575,
 'background_0': 4162.5376,
 'sigma_z_0': 1.230856,
 'sigma_x_0': 1.3226978,
 'sigma_y_0': 1.5703299,
 'sin_t_0': 0.25757262,
 'sin_p_0': -0.7650679,
 'eps_0': 204.29805,
 'bit_0': 26,
 'channel_0': None,
 'cand_spot_ind_0': 1535,
 'height_1': 4745.7793,
 'z_1': 13.558079,
 'x_1': 274.7035,
 'y_1': 495.94214,
 'background_1': 4233.427,
 'sigma_z_1': 1.4328941,
 'sigma_x_1': 1.3192182,
 'sigma_y_1': 1.5174807,
 'sin_t_1': 0.07596842,
 'sin_p_1': 1.0,
 'eps_1': 244.79645,
 'bit_1': 28,
 'channel_1': None,
 'cand_spot_ind_1': 1652,
 'height_2': 3380.7805,
 'z_2': 14.375536,
 'x_2': 274.58643,
 'y_2': 495.63727,
 'background_2': 6228.7534,
 'sigma_z_2': 1.7582834,
 'sigma_x_2': 1.704653,
 'sigma_y_2': 1.8261583,
 'sin_t_2': -0.53320223,
 'sin_p_2': -1.0,
 'eps_2': 233.90347,
 'bit_2': 51,
 'channel_2': None,
 'cand_spot_ind_

In [None]:
def SpotTupleList_2_DataFrame(spot_groups, )

In [None]:
SpotTuple_2_Dict[]

In [246]:
print(decoder_folder)

\\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Decoder\v0


In [249]:
decode_args[10][2]

'\\\\mendel\\Mendel_SSD4\\Pu_Temp\\20220419-CTP11-13_from_0415storm65\\Decoder\\v0\\Fov-0_Cell-11_Decoder.hdf5'

In [253]:
_cell_id = 11
_saveFile = os.path.join(decoder_folder, 'Decoded', f"Decoded_fov-{_fov_id}_cell-{_cell_id}.hdf5")

In [255]:
_decoder._save()

- Save decoder into file: \\mendel\Mendel_SSD4\Pu_Temp\20220419-CTP11-13_from_0415storm65\Decoder\v0\Decoded\Decoded_fov-0_cell-11.hdf5


In [257]:
_decoder = SpotDecoder(decode_args[10][0], codebook_gn, readout_df, saveFile=_saveFile, search_radius=250)

- Matching 194 bits to (988, 103) codebook
-- 99 bits matched
- Process (988, 102) codebook into valid pairs
-- 2964 valid pairs detected.
- Searching for spot-pairs within 250nm.
-- find candidate pairs by KDTree
-- filter candidate pairs by codebook


  self.bit_codebook[_matched_bit[0]] = self.codebook[_col].copy()


-- 5268 pairs selected.
- Select spot_groups from candidate pairs with radius:250, max_usage=1
-- calculate scores for candSpotPairs


100%|████████████████████████████████████████████████████████████████████████████| 5268/5268 [00:00<00:00, 6301.45it/s]


-- 678 full tuples selected


100%|███████████████████████████████████████████████████████████████████████████| 5268/5268 [00:00<00:00, 95013.15it/s]

-- 828 pairs selected
-- in total 1506 spot_groups detected





In [230]:
len(_decoder.spotGroups)

678

In [163]:
_decoder.SpotPair_list[0]

Spots3D([[ 1.8436116e+03,  2.3323305e+01,  2.9774316e+02,  4.9768063e+02,
           6.4360340e+03,  2.0489990e+00,  2.0662942e+00,  1.9769346e+00,
          -1.0000000e+00, -8.5118973e-01,  1.7233060e+02],
         [ 2.5633948e+03,  2.3572094e+01,  2.9736280e+02,  4.9559784e+02,
           2.2769778e+03,  1.5970697e+00,  1.1038125e+00,  1.2192842e+00,
           6.8197070e-02,  7.6454070e-02,  1.4488187e+02]])

In [135]:
_process_codebook_2_pairs(_decoder)

- Process (988, 102) codebook into valid pairs


{(37, 63): 1,
 (37, 78): 1,
 (63, 78): 1,
 (18, 43): 2,
 (18, 87): 2,
 (43, 87): 2,
 (35, 73): 3,
 (35, 83): 3,
 (73, 83): 3,
 (58, 59): 4,
 (58, 95): 4,
 (59, 95): 4,
 (42, 79): 5,
 (42, 94): 5,
 (79, 94): 5,
 (62, 66): 6,
 (62, 81): 6,
 (66, 81): 6,
 (67, 75): 7,
 (67, 84): 7,
 (75, 84): 7,
 (24, 45): 8,
 (24, 55): 8,
 (45, 55): 8,
 (5, 46): 9,
 (5, 91): 9,
 (46, 91): 9,
 (52, 65): 10,
 (52, 76): 10,
 (65, 76): 10,
 (13, 68): 11,
 (13, 70): 11,
 (68, 70): 11,
 (16, 44): 12,
 (16, 69): 12,
 (44, 69): 12,
 (31, 32): 13,
 (31, 63): 13,
 (32, 63): 13,
 (11, 23): 14,
 (11, 48): 14,
 (23, 48): 14,
 (6, 7): 15,
 (6, 14): 15,
 (7, 14): 15,
 (4, 36): 16,
 (4, 50): 16,
 (36, 50): 16,
 (40, 85): 17,
 (40, 89): 17,
 (85, 89): 17,
 (9, 21): 18,
 (9, 57): 18,
 (21, 57): 18,
 (25, 33): 19,
 (25, 56): 19,
 (33, 56): 19,
 (19, 30): 20,
 (19, 74): 20,
 (30, 74): 20,
 (54, 90): 21,
 (54, 92): 21,
 (90, 92): 21,
 (8, 15): 22,
 (8, 64): 22,
 (15, 64): 22,
 (1, 38): 23,
 (1, 82): 23,
 (38, 82): 23,
 (71, 

In [112]:
_decoder.bit_codebook

Unnamed: 0,name,id,chr,1,2,3,4,5,6,7,...,90,91,92,93,94,95,96,97,98,99
0,1:3740000-3760000,1,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1:6240000-6260000,2,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1:8740000-8760000,3,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1:11240000-11260000,4,1,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
4,1:13740000-13760000,5,1,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
983,X:161240000-161260000,1057,X,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
984,X:163740000-163760000,1058,X,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,0
985,X:166240000-166260000,1059,X,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
986,X:168740000-168760000,1060,X,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [80]:
_decoder._match_bit_2_codebook()

  self.bit_codebook[_matched_bit[0]] = self.codebook[_col].copy()


In [81]:
_decoder.bits

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
       35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51,
       52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68,
       69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85,
       86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [35]:
len()/200

  bit_codebook[_matched_bit[0]] = codebook_gn[_col].copy()


In [34]:
pd.concat?

In [None]:
np.isinst

[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99]