<a id='0.1'></a>
## 0.1 load required packages

[back to top](#top)

In [1]:
%run "..\..\Startup_py3.py"
sys.path.append(r"..\..\..\..\Documents")

import ImageAnalysis3 as ia
%matplotlib notebook

from ImageAnalysis3 import *
print(os.getpid())

# library design specific tools
from ImageAnalysis3.library_tools import LibraryDesigner as ld
from ImageAnalysis3.library_tools import LibraryTools as lt
# biopython imports
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Blast.Applications import NcbiblastnCommandline
from Bio.Blast import NCBIXML

28992


<a id='0.2'></a>
## 0.2 required parameters for Genome

[back to top](#top)

In [2]:
# NEW mouse genome
genome_assembly = 'GRCm39'
reference_folder = os.path.join(r'\\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\Genomes\mouse', f'{genome_assembly}_ensembl')
genome_folder = os.path.join(reference_folder, 'Genome')
transcriptome_folder = os.path.join(reference_folder, 'Transcriptome')

In [3]:
class OTTable (dict):
    '''A python dictionary based implementation of the off-target table.'''
    def __missing__ (self, key):
        return 0

    def add_seq(self, seq, weight):
        '''Add a sequence to the OTTable with a given weight.'''
        self[seq] = self[seq] + weight

    def save_pkl(self, file_name):
        '''Save the OTTable as a pickle file.'''
        with open(file_name, 'wb') as f:
            pickle.dump(self, f, pickle.HIGHEST_PROTOCOL)
        print(f'Wrote the OTTable to {file_name}.')
    
    @staticmethod
    def load_pkl(file_name):
        '''Load a OTTable from a pickle file.'''
        with open(file_name, 'rb') as f:
            ottable = pickle.load(f)
        print(f'Load the OTTable from {file_name}.')
        return ottable

In [4]:
from seqint import seq2Int,seq2Int_rc
from tqdm import tqdm
import pickle
Default_Num_Threads=12

class countTable(dict):
    '''A python dictionary based implementation of the off-target table.'''
    def __init__(self, word_len=17, save_file=None, verbose=True):
        # internalize parameters
        self.word_len = word_len
        self.save_file = save_file
        self.verbose = verbose
        super().__init__()
        return
    def __missing__ (self, key):
        return 0
    # function to append sequence
    def add_seq(self, seq, weight):
        '''Add a sequence to the OTTable with a given weight.'''
        self[seq] = self[seq] + weight
    # function 
    def save_pkl(self, file_name=None):
        '''Save the OTTable as a pickle file.'''
        if file_name is None:
            file_name = self.save_file
        with open(file_name, 'wb') as f:
            pickle.dump(self, f, pickle.HIGHEST_PROTOCOL)
        print(f'Wrote the OTTable to {file_name}.')
    def load_sequences(
        self, sequences:list, word_len:int=0, weights:list=[],
        ):
        if self.verbose:
            print(f"Loading {len(sequences)} sequences into count_table.")
        if word_len <= 0 or word_len is None:
            word_len = self.word_len
            if self.verbose:
                print(f"- Use reference word length: {word_len}")
        else:
            if self.verbose:
                print(f"- Use given word length: {word_len}")
        # weights, if applicable
        # Use uniform weights if no weights are given
        if len(weights) == 0:
            weights = [1] * len(sequences)
        assert(len(weights) == len(sequences))
        # load
        for _seq, _w in tqdm(zip(sequences, weights)):
            if len(_seq) < word_len:
                continue
            # Find all K-mers and add to the OTTable
            for j in range(len(_seq) - word_len + 1): 
                self.add_seq(seq2Int(_seq[j:j+word_len].encode()), _w)
        
    #@staticmethod
    def load_pkl(self, save_file=None):
        '''Load a OTTable from a pickle file.'''
        if save_file is None:
            save_file = self.save_file
        with open(save_file, 'rb') as f:
            _saved_table = pickle.load(f)
        print(f'Load the OTTable from {save_file}.')
        # update
        self.update(_saved_table)
        print(f'updated the OTTable from {save_file}.')
        
    
    def get(self,
            query_seq:str, # input sequences as string
            rc:bool=False, # whether reverse-complement
           ):
        """Query of sequences """
        if len(query_seq) < self.word_len:
            return 0
        if rc:
            _seq_counts = [self[seq2Int_rc(query_seq[_i:_i+self.word_len].encode())]
                           for _i in range(len(query_seq)-self.word_len+1)]
        else:
            _seq_counts = [self[seq2Int(query_seq[_i:_i+self.word_len].encode())]
                           for _i in range(len(query_seq)-self.word_len+1)]
        return np.sum(_seq_counts)

In [5]:
# load all ref sequences
from ImageAnalysis3 import library_tools
seq_rd = library_tools.sequences.sequence_reader(transcriptome_folder, auto_load_ref=True)
transcriptome_seqs = [str(_r.seq) for _r in seq_rd.ref_seq_dict.values()]

-- load sequence: ENSMUST00000178537.2, size=12
-- load sequence: ENSMUST00000178862.2, size=14
-- load sequence: ENSMUST00000196221.2, size=9
-- load sequence: ENSMUST00000179664.2, size=11
-- load sequence: ENSMUST00000177564.2, size=16
-- load sequence: ENSMUST00000179520.2, size=11
-- load sequence: ENSMUST00000179883.2, size=16
-- load sequence: ENSMUST00000195858.2, size=10
-- load sequence: ENSMUST00000179932.2, size=12
-- load sequence: ENSMUST00000180001.2, size=17
-- load sequence: ENSMUST00000178815.2, size=10
-- load sequence: ENSMUST00000177965.2, size=17
-- load sequence: ENSMUST00000178909.2, size=29
-- load sequence: ENSMUST00000177646.2, size=10
-- load sequence: ENSMUST00000178230.2, size=17
-- load sequence: ENSMUST00000178483.2, size=29
-- load sequence: ENSMUST00000179262.2, size=10
-- load sequence: ENSMUST00000178549.2, size=17
-- load sequence: ENSMUST00000193012.2, size=29
-- load sequence: ENSMUST00000179166.2, size=10
-- load sequence: ENSMUST00000179560.2, s

-- load sequence: ENSMUST00000099761.10, size=8794
-- load sequence: ENSMUST00000187185.2, size=500
-- load sequence: ENSMUST00000145951.8, size=634
-- load sequence: ENSMUST00000133853.3, size=456
-- load sequence: ENSMUST00000177768.3, size=3922
-- load sequence: ENSMUST00000177289.9, size=3066
-- load sequence: ENSMUST00000110249.3, size=2682
-- load sequence: ENSMUST00000191324.2, size=1593
-- load sequence: ENSMUST00000221003.2, size=728
-- load sequence: ENSMUST00000167565.3, size=3968
-- load sequence: ENSMUST00000165561.4, size=4483
-- load sequence: ENSMUST00000221549.2, size=502
-- load sequence: ENSMUST00000170060.4, size=3723
-- load sequence: ENSMUST00000222460.2, size=2385
-- load sequence: ENSMUST00000074072.5, size=561
-- load sequence: ENSMUST00000163881.2, size=72
-- load sequence: ENSMUST00000133454.8, size=4845
-- load sequence: ENSMUST00000147759.8, size=4972
-- load sequence: ENSMUST00000130671.8, size=4945
-- load sequence: ENSMUST00000100167.10, size=6792
-- loa

-- load sequence: ENSMUST00000163869.2, size=1467
-- load sequence: ENSMUST00000228092.2, size=2790
-- load sequence: ENSMUST00000072972.5, size=1048
-- load sequence: ENSMUST00000228802.2, size=7448
-- load sequence: ENSMUST00000228389.2, size=10213
-- load sequence: ENSMUST00000227317.2, size=8838
-- load sequence: ENSMUST00000080253.5, size=1059
-- load sequence: ENSMUST00000072113.6, size=3747
-- load sequence: ENSMUST00000226578.2, size=3247
-- load sequence: ENSMUST00000228656.2, size=9767
-- load sequence: ENSMUST00000227110.2, size=9888
-- load sequence: ENSMUST00000091721.3, size=897
-- load sequence: ENSMUST00000199318.2, size=2255
-- load sequence: ENSMUST00000204980.3, size=401
-- load sequence: ENSMUST00000204913.3, size=689
-- load sequence: ENSMUST00000199678.4, size=935
-- load sequence: ENSMUST00000203883.3, size=881
-- load sequence: ENSMUST00000204548.2, size=787
-- load sequence: ENSMUST00000196799.2, size=1648
-- load sequence: ENSMUST00000191506.8, size=1132
-- lo

-- load sequence: ENSMUST00000226299.2, size=3729
-- load sequence: ENSMUST00000227971.2, size=496
-- load sequence: ENSMUST00000228784.2, size=376
-- load sequence: ENSMUST00000042988.7, size=3450
-- load sequence: ENSMUST00000091701.3, size=534
-- load sequence: ENSMUST00000055770.4, size=741
-- load sequence: ENSMUST00000038523.15, size=3069
-- load sequence: ENSMUST00000214693.2, size=834
-- load sequence: ENSMUST00000095385.5, size=2909
-- load sequence: ENSMUST00000217175.2, size=2512
-- load sequence: ENSMUST00000214820.2, size=3516
-- load sequence: ENSMUST00000168033.3, size=8228
-- load sequence: ENSMUST00000234051.2, size=13307
-- load sequence: ENSMUST00000234738.2, size=9798
-- load sequence: ENSMUST00000234493.2, size=11539
-- load sequence: ENSMUST00000054425.7, size=481
-- load sequence: ENSMUST00000226039.2, size=1999
-- load sequence: ENSMUST00000223911.2, size=1457
-- load sequence: ENSMUST00000074067.4, size=1480
-- load sequence: ENSMUST00000113251.10, size=2077
--

-- load sequence: ENSMUST00000143937.2, size=485
-- load sequence: ENSMUST00000125572.2, size=600
-- load sequence: ENSMUST00000151389.2, size=547
-- load sequence: ENSMUST00000147306.2, size=639
-- load sequence: ENSMUST00000135355.2, size=417
-- load sequence: ENSMUST00000115188.9, size=1053
-- load sequence: ENSMUST00000123851.8, size=655
-- load sequence: ENSMUST00000151387.2, size=477
-- load sequence: ENSMUST00000115189.10, size=648
-- load sequence: ENSMUST00000099153.10, size=9661
-- load sequence: ENSMUST00000199523.2, size=696
-- load sequence: ENSMUST00000200559.2, size=680
-- load sequence: ENSMUST00000196139.5, size=740
-- load sequence: ENSMUST00000200271.5, size=2724
-- load sequence: ENSMUST00000198529.5, size=1294
-- load sequence: ENSMUST00000199222.5, size=2438
-- load sequence: ENSMUST00000117915.8, size=4913
-- load sequence: ENSMUST00000108210.9, size=2684
-- load sequence: ENSMUST00000196975.5, size=1711
-- load sequence: ENSMUST00000196369.2, size=1666
-- load s

-- load sequence: ENSMUST00000216965.2, size=3691
-- load sequence: ENSMUST00000054683.8, size=1122
-- load sequence: ENSMUST00000108821.8, size=998
-- load sequence: ENSMUST00000126695.2, size=651
-- load sequence: ENSMUST00000048801.8, size=876
-- load sequence: ENSMUST00000150429.8, size=1398
-- load sequence: ENSMUST00000056773.15, size=1551
-- load sequence: ENSMUST00000133305.8, size=2971
-- load sequence: ENSMUST00000027785.15, size=2776
-- load sequence: ENSMUST00000123409.8, size=2891
-- load sequence: ENSMUST00000139589.2, size=659
-- load sequence: ENSMUST00000232448.2, size=2447
-- load sequence: ENSMUST00000097175.5, size=1948
-- load sequence: ENSMUST00000212524.2, size=9945
-- load sequence: ENSMUST00000047425.5, size=3225
-- load sequence: ENSMUST00000212298.2, size=792
-- load sequence: ENSMUST00000102525.11, size=899
-- load sequence: ENSMUST00000148913.2, size=856
-- load sequence: ENSMUST00000141395.2, size=409
-- load sequence: ENSMUST00000031421.12, size=736
-- lo

-- load sequence: ENSMUST00000210524.2, size=638
-- load sequence: ENSMUST00000209800.2, size=3361
-- load sequence: ENSMUST00000224045.2, size=429
-- load sequence: ENSMUST00000021813.5, size=1399
-- load sequence: ENSMUST00000155917.8, size=641
-- load sequence: ENSMUST00000138521.8, size=341
-- load sequence: ENSMUST00000035991.8, size=3313
-- load sequence: ENSMUST00000186280.2, size=405
-- load sequence: ENSMUST00000114709.3, size=3570
-- load sequence: ENSMUST00000129068.2, size=361
-- load sequence: ENSMUST00000200097.5, size=3777
-- load sequence: ENSMUST00000197833.5, size=1598
-- load sequence: ENSMUST00000197387.5, size=576
-- load sequence: ENSMUST00000196203.2, size=937
-- load sequence: ENSMUST00000198733.2, size=491
-- load sequence: ENSMUST00000195902.2, size=465
-- load sequence: ENSMUST00000196343.2, size=4047
-- load sequence: ENSMUST00000197464.5, size=1742
-- load sequence: ENSMUST00000198399.5, size=654
-- load sequence: ENSMUST00000199026.5, size=478
-- load sequ

-- load sequence: ENSMUST00000094339.3, size=2640
-- load sequence: ENSMUST00000120115.3, size=939
-- load sequence: ENSMUST00000170808.3, size=2652
-- load sequence: ENSMUST00000232854.2, size=5957
-- load sequence: ENSMUST00000079439.10, size=877
-- load sequence: ENSMUST00000150883.8, size=314
-- load sequence: ENSMUST00000108404.8, size=485
-- load sequence: ENSMUST00000108405.2, size=550
-- load sequence: ENSMUST00000150306.2, size=912
-- load sequence: ENSMUST00000126983.2, size=2547
-- load sequence: ENSMUST00000039620.7, size=1170
-- load sequence: ENSMUST00000231302.2, size=703
-- load sequence: ENSMUST00000066747.14, size=3128
-- load sequence: ENSMUST00000205054.2, size=1950
-- load sequence: ENSMUST00000172321.3, size=2975
-- load sequence: ENSMUST00000219856.2, size=653
-- load sequence: ENSMUST00000219230.2, size=3591
-- load sequence: ENSMUST00000020258.10, size=3898
-- load sequence: ENSMUST00000219577.2, size=3914
-- load sequence: ENSMUST00000219177.2, size=1775
-- lo

-- load sequence: ENSMUST00000191653.2, size=1491
-- load sequence: ENSMUST00000050239.16, size=4442
-- load sequence: ENSMUST00000082287.3, size=925
-- load sequence: ENSMUST00000079817.8, size=2676
-- load sequence: ENSMUST00000227273.2, size=759
-- load sequence: ENSMUST00000226161.2, size=497
-- load sequence: ENSMUST00000227917.2, size=346
-- load sequence: ENSMUST00000226158.2, size=2661
-- load sequence: ENSMUST00000074018.4, size=928
-- load sequence: ENSMUST00000023660.9, size=1531
-- load sequence: ENSMUST00000088386.8, size=3121
-- load sequence: ENSMUST00000227372.2, size=3229
-- load sequence: ENSMUST00000107472.8, size=916
-- load sequence: ENSMUST00000084531.11, size=938
-- load sequence: ENSMUST00000107488.4, size=984
-- load sequence: ENSMUST00000019608.7, size=4694
-- load sequence: ENSMUST00000159935.2, size=3474
-- load sequence: ENSMUST00000212519.2, size=884
-- load sequence: ENSMUST00000113917.8, size=805
-- load sequence: ENSMUST00000113910.8, size=966
-- load s

-- load sequence: ENSMUST00000163619.8, size=1812
-- load sequence: ENSMUST00000168883.2, size=478
-- load sequence: ENSMUST00000191516.7, size=2566
-- load sequence: ENSMUST00000213545.2, size=2585
-- load sequence: ENSMUST00000099818.2, size=927
-- load sequence: ENSMUST00000030815.3, size=677
-- load sequence: ENSMUST00000237637.2, size=2925
-- load sequence: ENSMUST00000025685.8, size=2711
-- load sequence: ENSMUST00000235239.2, size=620
-- load sequence: ENSMUST00000063204.9, size=745
-- load sequence: ENSMUST00000139743.8, size=679
-- load sequence: ENSMUST00000137132.8, size=663
-- load sequence: ENSMUST00000134279.8, size=700
-- load sequence: ENSMUST00000149949.8, size=736
-- load sequence: ENSMUST00000124941.8, size=721
-- load sequence: ENSMUST00000136546.8, size=592
-- load sequence: ENSMUST00000135785.8, size=674
-- load sequence: ENSMUST00000125042.8, size=707
-- load sequence: ENSMUST00000154981.2, size=2884
-- load sequence: ENSMUST00000137751.2, size=446
-- load sequen

-- load sequence: ENSMUST00000230535.2, size=930
-- load sequence: ENSMUST00000229246.2, size=851
-- load sequence: ENSMUST00000133863.2, size=957
-- load sequence: ENSMUST00000034552.8, size=875
-- load sequence: ENSMUST00000214486.2, size=596
-- load sequence: ENSMUST00000214013.2, size=2759
-- load sequence: ENSMUST00000193559.6, size=1819
-- load sequence: ENSMUST00000192595.6, size=1274
-- load sequence: ENSMUST00000195471.6, size=1713
-- load sequence: ENSMUST00000195640.2, size=6162
-- load sequence: ENSMUST00000194622.2, size=578
-- load sequence: ENSMUST00000195793.2, size=277
-- load sequence: ENSMUST00000091014.10, size=6218
-- load sequence: ENSMUST00000107539.8, size=5912
-- load sequence: ENSMUST00000003203.14, size=4837
-- load sequence: ENSMUST00000107538.2, size=5143
-- load sequence: ENSMUST00000145113.2, size=3648
-- load sequence: ENSMUST00000147441.8, size=2605
-- load sequence: ENSMUST00000125152.2, size=443
-- load sequence: ENSMUST00000153858.8, size=1543
-- loa

-- load sequence: ENSMUST00000128557.3, size=3977
-- load sequence: ENSMUST00000143014.3, size=834
-- load sequence: ENSMUST00000109331.9, size=6497
-- load sequence: ENSMUST00000229966.2, size=860
-- load sequence: ENSMUST00000151418.2, size=732
-- load sequence: ENSMUST00000230393.2, size=825
-- load sequence: ENSMUST00000139372.2, size=426
-- load sequence: ENSMUST00000131062.2, size=605
-- load sequence: ENSMUST00000060808.10, size=6394
-- load sequence: ENSMUST00000005601.9, size=2664
-- load sequence: ENSMUST00000210245.2, size=2931
-- load sequence: ENSMUST00000210790.2, size=527
-- load sequence: ENSMUST00000191829.2, size=10783
-- load sequence: ENSMUST00000052366.5, size=2588
-- load sequence: ENSMUST00000056522.5, size=2813
-- load sequence: ENSMUST00000193137.2, size=511
-- load sequence: ENSMUST00000200883.2, size=2494
-- load sequence: ENSMUST00000073528.4, size=1429
-- load sequence: ENSMUST00000202174.2, size=758
-- load sequence: ENSMUST00000051754.2, size=3787
-- load

-- load sequence: ENSMUST00000055935.11, size=4741
-- load sequence: ENSMUST00000193890.2, size=444
-- load sequence: ENSMUST00000193941.2, size=593
-- load sequence: ENSMUST00000192875.2, size=5777
-- load sequence: ENSMUST00000039480.7, size=4625
-- load sequence: ENSMUST00000085455.6, size=871
-- load sequence: ENSMUST00000070316.12, size=4855
-- load sequence: ENSMUST00000137840.2, size=998
-- load sequence: ENSMUST00000033427.7, size=2677
-- load sequence: ENSMUST00000077528.7, size=855
-- load sequence: ENSMUST00000044113.12, size=13800
-- load sequence: ENSMUST00000230158.2, size=616
-- load sequence: ENSMUST00000230464.2, size=617
-- load sequence: ENSMUST00000040344.7, size=3855
-- load sequence: ENSMUST00000219847.2, size=1358
-- load sequence: ENSMUST00000219249.2, size=651
-- load sequence: ENSMUST00000219851.2, size=1056
-- load sequence: ENSMUST00000219505.2, size=802
-- load sequence: ENSMUST00000220350.2, size=373
-- load sequence: ENSMUST00000219531.2, size=3501
-- loa

-- load sequence: ENSMUST00000224208.2, size=6480
-- load sequence: ENSMUST00000224303.2, size=4672
-- load sequence: ENSMUST00000224504.2, size=745
-- load sequence: ENSMUST00000065766.7, size=4822
-- load sequence: ENSMUST00000112329.3, size=2895
-- load sequence: ENSMUST00000025956.13, size=2586
-- load sequence: ENSMUST00000038665.6, size=2768
-- load sequence: ENSMUST00000170236.2, size=1814
-- load sequence: ENSMUST00000141356.2, size=681
-- load sequence: ENSMUST00000052999.13, size=1307
-- load sequence: ENSMUST00000130942.8, size=1319
-- load sequence: ENSMUST00000125647.8, size=1186
-- load sequence: ENSMUST00000116307.8, size=893
-- load sequence: ENSMUST00000135718.8, size=556
-- load sequence: ENSMUST00000138701.8, size=499
-- load sequence: ENSMUST00000133049.9, size=1150
-- load sequence: ENSMUST00000123280.8, size=439
-- load sequence: ENSMUST00000126900.8, size=728
-- load sequence: ENSMUST00000150487.2, size=582
-- load sequence: ENSMUST00000127392.2, size=737
-- load

-- load sequence: ENSMUST00000037275.6, size=3525
-- load sequence: ENSMUST00000233789.2, size=11330
-- load sequence: ENSMUST00000232720.2, size=12623
-- load sequence: ENSMUST00000168730.3, size=11925
-- load sequence: ENSMUST00000233734.2, size=11862
-- load sequence: ENSMUST00000233656.2, size=11393
-- load sequence: ENSMUST00000188323.2, size=957
-- load sequence: ENSMUST00000222567.2, size=1015
-- load sequence: ENSMUST00000193762.2, size=1093
-- load sequence: ENSMUST00000188005.2, size=992
-- load sequence: ENSMUST00000027351.13, size=1417
-- load sequence: ENSMUST00000113437.9, size=794
-- load sequence: ENSMUST00000186832.2, size=772
-- load sequence: ENSMUST00000232921.2, size=4066
-- load sequence: ENSMUST00000233062.2, size=3687
-- load sequence: ENSMUST00000061074.10, size=3015
-- load sequence: ENSMUST00000040361.8, size=1789
-- load sequence: ENSMUST00000204277.2, size=857
-- load sequence: ENSMUST00000203011.2, size=1612
-- load sequence: ENSMUST00000043577.3, size=141

-- load sequence: ENSMUST00000102738.4, size=1629
-- load sequence: ENSMUST00000030047.3, size=3807
-- load sequence: ENSMUST00000035648.6, size=4765
-- load sequence: ENSMUST00000234430.2, size=2289
-- load sequence: ENSMUST00000234830.2, size=1274
-- load sequence: ENSMUST00000038614.12, size=973
-- load sequence: ENSMUST00000106359.2, size=469
-- load sequence: ENSMUST00000156891.3, size=409
-- load sequence: ENSMUST00000206143.2, size=829
-- load sequence: ENSMUST00000106357.8, size=1594
-- load sequence: ENSMUST00000145762.8, size=574
-- load sequence: ENSMUST00000132643.2, size=161
-- load sequence: ENSMUST00000106356.2, size=785
-- load sequence: ENSMUST00000150760.2, size=760
-- load sequence: ENSMUST00000143078.2, size=613
-- load sequence: ENSMUST00000170882.8, size=1050
-- load sequence: ENSMUST00000137963.8, size=3104
-- load sequence: ENSMUST00000023271.8, size=3583
-- load sequence: ENSMUST00000176592.2, size=799
-- load sequence: ENSMUST00000177179.2, size=475
-- load se

-- load sequence: ENSMUST00000203353.3, size=1949
-- load sequence: ENSMUST00000204702.3, size=9479
-- load sequence: ENSMUST00000203496.3, size=669
-- load sequence: ENSMUST00000203267.2, size=1997
-- load sequence: ENSMUST00000108086.7, size=2744
-- load sequence: ENSMUST00000203425.2, size=480
-- load sequence: ENSMUST00000204174.2, size=670
-- load sequence: ENSMUST00000048096.12, size=4077
-- load sequence: ENSMUST00000143580.2, size=504
-- load sequence: ENSMUST00000147008.2, size=646
-- load sequence: ENSMUST00000107365.3, size=1866
-- load sequence: ENSMUST00000234308.2, size=2084
-- load sequence: ENSMUST00000234260.2, size=968
-- load sequence: ENSMUST00000047611.4, size=1079
-- load sequence: ENSMUST00000234635.2, size=893
-- load sequence: ENSMUST00000234888.2, size=583
-- load sequence: ENSMUST00000224596.2, size=4762
-- load sequence: ENSMUST00000207801.2, size=3730
-- load sequence: ENSMUST00000207165.2, size=818
-- load sequence: ENSMUST00000207863.2, size=361
-- load s

-- load sequence: ENSMUST00000148374.8, size=1058
-- load sequence: ENSMUST00000068755.14, size=2823
-- load sequence: ENSMUST00000151989.2, size=397
-- load sequence: ENSMUST00000180025.8, size=2812
-- load sequence: ENSMUST00000141324.8, size=430
-- load sequence: ENSMUST00000144285.2, size=553
-- load sequence: ENSMUST00000129451.2, size=834
-- load sequence: ENSMUST00000045602.9, size=732
-- load sequence: ENSMUST00000135708.2, size=526
-- load sequence: ENSMUST00000147648.2, size=516
-- load sequence: ENSMUST00000075837.8, size=2064
-- load sequence: ENSMUST00000066475.11, size=3250
-- load sequence: ENSMUST00000208521.2, size=1159
-- load sequence: ENSMUST00000062002.6, size=2739
-- load sequence: ENSMUST00000100538.4, size=3362
-- load sequence: ENSMUST00000229970.2, size=809
-- load sequence: ENSMUST00000229000.2, size=572
-- load sequence: ENSMUST00000204978.3, size=610
-- load sequence: ENSMUST00000204084.3, size=409
-- load sequence: ENSMUST00000203801.2, size=1232
-- load s

-- load sequence: ENSMUST00000215065.3, size=3578
-- load sequence: ENSMUST00000216316.3, size=1078
-- load sequence: ENSMUST00000077757.7, size=1071
-- load sequence: ENSMUST00000020212.6, size=3639
-- load sequence: ENSMUST00000220052.2, size=3910
-- load sequence: ENSMUST00000220352.2, size=1842
-- load sequence: ENSMUST00000218076.2, size=1692
-- load sequence: ENSMUST00000007280.9, size=1565
-- load sequence: ENSMUST00000216177.2, size=1726
-- load sequence: ENSMUST00000051653.2, size=936
-- load sequence: ENSMUST00000215505.2, size=3514
-- load sequence: ENSMUST00000217227.2, size=3831
-- load sequence: ENSMUST00000062545.3, size=942
-- load sequence: ENSMUST00000216298.2, size=5616
-- load sequence: ENSMUST00000215194.2, size=4339
-- load sequence: ENSMUST00000079178.3, size=1055
-- load sequence: ENSMUST00000161660.8, size=754
-- load sequence: ENSMUST00000067505.15, size=2164
-- load sequence: ENSMUST00000161059.8, size=823
-- load sequence: ENSMUST00000111619.10, size=1256
--

-- load sequence: ENSMUST00000160394.8, size=1527
-- load sequence: ENSMUST00000161957.2, size=2520
-- load sequence: ENSMUST00000161547.2, size=548
-- load sequence: ENSMUST00000220791.2, size=1449
-- load sequence: ENSMUST00000221282.2, size=819
-- load sequence: ENSMUST00000161284.3, size=3817
-- load sequence: ENSMUST00000231096.2, size=1134
-- load sequence: ENSMUST00000023105.5, size=2408
-- load sequence: ENSMUST00000230430.2, size=1668
-- load sequence: ENSMUST00000100249.10, size=1467
-- load sequence: ENSMUST00000026188.10, size=4112
-- load sequence: ENSMUST00000160107.2, size=950
-- load sequence: ENSMUST00000160893.2, size=635
-- load sequence: ENSMUST00000161422.2, size=741
-- load sequence: ENSMUST00000162238.2, size=3129
-- load sequence: ENSMUST00000205041.2, size=2647
-- load sequence: ENSMUST00000032122.11, size=5021
-- load sequence: ENSMUST00000203775.2, size=4933
-- load sequence: ENSMUST00000086960.3, size=72
-- load sequence: ENSMUST00000021941.8, size=1327
-- l

-- load sequence: ENSMUST00000054629.7, size=954
-- load sequence: ENSMUST00000030709.9, size=2006
-- load sequence: ENSMUST00000026999.10, size=4278
-- load sequence: ENSMUST00000174843.8, size=793
-- load sequence: ENSMUST00000174411.2, size=769
-- load sequence: ENSMUST00000172718.2, size=392
-- load sequence: ENSMUST00000168918.8, size=1882
-- load sequence: ENSMUST00000069692.10, size=2438
-- load sequence: ENSMUST00000069637.15, size=2188
-- load sequence: ENSMUST00000221604.2, size=3080
-- load sequence: ENSMUST00000222394.2, size=4504
-- load sequence: ENSMUST00000221847.2, size=2382
-- load sequence: ENSMUST00000200948.2, size=1430
-- load sequence: ENSMUST00000162069.3, size=422
-- load sequence: ENSMUST00000206416.2, size=558
-- load sequence: ENSMUST00000156876.8, size=4932
-- load sequence: ENSMUST00000147758.8, size=3716
-- load sequence: ENSMUST00000188126.2, size=2864
-- load sequence: ENSMUST00000087701.4, size=3493
-- load sequence: ENSMUST00000149887.2, size=629
-- l

-- load sequence: ENSMUST00000124707.2, size=648
-- load sequence: ENSMUST00000208719.2, size=756
-- load sequence: ENSMUST00000209077.2, size=340
-- load sequence: ENSMUST00000066921.10, size=2784
-- load sequence: ENSMUST00000199817.5, size=1481
-- load sequence: ENSMUST00000196727.5, size=2749
-- load sequence: ENSMUST00000197006.2, size=4467
-- load sequence: ENSMUST00000170496.6, size=2602
-- load sequence: ENSMUST00000198792.2, size=453
-- load sequence: ENSMUST00000199532.2, size=422
-- load sequence: ENSMUST00000198119.2, size=2639
-- load sequence: ENSMUST00000036972.14, size=8831
-- load sequence: ENSMUST00000118129.2, size=3100
-- load sequence: ENSMUST00000177321.2, size=569
-- load sequence: ENSMUST00000035931.13, size=5168
-- load sequence: ENSMUST00000191794.2, size=5146
-- load sequence: ENSMUST00000193600.2, size=545
-- load sequence: ENSMUST00000194603.2, size=1515
-- load sequence: ENSMUST00000195086.2, size=890
-- load sequence: ENSMUST00000075363.10, size=1457
-- l

-- load sequence: ENSMUST00000186701.2, size=534
-- load sequence: ENSMUST00000188987.7, size=350
-- load sequence: ENSMUST00000186541.2, size=410
-- load sequence: ENSMUST00000190986.2, size=384
-- load sequence: ENSMUST00000191535.2, size=323
-- load sequence: ENSMUST00000225367.2, size=1178
-- load sequence: ENSMUST00000052514.6, size=8910
-- load sequence: ENSMUST00000223839.2, size=980
-- load sequence: ENSMUST00000223581.2, size=681
-- load sequence: ENSMUST00000223674.2, size=2063
-- load sequence: ENSMUST00000224703.2, size=1768
-- load sequence: ENSMUST00000224252.2, size=2847
-- load sequence: ENSMUST00000225649.2, size=1011
-- load sequence: ENSMUST00000105907.9, size=1484
-- load sequence: ENSMUST00000137622.8, size=537
-- load sequence: ENSMUST00000139030.2, size=546
-- load sequence: ENSMUST00000134781.8, size=613
-- load sequence: ENSMUST00000132713.2, size=587
-- load sequence: ENSMUST00000153059.2, size=373
-- load sequence: ENSMUST00000026236.11, size=2065
-- load seq

-- load sequence: ENSMUST00000171545.9, size=2745
-- load sequence: ENSMUST00000228823.2, size=486
-- load sequence: ENSMUST00000227483.2, size=2977
-- load sequence: ENSMUST00000228222.2, size=728
-- load sequence: ENSMUST00000227057.2, size=525
-- load sequence: ENSMUST00000226339.2, size=482
-- load sequence: ENSMUST00000021443.7, size=3241
-- load sequence: ENSMUST00000220046.2, size=3040
-- load sequence: ENSMUST00000218110.2, size=642
-- load sequence: ENSMUST00000218331.2, size=359
-- load sequence: ENSMUST00000218010.2, size=674
-- load sequence: ENSMUST00000218513.2, size=406
-- load sequence: ENSMUST00000220321.2, size=738
-- load sequence: ENSMUST00000030669.8, size=4676
-- load sequence: ENSMUST00000140681.2, size=725
-- load sequence: ENSMUST00000156079.2, size=760
-- load sequence: ENSMUST00000132864.2, size=579
-- load sequence: ENSMUST00000141658.2, size=928
-- load sequence: ENSMUST00000169922.9, size=9085
-- load sequence: ENSMUST00000199809.5, size=5289
-- load seque

-- load sequence: ENSMUST00000203043.2, size=15841
-- load sequence: ENSMUST00000050160.6, size=1368
-- load sequence: ENSMUST00000144270.8, size=5241
-- load sequence: ENSMUST00000005815.7, size=5214
-- load sequence: ENSMUST00000143221.2, size=648
-- load sequence: ENSMUST00000151357.2, size=354
-- load sequence: ENSMUST00000136002.2, size=392
-- load sequence: ENSMUST00000148993.5, size=842
-- load sequence: ENSMUST00000202167.2, size=3175
-- load sequence: ENSMUST00000201240.2, size=3104
-- load sequence: ENSMUST00000044976.12, size=2822
-- load sequence: ENSMUST00000155846.8, size=778
-- load sequence: ENSMUST00000157069.8, size=803
-- load sequence: ENSMUST00000119960.2, size=3296
-- load sequence: ENSMUST00000123173.2, size=690
-- load sequence: ENSMUST00000218464.2, size=1597
-- load sequence: ENSMUST00000188930.7, size=748
-- load sequence: ENSMUST00000187119.7, size=1524
-- load sequence: ENSMUST00000188736.7, size=1709
-- load sequence: ENSMUST00000191336.7, size=831
-- load

-- load sequence: ENSMUST00000034090.8, size=5265
-- load sequence: ENSMUST00000160737.8, size=2338
-- load sequence: ENSMUST00000005014.9, size=1711
-- load sequence: ENSMUST00000162352.2, size=521
-- load sequence: ENSMUST00000160150.2, size=658
-- load sequence: ENSMUST00000017622.12, size=3810
-- load sequence: ENSMUST00000093073.12, size=3705
-- load sequence: ENSMUST00000176699.2, size=512
-- load sequence: ENSMUST00000177386.2, size=2895
-- load sequence: ENSMUST00000176629.8, size=3672
-- load sequence: ENSMUST00000177195.2, size=492
-- load sequence: ENSMUST00000175819.2, size=518
-- load sequence: ENSMUST00000177123.2, size=4789
-- load sequence: ENSMUST00000175798.2, size=375
-- load sequence: ENSMUST00000177049.2, size=837
-- load sequence: ENSMUST00000174525.8, size=1991
-- load sequence: ENSMUST00000174589.2, size=4328
-- load sequence: ENSMUST00000068291.7, size=1473
-- load sequence: ENSMUST00000072040.7, size=1509
-- load sequence: ENSMUST00000221312.2, size=2520
-- lo

-- load sequence: ENSMUST00000135938.8, size=2895
-- load sequence: ENSMUST00000233173.2, size=3496
-- load sequence: ENSMUST00000233283.2, size=2014
-- load sequence: ENSMUST00000133717.9, size=2445
-- load sequence: ENSMUST00000156946.2, size=2765
-- load sequence: ENSMUST00000232998.2, size=447
-- load sequence: ENSMUST00000126801.8, size=1835
-- load sequence: ENSMUST00000148486.8, size=5276
-- load sequence: ENSMUST00000140728.8, size=2173
-- load sequence: ENSMUST00000150260.2, size=2170
-- load sequence: ENSMUST00000097288.4, size=2861
-- load sequence: ENSMUST00000233780.2, size=1738
-- load sequence: ENSMUST00000081684.3, size=702
-- load sequence: ENSMUST00000065504.17, size=2851
-- load sequence: ENSMUST00000100844.6, size=2904
-- load sequence: ENSMUST00000224975.2, size=2727
-- load sequence: ENSMUST00000225941.2, size=739
-- load sequence: ENSMUST00000226001.2, size=673
-- load sequence: ENSMUST00000224410.2, size=2653
-- load sequence: ENSMUST00000223960.2, size=387
-- l

-- load sequence: ENSMUST00000229015.2, size=616
-- load sequence: ENSMUST00000229797.2, size=710
-- load sequence: ENSMUST00000229447.2, size=609
-- load sequence: ENSMUST00000049038.4, size=2997
-- load sequence: ENSMUST00000236437.2, size=2432
-- load sequence: ENSMUST00000039500.4, size=4073
-- load sequence: ENSMUST00000235605.2, size=1645
-- load sequence: ENSMUST00000235546.2, size=2393
-- load sequence: ENSMUST00000001565.15, size=1679
-- load sequence: ENSMUST00000162927.2, size=920
-- load sequence: ENSMUST00000160535.8, size=731
-- load sequence: ENSMUST00000160734.8, size=1569
-- load sequence: ENSMUST00000160752.9, size=1569
-- load sequence: ENSMUST00000162894.8, size=925
-- load sequence: ENSMUST00000160039.8, size=660
-- load sequence: ENSMUST00000162266.8, size=367
-- load sequence: ENSMUST00000159671.8, size=620
-- load sequence: ENSMUST00000162604.2, size=827
-- load sequence: ENSMUST00000160711.2, size=271
-- load sequence: ENSMUST00000159852.2, size=331
-- load seq

-- load sequence: ENSMUST00000040217.6, size=1283
-- load sequence: ENSMUST00000213847.2, size=1146
-- load sequence: ENSMUST00000053253.10, size=5716
-- load sequence: ENSMUST00000180792.8, size=2667
-- load sequence: ENSMUST00000096273.9, size=1622
-- load sequence: ENSMUST00000122168.8, size=2096
-- load sequence: ENSMUST00000150993.2, size=2942
-- load sequence: ENSMUST00000151478.8, size=633
-- load sequence: ENSMUST00000134941.2, size=1515
-- load sequence: ENSMUST00000115814.4, size=1125
-- load sequence: ENSMUST00000008907.14, size=7969
-- load sequence: ENSMUST00000133642.2, size=592
-- load sequence: ENSMUST00000130066.2, size=740
-- load sequence: ENSMUST00000196250.2, size=744
-- load sequence: ENSMUST00000123442.2, size=508
-- load sequence: ENSMUST00000146818.2, size=352
-- load sequence: ENSMUST00000032261.9, size=1521
-- load sequence: ENSMUST00000112082.9, size=1335
-- load sequence: ENSMUST00000030985.10, size=2135
-- load sequence: ENSMUST00000202573.2, size=434
-- l

-- load sequence: ENSMUST00000105590.8, size=6339
-- load sequence: ENSMUST00000133985.2, size=2822
-- load sequence: ENSMUST00000138828.8, size=2106
-- load sequence: ENSMUST00000067086.14, size=4344
-- load sequence: ENSMUST00000105588.8, size=2733
-- load sequence: ENSMUST00000105589.2, size=2612
-- load sequence: ENSMUST00000127145.2, size=1356
-- load sequence: ENSMUST00000127934.8, size=719
-- load sequence: ENSMUST00000137012.2, size=1019
-- load sequence: ENSMUST00000102694.4, size=5902
-- load sequence: ENSMUST00000198037.2, size=660
-- load sequence: ENSMUST00000200065.2, size=4786
-- load sequence: ENSMUST00000168329.7, size=1125
-- load sequence: ENSMUST00000170301.3, size=1092
-- load sequence: ENSMUST00000038287.7, size=2473
-- load sequence: ENSMUST00000234512.2, size=428
-- load sequence: ENSMUST00000047762.10, size=3234
-- load sequence: ENSMUST00000231831.2, size=3784
-- load sequence: ENSMUST00000235081.2, size=852
-- load sequence: ENSMUST00000234722.2, size=451
-- 

-- load sequence: ENSMUST00000089752.11, size=8190
-- load sequence: ENSMUST00000054096.13, size=4715
-- load sequence: ENSMUST00000176606.2, size=2767
-- load sequence: ENSMUST00000038628.4, size=4285
-- load sequence: ENSMUST00000197995.2, size=1212
-- load sequence: ENSMUST00000130560.8, size=2074
-- load sequence: ENSMUST00000115230.2, size=1070
-- load sequence: ENSMUST00000023207.15, size=979
-- load sequence: ENSMUST00000156456.8, size=371
-- load sequence: ENSMUST00000155682.2, size=422
-- load sequence: ENSMUST00000145837.2, size=362
-- load sequence: ENSMUST00000152792.8, size=5621
-- load sequence: ENSMUST00000124505.8, size=4962
-- load sequence: ENSMUST00000152065.8, size=4728
-- load sequence: ENSMUST00000127756.8, size=4672
-- load sequence: ENSMUST00000134680.2, size=484
-- load sequence: ENSMUST00000150944.2, size=426
-- load sequence: ENSMUST00000127110.2, size=532
-- load sequence: ENSMUST00000020984.9, size=4348
-- load sequence: ENSMUST00000196384.2, size=1167
-- l

-- load sequence: ENSMUST00000201452.4, size=3140
-- load sequence: ENSMUST00000202361.4, size=3802
-- load sequence: ENSMUST00000074846.14, size=3240
-- load sequence: ENSMUST00000202089.4, size=3054
-- load sequence: ENSMUST00000201877.2, size=751
-- load sequence: ENSMUST00000202137.2, size=752
-- load sequence: ENSMUST00000201431.4, size=4764
-- load sequence: ENSMUST00000201559.4, size=661
-- load sequence: ENSMUST00000200805.4, size=587
-- load sequence: ENSMUST00000200825.2, size=416
-- load sequence: ENSMUST00000201666.2, size=254
-- load sequence: ENSMUST00000061052.12, size=1452
-- load sequence: ENSMUST00000077494.13, size=1340
-- load sequence: ENSMUST00000149277.8, size=1388
-- load sequence: ENSMUST00000143114.8, size=539
-- load sequence: ENSMUST00000126744.8, size=882
-- load sequence: ENSMUST00000141662.8, size=2034
-- load sequence: ENSMUST00000056034.13, size=1930
-- load sequence: ENSMUST00000140292.8, size=731
-- load sequence: ENSMUST00000151009.8, size=762
-- loa

-- load sequence: ENSMUST00000109202.3, size=1773
-- load sequence: ENSMUST00000152914.2, size=703
-- load sequence: ENSMUST00000214804.2, size=737
-- load sequence: ENSMUST00000213728.2, size=1678
-- load sequence: ENSMUST00000046704.7, size=1690
-- load sequence: ENSMUST00000224982.2, size=5325
-- load sequence: ENSMUST00000066510.8, size=4261
-- load sequence: ENSMUST00000224469.2, size=1616
-- load sequence: ENSMUST00000225362.2, size=556
-- load sequence: ENSMUST00000165133.3, size=4287
-- load sequence: ENSMUST00000224373.2, size=874
-- load sequence: ENSMUST00000022293.14, size=1954
-- load sequence: ENSMUST00000153320.8, size=893
-- load sequence: ENSMUST00000173456.8, size=956
-- load sequence: ENSMUST00000152562.8, size=377
-- load sequence: ENSMUST00000172727.2, size=1291
-- load sequence: ENSMUST00000147715.2, size=407
-- load sequence: ENSMUST00000132364.2, size=663
-- load sequence: ENSMUST00000130224.2, size=595
-- load sequence: ENSMUST00000142084.3, size=2039
-- load s

-- load sequence: ENSMUST00000035027.13, size=4365
-- load sequence: ENSMUST00000162295.2, size=4174
-- load sequence: ENSMUST00000096367.5, size=4173
-- load sequence: ENSMUST00000021728.12, size=924
-- load sequence: ENSMUST00000109755.5, size=333
-- load sequence: ENSMUST00000222353.2, size=568
-- load sequence: ENSMUST00000222368.2, size=401
-- load sequence: ENSMUST00000222729.2, size=1317
-- load sequence: ENSMUST00000118535.9, size=1419
-- load sequence: ENSMUST00000117702.2, size=1080
-- load sequence: ENSMUST00000212846.2, size=640
-- load sequence: ENSMUST00000178196.3, size=510
-- load sequence: ENSMUST00000161058.8, size=1430
-- load sequence: ENSMUST00000159342.8, size=5456
-- load sequence: ENSMUST00000162039.2, size=599
-- load sequence: ENSMUST00000097992.10, size=5791
-- load sequence: ENSMUST00000129967.2, size=590
-- load sequence: ENSMUST00000107148.8, size=5988
-- load sequence: ENSMUST00000107147.2, size=1177
-- load sequence: ENSMUST00000147527.2, size=466
-- loa

-- load sequence: ENSMUST00000216794.2, size=3199
-- load sequence: ENSMUST00000217219.2, size=3314
-- load sequence: ENSMUST00000204717.3, size=313
-- load sequence: ENSMUST00000078876.4, size=960
-- load sequence: ENSMUST00000109521.4, size=1580
-- load sequence: ENSMUST00000212744.2, size=376
-- load sequence: ENSMUST00000211939.2, size=3881
-- load sequence: ENSMUST00000212120.2, size=530
-- load sequence: ENSMUST00000212124.2, size=528
-- load sequence: ENSMUST00000211957.2, size=855
-- load sequence: ENSMUST00000212848.2, size=830
-- load sequence: ENSMUST00000213239.2, size=3724
-- load sequence: ENSMUST00000215142.3, size=3108
-- load sequence: ENSMUST00000095245.2, size=942
-- load sequence: ENSMUST00000217106.2, size=1788
-- load sequence: ENSMUST00000213379.2, size=1866
-- load sequence: ENSMUST00000059038.3, size=997
-- load sequence: ENSMUST00000135643.8, size=1192
-- load sequence: ENSMUST00000178511.3, size=954
-- load sequence: ENSMUST00000203236.3, size=4523
-- load se

-- load sequence: ENSMUST00000139700.3, size=382
-- load sequence: ENSMUST00000166968.9, size=4011
-- load sequence: ENSMUST00000168810.9, size=2175
-- load sequence: ENSMUST00000173780.2, size=2137
-- load sequence: ENSMUST00000176122.2, size=3607
-- load sequence: ENSMUST00000066774.6, size=1374
-- load sequence: ENSMUST00000019649.4, size=1499
-- load sequence: ENSMUST00000136938.2, size=890
-- load sequence: ENSMUST00000197841.2, size=2476
-- load sequence: ENSMUST00000065220.13, size=1672
-- load sequence: ENSMUST00000196081.5, size=1615
-- load sequence: ENSMUST00000091112.6, size=1795
-- load sequence: ENSMUST00000197220.2, size=1684
-- load sequence: ENSMUST00000200358.2, size=669
-- load sequence: ENSMUST00000198838.2, size=557
-- load sequence: ENSMUST00000200673.2, size=339
-- load sequence: ENSMUST00000196716.2, size=7795
-- load sequence: ENSMUST00000165128.9, size=1836
-- load sequence: ENSMUST00000077500.6, size=1426
-- load sequence: ENSMUST00000130660.2, size=618
-- lo

-- load sequence: ENSMUST00000139347.8, size=822
-- load sequence: ENSMUST00000156591.8, size=798
-- load sequence: ENSMUST00000139666.8, size=7732
-- load sequence: ENSMUST00000137708.8, size=5153
-- load sequence: ENSMUST00000137784.8, size=5684
-- load sequence: ENSMUST00000150848.8, size=5418
-- load sequence: ENSMUST00000137495.8, size=3246
-- load sequence: ENSMUST00000137503.2, size=593
-- load sequence: ENSMUST00000141640.2, size=779
-- load sequence: ENSMUST00000239324.2, size=327
-- load sequence: ENSMUST00000136087.2, size=241
-- load sequence: ENSMUST00000123030.2, size=427
-- load sequence: ENSMUST00000137531.3, size=1210
-- load sequence: ENSMUST00000124188.2, size=610
-- load sequence: ENSMUST00000133422.8, size=1167
-- load sequence: ENSMUST00000124922.2, size=2662
-- load sequence: ENSMUST00000170827.9, size=7338
-- load sequence: ENSMUST00000110893.10, size=1242
-- load sequence: ENSMUST00000085633.12, size=1089
-- load sequence: ENSMUST00000110891.8, size=1214
-- loa

-- load sequence: ENSMUST00000210998.2, size=488
-- load sequence: ENSMUST00000210668.2, size=542
-- load sequence: ENSMUST00000026018.4, size=845
-- load sequence: ENSMUST00000144912.8, size=362
-- load sequence: ENSMUST00000156336.2, size=2858
-- load sequence: ENSMUST00000019503.14, size=2495
-- load sequence: ENSMUST00000113744.2, size=2427
-- load sequence: ENSMUST00000151572.3, size=1421
-- load sequence: ENSMUST00000174143.2, size=1571
-- load sequence: ENSMUST00000087656.5, size=925
-- load sequence: ENSMUST00000217223.2, size=3733
-- load sequence: ENSMUST00000076331.3, size=1095
-- load sequence: ENSMUST00000019944.9, size=1466
-- load sequence: ENSMUST00000094836.6, size=3458
-- load sequence: ENSMUST00000216440.2, size=3644
-- load sequence: ENSMUST00000080231.5, size=1050
-- load sequence: ENSMUST00000032074.5, size=986
-- load sequence: ENSMUST00000179539.9, size=752
-- load sequence: ENSMUST00000232299.2, size=2253
-- load sequence: ENSMUST00000097400.5, size=747
-- load

-- load sequence: ENSMUST00000030951.2, size=596
-- load sequence: ENSMUST00000167994.10, size=3438
-- load sequence: ENSMUST00000128267.8, size=1082
-- load sequence: ENSMUST00000130297.2, size=708
-- load sequence: ENSMUST00000147532.8, size=1447
-- load sequence: ENSMUST00000139911.2, size=666
-- load sequence: ENSMUST00000132675.2, size=474
-- load sequence: ENSMUST00000233295.2, size=3460
-- load sequence: ENSMUST00000002101.12, size=1023
-- load sequence: ENSMUST00000159019.3, size=2539
-- load sequence: ENSMUST00000160886.2, size=412
-- load sequence: ENSMUST00000098971.11, size=4307
-- load sequence: ENSMUST00000146273.8, size=1966
-- load sequence: ENSMUST00000139608.8, size=4448
-- load sequence: ENSMUST00000054622.15, size=4401
-- load sequence: ENSMUST00000108814.8, size=4322
-- load sequence: ENSMUST00000048608.16, size=4135
-- load sequence: ENSMUST00000108815.8, size=4243
-- load sequence: ENSMUST00000153112.2, size=719
-- load sequence: ENSMUST00000148252.2, size=3505
-

-- load sequence: ENSMUST00000213046.2, size=550
-- load sequence: ENSMUST00000211937.2, size=500
-- load sequence: ENSMUST00000212228.2, size=2925
-- load sequence: ENSMUST00000212556.2, size=521
-- load sequence: ENSMUST00000212323.2, size=3277
-- load sequence: ENSMUST00000098473.11, size=8390
-- load sequence: ENSMUST00000120268.2, size=1131
-- load sequence: ENSMUST00000166327.3, size=11130
-- load sequence: ENSMUST00000232464.2, size=10873
-- load sequence: ENSMUST00000232090.2, size=11145
-- load sequence: ENSMUST00000092646.13, size=5098
-- load sequence: ENSMUST00000177522.8, size=2192
-- load sequence: ENSMUST00000177232.8, size=3953
-- load sequence: ENSMUST00000020159.15, size=4798
-- load sequence: ENSMUST00000176827.8, size=1635
-- load sequence: ENSMUST00000176313.2, size=770
-- load sequence: ENSMUST00000177175.2, size=419
-- load sequence: ENSMUST00000176502.2, size=704
-- load sequence: ENSMUST00000176285.2, size=3226
-- load sequence: ENSMUST00000175786.2, size=368
-

-- load sequence: ENSMUST00000124934.8, size=2817
-- load sequence: ENSMUST00000077730.7, size=7928
-- load sequence: ENSMUST00000239467.2, size=808
-- load sequence: ENSMUST00000063871.13, size=4866
-- load sequence: ENSMUST00000172378.2, size=1438
-- load sequence: ENSMUST00000026506.5, size=1504
-- load sequence: ENSMUST00000148125.2, size=393
-- load sequence: ENSMUST00000205667.2, size=3002
-- load sequence: ENSMUST00000022831.5, size=3898
-- load sequence: ENSMUST00000227462.2, size=4075
-- load sequence: ENSMUST00000228462.2, size=5882
-- load sequence: ENSMUST00000113602.2, size=420
-- load sequence: ENSMUST00000211116.2, size=386
-- load sequence: ENSMUST00000113089.8, size=1720
-- load sequence: ENSMUST00000032416.11, size=1703
-- load sequence: ENSMUST00000113091.8, size=785
-- load sequence: ENSMUST00000133348.2, size=376
-- load sequence: ENSMUST00000031367.15, size=2732
-- load sequence: ENSMUST00000124569.8, size=2370
-- load sequence: ENSMUST00000153832.8, size=2673
-- 

-- load sequence: ENSMUST00000114836.9, size=1752
-- load sequence: ENSMUST00000129046.9, size=1948
-- load sequence: ENSMUST00000233150.2, size=1420
-- load sequence: ENSMUST00000233665.2, size=1033
-- load sequence: ENSMUST00000233961.2, size=1507
-- load sequence: ENSMUST00000233587.2, size=1978
-- load sequence: ENSMUST00000232728.2, size=1566
-- load sequence: ENSMUST00000233898.2, size=1705
-- load sequence: ENSMUST00000232798.2, size=1825
-- load sequence: ENSMUST00000233615.2, size=1722
-- load sequence: ENSMUST00000233009.2, size=1619
-- load sequence: ENSMUST00000043925.16, size=1743
-- load sequence: ENSMUST00000123628.2, size=587
-- load sequence: ENSMUST00000233878.2, size=83
-- load sequence: ENSMUST00000108681.9, size=1941
-- load sequence: ENSMUST00000136796.9, size=1118
-- load sequence: ENSMUST00000041611.12, size=2968
-- load sequence: ENSMUST00000129801.2, size=785
-- load sequence: ENSMUST00000145496.2, size=2628
-- load sequence: ENSMUST00000108680.2, size=1466
--

-- load sequence: ENSMUST00000171784.8, size=1759
-- load sequence: ENSMUST00000077780.14, size=2480
-- load sequence: ENSMUST00000138924.2, size=944
-- load sequence: ENSMUST00000145131.8, size=902
-- load sequence: ENSMUST00000137342.2, size=668
-- load sequence: ENSMUST00000138619.8, size=457
-- load sequence: ENSMUST00000139811.8, size=1188
-- load sequence: ENSMUST00000128421.2, size=747
-- load sequence: ENSMUST00000206535.2, size=1542
-- load sequence: ENSMUST00000165241.8, size=1668
-- load sequence: ENSMUST00000164408.8, size=1745
-- load sequence: ENSMUST00000100900.4, size=1089
-- load sequence: ENSMUST00000024734.8, size=645
-- load sequence: ENSMUST00000149040.2, size=531
-- load sequence: ENSMUST00000148537.2, size=510
-- load sequence: ENSMUST00000030510.14, size=3047
-- load sequence: ENSMUST00000166773.2, size=2524
-- load sequence: ENSMUST00000112737.10, size=984
-- load sequence: ENSMUST00000170923.9, size=1761
-- load sequence: ENSMUST00000233108.2, size=518
-- load

-- load sequence: ENSMUST00000078752.10, size=4215
-- load sequence: ENSMUST00000127475.2, size=1814
-- load sequence: ENSMUST00000135416.8, size=928
-- load sequence: ENSMUST00000089912.12, size=3602
-- load sequence: ENSMUST00000089915.10, size=3475
-- load sequence: ENSMUST00000126256.2, size=1282
-- load sequence: ENSMUST00000136023.2, size=918
-- load sequence: ENSMUST00000110586.10, size=4032
-- load sequence: ENSMUST00000107105.9, size=1861
-- load sequence: ENSMUST00000107102.8, size=1837
-- load sequence: ENSMUST00000107103.8, size=1822
-- load sequence: ENSMUST00000006750.8, size=2046
-- load sequence: ENSMUST00000146330.8, size=1878
-- load sequence: ENSMUST00000128331.2, size=635
-- load sequence: ENSMUST00000150982.8, size=1017
-- load sequence: ENSMUST00000142157.8, size=866
-- load sequence: ENSMUST00000131219.2, size=462
-- load sequence: ENSMUST00000132876.2, size=313
-- load sequence: ENSMUST00000167804.9, size=2637
-- load sequence: ENSMUST00000077385.15, size=2546
-

-- load sequence: ENSMUST00000129096.3, size=1830
-- load sequence: ENSMUST00000133786.3, size=815
-- load sequence: ENSMUST00000233109.2, size=825
-- load sequence: ENSMUST00000233051.2, size=864
-- load sequence: ENSMUST00000233676.2, size=1127
-- load sequence: ENSMUST00000004986.14, size=1526
-- load sequence: ENSMUST00000034527.14, size=3366
-- load sequence: ENSMUST00000135457.8, size=1433
-- load sequence: ENSMUST00000137976.2, size=629
-- load sequence: ENSMUST00000149185.8, size=1037
-- load sequence: ENSMUST00000142667.2, size=517
-- load sequence: ENSMUST00000088785.6, size=1709
-- load sequence: ENSMUST00000182248.2, size=382
-- load sequence: ENSMUST00000098278.4, size=2371
-- load sequence: ENSMUST00000090569.10, size=1239
-- load sequence: ENSMUST00000129708.3, size=387
-- load sequence: ENSMUST00000000194.4, size=537
-- load sequence: ENSMUST00000124916.2, size=481
-- load sequence: ENSMUST00000123095.8, size=363
-- load sequence: ENSMUST00000050735.12, size=4006
-- loa

-- load sequence: ENSMUST00000130972.8, size=1923
-- load sequence: ENSMUST00000128241.8, size=1938
-- load sequence: ENSMUST00000143548.8, size=1265
-- load sequence: ENSMUST00000131028.8, size=2966
-- load sequence: ENSMUST00000155021.8, size=1737
-- load sequence: ENSMUST00000140636.8, size=1615
-- load sequence: ENSMUST00000148469.8, size=1618
-- load sequence: ENSMUST00000019257.15, size=1926
-- load sequence: ENSMUST00000105395.9, size=1630
-- load sequence: ENSMUST00000156417.8, size=1627
-- load sequence: ENSMUST00000105396.9, size=1749
-- load sequence: ENSMUST00000143735.8, size=1936
-- load sequence: ENSMUST00000154374.2, size=1712
-- load sequence: ENSMUST00000143452.2, size=632
-- load sequence: ENSMUST00000141255.2, size=1077
-- load sequence: ENSMUST00000033975.9, size=4246
-- load sequence: ENSMUST00000210777.2, size=734
-- load sequence: ENSMUST00000211688.4, size=2266
-- load sequence: ENSMUST00000211009.3, size=2509
-- load sequence: ENSMUST00000211751.3, size=451
--

-- load sequence: ENSMUST00000136445.2, size=569
-- load sequence: ENSMUST00000216942.2, size=3370
-- load sequence: ENSMUST00000051540.5, size=1061
-- load sequence: ENSMUST00000053686.9, size=4731
-- load sequence: ENSMUST00000192702.6, size=1194
-- load sequence: ENSMUST00000195443.6, size=738
-- load sequence: ENSMUST00000027839.14, size=803
-- load sequence: ENSMUST00000192269.3, size=560
-- load sequence: ENSMUST00000191745.2, size=732
-- load sequence: ENSMUST00000193579.2, size=480
-- load sequence: ENSMUST00000101491.11, size=3747
-- load sequence: ENSMUST00000031975.7, size=1745
-- load sequence: ENSMUST00000129948.9, size=858
-- load sequence: ENSMUST00000177178.2, size=665
-- load sequence: ENSMUST00000182750.10, size=1758
-- load sequence: ENSMUST00000107742.11, size=1574
-- load sequence: ENSMUST00000183120.4, size=1491
-- load sequence: ENSMUST00000205100.3, size=3023
-- load sequence: ENSMUST00000061289.7, size=963
-- load sequence: ENSMUST00000156671.8, size=752
-- loa

-- load sequence: ENSMUST00000151104.8, size=2452
-- load sequence: ENSMUST00000127135.3, size=488
-- load sequence: ENSMUST00000075571.16, size=1193
-- load sequence: ENSMUST00000209739.2, size=1858
-- load sequence: ENSMUST00000210754.2, size=1242
-- load sequence: ENSMUST00000210147.2, size=513
-- load sequence: ENSMUST00000209435.2, size=422
-- load sequence: ENSMUST00000209464.2, size=431
-- load sequence: ENSMUST00000210021.2, size=1690
-- load sequence: ENSMUST00000211779.2, size=1427
-- load sequence: ENSMUST00000238192.2, size=3115
-- load sequence: ENSMUST00000237527.2, size=1195
-- load sequence: ENSMUST00000133308.3, size=1479
-- load sequence: ENSMUST00000236321.2, size=1480
-- load sequence: ENSMUST00000138172.2, size=387
-- load sequence: ENSMUST00000002145.12, size=2867
-- load sequence: ENSMUST00000128085.2, size=774
-- load sequence: ENSMUST00000041621.5, size=1485
-- load sequence: ENSMUST00000142670.2, size=812
-- load sequence: ENSMUST00000144440.2, size=775
-- loa

-- load sequence: ENSMUST00000147330.8, size=1671
-- load sequence: ENSMUST00000137952.8, size=571
-- load sequence: ENSMUST00000148245.8, size=651
-- load sequence: ENSMUST00000146387.8, size=2335
-- load sequence: ENSMUST00000141101.5, size=6794
-- load sequence: ENSMUST00000051704.15, size=9514
-- load sequence: ENSMUST00000112008.9, size=6087
-- load sequence: ENSMUST00000102560.7, size=6213
-- load sequence: ENSMUST00000153407.2, size=293
-- load sequence: ENSMUST00000127976.3, size=561
-- load sequence: ENSMUST00000202734.2, size=1184
-- load sequence: ENSMUST00000128702.2, size=756
-- load sequence: ENSMUST00000145363.8, size=5974
-- load sequence: ENSMUST00000147479.8, size=5016
-- load sequence: ENSMUST00000139881.8, size=699
-- load sequence: ENSMUST00000122877.8, size=4398
-- load sequence: ENSMUST00000136780.2, size=287
-- load sequence: ENSMUST00000123736.2, size=3487
-- load sequence: ENSMUST00000134609.2, size=3456
-- load sequence: ENSMUST00000150821.8, size=7676
-- loa

-- load sequence: ENSMUST00000138316.2, size=1653
-- load sequence: ENSMUST00000011285.11, size=2534
-- load sequence: ENSMUST00000102585.2, size=3039
-- load sequence: ENSMUST00000138502.2, size=3706
-- load sequence: ENSMUST00000103009.5, size=582
-- load sequence: ENSMUST00000130128.2, size=512
-- load sequence: ENSMUST00000125261.2, size=907
-- load sequence: ENSMUST00000048128.15, size=5938
-- load sequence: ENSMUST00000119606.8, size=1841
-- load sequence: ENSMUST00000146895.2, size=362
-- load sequence: ENSMUST00000121840.8, size=1676
-- load sequence: ENSMUST00000117956.2, size=1863
-- load sequence: ENSMUST00000086535.12, size=1049
-- load sequence: ENSMUST00000194746.6, size=703
-- load sequence: ENSMUST00000178079.8, size=487
-- load sequence: ENSMUST00000179954.8, size=630
-- load sequence: ENSMUST00000195123.6, size=356
-- load sequence: ENSMUST00000191855.2, size=468
-- load sequence: ENSMUST00000166457.8, size=1891
-- load sequence: ENSMUST00000168295.2, size=428
-- load

-- load sequence: ENSMUST00000048309.12, size=7823
-- load sequence: ENSMUST00000080348.12, size=2591
-- load sequence: ENSMUST00000120220.3, size=2195
-- load sequence: ENSMUST00000123877.2, size=375
-- load sequence: ENSMUST00000061328.6, size=3900
-- load sequence: ENSMUST00000059889.4, size=632
-- load sequence: ENSMUST00000109484.2, size=488
-- load sequence: ENSMUST00000148187.2, size=323
-- load sequence: ENSMUST00000202447.4, size=3959
-- load sequence: ENSMUST00000201427.4, size=1971
-- load sequence: ENSMUST00000035579.10, size=3883
-- load sequence: ENSMUST00000201611.2, size=1878
-- load sequence: ENSMUST00000200780.4, size=387
-- load sequence: ENSMUST00000201889.2, size=303
-- load sequence: ENSMUST00000040853.11, size=4647
-- load sequence: ENSMUST00000128880.8, size=2433
-- load sequence: ENSMUST00000136902.2, size=780
-- load sequence: ENSMUST00000143728.2, size=507
-- load sequence: ENSMUST00000082122.14, size=1020
-- load sequence: ENSMUST00000051605.9, size=1950
-- 

-- load sequence: ENSMUST00000217553.2, size=458
-- load sequence: ENSMUST00000007139.6, size=1035
-- load sequence: ENSMUST00000186017.7, size=5258
-- load sequence: ENSMUST00000187407.7, size=4336
-- load sequence: ENSMUST00000027642.5, size=3737
-- load sequence: ENSMUST00000191549.3, size=2648
-- load sequence: ENSMUST00000047687.9, size=3453
-- load sequence: ENSMUST00000143579.2, size=843
-- load sequence: ENSMUST00000189714.2, size=1066
-- load sequence: ENSMUST00000191292.2, size=412
-- load sequence: ENSMUST00000054316.9, size=364
-- load sequence: ENSMUST00000111280.3, size=416
-- load sequence: ENSMUST00000031058.15, size=2021
-- load sequence: ENSMUST00000201347.4, size=2442
-- load sequence: ENSMUST00000200692.4, size=1816
-- load sequence: ENSMUST00000202256.2, size=910
-- load sequence: ENSMUST00000202501.2, size=692
-- load sequence: ENSMUST00000202025.2, size=488
-- load sequence: ENSMUST00000218429.2, size=1462
-- load sequence: ENSMUST00000219877.2, size=1160
-- load

-- load sequence: ENSMUST00000202082.4, size=4506
-- load sequence: ENSMUST00000202601.4, size=890
-- load sequence: ENSMUST00000200946.3, size=473
-- load sequence: ENSMUST00000201383.2, size=1553
-- load sequence: ENSMUST00000202721.2, size=2778
-- load sequence: ENSMUST00000202572.2, size=780
-- load sequence: ENSMUST00000143073.8, size=594
-- load sequence: ENSMUST00000030181.12, size=817
-- load sequence: ENSMUST00000127324.2, size=363
-- load sequence: ENSMUST00000107922.3, size=865
-- load sequence: ENSMUST00000140005.8, size=942
-- load sequence: ENSMUST00000137113.2, size=863
-- load sequence: ENSMUST00000135192.8, size=4622
-- load sequence: ENSMUST00000138407.8, size=2973
-- load sequence: ENSMUST00000124289.8, size=1627
-- load sequence: ENSMUST00000126634.8, size=2534
-- load sequence: ENSMUST00000027833.12, size=4351
-- load sequence: ENSMUST00000150298.2, size=457
-- load sequence: ENSMUST00000152403.2, size=657
-- load sequence: ENSMUST00000133909.2, size=2505
-- load s

-- load sequence: ENSMUST00000207270.2, size=5845
-- load sequence: ENSMUST00000208424.2, size=888
-- load sequence: ENSMUST00000208202.2, size=936
-- load sequence: ENSMUST00000054440.11, size=752
-- load sequence: ENSMUST00000106517.9, size=1279
-- load sequence: ENSMUST00000106516.2, size=1069
-- load sequence: ENSMUST00000208411.2, size=456
-- load sequence: ENSMUST00000207680.2, size=1294
-- load sequence: ENSMUST00000030028.5, size=2624
-- load sequence: ENSMUST00000138743.2, size=442
-- load sequence: ENSMUST00000132927.8, size=384
-- load sequence: ENSMUST00000148947.2, size=586
-- load sequence: ENSMUST00000215683.2, size=2421
-- load sequence: ENSMUST00000034499.10, size=1122
-- load sequence: ENSMUST00000215818.2, size=734
-- load sequence: ENSMUST00000216770.2, size=413
-- load sequence: ENSMUST00000102626.10, size=3137
-- load sequence: ENSMUST00000209161.2, size=3296
-- load sequence: ENSMUST00000033142.13, size=3145
-- load sequence: ENSMUST00000208324.2, size=2165
-- lo

-- load sequence: ENSMUST00000211769.2, size=817
-- load sequence: ENSMUST00000211598.2, size=867
-- load sequence: ENSMUST00000202622.4, size=2726
-- load sequence: ENSMUST00000036125.10, size=2367
-- load sequence: ENSMUST00000202289.4, size=1057
-- load sequence: ENSMUST00000200874.2, size=896
-- load sequence: ENSMUST00000201263.2, size=1849
-- load sequence: ENSMUST00000160436.2, size=2565
-- load sequence: ENSMUST00000161213.3, size=779
-- load sequence: ENSMUST00000162222.2, size=2489
-- load sequence: ENSMUST00000052915.14, size=4260
-- load sequence: ENSMUST00000140821.2, size=826
-- load sequence: ENSMUST00000152909.2, size=477
-- load sequence: ENSMUST00000144511.2, size=692
-- load sequence: ENSMUST00000106711.2, size=2551
-- load sequence: ENSMUST00000144130.2, size=1051
-- load sequence: ENSMUST00000139682.2, size=2045
-- load sequence: ENSMUST00000015137.10, size=3331
-- load sequence: ENSMUST00000111233.8, size=3109
-- load sequence: ENSMUST00000134093.2, size=3473
-- l

-- load sequence: ENSMUST00000211005.2, size=3961
-- load sequence: ENSMUST00000211022.2, size=3132
-- load sequence: ENSMUST00000211764.2, size=689
-- load sequence: ENSMUST00000209242.2, size=3198
-- load sequence: ENSMUST00000211375.2, size=2823
-- load sequence: ENSMUST00000011055.7, size=919
-- load sequence: ENSMUST00000124540.8, size=837
-- load sequence: ENSMUST00000155004.2, size=502
-- load sequence: ENSMUST00000147131.2, size=278
-- load sequence: ENSMUST00000000579.3, size=4135
-- load sequence: ENSMUST00000052708.7, size=3137
-- load sequence: ENSMUST00000149611.2, size=303
-- load sequence: ENSMUST00000107859.8, size=1738
-- load sequence: ENSMUST00000107861.8, size=2613
-- load sequence: ENSMUST00000042943.13, size=3293
-- load sequence: ENSMUST00000092780.9, size=1885
-- load sequence: ENSMUST00000107863.4, size=3334
-- load sequence: ENSMUST00000107858.9, size=2742
-- load sequence: ENSMUST00000028882.2, size=1974
-- load sequence: ENSMUST00000028881.14, size=1356
-- l

-- load sequence: ENSMUST00000133366.2, size=370
-- load sequence: ENSMUST00000147723.2, size=377
-- load sequence: ENSMUST00000041589.6, size=2288
-- load sequence: ENSMUST00000061469.4, size=3516
-- load sequence: ENSMUST00000131352.2, size=444
-- load sequence: ENSMUST00000208510.3, size=775
-- load sequence: ENSMUST00000178891.4, size=327
-- load sequence: ENSMUST00000140592.8, size=1412
-- load sequence: ENSMUST00000028151.7, size=869
-- load sequence: ENSMUST00000128039.2, size=655
-- load sequence: ENSMUST00000150419.2, size=851
-- load sequence: ENSMUST00000239512.1, size=4451
-- load sequence: ENSMUST00000239513.1, size=4587
-- load sequence: ENSMUST00000239514.1, size=3865
-- load sequence: ENSMUST00000239515.1, size=1791
-- load sequence: ENSMUST00000148168.8, size=3000
-- load sequence: ENSMUST00000108531.8, size=2453
-- load sequence: ENSMUST00000123192.2, size=2730
-- load sequence: ENSMUST00000035283.11, size=2450
-- load sequence: ENSMUST00000108530.2, size=2389
-- load

-- load sequence: ENSMUST00000154830.2, size=643
-- load sequence: ENSMUST00000146693.3, size=610
-- load sequence: ENSMUST00000174178.2, size=450
-- load sequence: ENSMUST00000210419.2, size=339
-- load sequence: ENSMUST00000211789.2, size=878
-- load sequence: ENSMUST00000168194.3, size=1076
-- load sequence: ENSMUST00000210882.2, size=1757
-- load sequence: ENSMUST00000210089.2, size=710
-- load sequence: ENSMUST00000210936.2, size=533
-- load sequence: ENSMUST00000055353.9, size=2356
-- load sequence: ENSMUST00000172775.4, size=2070
-- load sequence: ENSMUST00000026541.15, size=1220
-- load sequence: ENSMUST00000211044.2, size=1029
-- load sequence: ENSMUST00000211283.2, size=975
-- load sequence: ENSMUST00000166758.4, size=1220
-- load sequence: ENSMUST00000211681.2, size=1192
-- load sequence: ENSMUST00000026540.9, size=620
-- load sequence: ENSMUST00000120034.8, size=680
-- load sequence: ENSMUST00000148716.8, size=1685
-- load sequence: ENSMUST00000142105.8, size=536
-- load se

-- load sequence: ENSMUST00000145694.8, size=3577
-- load sequence: ENSMUST00000028822.14, size=3179
-- load sequence: ENSMUST00000124632.2, size=581
-- load sequence: ENSMUST00000124107.8, size=710
-- load sequence: ENSMUST00000110135.3, size=603
-- load sequence: ENSMUST00000140867.8, size=459
-- load sequence: ENSMUST00000131617.2, size=334
-- load sequence: ENSMUST00000110142.8, size=3703
-- load sequence: ENSMUST00000214895.3, size=2222
-- load sequence: ENSMUST00000213749.2, size=2150
-- load sequence: ENSMUST00000215617.2, size=476
-- load sequence: ENSMUST00000099927.2, size=930
-- load sequence: ENSMUST00000099926.2, size=945
-- load sequence: ENSMUST00000151511.8, size=674
-- load sequence: ENSMUST00000116375.2, size=3451
-- load sequence: ENSMUST00000215758.2, size=1658
-- load sequence: ENSMUST00000214679.2, size=1648
-- load sequence: ENSMUST00000217218.3, size=1460
-- load sequence: ENSMUST00000099925.2, size=945
-- load sequence: ENSMUST00000216933.2, size=1706
-- load s

-- load sequence: ENSMUST00000090940.6, size=3635
-- load sequence: ENSMUST00000028175.7, size=6110
-- load sequence: ENSMUST00000148764.8, size=941
-- load sequence: ENSMUST00000153052.2, size=378
-- load sequence: ENSMUST00000151169.2, size=387
-- load sequence: ENSMUST00000151785.2, size=686
-- load sequence: ENSMUST00000144117.8, size=856
-- load sequence: ENSMUST00000146545.8, size=856
-- load sequence: ENSMUST00000131443.8, size=641
-- load sequence: ENSMUST00000028178.14, size=9162
-- load sequence: ENSMUST00000100085.8, size=4289
-- load sequence: ENSMUST00000112608.9, size=1497
-- load sequence: ENSMUST00000112607.3, size=1266
-- load sequence: ENSMUST00000154453.2, size=392
-- load sequence: ENSMUST00000112599.8, size=2891
-- load sequence: ENSMUST00000145495.8, size=2599
-- load sequence: ENSMUST00000090935.9, size=3293
-- load sequence: ENSMUST00000056376.12, size=2394
-- load sequence: ENSMUST00000126407.2, size=612
-- load sequence: ENSMUST00000112601.9, size=3050
-- load

-- load sequence: ENSMUST00000209386.2, size=247
-- load sequence: ENSMUST00000218975.2, size=549
-- load sequence: ENSMUST00000210391.2, size=416
-- load sequence: ENSMUST00000219945.2, size=1498
-- load sequence: ENSMUST00000218683.2, size=1177
-- load sequence: ENSMUST00000219868.2, size=1319
-- load sequence: ENSMUST00000218144.2, size=2246
-- load sequence: ENSMUST00000218279.2, size=982
-- load sequence: ENSMUST00000218353.2, size=1445
-- load sequence: ENSMUST00000222635.2, size=330
-- load sequence: ENSMUST00000185884.2, size=400
-- load sequence: ENSMUST00000140995.2, size=400
-- load sequence: ENSMUST00000210189.2, size=493
-- load sequence: ENSMUST00000119768.2, size=1281
-- load sequence: ENSMUST00000189740.2, size=1276
-- load sequence: ENSMUST00000119877.2, size=240
-- load sequence: ENSMUST00000191876.2, size=198
-- load sequence: ENSMUST00000188825.2, size=474
-- load sequence: ENSMUST00000236132.2, size=466
-- load sequence: ENSMUST00000151704.2, size=340
-- load seque

-- load sequence: ENSMUST00000226708.2, size=571
-- load sequence: ENSMUST00000059065.10, size=321
-- load sequence: ENSMUST00000185805.2, size=1426
-- load sequence: ENSMUST00000213253.2, size=622
-- load sequence: ENSMUST00000189789.2, size=630
-- load sequence: ENSMUST00000190858.2, size=372
-- load sequence: ENSMUST00000172918.2, size=646
-- load sequence: ENSMUST00000231787.2, size=880
-- load sequence: ENSMUST00000186994.2, size=398
-- load sequence: ENSMUST00000119769.2, size=382
-- load sequence: ENSMUST00000207236.2, size=3835
-- load sequence: ENSMUST00000211669.2, size=1312
-- load sequence: ENSMUST00000210122.2, size=547
-- load sequence: ENSMUST00000211304.2, size=595
-- load sequence: ENSMUST00000209227.2, size=784
-- load sequence: ENSMUST00000122422.3, size=591
-- load sequence: ENSMUST00000119249.2, size=520
-- load sequence: ENSMUST00000118143.3, size=3130
-- load sequence: ENSMUST00000119098.3, size=593
-- load sequence: ENSMUST00000091031.6, size=435
-- load sequenc

-- load sequence: ENSMUST00000086596.2, size=378
-- load sequence: ENSMUST00000118618.2, size=1258
-- load sequence: ENSMUST00000190086.2, size=409
-- load sequence: ENSMUST00000117631.2, size=309
-- load sequence: ENSMUST00000119653.2, size=893
-- load sequence: ENSMUST00000223297.2, size=348
-- load sequence: ENSMUST00000120138.2, size=718
-- load sequence: ENSMUST00000223458.2, size=339
-- load sequence: ENSMUST00000096279.6, size=333
-- load sequence: ENSMUST00000122362.2, size=686
-- load sequence: ENSMUST00000117744.2, size=255
-- load sequence: ENSMUST00000119743.2, size=439
-- load sequence: ENSMUST00000121933.2, size=984
-- load sequence: ENSMUST00000117494.2, size=528
-- load sequence: ENSMUST00000119715.2, size=643
-- load sequence: ENSMUST00000172123.3, size=351
-- load sequence: ENSMUST00000230459.2, size=658
-- load sequence: ENSMUST00000235761.2, size=476
-- load sequence: ENSMUST00000229230.2, size=1103
-- load sequence: ENSMUST00000198755.2, size=466
-- load sequence: 

-- load sequence: ENSMUST00000119471.2, size=1293
-- load sequence: ENSMUST00000122207.2, size=1006
-- load sequence: ENSMUST00000207804.2, size=542
-- load sequence: ENSMUST00000208020.2, size=547
-- load sequence: ENSMUST00000207467.2, size=420
-- load sequence: ENSMUST00000162296.2, size=297
-- load sequence: ENSMUST00000152628.2, size=486
-- load sequence: ENSMUST00000195036.2, size=290
-- load sequence: ENSMUST00000233551.2, size=245
-- load sequence: ENSMUST00000118086.2, size=375
-- load sequence: ENSMUST00000200912.2, size=799
-- load sequence: ENSMUST00000199513.2, size=367
-- load sequence: ENSMUST00000198953.2, size=493
-- load sequence: ENSMUST00000117478.2, size=868
-- load sequence: ENSMUST00000208529.2, size=568
-- load sequence: ENSMUST00000208272.2, size=452
-- load sequence: ENSMUST00000212919.2, size=1085
-- load sequence: ENSMUST00000233285.2, size=609
-- load sequence: ENSMUST00000212776.2, size=1238
-- load sequence: ENSMUST00000210757.2, size=309
-- load sequence

-- load sequence: ENSMUST00000207138.2, size=805
-- load sequence: ENSMUST00000188953.2, size=139
-- load sequence: ENSMUST00000185244.2, size=463
-- load sequence: ENSMUST00000209088.2, size=484
-- load sequence: ENSMUST00000208144.2, size=484
-- load sequence: ENSMUST00000122307.2, size=319
-- load sequence: ENSMUST00000117187.2, size=264
-- load sequence: ENSMUST00000122119.2, size=321
-- load sequence: ENSMUST00000119857.2, size=282
-- load sequence: ENSMUST00000187161.2, size=955
-- load sequence: ENSMUST00000207666.2, size=1158
-- load sequence: ENSMUST00000188246.2, size=163
-- load sequence: ENSMUST00000191583.2, size=463
-- load sequence: ENSMUST00000182254.2, size=1006
-- load sequence: ENSMUST00000187732.2, size=463
-- load sequence: ENSMUST00000182864.2, size=1009
-- load sequence: ENSMUST00000205872.2, size=876
-- load sequence: ENSMUST00000210220.2, size=318
-- load sequence: ENSMUST00000182724.2, size=1007
-- load sequence: ENSMUST00000189023.2, size=436
-- load sequence

-- load sequence: ENSMUST00000238950.2, size=970
-- load sequence: ENSMUST00000235519.2, size=262
-- load sequence: ENSMUST00000239209.2, size=539
-- load sequence: ENSMUST00000219562.3, size=850
-- load sequence: ENSMUST00000220144.3, size=847
-- load sequence: ENSMUST00000231182.2, size=780
-- load sequence: ENSMUST00000164528.4, size=600
-- load sequence: ENSMUST00000217756.2, size=367
-- load sequence: ENSMUST00000178170.2, size=639
-- load sequence: ENSMUST00000179940.2, size=639
-- load sequence: ENSMUST00000091710.6, size=352
-- load sequence: ENSMUST00000177664.2, size=639
-- load sequence: ENSMUST00000126961.2, size=640
-- load sequence: ENSMUST00000145953.2, size=498
-- load sequence: ENSMUST00000223746.2, size=1123
-- load sequence: ENSMUST00000238661.2, size=818
-- load sequence: ENSMUST00000154752.2, size=347
-- load sequence: ENSMUST00000118560.2, size=638
-- load sequence: ENSMUST00000120775.2, size=517
-- load sequence: ENSMUST00000117599.2, size=432
-- load sequence: E

-- load sequence: ENSMUST00000122729.3, size=183
-- load sequence: ENSMUST00000158482.3, size=108
-- load sequence: ENSMUST00000122626.3, size=112
-- load sequence: ENSMUST00000122804.3, size=104
-- load sequence: ENSMUST00020182294.1, size=97
-- load sequence: ENSMUST00000240313.1, size=107
-- load sequence: ENSMUST00020182619.1, size=84
-- load sequence: ENSMUST00000240302.1, size=107
-- load sequence: ENSMUST00000122716.3, size=157
-- load sequence: ENSMUST00000122803.3, size=151
-- load sequence: ENSMUST00000083310.4, size=103
-- load sequence: ENSMUST00000104323.3, size=107
-- load sequence: ENSMUST00020182785.1, size=91
-- load sequence: ENSMUST00000122652.4, size=105
-- load sequence: ENSMUST00000183508.3, size=62
-- load sequence: ENSMUST00000180129.3, size=107
-- load sequence: ENSMUST00000157248.3, size=109
-- load sequence: ENSMUST00000122765.4, size=81
-- load sequence: ENSMUST00000083722.4, size=104
-- load sequence: ENSMUST00000083242.3, size=151
-- load sequence: ENSMUST

-- load sequence: ENSMUST00020182403.1, size=91
-- load sequence: ENSMUST00000157479.3, size=95
-- load sequence: ENSMUST00000116966.3, size=125
-- load sequence: ENSMUST00000158563.4, size=124
-- load sequence: ENSMUST00000082854.3, size=137
-- load sequence: ENSMUST00000082996.3, size=132
-- load sequence: ENSMUST00000104058.3, size=135
-- load sequence: ENSMUST00020182830.1, size=131
-- load sequence: ENSMUST00000157412.3, size=130
-- load sequence: ENSMUST00000177209.3, size=76
-- load sequence: ENSMUST00000177097.3, size=54
-- load sequence: ENSMUST00000158876.3, size=122
-- load sequence: ENSMUST00020181784.1, size=134
-- load sequence: ENSMUST00000104033.3, size=111
-- load sequence: ENSMUST00000158251.4, size=109
-- load sequence: ENSMUST00000104509.3, size=128
-- load sequence: ENSMUST00000158899.3, size=108
-- load sequence: ENSMUST00000103987.3, size=129
-- load sequence: ENSMUST00000104151.3, size=126
-- load sequence: ENSMUST00000082966.3, size=131
-- load sequence: ENSMUS

-- load sequence: ENSMUST00000186679.2, size=537
-- load sequence: ENSMUST00000189444.2, size=174
-- load sequence: ENSMUST00000178692.2, size=498
-- load sequence: ENSMUST00000188058.7, size=182
-- load sequence: ENSMUST00000185217.2, size=374
-- load sequence: ENSMUST00000186151.7, size=622
-- load sequence: ENSMUST00000185585.2, size=437
-- load sequence: ENSMUST00000187288.2, size=175
-- load sequence: ENSMUST00000179646.2, size=498
-- load sequence: ENSMUST00000189623.7, size=901
-- load sequence: ENSMUST00000187878.2, size=297
-- load sequence: ENSMUST00000191193.2, size=1499
-- load sequence: ENSMUST00000189418.2, size=438
-- load sequence: ENSMUST00000188322.7, size=622
-- load sequence: ENSMUST00000190203.2, size=437
-- load sequence: ENSMUST00000185311.2, size=174
-- load sequence: ENSMUST00000177552.2, size=498
-- load sequence: ENSMUST00000186892.7, size=622
-- load sequence: ENSMUST00000187851.2, size=437
-- load sequence: ENSMUST00000191194.7, size=1498
-- load sequence: 

-- load sequence: ENSMUST00000186880.2, size=632
-- load sequence: ENSMUST00000215539.2, size=519
-- load sequence: ENSMUST00000237596.2, size=391
-- load sequence: ENSMUST00000235159.2, size=1197
-- load sequence: ENSMUST00000181921.2, size=1642
-- load sequence: ENSMUST00000235809.2, size=1655
-- load sequence: ENSMUST00000218077.2, size=559
-- load sequence: ENSMUST00000132223.2, size=3321
-- load sequence: ENSMUST00000227935.2, size=3717
-- load sequence: ENSMUST00000228435.2, size=2203
-- load sequence: ENSMUST00000228477.2, size=2116
-- load sequence: ENSMUST00000228036.2, size=2663
-- load sequence: ENSMUST00000198718.2, size=1263
-- load sequence: ENSMUST00000181291.8, size=1798
-- load sequence: ENSMUST00000181605.2, size=3162
-- load sequence: ENSMUST00000180547.2, size=1709
-- load sequence: ENSMUST00000219485.2, size=2209
-- load sequence: ENSMUST00000234989.2, size=978
-- load sequence: ENSMUST00000234513.2, size=801
-- load sequence: ENSMUST00000234973.2, size=815
-- load

-- load sequence: ENSMUST00000123388.2, size=344
-- load sequence: ENSMUST00000137442.8, size=1414
-- load sequence: ENSMUST00000132226.2, size=3184
-- load sequence: ENSMUST00000130570.2, size=575
-- load sequence: ENSMUST00000229807.2, size=1106
-- load sequence: ENSMUST00000229045.2, size=922
-- load sequence: ENSMUST00000229509.2, size=759
-- load sequence: ENSMUST00000181794.3, size=729
-- load sequence: ENSMUST00000236110.2, size=334
-- load sequence: ENSMUST00000236935.2, size=4900
-- load sequence: ENSMUST00000236603.2, size=1410
-- load sequence: ENSMUST00000235580.2, size=484
-- load sequence: ENSMUST00000230426.2, size=447
-- load sequence: ENSMUST00000131044.2, size=944
-- load sequence: ENSMUST00000141222.2, size=440
-- load sequence: ENSMUST00000136489.2, size=416
-- load sequence: ENSMUST00000071522.4, size=1887
-- load sequence: ENSMUST00000225384.2, size=512
-- load sequence: ENSMUST00000235402.2, size=395
-- load sequence: ENSMUST00000237763.2, size=475
-- load sequen

-- load sequence: ENSMUST00000150623.2, size=743
-- load sequence: ENSMUST00000204449.3, size=1740
-- load sequence: ENSMUST00000181121.8, size=2142
-- load sequence: ENSMUST00000205093.3, size=873
-- load sequence: ENSMUST00000181840.4, size=995
-- load sequence: ENSMUST00000181145.8, size=2968
-- load sequence: ENSMUST00000204609.2, size=1294
-- load sequence: ENSMUST00000160992.8, size=614
-- load sequence: ENSMUST00000162651.2, size=420
-- load sequence: ENSMUST00000162829.2, size=435
-- load sequence: ENSMUST00000207056.2, size=504
-- load sequence: ENSMUST00000203711.2, size=172
-- load sequence: ENSMUST00000147939.2, size=625
-- load sequence: ENSMUST00000162276.10, size=392
-- load sequence: ENSMUST00000161923.2, size=2425
-- load sequence: ENSMUST00000161282.2, size=184
-- load sequence: ENSMUST00000187956.2, size=775
-- load sequence: ENSMUST00000161449.2, size=2405
-- load sequence: ENSMUST00000133752.2, size=1493
-- load sequence: ENSMUST00000211790.2, size=1382
-- load seq

-- load sequence: ENSMUST00000181369.2, size=501
-- load sequence: ENSMUST00000238088.2, size=520
-- load sequence: ENSMUST00000226503.2, size=1412
-- load sequence: ENSMUST00000174283.2, size=2210
-- load sequence: ENSMUST00000181951.2, size=3467
-- load sequence: ENSMUST00000220061.2, size=654
-- load sequence: ENSMUST00000219459.2, size=1303
-- load sequence: ENSMUST00000197430.2, size=424
-- load sequence: ENSMUST00000200648.2, size=1957
-- load sequence: ENSMUST00000141700.2, size=2231
-- load sequence: ENSMUST00000196770.2, size=4352
-- load sequence: ENSMUST00000197309.2, size=3463
-- load sequence: ENSMUST00000153208.2, size=948
-- load sequence: ENSMUST00000237543.2, size=1053
-- load sequence: ENSMUST00000236278.2, size=971
-- load sequence: ENSMUST00000237768.2, size=589
-- load sequence: ENSMUST00000237231.2, size=1771
-- load sequence: ENSMUST00000235456.2, size=1130
-- load sequence: ENSMUST00000235897.2, size=661
-- load sequence: ENSMUST00000236069.2, size=969
-- load s

-- load sequence: ENSMUST00000182497.2, size=2929
-- load sequence: ENSMUST00000230620.2, size=344
-- load sequence: ENSMUST00000218979.2, size=267
-- load sequence: ENSMUST00000177492.2, size=1131
-- load sequence: ENSMUST00000227115.2, size=568
-- load sequence: ENSMUST00000226371.2, size=1072
-- load sequence: ENSMUST00000228838.2, size=1418
-- load sequence: ENSMUST00000227517.2, size=811
-- load sequence: ENSMUST00000177065.2, size=698
-- load sequence: ENSMUST00000217771.2, size=3607
-- load sequence: ENSMUST00000176406.2, size=587
-- load sequence: ENSMUST00000218793.2, size=529
-- load sequence: ENSMUST00000220015.2, size=172
-- load sequence: ENSMUST00000219252.2, size=338
-- load sequence: ENSMUST00000210723.2, size=459
-- load sequence: ENSMUST00000211274.2, size=483
-- load sequence: ENSMUST00000209312.2, size=1621
-- load sequence: ENSMUST00000218657.2, size=1864
-- load sequence: ENSMUST00000144990.2, size=469
-- load sequence: ENSMUST00000227020.2, size=741
-- load seque

-- load sequence: ENSMUST00000201375.2, size=1133
-- load sequence: ENSMUST00000124811.2, size=384
-- load sequence: ENSMUST00000123644.3, size=1172
-- load sequence: ENSMUST00000145534.2, size=350
-- load sequence: ENSMUST00000215849.2, size=798
-- load sequence: ENSMUST00000181712.3, size=1581
-- load sequence: ENSMUST00000215242.2, size=2102
-- load sequence: ENSMUST00000217385.2, size=1782
-- load sequence: ENSMUST00000215349.2, size=299
-- load sequence: ENSMUST00000213885.2, size=619
-- load sequence: ENSMUST00000214913.2, size=914
-- load sequence: ENSMUST00000217145.2, size=373
-- load sequence: ENSMUST00000213920.2, size=999
-- load sequence: ENSMUST00000214085.2, size=721
-- load sequence: ENSMUST00000144193.2, size=271
-- load sequence: ENSMUST00000217323.2, size=269
-- load sequence: ENSMUST00000214727.2, size=538
-- load sequence: ENSMUST00000191396.2, size=1009
-- load sequence: ENSMUST00000217349.2, size=3869
-- load sequence: ENSMUST00000213995.2, size=722
-- load seque

-- load sequence: ENSMUST00000207045.2, size=2094
-- load sequence: ENSMUST00000207829.2, size=549
-- load sequence: ENSMUST00000149300.8, size=1492
-- load sequence: ENSMUST00000156387.2, size=749
-- load sequence: ENSMUST00000144728.2, size=992
-- load sequence: ENSMUST00000145972.8, size=676
-- load sequence: ENSMUST00000067618.5, size=1780
-- load sequence: ENSMUST00000166653.2, size=309
-- load sequence: ENSMUST00000131099.2, size=2959
-- load sequence: ENSMUST00000149285.2, size=733
-- load sequence: ENSMUST00000143202.8, size=1928
-- load sequence: ENSMUST00000135739.2, size=1854
-- load sequence: ENSMUST00000141452.2, size=3387
-- load sequence: ENSMUST00000141473.2, size=626
-- load sequence: ENSMUST00000129485.2, size=126
-- load sequence: ENSMUST00000150616.2, size=403
-- load sequence: ENSMUST00000201816.2, size=1292
-- load sequence: ENSMUST00000210851.2, size=1148
-- load sequence: ENSMUST00000209947.2, size=1032
-- load sequence: ENSMUST00000211430.2, size=1193
-- load s

In [6]:
transcriptome_savefile = os.path.join(r'D:\References', f"{genome_assembly}_transcriptome_table.pkl")
if os.path.isfile(transcriptome_savefile):
    print(f"Loading transcriptome table file")
    transcriptome_table = countTable()
    transcriptome_table.load_pkl(transcriptome_savefile)
else:
    # process
    transcriptome_table = countTable()
    transcriptome_table.load_sequences(transcriptome_seqs)
    print(f"Save transcriptome table to file: {transcriptome_savefile}")
    transcriptome_table.save_pkl(transcriptome_savefile)

Loading transcriptome table file
Load the OTTable from D:\References\GRCm39_transcriptome_table.pkl.
updated the OTTable from D:\References\GRCm39_transcriptome_table.pkl.


In [7]:
# json
import json

In [15]:
%%time
transcriptome_savefile = os.path.join(r'D:\References', f"{genome_assembly}_transcriptome_table.npy")
np.save(transcriptome_savefile, transcriptome_table)

Wall time: 11.6 s


In [16]:
%%time
test_ct = np.load(transcriptome_savefile, allow_pickle=True)

Wall time: 55.8 s


In [19]:
%%time
test_array = np.array([list(transcriptome_table.keys()), list(transcriptome_table.values())], dtype=np.uint64)

Wall time: 12.1 s


In [22]:
%%time
transcriptome_savefile = os.path.join(r'D:\References', f"{genome_assembly}_transcriptome_table_test.npy")
np.save(transcriptome_savefile, test_array)

Wall time: 4.58 s


In [26]:
%%time
test_ct = np.load(transcriptome_savefile, allow_pickle=True)

Wall time: 538 ms


In [30]:
test_ct[0,-1] == test_array[0,-1]

True

In [34]:
test_table = countTable()
for _k, _v in tqdm(zip(test_ct[0], test_ct[1])):
    test_table[int(_k)] = int(_v)

110282344it [02:15, 812678.35it/s]


In [32]:
np.unique(test_ct[0] - test_array[0])

array([0], dtype=uint64)

In [24]:
test_ct.shape

(2, 110282344)

In [7]:
transcriptome_table = countTable()
transcriptome_table.load_sequences(transcriptome_seqs)

Loading 139145 sequences into count_table.
- Use reference word length: 17


139145it [03:40, 629.93it/s] 


In [9]:
transcriptome_savefile = os.path.join(r'D:\References', f"{genome_assembly}_transcriptome_table.pkl")
if os.path.isfile(transcriptome_savefile):
    print(f"transcriptome table file already exist, skip.")
else:
    print(f"Save transcriptome table to file: {transcriptome_savefile}")
    transcriptome_table.save_pkl(transcriptome_savefile)

Save transcriptome table to file: D:\References\GRCm39_transcriptome_table.pkl
Wrote the OTTable to D:\References\GRCm39_transcriptome_table.pkl.


In [50]:
genome_savefile = os.path.join(r'D:\References', f"{genome_assembly}_genome_table.pkl")

In [None]:
genome_savefile = os.path.join(r'D:\References', f"{genome_assembly}_genome_table.pkl")
if os.path.isfile(genome_savefile):
    print(f"genome table file already exist, skip.")
else:
    print(f"Save genome table to file: {genome_savefile}")
    genome_table.save_pkl(genome_savefile)

In [10]:
%%time
test_ct = countTable()
test_ct.load_pkl(transcriptome_savefile)

Load the OTTable from D:\References\GRCm39_transcriptome_table.pkl.
updated the OTTable from D:\References\GRCm39_transcriptome_table.pkl.
Wall time: 1min 7s


In [12]:
type(test_ct)

__main__.countTable

In [14]:
%%timeit
test_ct.get(transcriptome_seqs[50][:17], )

7.61 µs ± 127 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [13]:
test_ct.get?

In [51]:
%%time
genome_ct = countTable()
genome_ct.load_pkl(genome_savefile)

Load the OTTable from D:\References\GRCm39_genome_table.pkl.
updated the OTTable from D:\References\GRCm39_genome_table.pkl.
Wall time: 1h 21min 39s


In [15]:
%%time
genome_savefile = os.path.join(r'D:\References', f"{genome_assembly}_genome_table.pkl")
test_genome_ct = pickle.load(open(genome_savefile, 'rb'))

Wall time: 27min 38s


In [None]:
test_ge

In [48]:
ctp11_folder = r'\\10.245.74.212\Chromatin_NAS_2\Chromatin_Libraries\CTP-11_brain\mouse_genome_1000'

ctp11_table_file = os.path.join(reference_folder, 'ctp11_genome_17w.npy')
test_ct = ld.countTable(sparse=False, save_file=ctp11_table_file)
test_ct.load()

In [49]:
%%timeit
test_ct.get(transcriptome_seqs[50][:17], )

9.07 µs ± 14.7 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [41]:
def get(self,
        query_seq:str, # input sequences as string
        rc:bool=False, # whether reverse-complement
       ):
    """Query of sequences """
    if len(query_seq) < self.word_len:
        return 0
    if rc:
        _seq_counts = [self[seq2Int_rc(query_seq[_i:_i+self.word_len].encode())]
                       for _i in range(len(query_seq)-self.word_len+1)]
    else:
        _seq_counts = [self[seq2Int(query_seq[_i:_i+self.word_len].encode())]
                       for _i in range(len(query_seq)-self.word_len+1)]
    print(_seq_counts)
    return np.sum(_seq_counts)

In [42]:
get(test_ct, test_seq)

[2]


2

In [17]:
transcriptome_table[seq2Int(test_seq.encode())]

2

In [18]:
%%timeit
transcriptome_table[seq2Int(test_seq.encode())]

213 ns ± 2.79 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


## Genome

In [35]:
# load all ref sequences
from ImageAnalysis3 import library_tools
seq_rd = library_tools.sequences.sequence_reader(genome_folder, auto_load_ref=True)
genome_seqs = [str(_r.seq) for _r in seq_rd.ref_seq_dict.values()]

-- load sequence: 1, size=195154279
-- load sequence: 10, size=130530862
-- load sequence: 11, size=121973369
-- load sequence: 12, size=120092757
-- load sequence: 13, size=120883175
-- load sequence: 14, size=125139656
-- load sequence: 15, size=104073951
-- load sequence: 16, size=98008968
-- load sequence: 17, size=95294699
-- load sequence: 18, size=90720763
-- load sequence: 19, size=61420004
-- load sequence: 2, size=181755017
-- load sequence: 3, size=159745316
-- load sequence: 4, size=156860686
-- load sequence: 5, size=151758149
-- load sequence: 6, size=149588044
-- load sequence: 7, size=144995196
-- load sequence: 8, size=130127694
-- load sequence: 9, size=124359700
-- load sequence: MT, size=16299
-- load sequence: X, size=169476592
-- load sequence: Y, size=91455967
-- load sequence: JH584299.1, size=953012
-- load sequence: GL456233.2, size=559103
-- load sequence: JH584301.1, size=259875
-- load sequence: GL456211.1, size=241735
-- load sequence: GL456221.1, size=206

In [36]:
%%time
genome_table = countTable()
genome_table.load_sequences(genome_seqs)

Loading 61 sequences into count_table.
- Use reference word length: 17


61it [1:22:33, 81.20s/it] 


Wall time: 1h 22min 34s


In [37]:
%%timeit
genome_table.get(transcriptome_seqs[50][:17], )

7.79 µs ± 84 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
Compiler time: 0.23 s


In [38]:
%%time
genome_savefile = os.path.join(r'D:\References', f"{genome_assembly}_genome_table.npy")
if os.path.isfile(genome_savefile):
    print(f"genome table file already exist, skip.")
else:
    print(f"Save genome table to file: {genome_savefile}")
    np.save(genome_savefile, genome_table)

Save genome table to file: D:\References\GRCm39_genome_table.npy
Wall time: 4min 40s


In [43]:
%%time
genome_savefile = os.path.join(r'D:\References', f"{genome_assembly}_genome_table.pkl")
test_ct = pickle.load(open(genome_savefile, 'rb'))

MemoryError: 

In [39]:
%%time
genome_savefile = os.path.join(r'D:\References', f"{genome_assembly}_genome_table.pkl")
if os.path.isfile(genome_savefile):
    print(f"genome table file already exist, skip.")
else:
    print(f"Save genome table to file: {genome_savefile}")
    genome_table.save_pkl(genome_savefile)

Save genome table to file: D:\References\GRCm39_genome_table.pkl
Wrote the OTTable to D:\References\GRCm39_genome_table.pkl.
Wall time: 2min 55s
