## CHOPCHOP

## Sequence alignments & Epigenomic feature

In [1]:
import numpy as np
import pandas as pd

import pyBigWig
from pyfaidx import Fasta

In [2]:
trans_base_dict={'A':'T','C':'G','G':'C','T':'A','N':'N'}
chr_list = [f'chr{i}' for i in range(1,23)]+['chrX']

def reverse_DNA(seq):
    ### 用于反转DNA
    return ''.join([trans_base_dict[c] for c in seq])[::-1]

def gc_content(seq_list):
    ### 计算GC含量
    content = []
    for seq in seq_list:
        if len(seq)==0:
            content.append(0.0)
        else:
            content.append( (seq.upper().count('C')+seq.upper().count('G')) / len(seq) )
    return content

def find_contextual_single(data,fa=None,col_name = 'sequence',expand_flank=30,upstream=4,spacer_len=20,downstream=6):
    
    # 自动寻找上下文并且把补全上下文之后的序列整理成有30bp的格式
    ## 因为参考基因组并不会随着细胞系的改变而改变，所以这里可以这么做

    p1,p2=[],[];e1,e2=[],[]
    adj_s1,adj_s2=[],[]
    merge_table=data
    
    for _,row in merge_table.iterrows():
    
        s1 = row[col_name].upper().replace(' ','') # 因为输出是ChopChop的,非常稳定，就是20 bp protospacer + 3 bp PAM

        try:
            c,s,e = row['chr'],int(row['start']),int(row['end'])
            if not c in chr_list:
                p1.append('-1');e1.append('N'*total_len);adj_s1.append(-1)
                continue
        except:
            p1.append('-1');e1.append('N'*upstream+s1+'N'*downstream);adj_s1.append(-1)
            continue

        sequence=fa[c][s-expand_flank:e+expand_flank].seq.upper()

        l1=sequence.find(s1)

        if l1<0: # 没有抓取到位置信息的情况,自动认为是负向的
            l1=sequence.find(reverse_DNA(s1))
            p1.append('0');e1.append( reverse_DNA( fa[c][s-expand_flank+l1-downstream:s-expand_flank+l1+spacer_len+upstream].seq.upper() ) )
        else:
            p1.append('1');e1.append( fa[c][s-expand_flank+l1-upstream:s-expand_flank+l1+spacer_len+downstream].seq.upper() )
        adj_s1.append(s-expand_flank+l1)
    
    return p1,e1,adj_s1

def gain_epivalue_TPM(region,bw=None):
    
    chromo=region[0]
    start=min(region[1],region[2])
    end=max(region[1],region[2])
    
    if start==end:
        end+=1
        
    delta=end-start
    
    try:
        values = bw.stats(chromo, start, end, type='sum')[0]
        tpm = (values/delta)/(bw.header()['sumData']/bw.header()['nBasesCovered'])*1000
    except Exception as e:
        tpm = 0
    return tpm

def gain_series_TPM(data,epi='DNase',cellline='A549',genome='hg38',lo=None):
    
    epi_value_series=[]
    
    if genome=='hg38': 
        bw=pyBigWig.open(f"/cluster2/huanglab/liquan/data/{cellline}/{epi}.bigWig")
    elif genome=='hg19':
        bw=pyBigWig.open(f"/cluster2/huanglab/liquan/data/{cellline}/hg19/{epi}.bigWig")
    
    for i in range(len(data)):
        c,s,e = data.at[i,'chr'],data.at[i,'start'],data.at[i,'end']
        if s==e:
            e+=1
        else:
            s,e = min(s,e),max(s,e)
        if not lo is None:
            c,s,e = convert_loci([c,s,e],lo=lo)
        
        try:
            epi_value_series.append(gain_epivalue_TPM([c,s,e],bw=bw))
        except:
            epi_value_series.append(0.0)
    
    bw.close()
    
    return epi_value_series

In [3]:
test_data = pd.read_csv('data/test.csv')

In [4]:
test_data.head(n=10)

Unnamed: 0,chr,start,end,seq1,seq2,label
0,chr2,47783271,47783391,ATGCGGCCTGGAGCGAGGCT,CACTCAGCGCCGGAGACTTG,1
1,chr2,47783271,47783391,ATGCGGCCTGGAGCGAGGCT,CACTCAGCGCCGGAGACTTG,1
2,chr2,47783160,47783296,GGCGCTGAGTGATGCCAACA,ATCTACCGCGCGGCTCCTGC,1
3,chr2,47783160,47783296,GGCGCTGAGTGATGCCAACA,ATCTACCGCGCGGCTCCTGC,1
4,chr2,47783307,47783454,AGGCGAAGAACCTCAACGGA,ATGCCAACAAGGCCTCGGCC,1
5,chr2,47783307,47783454,AGGCGAAGAACCTCAACGGA,ATGCCAACAAGGCCTCGGCC,1
6,chr2,47783271,47783438,TCCGTTGAGGTTCTTCGCCT,CACTCAGCGCCGGAGACTTG,1
7,chr2,47783271,47783438,TCCGTTGAGGTTCTTCGCCT,CACTCAGCGCCGGAGACTTG,1
8,chr2,47783296,47783438,TCCGTTGAGGTTCTTCGCCT,GGCGCTGAGTGATGCCAACA,1
9,chr2,47783296,47783438,TCCGTTGAGGTTCTTCGCCT,GGCGCTGAGTGATGCCAACA,1


In [5]:
with Fasta("./genome_ref/hg38.fa") as hg38:
    _,test_data['seq1'],_ = find_contextual_single(test_data,col_name='seq1',fa=hg38,upstream=4,spacer_len=20,downstream=6)
    _,test_data['seq2'],_ = find_contextual_single(test_data,col_name='seq2',fa=hg38,upstream=4,spacer_len=20,downstream=6)
    test_data['Median_sequence']=test_data.apply( lambda x: hg38[x['chr']][ x['start']:x['end'] ].seq.upper() , axis=1 )
test_data['GC']=gc_content(test_data['Median_sequence'])
test_data['length'] = abs(test_data['end'] - test_data['start'])

In [6]:
test_data['DNase']=gain_series_TPM( test_data, epi='DNase'  , cellline='A549', genome='hg38', lo=None)
test_data['ATAC'] =gain_series_TPM( test_data, epi='ATAC'   , cellline='A549', genome='hg38', lo=None)
test_data['H3K27ac']=gain_series_TPM( test_data, epi='H3K27ac', cellline='A549', genome='hg38', lo=None)
test_data['H3K4me3']=gain_series_TPM( test_data, epi='H3K4me3', cellline='A549', genome='hg38', lo=None)

In [7]:
test_data.head(n=10)

Unnamed: 0,chr,start,end,seq1,seq2,label,Median_sequence,GC,length,DNase,ATAC,H3K27ac,H3K4me3
0,chr2,47783271,47783391,GGGGATGCGGCCTGGAGCGAGGCTGGGCCT,GCATCACTCAGCGCCGGAGACTTGGGGAAG,1,GTCTCCGGCGCTGAGTGATGCCAACAAGGCCTCGGCCAGGGCCTCA...,0.758333,120,41062.843073,145.828758,60124.443387,38035.304341
1,chr2,47783271,47783391,GGGGATGCGGCCTGGAGCGAGGCTGGGCCT,GCATCACTCAGCGCCGGAGACTTGGGGAAG,1,GTCTCCGGCGCTGAGTGATGCCAACAAGGCCTCGGCCAGGGCCTCA...,0.758333,120,41062.843073,145.828758,60124.443387,38035.304341
2,chr2,47783160,47783296,CTCCGGCGCTGAGTGATGCCAACAAGGCCT,CCGCATCTACCGCGCGGCTCCTGCTGGCGG,1,GGAGCCGCGCGGTAGATGCGGTGCTTTTAGGAGCTCCGTCCGACAG...,0.625,136,80228.226721,216.315853,39310.515606,28072.128794
3,chr2,47783160,47783296,CTCCGGCGCTGAGTGATGCCAACAAGGCCT,CCGCATCTACCGCGCGGCTCCTGCTGGCGG,1,GGAGCCGCGCGGTAGATGCGGTGCTTTTAGGAGCTCCGTCCGACAG...,0.625,136,80228.226721,216.315853,39310.515606,28072.128794
4,chr2,47783307,47783454,CCCAAGGCGAAGAACCTCAACGGAGGGCTG,AGTGATGCCAACAAGGCCTCGGCCAGGGCC,1,CAGGGCCTCACGCGAAGGCGGCCGTGCCGCCGCTGCCCCCGGGGCC...,0.768707,147,40296.065744,221.978233,66933.388549,38933.887624
5,chr2,47783307,47783454,CCCAAGGCGAAGAACCTCAACGGAGGGCTG,AGTGATGCCAACAAGGCCTCGGCCAGGGCC,1,CAGGGCCTCACGCGAAGGCGGCCGTGCCGCCGCTGCCCCCGGGGCC...,0.768707,147,40296.065744,221.978233,66933.388549,38933.887624
6,chr2,47783271,47783438,GCCCTCCGTTGAGGTTCTTCGCCTTGGGCG,GCATCACTCAGCGCCGGAGACTTGGGGAAG,1,GTCTCCGGCGCTGAGTGATGCCAACAAGGCCTCGGCCAGGGCCTCA...,0.766467,167,40957.151453,172.349073,63526.299546,38096.511774
7,chr2,47783271,47783438,GCCCTCCGTTGAGGTTCTTCGCCTTGGGCG,GCATCACTCAGCGCCGGAGACTTGGGGAAG,1,GTCTCCGGCGCTGAGTGATGCCAACAAGGCCTCGGCCAGGGCCTCA...,0.766467,167,40957.151453,172.349073,63526.299546,38096.511774
8,chr2,47783296,47783438,GCCCTCCGTTGAGGTTCTTCGCCTTGGGCG,CTCCGGCGCTGAGTGATGCCAACAAGGCCT,1,AAGGCCTCGGCCAGGGCCTCACGCGAAGGCGGCCGTGCCGCCGCTG...,0.788732,142,40891.703951,154.081396,65818.102592,38881.936602
9,chr2,47783296,47783438,GCCCTCCGTTGAGGTTCTTCGCCTTGGGCG,CTCCGGCGCTGAGTGATGCCAACAAGGCCT,1,AAGGCCTCGGCCAGGGCCTCACGCGAAGGCGGCCGTGCCGCCGCTG...,0.788732,142,40891.703951,154.081396,65818.102592,38881.936602


## Single Guide Prediction

In [8]:
import utils.DeepSpCas9_main as DeepSpCas9
import utils.DeepCRISPR_main as DeepCRISPR
import utils.DistillatedRuleSet2 as Ruleset2
import utils.DistillatedCRISPRedict as CRISPRedict



2025-10-16 11:22:56.173108: I tensorflow/stream_executor/platform/default/dso_loader.cc:50] Successfully opened dynamic library libcudart.so.12


TensorFlow version: 1.15.5
Running in TensorFlow 1.x compatibility mode.






In [9]:
test_data['DeepSpCas9_s1'] = DeepSpCas9.predict_sequence(test_data['seq1'])
test_data['DeepSpCas9_s2'] = DeepSpCas9.predict_sequence(test_data['seq2'])
test_data['DeepCRISPR_s1'] = DeepCRISPR.predict_sequence([seq[4:27] for seq in test_data['seq1']])
test_data['DeepCRISPR_s2'] = DeepCRISPR.predict_sequence([seq[4:27] for seq in test_data['seq2']])
test_data['Ruleset2_s1']   = Ruleset2.predict(test_data['seq1'],model_path='models/DistillatedRuleSet2.pth')
test_data['Ruleset2_s2']   = Ruleset2.predict(test_data['seq2'],model_path='models/DistillatedRuleSet2.pth')
test_data['CRISPRedit_s1'] = CRISPRedict.predict(test_data['seq1'],model_path='models/DistillatedCRISPRedict.pth')
test_data['CRISPRedit_s2'] = CRISPRedict.predict(test_data['seq2'],model_path='models/DistillatedCRISPRedict.pth')






















2025-10-16 11:23:28.791276: I tensorflow/core/platform/profile_utils/cpu_utils.cc:109] CPU Frequency: 2700000000 Hz
2025-10-16 11:23:28.809515: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fc59c20de80 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2025-10-16 11:23:28.809559: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2025-10-16 11:23:28.811188: I tensorflow/stream_executor/platform/default/dso_loader.cc:50] Successfully opened dynamic library libcuda.so.1
2025-10-16 11:23:28.905048: E tensorflow/stream_executor/cuda/cuda_driver.cc:282] failed call to cuInit: CUDA_ERROR_UNKNOWN: unknown error
2025-10-16 11:23:28.905143: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (node1): /proc/driver/nvidia/version does not exist













































INFO:tensorflow:Restoring parameters from ./DeepCas9_Final/PreTrain-Final-False-3-5-7-100-70-40-0.001-550-True-80-60


INFO:tensorflow:Restoring parameters from ./DeepCas9_Final/PreTrain-Final-False-3-5-7-100-70-40-0.001-550-True-80-60


INFO:tensorflow:Restoring parameters from ./DeepCas9_Final/PreTrain-Final-False-3-5-7-100-70-40-0.001-550-True-80-60


INFO:tensorflow:Restoring parameters from ./DeepCas9_Final/PreTrain-Final-False-3-5-7-100-70-40-0.001-550-True-80-60


























/cluster2/huanglab/liquan/pycode/dual/20250306_demo/DeepCRISPR_Seq/model.ckpt-seq loaded
/cluster2/huanglab/liquan/pycode/dual/20250306_demo/DeepCRISPR_Seq/model.ckpt-seq loaded


In [10]:
def norm(x):
    return (x-x.min())/(x.max()-x.min())

if not test_data['DeepSpCas9_s1'].max() - test_data['DeepSpCas9_s1'].min() == 0:
    test_data['DeepSpCas9_s1'] = norm(test_data['DeepSpCas9_s1'])*10
if not test_data['DeepSpCas9_s2'].max() - test_data['DeepSpCas9_s2'].min() == 0:
    test_data['DeepSpCas9_s2'] = norm(test_data['DeepSpCas9_s2'])*10
    
if not test_data['DeepCRISPR_s1'].max() - test_data['DeepCRISPR_s1'].min() == 0:
    test_data['DeepSpCas9_s1'] = norm(test_data['DeepCRISPR_s1'])*10
if not test_data['DeepCRISPR_s2'].max() - test_data['DeepCRISPR_s2'].min() == 0:
    test_data['DeepSpCas9_s2'] = norm(test_data['DeepCRISPR_s2'])*10

if not test_data['CRISPRedit_s1'].max() - test_data['CRISPRedit_s1'].min() == 0:
    test_data['CRISPRedit_s1'] = norm(test_data['CRISPRedit_s1'])*10
if not test_data['CRISPRedit_s2'].max() - test_data['CRISPRedit_s2'].min() == 0:
    test_data['CRISPRedit_s2'] = norm(test_data['CRISPRedit_s2'])*10

if not test_data['Ruleset2_s1'].max() - test_data['Ruleset2_s1'].min() == 0:
    test_data['Ruleset2_s1'] = norm(test_data['Ruleset2_s1'])*10
if not test_data['Ruleset2_s2'].max() - test_data['Ruleset2_s2'].min() == 0:
    test_data['Ruleset2_s2'] = norm(test_data['Ruleset2_s2'])*10

In [11]:
test_data.head(n=10)

Unnamed: 0,chr,start,end,seq1,seq2,label,Median_sequence,GC,length,DNase,...,H3K27ac,H3K4me3,DeepSpCas9_s1,DeepSpCas9_s2,DeepCRISPR_s1,DeepCRISPR_s2,Ruleset2_s1,Ruleset2_s2,CRISPRedit_s1,CRISPRedit_s2
0,chr2,47783271,47783391,GGGGATGCGGCCTGGAGCGAGGCTGGGCCT,GCATCACTCAGCGCCGGAGACTTGGGGAAG,1,GTCTCCGGCGCTGAGTGATGCCAACAAGGCCTCGGCCAGGGCCTCA...,0.758333,120,41062.843073,...,60124.443387,38035.304341,6.229809,7.765127,0.371992,0.412088,6.192562,4.815709,5.077215,4.178747
1,chr2,47783271,47783391,GGGGATGCGGCCTGGAGCGAGGCTGGGCCT,GCATCACTCAGCGCCGGAGACTTGGGGAAG,1,GTCTCCGGCGCTGAGTGATGCCAACAAGGCCTCGGCCAGGGCCTCA...,0.758333,120,41062.843073,...,60124.443387,38035.304341,6.229809,7.765127,0.371992,0.412088,6.192562,4.815709,5.077215,4.178747
2,chr2,47783160,47783296,CTCCGGCGCTGAGTGATGCCAACAAGGCCT,CCGCATCTACCGCGCGGCTCCTGCTGGCGG,1,GGAGCCGCGCGGTAGATGCGGTGCTTTTAGGAGCTCCGTCCGACAG...,0.625,136,80228.226721,...,39310.515606,28072.128794,3.572263,1.446835,0.221542,0.136312,7.538323,5.371348,6.927964,3.534453
3,chr2,47783160,47783296,CTCCGGCGCTGAGTGATGCCAACAAGGCCT,CCGCATCTACCGCGCGGCTCCTGCTGGCGG,1,GGAGCCGCGCGGTAGATGCGGTGCTTTTAGGAGCTCCGTCCGACAG...,0.625,136,80228.226721,...,39310.515606,28072.128794,3.572263,1.446835,0.221542,0.136312,7.538323,5.371348,6.927964,3.534453
4,chr2,47783307,47783454,CCCAAGGCGAAGAACCTCAACGGAGGGCTG,AGTGATGCCAACAAGGCCTCGGCCAGGGCC,1,CAGGGCCTCACGCGAAGGCGGCCGTGCCGCCGCTGCCCCCGGGGCC...,0.768707,147,40296.065744,...,66933.388549,38933.887624,2.688676,1.426041,0.17152,0.135405,4.634717,6.168975,6.839151,3.273399
5,chr2,47783307,47783454,CCCAAGGCGAAGAACCTCAACGGAGGGCTG,AGTGATGCCAACAAGGCCTCGGCCAGGGCC,1,CAGGGCCTCACGCGAAGGCGGCCGTGCCGCCGCTGCCCCCGGGGCC...,0.768707,147,40296.065744,...,66933.388549,38933.887624,2.688676,1.426041,0.17152,0.135405,4.634717,6.168975,6.839151,3.273399
6,chr2,47783271,47783438,GCCCTCCGTTGAGGTTCTTCGCCTTGGGCG,GCATCACTCAGCGCCGGAGACTTGGGGAAG,1,GTCTCCGGCGCTGAGTGATGCCAACAAGGCCTCGGCCAGGGCCTCA...,0.766467,167,40957.151453,...,63526.299546,38096.511774,1.748763,7.765127,0.11831,0.412088,6.762706,4.815709,3.011926,4.178747
7,chr2,47783271,47783438,GCCCTCCGTTGAGGTTCTTCGCCTTGGGCG,GCATCACTCAGCGCCGGAGACTTGGGGAAG,1,GTCTCCGGCGCTGAGTGATGCCAACAAGGCCTCGGCCAGGGCCTCA...,0.766467,167,40957.151453,...,63526.299546,38096.511774,1.748763,7.765127,0.11831,0.412088,6.762706,4.815709,3.011926,4.178747
8,chr2,47783296,47783438,GCCCTCCGTTGAGGTTCTTCGCCTTGGGCG,CTCCGGCGCTGAGTGATGCCAACAAGGCCT,1,AAGGCCTCGGCCAGGGCCTCACGCGAAGGCGGCCGTGCCGCCGCTG...,0.788732,142,40891.703951,...,65818.102592,38881.936602,1.748763,3.399537,0.11831,0.221542,6.762706,7.306734,3.011926,6.978117
9,chr2,47783296,47783438,GCCCTCCGTTGAGGTTCTTCGCCTTGGGCG,CTCCGGCGCTGAGTGATGCCAACAAGGCCT,1,AAGGCCTCGGCCAGGGCCTCACGCGAAGGCGGCCGTGCCGCCGCTG...,0.788732,142,40891.703951,...,65818.102592,38881.936602,1.748763,3.399537,0.11831,0.221542,6.762706,7.306734,3.011926,6.978117


## HyenaDNA Prediction

In [12]:
import utils.HyenaDNA_main as Hyena

Loaded pretrained weights ok!


In [13]:
test_data['DeepSpCas9_harmony'] = 1/(1/(test_data['DeepSpCas9_s1']+1)+1/(test_data['DeepSpCas9_s2']+1))
test_data['DeepCRISPR_harmony'] = 1/(1/(test_data['DeepCRISPR_s1']+1)+1/(test_data['DeepCRISPR_s2']+1))
test_data['CRISPRedict_harmony']= 1/(1/(test_data['CRISPRedit_s1']+1)+1/(test_data['CRISPRedit_s2']+1))
test_data['Ruleset2_harmony']   = 1/(1/(test_data['Ruleset2_s1']+1)+1/(test_data['Ruleset2_s2']+1))
test_data = test_data.sort_values(
    by=['DeepSpCas9_harmony','DeepCRISPR_harmony','CRISPRedict_harmony','Ruleset2_harmony','DNase','ATAC','H3K27ac','H3K4me3'],
    ascending=False
)
test_data['hyena_score']=np.array([Hyena.hyena_inference(s[:16384]) for s in list(test_data['Median_sequence'].fillna('N'))])

In [14]:
(
test_data
.drop(['DeepSpCas9_s1','DeepSpCas9_s2','DeepCRISPR_s1','DeepCRISPR_s2',
       'CRISPRedit_s1','CRISPRedit_s2','Ruleset2_s1','Ruleset2_s2'],axis=1)
).to_csv('data/test_data_annot.csv',index=False)

In [15]:
test_data.head(n=10)

Unnamed: 0,chr,start,end,seq1,seq2,label,Median_sequence,GC,length,DNase,...,DeepCRISPR_s2,Ruleset2_s1,Ruleset2_s2,CRISPRedit_s1,CRISPRedit_s2,DeepSpCas9_harmony,DeepCRISPR_harmony,CRISPRedict_harmony,Ruleset2_harmony,hyena_score
841,chr2,48440797,48440893,CGCCGCGCCTCCTCCTCCACTTCCGGGTTC,TTCCCGGGAGCGTGTCTGGGTTTGGGGGCG,1,AGTGGAGGAGGAGGCGCGGCGGCGGCGGCGGCGGCGGCTGCGGTGG...,0.729167,96,123332.744881,...,0.461988,4.899137,7.82967,2.875407,2.147491,4.431776,0.719472,1.736863,3.536433,-2.889551
902,chr7,5072216,5072322,GTGACTGGTCCGTGGGCTAACTCAGGGCTG,GCAGAGAGGAAAGGGCGTGAAGAGGGGGCG,1,AGGGCTGCTCGGGAGACATAGGCGGGCCGTGGGGGCAGGTGAAGGG...,0.669811,106,1910.800738,...,0.471034,8.456694,6.12788,7.456377,6.263837,4.398179,0.718165,3.907437,4.06439,-3.844674
620,chr7,5072216,5072322,GCAGAGAGGAAAGGGCGTGAAGAGGGGGCG,GTGACTGGTCCGTGGGCTAACTCAGGGCTG,0,AGGGCTGCTCGGGAGACATAGGCGGGCCGTGGGGGCAGGTGAAGGG...,0.669811,106,1910.800738,...,0.403225,6.322104,8.196891,6.201829,7.497902,4.382854,0.718165,3.898184,4.076555,-3.844674
779,chr2,46541807,46541911,AGATGGGTGTGTTCCGGCATCGCCAGGCAG,ACGGAATACCTTTGAGGAACATTGGGGACC,1,CAGGCAGAGGGGCAGGGGTTGCCGCCTCGAGCACAGGCCAAGTTTC...,0.538462,104,30556.754856,...,0.436788,4.828207,7.326189,3.122491,3.204861,4.380051,0.716701,2.081635,3.428387,-3.699923
214,chr2,46541807,46541911,ACGGAATACCTTTGAGGAACATTGGGGACC,AGATGGGTGTGTTCCGGCATCGCCAGGCAG,0,CAGGCAGAGGGGCAGGGGTTGCCGCCTCGAGCACAGGCCAAGTTTC...,0.538462,104,30556.754856,...,0.430032,7.558394,4.679877,3.092086,3.23477,4.378488,0.716701,2.081103,3.414082,-3.699923
215,chr2,46541807,46541911,ACGGAATACCTTTGAGGAACATTGGGGACC,AGATGGGTGTGTTCCGGCATCGCCAGGCAG,0,CAGGCAGAGGGGCAGGGGTTGCCGCCTCGAGCACAGGCCAAGTTTC...,0.538462,104,30556.754856,...,0.430032,7.558394,4.679877,3.092086,3.23477,4.378488,0.716701,2.081103,3.414082,-3.699923
158,chr7,5607353,5607456,GATAGCAGCTGCGAGAAGACTGAGGGGAGG,CTGTCCCTGCAGCCTAGGATGCAAAGGGAG,1,CATCCTAGGCTGCAGGGACAGGGGCTGGGCAGAGGCTCGCGGACCT...,0.621359,103,3816.656758,...,0.419748,6.588349,3.007007,8.193186,3.034554,4.311299,0.713438,2.803987,2.622306,-3.937159
159,chr7,5607353,5607456,GATAGCAGCTGCGAGAAGACTGAGGGGAGG,CTGTCCCTGCAGCCTAGGATGCAAAGGGAG,1,CATCCTAGGCTGCAGGGACAGGGGCTGGGCAGAGGCTCGCGGACCT...,0.621359,103,3816.656758,...,0.419748,6.588349,3.007007,8.193186,3.034554,4.311299,0.713438,2.803987,2.622306,-3.937159
686,chr3,46372159,46372259,ATTGGCATCCAGTATGTGCCCTCGAGGCCT,AGACTGAGTTGCAGCCGGGCATGGTGGCTC,0,GAGGCCTCTTAATTATTACTGGCTTGCTCATAGTGCATGTTCTTTG...,0.44,100,301.381396,...,0.467145,6.489313,5.357066,6.31023,7.231717,4.285287,0.712808,3.871828,3.438448,-3.522516
160,chr7,5607305,5607456,GATAGCAGCTGCGAGAAGACTGAGGGGAGG,CCTGACTGTGGCCTTGGTAAAGTCTGGAAA,1,TTTACCAAGGCCACAGTCAGGGCGACCTGGGTTCTGAGCCTCCCTT...,0.609272,151,3745.180017,...,0.402188,6.588349,3.427755,8.193186,3.360199,4.215512,0.708976,2.957499,2.796193,-3.006579
