In [1]:
import numpy as np
import pandas as pd
import subprocess
import tempfile
import multiprocessing

from primo.tools.barcoder import Barcoder

In [2]:
IP = "AGCACTCAGTATTTGTCCG"

In [3]:
barcode_order = pd.read_csv('/tf/primo/data/metadata/target_barcode_order.csv.gz')

In [4]:
barcoder = Barcoder(
    n_data_symbols   = 4,
    n_check_symbols  = 2,
    bits_per_symbol  = 6,
    bases_per_symbol = 5,
    seed = 42
)
def decode_barcode(barcode):
    return barcoder.barcode_seq_to_num(barcode.strip())

In [5]:
def decode_index(path_glob):
    
    with tempfile.NamedTemporaryFile() as temp:
        
        if path_glob.endswith(".gz"):
            cat_cmd = "zcat %s" % path_glob
        else:
            cat_cmd = "cat %s" % path_glob
    
        # extract barcodes
        subprocess.call(
            (cat_cmd + "| egrep -o '[ATCGN]{30}%s' | cut -b 1-30 > %s") % (
                IP,
                temp.name
            ),
            shell = True
        )
        
        barcodes = temp.readlines()
            
    # decode
    pool = multiprocessing.Pool()
    try:
        results = np.array(pool.map(decode_barcode, barcodes))
    finally:
        pool.close()
    
    decoded = results[results != None].astype(int)
    
    counts = np.bincount(decoded, minlength=len(barcode_order))[:len(barcode_order)]
    
    return counts

In [6]:
def decode_run(run_id):
    run_path = '/tf/primo/data/sequencing/%s/' % run_id
    
    # open run meta
    run_meta = pd.read_csv(run_path + 'index.csv')
    with open(run_path + 'location') as f:
        location = f.readline().strip()
        
    # decode each index
    counts = []
    for ix in run_meta.sequencing_index:
        print (run_id, ix)
        path_glob = location % ix
        counts.append(decode_index(path_glob))

    # save
    df = pd.DataFrame(
       np.array(counts),
       index = run_meta.sequencing_index,
       columns = barcode_order.ImageID
    )
    df.to_pickle(run_path + 'decoded.pkl.gz')
    
    return df

In [7]:
df = decode_run("Run_92")

('Run_92', 'E4')
('Run_92', 'E6')
('Run_92', 'E7')
('Run_92', 'E8')
('Run_92', 'E9')
('Run_92', 'D4')
('Run_92', 'D8')
('Run_92', 'D9')
('Run_92', 'D12')
('Run_92', 'E1')
