Skip to content

Commit

Permalink
Finished updating Caper/Pygr bridge and tests.
Browse files Browse the repository at this point in the history
  • Loading branch information
voidptr committed Jul 8, 2010
1 parent 54fa2a6 commit 8b8fa1d
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 146 deletions.
2 changes: 1 addition & 1 deletion Caper/MappingFile.h
Expand Up @@ -364,7 +364,7 @@ class MappingFile
lSource = (Bytef *) calloc( lSourceLength, 1 ); // allocate the memory on the fly.
assert( lSource != NULL ); // temporary. TODO handle this better, clean up after myself.
strcpy( (char*) lSource, aBlock.c_str() ); // this only works because the string is null terminated. :/ TODO, fix this.
lSource[aBlock.size()] = NULL; // set null termination manually.
lSource[aBlock.size()] = 0; // set null termination manually.

Bytef *lCompressed;
unsigned long lCompressedLength = compressBound( lSourceLength );
Expand Down
3 changes: 0 additions & 3 deletions NOTES
Expand Up @@ -9,9 +9,6 @@ add mapquery stuff
finish implementing iterators in python wrapper
- tests

finish reimplementing pygr-bridge
- tests

do explicit recasting to remove the huge pile of warnings between signed/unsigned

beef up the testing suite to cover:
Expand Down
48 changes: 27 additions & 21 deletions Pyrex/caper_pygr_bridge.py
Expand Up @@ -19,19 +19,20 @@ def __init__(self, bridge, ival, m):

def __iter__(self):
db = self.bridge.reads_db
for ival_start, ival_stop, match_name, start, stop, o in self.m:
if ival_start == ival_stop:
continue
for ival_start, ival_stop, start, stop, matches in self.m:
for index, match_name, sequence, orientation in matches:
if ival_start == ival_stop:
continue

if o == -1:
read = -db[match_name]
else:
read = db[match_name]
if orientation == -1:
read = -db[match_name]
else:
read = db[match_name]

if start > stop:
start, stop = stop - 1, start - 1
if start > stop:
start, stop = stop - 1, start - 1

yield read[start:stop]
yield read[start:stop]


def keys(self):
Expand All @@ -43,19 +44,24 @@ def iterkeys(self):
def edges(self):
ival = self.ival
db = self.bridge.reads_db
for ival_start, ival_stop, match_name, start, stop, o in self.m:
if ival_start == ival_stop:
continue
for ival_start, ival_stop, start, stop, matches in self.m:
for index, match_name, sequence, orientation in matches:
if ival_start == ival_stop:
continue

if o == -1:
read = -db[match_name]
else:
read = db[match_name]
if orientation == -1:
read = -db[match_name]
else:
read = db[match_name]

if start > stop:
start, stop = stop - 1, start - 1
if start > stop:
start, stop = stop - 1, start - 1

yield ival, read[start:stop], None
yield ival, read[start:stop], None

def __len__(self):
return len(self.m)
#return len(self.m)
count = 0;
for index in self.m:
count = count + len(index[4])
return count
97 changes: 48 additions & 49 deletions Pyrex/foo.py
Expand Up @@ -2,7 +2,7 @@
#bundle_path = '/scratch2/rose/capertestdata/out/s_1_sequence.map.trim3-20.bundle'
#contig = 'Contig0'

bundle_path = 'data/cache/REL606-maq-map_7entries.txt.bundle'
bundle_path = 'data/cache/REL606-maq-map.txt.bundle'
contig = 'rel606'
engine = caper.mapping_container(bundle_path)

Expand Down Expand Up @@ -31,56 +31,55 @@

print "0-5"
slice = engine.get_slice(contig,0, 5)
print "1001-1005"
slice3 = engine.get_slice(contig, 1001, 1005)
print "0-5 again"
slice2 = engine.get_slice(contig,0, 5)

print "FETCHING FROM the original 0-5"
print
print "## ROOT"
print "======================================================"
val = slice[0]
print ("VAL: ", val)
print "------------------------------------------------------"
print ("SLICE[0]: ", slice[0])
print "------------------------------------------------------"

print
print
print "##0th"
print "======================================================"

print ("VAL[0]: ",val[0])
print "------------------------------------------------------"
print ("SLICE[0][0]: ",slice[0][0])
print "------------------------------------------------------"

print
print
print "##1st"
print "======================================================"

print ("VAL[1]: ",val[1])
print "------------------------------------------------------"
print ("SLICE[0][1]: ",slice[0][1])
print "------------------------------------------------------"

print
print
print "##2nd"
print "======================================================"

print ("VAL[2]: ",val[2])
print "------------------------------------------------------"
print ("SLICE[0][2]: ",slice[0][2])
print "------------------------------------------------------"
# print "1001-1005"
# slice3 = engine.get_slice(contig, 1001, 1005)
# print "0-5 again"
# slice2 = engine.get_slice(contig,0, 5)
#
# print "FETCHING FROM the original 0-5"
# print
# print "## ROOT"
# print "======================================================"
# val = slice[0]
# print ("VAL: ", val)
# print "------------------------------------------------------"
# print ("SLICE[0]: ", slice[0])
# print "------------------------------------------------------"
#
# print
# print
# print "##0th"
# print "======================================================"
#
# print ("VAL[0]: ",val[0])
# print "------------------------------------------------------"
# print ("SLICE[0][0]: ",slice[0][0])
# print "------------------------------------------------------"
#
# print
# print
# print "##1st"
# print "======================================================"
#
# print ("VAL[1]: ",val[1])
# print "------------------------------------------------------"
# print ("SLICE[0][1]: ",slice[0][1])
# print "------------------------------------------------------"
#
# print
# print
# print "##2nd"
# print "======================================================"
#
# print ("VAL[2]: ",val[2])
# print "------------------------------------------------------"
# print ("SLICE[0][2]: ",slice[0][2])
# print "------------------------------------------------------"


#for item in slice:
# print "array", item
# for read in item[4]:
# print read
for ival_start, ival_stop, start, stop, matches in slice:
for index, name, sequence, orientation in matches:
print index, name, sequence, orientation



Expand Down
144 changes: 72 additions & 72 deletions Pyrex/tests.py
Expand Up @@ -3,17 +3,20 @@
# lib code.
import os.path

#try:
# import screed
#except ImportError:
# raise Exception, "you need to install screed!"
try:
import screed
except ImportError:
raise Exception, "you need to install screed!"

import caper
#from caper_pygr_bridge import CaperBridge
#from screed.pygr_api import ScreedSequenceDB
from caper_pygr_bridge import CaperBridge
from screed.pygr_api import ScreedSequenceDB
from pygr import cnestedlist, seqdb

#sequence_path = 'data/REL606.gmc.fa'
sequence_path = 'data/REL606.gmc.fa'
sequence_fastq = 'data/REL606-seqs.fastq'

contig = 'rel606'

#genome_index = 'data/cache/REL606.gmc.fa.genomeindex'
#sequence_index = 'data/cache/REL606.gmc.fa.indexed'
Expand All @@ -33,11 +36,11 @@ def setup():
class MappingContainer_Test(object):
def setup(self):
self.cont = caper.mapping_container(map_bundle)
self.db = seqdb.SequenceFileDB('data/REL606.gmc.fa')
self.seq = self.db['rel606']
self.db = seqdb.SequenceFileDB(sequence_path)
self.seq = self.db[contig]

def test_retrieve(self):
matches = self.cont.get_slice('rel606', 0, 5)
matches = self.cont.get_slice(contig, 0, 5)

x = list()
for item in matches:
Expand All @@ -56,11 +59,11 @@ def test_retrieve(self):
assert tstop - tstart == bstop - bstart

def test_empty_retrieve(self):
matches = self.cont.get_slice('rel606', 14000, 14999)
matches = self.cont.get_slice(contig, 14000, 14999)
assert len(matches) == 0

def test_was_broken_boundary(self):
matches = self.cont.get_slice('rel606', 0, 20000)
matches = self.cont.get_slice(contig, 0, 20000)

count = 0;
for index in matches:
Expand All @@ -69,7 +72,7 @@ def test_was_broken_boundary(self):
assert count == 10000

def test_retrieve_all(self):
matches = self.cont.get_slice('rel606', 0, len(self.seq))
matches = self.cont.get_slice(contig, 0, len(self.seq))

count = 0;
for index in matches:
Expand All @@ -78,62 +81,59 @@ def test_retrieve_all(self):
assert count == 10000


# class PygrBridge_Test(object):
# def setup(self):
# self.cont = caper.mapping_container(map_bundle)
# self.db = seqdb.SequenceFileDB('data/REL606.gmc.fa') #ScreedSequenceDB('data/REL606.gmc.fa')
# #self.db = ScreedSequenceDB('data/REL606.gmc.fa')
# self.seq = self.db['rel606']
# self.reads_db = ScreedSequenceDB('data/REL606-seqs.fastq')
#
# self.al = CaperBridge(self.cont, self.db, self.reads_db)
#
# def test_basic_slice(self):
# ival = self.seq[0:50]
# slice = self.al[ival]
# assert len(slice) == 22
#
# for src, dest, _ in slice.edges():
# print src
# print dest
# print ''
#
# def test_bridge_nlmsa_equiv(self):
# nlmsa = cnestedlist.NLMSA('foo', 'memory', pairwiseMode=True)
# nlmsa += self.seq
#
# # select across a wider window then we'll actually slice, to get
# # an inclusive set of reads
# m = self.cont.get_slice('rel606', 0, 200)
#
# # construct nlmsa
# N=0
# for ival_start, ival_stop, read_name, read_start, read_stop, o in m:
# N += 1
# if ival_start == ival_stop:
# continue
#
# ival = self.seq[ival_start:ival_stop]
# if o == -1:
# match_seq = -self.reads_db[read_name]
# else:
# match_seq = self.reads_db[read_name]
#
# match_ival = match_seq[read_start:read_stop]
# nlmsa[ival] += match_ival
#
# nlmsa.build()
#
# ### now, slice!
# ival = self.seq[75:80]
# nlmsa_slice = nlmsa[ival]
# caper_slice = self.al[ival]
#
# s = set(nlmsa_slice.keys())
# t = set(caper_slice.keys())
#
# assert s == t
#
# for k, v, e in nlmsa[ival].edges():
# print repr(k), repr(v), e.pIdentity(), v.orientation
# assert e.pIdentity() >= 0.8, (repr(k), repr(v), e.pIdentity())
class PygrBridge_Test(object):
def setup(self):
self.cont = caper.mapping_container(map_bundle)
self.db = seqdb.SequenceFileDB(sequence_path) #ScreedSequenceDB('data/REL606.gmc.fa')
self.db = ScreedSequenceDB(sequence_path)
self.seq = self.db[contig]
self.reads_db = ScreedSequenceDB(sequence_fastq)

self.al = CaperBridge(self.cont, self.db, self.reads_db)

def test_basic_slice(self):
ival = self.seq[0:50]
slice = self.al[ival]
assert len(slice) == 22

def test_bridge_nlmsa_equiv(self):
nlmsa = cnestedlist.NLMSA('foo', 'memory', pairwiseMode=True)
nlmsa += self.seq

# select across a wider window then we'll actually slice, to get
# an inclusive set of reads
m = self.cont.get_slice('rel606', 0, 200)

# construct nlmsa
N=0
for ival_start, ival_stop, read_start, read_stop, match in m:
for index, read_name, sequence, orientation in match:
N += 1
if ival_start == ival_stop:
continue

ival = self.seq[ival_start:ival_stop]
if orientation == -1:
match_seq = -self.reads_db[read_name]
else:
match_seq = self.reads_db[read_name]

match_ival = match_seq[read_start:read_stop]

nlmsa[ival] += match_ival

nlmsa.build()

### now, slice!
ival = self.seq[75:80]
nlmsa_slice = nlmsa[ival]
caper_slice = self.al[ival]

s = set(nlmsa_slice.keys())
t = set(caper_slice.keys())

assert s == t

for k, v, e in nlmsa[ival].edges():
print repr(k), repr(v), e.pIdentity(), v.orientation
assert e.pIdentity() >= 0.8, (repr(k), repr(v), e.pIdentity())

0 comments on commit 8b8fa1d

Please sign in to comment.