Finished updating Caper/Pygr bridge and tests.

voidptr · Jul 8, 2010 · 8b8fa1d · 8b8fa1d
1 parent 54fa2a6
commit 8b8fa1d
Show file tree

Hide file tree

Showing 5 changed files with 148 additions and 146 deletions.
diff --git a/Caper/MappingFile.h b/Caper/MappingFile.h
@@ -364,7 +364,7 @@ class MappingFile
     lSource = (Bytef *) calloc( lSourceLength, 1 ); // allocate the memory on the fly.
     assert( lSource != NULL ); // temporary. TODO handle this better, clean up after myself.
     strcpy( (char*) lSource, aBlock.c_str() ); // this only works because the string is null terminated. :/ TODO, fix this.
-    lSource[aBlock.size()] = NULL; // set null termination manually.
+    lSource[aBlock.size()] = 0; // set null termination manually.
 
     Bytef *lCompressed;
     unsigned long lCompressedLength = compressBound( lSourceLength );

diff --git a/NOTES b/NOTES
@@ -9,9 +9,6 @@ add mapquery stuff
 finish implementing iterators in python wrapper
   - tests
 
-finish reimplementing pygr-bridge
-  - tests
-
 do explicit recasting to remove the huge pile of warnings between signed/unsigned
 
 beef up the testing suite to cover:

diff --git a/Pyrex/caper_pygr_bridge.py b/Pyrex/caper_pygr_bridge.py
@@ -19,19 +19,20 @@ def __init__(self, bridge, ival, m):
 
     def __iter__(self):
         db = self.bridge.reads_db
-        for ival_start, ival_stop, match_name, start, stop, o in self.m:
-            if ival_start == ival_stop:
-                continue
+        for ival_start, ival_stop, start, stop, matches in self.m:
+            for index, match_name, sequence, orientation in matches:
+                if ival_start == ival_stop:
+                    continue
 
-            if o == -1:
-                read = -db[match_name]
-            else:
-                read = db[match_name]
+                if orientation == -1:
+                    read = -db[match_name]
+                else:
+                    read = db[match_name]
 
-            if start > stop:
-                start, stop = stop - 1, start - 1
+                if start > stop:
+                    start, stop = stop - 1, start - 1
 
-            yield read[start:stop]
+                yield read[start:stop]
 
 
     def keys(self):
@@ -43,19 +44,24 @@ def iterkeys(self):
     def edges(self):
         ival = self.ival
         db = self.bridge.reads_db
-        for ival_start, ival_stop, match_name, start, stop, o in self.m:
-            if ival_start == ival_stop:
-                continue
+        for ival_start, ival_stop, start, stop, matches in self.m:
+            for index, match_name, sequence, orientation in matches:
+                if ival_start == ival_stop:
+                    continue
 
-            if o == -1:
-                read = -db[match_name]
-            else:
-                read = db[match_name]
+                if orientation == -1:
+                    read = -db[match_name]
+                else:
+                    read = db[match_name]
 
-            if start > stop:
-                start, stop = stop - 1, start - 1
+                if start > stop:
+                    start, stop = stop - 1, start - 1
 
-            yield ival, read[start:stop], None
+                yield ival, read[start:stop], None
 
     def __len__(self):
-        return len(self.m)
+        #return len(self.m)
+        count = 0;
+        for index in self.m:
+            count = count + len(index[4])
+        return count
diff --git a/Pyrex/foo.py b/Pyrex/foo.py
@@ -2,7 +2,7 @@
 #bundle_path = '/scratch2/rose/capertestdata/out/s_1_sequence.map.trim3-20.bundle'
 #contig = 'Contig0'
 
-bundle_path = 'data/cache/REL606-maq-map_7entries.txt.bundle'
+bundle_path = 'data/cache/REL606-maq-map.txt.bundle'
 contig = 'rel606'
 engine = caper.mapping_container(bundle_path)
 
@@ -31,56 +31,55 @@
 
 print "0-5"
 slice = engine.get_slice(contig,0, 5)
-print "1001-1005"
-slice3 = engine.get_slice(contig, 1001, 1005)
-print "0-5 again"
-slice2 = engine.get_slice(contig,0, 5)
-
-print "FETCHING FROM the original 0-5"
-print
-print "## ROOT"
-print "======================================================"
-val = slice[0]
-print ("VAL: ", val)
-print "------------------------------------------------------"
-print ("SLICE[0]: ", slice[0])
-print "------------------------------------------------------"
-
-print
-print
-print "##0th"
-print "======================================================"
-
-print ("VAL[0]: ",val[0])
-print "------------------------------------------------------"
-print ("SLICE[0][0]: ",slice[0][0])
-print "------------------------------------------------------"
-
-print
-print
-print "##1st"
-print "======================================================"
-
-print ("VAL[1]: ",val[1])
-print "------------------------------------------------------"
-print ("SLICE[0][1]: ",slice[0][1])
-print "------------------------------------------------------"
-
-print
-print
-print "##2nd"
-print "======================================================"
-
-print ("VAL[2]: ",val[2])
-print "------------------------------------------------------"
-print ("SLICE[0][2]: ",slice[0][2])
-print "------------------------------------------------------"
+# print "1001-1005"
+# slice3 = engine.get_slice(contig, 1001, 1005)
+# print "0-5 again"
+# slice2 = engine.get_slice(contig,0, 5)
+#
+# print "FETCHING FROM the original 0-5"
+# print
+# print "## ROOT"
+# print "======================================================"
+# val = slice[0]
+# print ("VAL: ", val)
+# print "------------------------------------------------------"
+# print ("SLICE[0]: ", slice[0])
+# print "------------------------------------------------------"
+#
+# print
+# print
+# print "##0th"
+# print "======================================================"
+#
+# print ("VAL[0]: ",val[0])
+# print "------------------------------------------------------"
+# print ("SLICE[0][0]: ",slice[0][0])
+# print "------------------------------------------------------"
+#
+# print
+# print
+# print "##1st"
+# print "======================================================"
+#
+# print ("VAL[1]: ",val[1])
+# print "------------------------------------------------------"
+# print ("SLICE[0][1]: ",slice[0][1])
+# print "------------------------------------------------------"
+#
+# print
+# print
+# print "##2nd"
+# print "======================================================"
+#
+# print ("VAL[2]: ",val[2])
+# print "------------------------------------------------------"
+# print ("SLICE[0][2]: ",slice[0][2])
+# print "------------------------------------------------------"
 
 
-#for item in slice:
-#    print "array", item
-#    for read in item[4]:
-#        print read
+for ival_start, ival_stop, start, stop, matches in slice:
+    for index, name, sequence, orientation in matches:
+        print index, name, sequence, orientation
 
 
 

diff --git a/Pyrex/tests.py b/Pyrex/tests.py
@@ -3,17 +3,20 @@
 # lib code.
 import os.path
 
-#try:
-#    import screed
-#except ImportError:
-#    raise Exception, "you need to install screed!"
+try:
+    import screed
+except ImportError:
+    raise Exception, "you need to install screed!"
 
 import caper
-#from caper_pygr_bridge import CaperBridge
-#from screed.pygr_api import ScreedSequenceDB
+from caper_pygr_bridge import CaperBridge
+from screed.pygr_api import ScreedSequenceDB
 from pygr import cnestedlist, seqdb
 
-#sequence_path = 'data/REL606.gmc.fa'
+sequence_path = 'data/REL606.gmc.fa'
+sequence_fastq = 'data/REL606-seqs.fastq'
+
+contig = 'rel606'
 
 #genome_index = 'data/cache/REL606.gmc.fa.genomeindex'
 #sequence_index = 'data/cache/REL606.gmc.fa.indexed'
@@ -33,11 +36,11 @@ def setup():
 class MappingContainer_Test(object):
     def setup(self):
         self.cont = caper.mapping_container(map_bundle)
-        self.db = seqdb.SequenceFileDB('data/REL606.gmc.fa')
-        self.seq = self.db['rel606']
+        self.db = seqdb.SequenceFileDB(sequence_path)
+        self.seq = self.db[contig]
 
     def test_retrieve(self):
-        matches = self.cont.get_slice('rel606', 0, 5)
+        matches = self.cont.get_slice(contig, 0, 5)
 
         x = list()
         for item in matches:
@@ -56,11 +59,11 @@ def test_retrieve(self):
             assert tstop - tstart == bstop - bstart
 
     def test_empty_retrieve(self):
-        matches = self.cont.get_slice('rel606', 14000, 14999)
+        matches = self.cont.get_slice(contig, 14000, 14999)
         assert len(matches) == 0
 
     def test_was_broken_boundary(self):
-        matches = self.cont.get_slice('rel606', 0, 20000)
+        matches = self.cont.get_slice(contig, 0, 20000)
 
         count = 0;
         for index in matches:
@@ -69,7 +72,7 @@ def test_was_broken_boundary(self):
         assert count == 10000
 
     def test_retrieve_all(self):
-        matches = self.cont.get_slice('rel606', 0, len(self.seq))
+        matches = self.cont.get_slice(contig, 0, len(self.seq))
 
         count = 0;
         for index in matches:
@@ -78,62 +81,59 @@ def test_retrieve_all(self):
         assert count == 10000
 
 
-# class PygrBridge_Test(object):
-#     def setup(self):
-#         self.cont = caper.mapping_container(map_bundle)
-#         self.db = seqdb.SequenceFileDB('data/REL606.gmc.fa') #ScreedSequenceDB('data/REL606.gmc.fa')
-#         #self.db = ScreedSequenceDB('data/REL606.gmc.fa')
-#         self.seq = self.db['rel606']
-#         self.reads_db = ScreedSequenceDB('data/REL606-seqs.fastq')
-#
-#         self.al = CaperBridge(self.cont, self.db, self.reads_db)
-#
-#     def test_basic_slice(self):
-#         ival = self.seq[0:50]
-#         slice = self.al[ival]
-#         assert len(slice) == 22
-#
-#         for src, dest, _ in slice.edges():
-#             print src
-#             print dest
-#             print ''
-#
-#     def test_bridge_nlmsa_equiv(self):
-#         nlmsa = cnestedlist.NLMSA('foo', 'memory', pairwiseMode=True)
-#         nlmsa += self.seq
-#
-#         # select across a wider window then we'll actually slice, to get
-#         # an inclusive set of reads
-#         m = self.cont.get_slice('rel606', 0, 200)
-#
-#         # construct nlmsa
-#         N=0
-#         for ival_start, ival_stop, read_name, read_start, read_stop, o in m:
-#             N += 1
-#             if ival_start == ival_stop:
-#                 continue
-#
-#             ival = self.seq[ival_start:ival_stop]
-#             if o == -1:
-#                 match_seq = -self.reads_db[read_name]
-#             else:
-#                 match_seq = self.reads_db[read_name]
-#
-#             match_ival = match_seq[read_start:read_stop]
-#             nlmsa[ival] += match_ival
-#
-#         nlmsa.build()
-#
-#         ### now, slice!
-#         ival = self.seq[75:80]
-#         nlmsa_slice = nlmsa[ival]
-#         caper_slice = self.al[ival]
-#
-#         s = set(nlmsa_slice.keys())
-#         t = set(caper_slice.keys())
-#
-#         assert s == t
-#
-#         for k, v, e in nlmsa[ival].edges():
-#             print repr(k), repr(v), e.pIdentity(), v.orientation
-#             assert e.pIdentity() >= 0.8, (repr(k), repr(v), e.pIdentity())
+class PygrBridge_Test(object):
+    def setup(self):
+        self.cont = caper.mapping_container(map_bundle)
+        self.db = seqdb.SequenceFileDB(sequence_path) #ScreedSequenceDB('data/REL606.gmc.fa')
+        self.db = ScreedSequenceDB(sequence_path)
+        self.seq = self.db[contig]
+        self.reads_db = ScreedSequenceDB(sequence_fastq)
+
+        self.al = CaperBridge(self.cont, self.db, self.reads_db)
+
+    def test_basic_slice(self):
+        ival = self.seq[0:50]
+        slice = self.al[ival]
+        assert len(slice) == 22
+
+    def test_bridge_nlmsa_equiv(self):
+        nlmsa = cnestedlist.NLMSA('foo', 'memory', pairwiseMode=True)
+        nlmsa += self.seq
+
+        # select across a wider window then we'll actually slice, to get
+        # an inclusive set of reads
+        m = self.cont.get_slice('rel606', 0, 200)
+
+        # construct nlmsa
+        N=0
+        for ival_start, ival_stop, read_start, read_stop, match in m:
+            for index, read_name, sequence, orientation in match:
+                N += 1
+                if ival_start == ival_stop:
+                    continue
+
+                ival = self.seq[ival_start:ival_stop]
+                if orientation == -1:
+                    match_seq = -self.reads_db[read_name]
+                else:
+                    match_seq = self.reads_db[read_name]
+
+                match_ival = match_seq[read_start:read_stop]
+
+                nlmsa[ival] += match_ival
+
+        nlmsa.build()
+
+        ### now, slice!
+        ival = self.seq[75:80]
+        nlmsa_slice = nlmsa[ival]
+        caper_slice = self.al[ival]
+
+        s = set(nlmsa_slice.keys())
+        t = set(caper_slice.keys())
+
+        assert s == t
+
+        for k, v, e in nlmsa[ival].edges():
+            print repr(k), repr(v), e.pIdentity(), v.orientation
+            assert e.pIdentity() >= 0.8, (repr(k), repr(v), e.pIdentity())