Skip to content

Commit

Permalink
fixed unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeremiah Wala committed Sep 23, 2016
1 parent c0186da commit 480f831
Show file tree
Hide file tree
Showing 7 changed files with 118 additions and 304 deletions.
4 changes: 2 additions & 2 deletions .travis.scripts/publish-doxygen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
cd ${HOME};
wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/travis/doxygen_1.8.8-1_amd64.deb
sudo dpkg --install doxygen_1.8.8-1_amd64.deb
cd ${HOME}/build/jwalabroad/SeqLib;
cd ${HOME}/build/walaj/SeqLib;
doxygen

echo -e "Publishing doxygen...\n";
git config --global user.email "travis@travis-ci.org";
git config --global user.name "travis-ci";
git clone --branch=gh-pages https://${GH_TOKEN}@github.com/jwalabroad/SeqLib gh-pages;
git clone --branch=gh-pages https://${GH_TOKEN}@github.com/walaj/SeqLib gh-pages;
cd gh-pages;
rm -rf doxygen/;
mv ../docs/html doxygen/;
Expand Down
188 changes: 90 additions & 98 deletions SeqLib/GenomicRegionCollection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ namespace SeqLib {
}
}

static bool header_has_chr_string = false;

template<class T>
void GenomicRegionCollection<T>::CoordinateSort() {

Expand Down Expand Up @@ -169,54 +167,65 @@ template<class T>
bool GenomicRegionCollection<T>::ReadBED(const std::string & file, const BamHeader& hdr) {

m_sorted = false;
std::ifstream iss(file.c_str());
if (!iss || file.length() == 0) {
std::cerr << "BED file does not exist: " << file << std::endl;
idx = 0;

gzFile fp = NULL;
fp = strcmp(file.c_str(), "-")? gzopen(file.c_str(), "r") : gzdopen(fileno(stdin), "r");

if (file.empty() || !fp) {
std::cerr << "BED file not readable: " << file << std::endl;
return false;
}

std::string line;
std::string curr_chr = "-1";
while (std::getline(iss, line, '\n')) {
// http://www.lemoda.net/c/gzfile-read/
size_t c = 0;
while (1) {

int err;
char buffer[GZBUFFER];
gzgets(fp, buffer, GZBUFFER);
int bytes_read = strlen(buffer);

// get one line
if (bytes_read < GZBUFFER - 1) {
if (gzeof (fp)) break;
else {
const char * error_string;
error_string = gzerror (fp, &err);
if (err) {
fprintf (stderr, "Error: %s.\n", error_string);
exit (EXIT_FAILURE);
}
}
}

// prepare to loop through each field of BED line
size_t counter = 0;
std::string chr, pos1, pos2;
std::string line(buffer);
std::istringstream iss_line(line);
std::string val;
if (line.find("#") != std::string::npos)
continue;

if (line.find("#") == std::string::npos) {
while(std::getline(iss_line, val, '\t')) {
switch (counter) {
case 0 : chr = val; break;
case 1 : pos1 = val; break;
case 2 : pos2 = val; break;
}
if (counter >= 2)
break;
++counter;

if (chr != curr_chr) {
//std::cerr << "...reading from BED - chr" << chr << std::endl;
curr_chr = chr;
}

}

if (header_has_chr_string) {
//if (chr == "X" || chr == "Y" || std::stoi(chr) < 23)
chr = "chr" + chr;
// loop BED lines
while(std::getline(iss_line, val, '\t')) {
switch (counter) {
case 0 : chr = val; break;
case 1 : pos1 = val; break;
case 2 : pos2 = val; break;
}

// construct the GenomicRegion
T gr(chr, pos1, pos2, hdr);

if (gr.chr >= 0) {
m_grv->push_back(gr);
}
//}
} // end "keep" conditional
} // end main while
if (counter >= 2)
break;
++counter;
}

// construct the GenomicRegion
T gr(chr, pos1, pos2, hdr);
if (gr.chr >= 0)
m_grv->push_back(gr);
}

return true;
}
Expand All @@ -225,62 +234,14 @@ template<class T>
bool GenomicRegionCollection<T>::ReadVCF(const std::string & file, const BamHeader& hdr) {

m_sorted = false;
std::ifstream iss(file.c_str());
if (!iss || file.length() == 0) {
std::cerr << "VCF file does not exist: " << file << std::endl;
return false;
}

std::string line;

while (std::getline(iss, line, '\n')) {
if (line.length() > 0) {
if (line.at(0) != '#') { // its a valid line
std::istringstream iss_this(line);
int count = 0;
std::string val, chr, pos;

while (std::getline(iss_this, val, '\t')) {
switch (count) {
case 0 : chr = val;
case 1 : pos = val;
}
count++;
}
if (count < 3) {
std::cerr << "Didn't parse VCF line properly: " << line << std::endl;
return false;
} else {

// construct the GenomicRegion
T gr(chr, pos, pos, hdr);
if (gr.chr >= 0) {
m_grv->push_back(gr);
}
}

}
}
}

return true;

}

template<class T>
GenomicRegionCollection<T>::GenomicRegionCollection(const std::string &file, const BamHeader& hdr) {

__allocate_grc();

idx = 0;

//
gzFile fp = NULL;
fp = strcmp(file.c_str(), "-")? gzopen(file.c_str(), "r") : gzdopen(fileno(stdin), "r");

if (file.empty() || !fp) {
std::cerr << "Region file not readable: " << file << std::endl;
return;
std::cerr << "VCF file not readable: " << file << std::endl;
return false;
}

// http://www.lemoda.net/c/gzfile-read/
Expand All @@ -292,6 +253,7 @@ GenomicRegionCollection<T>::GenomicRegionCollection(const std::string &file, con
gzgets(fp, buffer, GZBUFFER);
int bytes_read = strlen(buffer);

// get one line
if (bytes_read < GZBUFFER - 1) {
if (gzeof (fp)) break;
else {
Expand All @@ -304,9 +266,42 @@ GenomicRegionCollection<T>::GenomicRegionCollection(const std::string &file, con
}
}

std::cerr << std::string(buffer) << " C " << c << std::endl;
// prepare to loop through each field of BED line
size_t counter = 0;
std::string chr, pos;
std::string line(buffer);
std::istringstream iss_line(line);
std::string val;
if (line.empty() || line.at(0) == '#')
continue;

// loop VCF lines
while(std::getline(iss_line, val, '\t')) {
switch (counter) {
case 0 : chr = val; break;
case 1 : pos = val; break;
}
if (counter >= 2)
break;
++counter;
}

// construct the GenomicRegion
T gr(chr, pos, pos, hdr);
if (gr.chr >= 0)
m_grv->push_back(gr);
}

return true;
}

template<class T>
GenomicRegionCollection<T>::GenomicRegionCollection(const std::string &file, const BamHeader& hdr) {

__allocate_grc();

idx = 0;

/*
// get the header line to check format
std::string header;
Expand All @@ -316,20 +311,17 @@ GenomicRegionCollection<T>::GenomicRegionCollection(const std::string &file, con
}
iss.close();
// MUTECT CALL STATS
if ((header.find("MuTect") != std::string::npos) ||
(file.find("call_stats") != std::string::npos) ||
(file.find("callstats") != std::string::npos))
std::cerr << "MuTect reading not currently available" << std::endl; //ReadMuTect(file, hdr);
*/

// BED file
else if (file.find(".bed") != std::string::npos)
if (file.find(".bed") != std::string::npos)
ReadBED(file, hdr);
// VCF file
else if (file.find(".vcf") != std::string::npos)
ReadVCF(file, hdr);
else // default is BED file
ReadBED(file, hdr);
*/

}

// reduce a set of GenomicRegions into the minium overlapping set (same as GenomicRanges "reduce")
Expand Down
2 changes: 1 addition & 1 deletion bwa
Submodule bwa updated from 88a5ca to fbd4db
64 changes: 17 additions & 47 deletions seq_test/seq_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "SeqLib/RefGenome.h"

#define GZBED "test_data/test.bed.gz"
#define GZVCF "test_data/test.vcf.gz"
#define SBAM "test_data/small.bam"
#define OBAM "test_data/small_out.bam"
#define OCRAM "test_data/small_out.cram"
Expand All @@ -36,7 +37,14 @@ BOOST_AUTO_TEST_CASE( read_gzbed ) {
br.Open("test_data/small.bam");

SeqLib::GRC g(GZBED, br.Header());
BOOST_CHECK_EQUAL(g.size(), 3);

BOOST_CHECK_EQUAL(g[2].chr, 22);

SeqLib::GRC v(GZVCF, br.Header());
BOOST_CHECK_EQUAL(v.size(), 31);

BOOST_CHECK_EQUAL(v[29].chr, 22);
}

BOOST_AUTO_TEST_CASE ( bfc ) {
Expand Down Expand Up @@ -405,15 +413,15 @@ BOOST_AUTO_TEST_CASE( bam_record ) {
size_t count = 0;
br.GetNextRecord(r);

BOOST_CHECK_EQUAL(r.asGenomicRegion().chr, 22);
BOOST_CHECK_EQUAL(r.asGenomicRegion().pos1,999901);
BOOST_CHECK_EQUAL(r.asGenomicRegion().pos2,1000002);
BOOST_CHECK_EQUAL(r.asGenomicRegion().strand,'+');
BOOST_CHECK_EQUAL(r.AsGenomicRegion().chr, 22);
BOOST_CHECK_EQUAL(r.AsGenomicRegion().pos1,999901);
BOOST_CHECK_EQUAL(r.AsGenomicRegion().pos2,1000002);
BOOST_CHECK_EQUAL(r.AsGenomicRegion().strand,'+');

BOOST_CHECK_EQUAL(r.asGenomicRegionMate().chr, 22);
BOOST_CHECK_EQUAL(r.asGenomicRegionMate().pos1,999993);
BOOST_CHECK_EQUAL(r.asGenomicRegionMate().pos2,1000094);
BOOST_CHECK_EQUAL(r.asGenomicRegionMate().strand,'-');
BOOST_CHECK_EQUAL(r.AsGenomicRegionMate().chr, 22);
BOOST_CHECK_EQUAL(r.AsGenomicRegionMate().pos1,999993);
BOOST_CHECK_EQUAL(r.AsGenomicRegionMate().pos2,1000094);
BOOST_CHECK_EQUAL(r.AsGenomicRegionMate().strand,'-');

BOOST_CHECK_EQUAL(std::floor(r.MeanPhred()), 34);

Expand Down Expand Up @@ -987,14 +995,6 @@ BOOST_AUTO_TEST_CASE( bam_write ) {
// empty constructor
SeqLib::BamWriter w;

// specify bam explicitly
//w = SeqLib::BamWriter(SeqLib::BAM);

//BOOST_CHECK_THROW(w.WriteHeader(), std::runtime_error);
//BOOST_CHECK_THROW(w.Close(), std::runtime_error);
//BOOST_CHECK_THROW(w.BuildIndex(), std::runtime_error);
//BOOST_CHECK_THROW(w.WriteRecord(rec), std::runtime_error);

BOOST_CHECK(!w.WriteHeader());
BOOST_CHECK(!w.Close());
BOOST_CHECK(!w.BuildIndex());
Expand All @@ -1004,8 +1004,6 @@ BOOST_AUTO_TEST_CASE( bam_write ) {

// check that set CRAM fails
BOOST_CHECK(!w.SetCramReference("dummy"));

//BOOST_CHECK_THROW(w.WriteHeader(), std::runtime_error);
BOOST_CHECK(!w.WriteHeader());

w.SetHeader(h);
Expand All @@ -1014,13 +1012,10 @@ BOOST_AUTO_TEST_CASE( bam_write ) {

size_t count = 0;

while(br.GetNextRecord(rec) && count++ < 10000) {
while(br.GetNextRecord(rec) && count++ < 10000)
w.WriteRecord(rec);
}


BOOST_CHECK(!w.BuildIndex());
//BOOST_CHECK_THROW(w.BuildIndex(), std::runtime_error);
w.Close();

w.BuildIndex();
Expand Down Expand Up @@ -1438,31 +1433,6 @@ BOOST_AUTO_TEST_CASE( plot_test ) {
// BOOST_CHECK_EQUAL(rec1.Qname(), rec2.Qname());
// }


BOOST_AUTO_TEST_CASE( bed_read ) {

SeqLib::BamReader r;
r.Open("test_data/small.bam");

SeqLib::GRC g(BEDFILE, r.Header());

BOOST_CHECK_EQUAL(g.size(), 3);
BOOST_CHECK_EQUAL(g[2].chr, 22);

}

BOOST_AUTO_TEST_CASE( vcf_read ) {

SeqLib::BamReader r;
r.Open("test_data/small.bam");

SeqLib::GRC g(VCFFILE, r.Header());

BOOST_CHECK_EQUAL(g.size(), 31);
BOOST_CHECK_EQUAL(g[29].chr, 22);

}

BOOST_AUTO_TEST_CASE (json_parse) {

SeqLib::BamReader r;
Expand Down
3 changes: 0 additions & 3 deletions seq_test/test_data/test.bed

This file was deleted.

Loading

0 comments on commit 480f831

Please sign in to comment.