Skip to content

Commit

Permalink
BAM indexing tested;
Browse files Browse the repository at this point in the history
renamed 'view' to 'merge' because 'bam' tools does most of the 'view'
functions.
  • Loading branch information
vadimzalunin committed Jan 21, 2013
1 parent 1c2cc7d commit e05f889
Show file tree
Hide file tree
Showing 10 changed files with 156 additions and 101 deletions.
4 changes: 2 additions & 2 deletions build.number
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
#Build Number for ANT. Do not edit!
#Fri Jan 11 16:47:26 GMT 2013
build.number=263
#Mon Jan 21 16:53:48 GMT 2013
build.number=272
Binary file modified cramtools-1.0.jar
Binary file not shown.
10 changes: 7 additions & 3 deletions src/main/java/net/sf/cram/Bam2Cram.java
Original file line number Diff line number Diff line change
Expand Up @@ -250,10 +250,11 @@ public static void main(String[] args) throws IOException,
{
String seqName = null;
SAMRecord samRecord = iterator.next();
if (samRecord == null) throw new RuntimeException("No records found.");
samRecords.add(samRecord);
if (samRecord == null)
throw new RuntimeException("No records found.");
seqName = samRecord.getReferenceName();
prevSeqId = samRecord.getReferenceIndex();
samRecords.add(samRecord);

if (samFileReader.getFileHeader().getReadGroups().isEmpty()
|| samFileReader.getFileHeader().getReadGroup(
Expand Down Expand Up @@ -314,7 +315,7 @@ public static void main(String[] args) throws IOException,
SAMRecord samRecord = iterator.next();
if (samRecord == null)
// no more records
break ;
break;
if (samRecord.getReferenceIndex() != prevSeqId
|| samRecords.size() >= params.maxContainerSize) {
if (!samRecords.isEmpty()) {
Expand Down Expand Up @@ -445,6 +446,9 @@ static class Params {
@Parameter(names = { "--preserve-read-names" }, description = "Preserve all read names.")
boolean preserveReadNames = false;

@Parameter(names = { "--lossless-quality-score", "-Q" }, description = "Preserve all quality scores. Overwrites '--lossless-quality-score'.")
boolean losslessQS = false;

@Parameter(names = { "--lossy-quality-score-spec", "-L" }, description = "A string specifying what quality scores should be preserved.")
String qsSpec = "";

Expand Down
15 changes: 6 additions & 9 deletions src/main/java/net/sf/cram/Cram2Bam.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,6 @@
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

Expand Down Expand Up @@ -118,7 +115,7 @@ public static void main(String[] args) throws IOException,
long offset = 0;
CountingInputStream cis = new CountingInputStream(is);
CramHeader cramHeader = ReadWrite.readCramHeader(cis);
offset = cis.getCount() ;
offset = cis.getCount();

List<Index.Entry> entries = null;
if (!params.locations.isEmpty() && params.cramFile != null) {
Expand Down Expand Up @@ -183,7 +180,7 @@ public static void main(String[] args) throws IOException,
SeekableStream ss = (SeekableStream) is;
Entry entry = entries.get(0);
ss.seek(entry.offset);
offset = entry.offset ;
offset = entry.offset;
} else
throw new RuntimeException(
"The input stream does not support random access.");
Expand All @@ -206,10 +203,10 @@ public static void main(String[] args) throws IOException,
Container c = null;
try {
time = System.nanoTime();
cis = new CountingInputStream(is) ;
cis = new CountingInputStream(is);
c = ReadWrite.readContainer(cramHeader.samFileHeader, cis);
c.offset = offset ;
offset += cis.getCount() ;
c.offset = offset;
offset += cis.getCount();
readTime += System.nanoTime() - time;
} catch (EOFException e) {
break;
Expand Down Expand Up @@ -267,7 +264,7 @@ public static void main(String[] args) throws IOException,
recordCount++;
continue;
}

if (ref != null)
Utils.calculateMdAndNmTags(s, ref, params.calculateMdTag,
params.calculateNmTag);
Expand Down
5 changes: 5 additions & 0 deletions src/main/java/net/sf/cram/CramTools.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ public class CramTools {
public static final String CRAM2BAM_COMMAND = "bam";
public static final String BAM2CRAM_COMMAND = "cram";
public static final String INDEX_COMMAND = "index";
public static final String MERGE_COMMAND = "merge";

private static Log log = Log.getInstance(CramTools.class);

Expand All @@ -39,10 +40,12 @@ public static void main(String[] args) throws Exception {
Cram2Bam.Params cram2BamParams = new Cram2Bam.Params();
Bam2Cram.Params bam2CramParams = new Bam2Cram.Params();
IndexCRAM.Params indexParams = new IndexCRAM.Params();
Merge.Params mergeParams = new Merge.Params();

jc.addCommand(CRAM2BAM_COMMAND, cram2BamParams);
jc.addCommand(BAM2CRAM_COMMAND, bam2CramParams);
jc.addCommand(INDEX_COMMAND, indexParams);
jc.addCommand(MERGE_COMMAND, mergeParams);

jc.parse(args);

Expand All @@ -68,6 +71,8 @@ else if (BAM2CRAM_COMMAND.equals(command))
Bam2Cram.main(commandArgs);
else if (INDEX_COMMAND.equals(command))
IndexCRAM.main(commandArgs);
else if (MERGE_COMMAND.equals(command))
Merge.main(commandArgs);

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.TreeMap;

import net.sf.picard.io.IoUtil;
import net.sf.samtools.BAMFileWriter;
Expand All @@ -26,15 +28,15 @@
import com.beust.jcommander.Parameters;
import com.beust.jcommander.converters.FileConverter;

public class View {
public class Merge {

public static void usage(JCommander jc) {
StringBuilder sb = new StringBuilder();
sb.append("\n");
jc.usage(sb);

System.out.println("Version "
+ View.class.getPackage().getImplementationVersion());
+ Merge.class.getPackage().getImplementationVersion());
System.out.println(sb.toString());
}

Expand Down Expand Up @@ -62,6 +64,7 @@ public static void main(String[] args) throws IOException {

List<SAMFileReader> readers = new ArrayList<SAMFileReader>(
params.files.size());
List<String> ids = new ArrayList<String>(params.files.size());
StringBuffer mergeComment = new StringBuffer("Merged from:");
for (File file : params.files) {
IoUtil.assertFileIsReadable(file);
Expand All @@ -75,11 +78,7 @@ public static void main(String[] args) throws IOException {
System.setProperty("reference",
params.reference.getAbsolutePath());
SAMFileReader reader = new SAMFileReader(file, index);

// reader = new CRAMFileReader(new SingleSeekableStreamFactory(new
// BufferedInputStream(new FileInputStream(
// file))), new FileInputStreamFactory(file), params.reference,
// index);
ids.add(file.getName());

readers.add(reader);

Expand Down Expand Up @@ -112,16 +111,17 @@ else if (!params.samFormat) {
writer = new SAMFileWriterFactory().makeSAMWriter(header, true,
new BufferedOutputStream(System.out));

List<SAMRecordIterator> iterators = new ArrayList<SAMRecordIterator>(
readers.size());
List<String> noCollisionIds = resolveCollisions(ids) ;
List<RecordSource> sources = new ArrayList<RecordSource>(readers.size());
int i = 0;
for (SAMFileReader reader : readers) {
SAMRecordIterator it = reader.query(query.sequence, query.start,
query.end, false);
iterators.add(it);
sources.add(new RecordSource(noCollisionIds.get(i++), it));
}

MergedSAMRecordIterator mergedIterator = new MergedSAMRecordIterator(
iterators, header);
sources, header);
while (mergedIterator.hasNext()) {
writer.addAlignment(mergedIterator.next());
}
Expand All @@ -143,10 +143,51 @@ else if (!params.samFormat) {
throw e;
}
}

private static List<String> resolveCollisions (List<String> list) {

ArrayList<String> result = new ArrayList<String>(list.size()) ;

// count list entries:
Map<String, Integer> idCountMap = new TreeMap<String, Integer>();
for (String id : list) {
if (idCountMap.containsKey(id)) {
idCountMap.put(id,
((Integer) idCountMap.get(id)).intValue() + 1);
} else
idCountMap.put(id, 1);
}

private static class MergedSAMRecordIterator implements SAMRecordIterator {
// append entries with their number of occurrence except for singletons:
for (int i = list.size() - 1; i >= 0; i--) {
String id = list.get(i);
int count = idCountMap.get(id);
if (count > 1) {
result.set(i, id + String.valueOf(count));
idCountMap.put(id, --count);
}
}

return result ;
}

private static class RecordSource {
private String id;
private SAMRecordIterator it;

private List<SAMRecordIterator> iterators;
public RecordSource() {
}

public RecordSource(String id, SAMRecordIterator it) {
this.id = id;
this.it = it;
}

}

private static class MergedSAMRecordIterator implements SAMRecordIterator {
private char delim = '.';
private List<RecordSource> sources;
private SAMRecord nextRecord;
private SAMFileHeader header;
private PriorityQueue<SAMRecord> queue = new PriorityQueue<SAMRecord>(
Expand All @@ -164,17 +205,17 @@ public int compare(SAMRecord o1, SAMRecord o2) {

});

public MergedSAMRecordIterator(List<SAMRecordIterator> iterators,
public MergedSAMRecordIterator(List<RecordSource> sources,
SAMFileHeader header) {
this.iterators = iterators;
this.sources = sources;
this.header = header;
nextRecord = doNext();
}

@Override
public void close() {
for (SAMRecordIterator it : iterators)
it.close();
for (RecordSource it : sources)
it.it.close();
}

@Override
Expand All @@ -185,18 +226,18 @@ public boolean hasNext() {
private boolean milk() {
boolean hasMore = false;
int counter = 0;
for (SAMRecordIterator it : iterators) {
for (RecordSource source : sources) {
counter++;
SAMRecordIterator it = source.it;
if (it.hasNext()) {
SAMRecord record;
try {
record = (SAMRecord) it.next().clone();
record.setReadName(String.valueOf(counter) + "_"
record.setReadName(source.id + delim
+ record.getReadName());
queue.add(record);
} catch (CloneNotSupportedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
throw new RuntimeException(e);
}
hasMore = true;
}
Expand Down Expand Up @@ -289,7 +330,7 @@ private static SAMFileHeader mergeHeaders(List<SAMFileReader> readers) {
return header;
}

@Parameters(commandDescription = "CRAM to BAM conversion. ")
@Parameters(commandDescription = "Tool to merge CRAM or BAM files. ")
static class Params {

@Parameter(names = { "--reference-fasta-file" }, converter = FileConverter.class, description = "Path to the reference fasta file, it must be uncompressed and indexed (use 'samtools faidx' for example).")
Expand Down
55 changes: 54 additions & 1 deletion src/main/java/net/sf/cram/Utils.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@

import java.io.File;
import java.io.FileNotFoundException;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
Expand All @@ -37,6 +40,8 @@
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import net.sf.samtools.SAMTag;

public class Utils {
Expand Down Expand Up @@ -389,7 +394,7 @@ public static void calculateMdAndNmTags(SAMRecord record, byte[] ref,
int nm = 0;
StringBuffer str = new StringBuffer();

int size = cigarElements.size();
int size = cigarElements.size();
for (i = y = 0, x = start; i < size; ++i) {
CigarElement ce = cigarElements.get(i);
int j, l = ce.getLength();
Expand Down Expand Up @@ -481,4 +486,52 @@ public int compare(int[] o1, int[] o2) {
return -(o1[1] - o2[1]);
}
};

public static void checkRefMD5(SAMSequenceDictionary d,
ReferenceSequenceFile refFile, boolean checkExistingMD5,
boolean failIfMD5Mismatch) throws NoSuchAlgorithmException {

for (SAMSequenceRecord r : d.getSequences()) {
ReferenceSequence sequence = refFile.getSequence(r
.getSequenceName());
if (!r.getAttributes().contains(SAMSequenceRecord.MD5_TAG)) {
String md5 = calculateMD5(sequence.getBases());
r.setAttribute(SAMSequenceRecord.MD5_TAG, md5);
} else {
if (checkExistingMD5) {
String existingMD5 = r.getAttribute(SAMSequenceRecord.MD5_TAG);
String md5 = calculateMD5(sequence.getBases());
if (!md5.equals(existingMD5)) {

String message = String
.format("For sequence %s the md5 %s does not match the actual md5 %s.",
r.getSequenceName(), existingMD5,
md5);

if (failIfMD5Mismatch)
throw new RuntimeException(message);
else
log.warn(message);
}
}
}
}
}


public static String calculateMD5(byte[] data)
throws NoSuchAlgorithmException {
MessageDigest md5_MessageDigest = MessageDigest.getInstance("MD5");
md5_MessageDigest.reset();
md5_MessageDigest.update(data);
return String.format("%032x",
new BigInteger(md5_MessageDigest.digest()));
}

public static void main(String[] args) throws NoSuchAlgorithmException {
System.out.println(calculateMD5("363".getBytes()));
System.out.println(calculateMD5("a".getBytes()));
System.out.println(calculateMD5("Ჾ蠇".getBytes()));
System.out.println(calculateMD5("jk8ssl".getBytes()));
}
}
Loading

0 comments on commit e05f889

Please sign in to comment.