Permalink
Browse files

basic indexing, briefly tested.

possibly fixed the 1-off insert size, not tested.
fixed bug with incorrectly read integer tags.
added insertions to the lossy spec which now needs extending.
unmaped reads get '*' for cigar now.
  • Loading branch information...
1 parent c87becd commit 903d3a7d6427d13c4142159535b7c5888b7f43b3 @vadimzalunin committed Nov 30, 2012
View
2 .classpath
@@ -5,7 +5,7 @@
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
<classpathentry kind="lib" path="lib/jcommander-1.7.jar"/>
<classpathentry kind="lib" path="lib/sam-1.79.jar" sourcepath="/picard/src/java"/>
- <classpathentry kind="lib" path="lib/picard-1.79.jar" sourcepath="/picard/trunk/src/java"/>
+ <classpathentry kind="lib" path="lib/picard-1.79.jar" sourcepath="/picard/src/java"/>
<classpathentry kind="lib" path="lib/ega-Cipher.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.junit.JUNIT_CONTAINER/4"/>
<classpathentry kind="output" path="bin"/>
View
BIN cramtools-1.0.jar
Binary file not shown.
View
38 src/main/java/net/sf/cram/Cram2Bam.java
@@ -13,6 +13,7 @@
import java.util.zip.GZIPInputStream;
import net.sf.cram.CramTools.LevelConverter;
+import net.sf.cram.Index.Entry;
import net.sf.cram.ReadWrite.CramHeader;
import net.sf.cram.structure.Container;
import net.sf.picard.reference.ReferenceSequence;
@@ -111,6 +112,9 @@ public static void main(String[] args) throws IOException,
List<Index.Entry> entries = null;
if (!params.locations.isEmpty() && params.cramFile != null) {
+ if (params.locations.size() > 1)
+ throw new RuntimeException("Only one location is supported.");
+
File indexFile = new File(params.cramFile.getAbsolutePath()
+ ".crai");
FileInputStream fis = new FileInputStream(indexFile);
@@ -158,15 +162,30 @@ public static void main(String[] args) throws IOException,
SAMFileWriter writer = createSAMFileWriter(params, cramHeader,
samFileWriterFactory);
- long recordCount = 0;
- while (true) {
+ if (!entries.isEmpty()) {
+ // position the stream for random access:
if (is instanceof SeekableStream) {
SeekableStream ss = (SeekableStream) is;
- }
+ Entry entry = entries.get(0) ;
+ ss.seek(entry.offset) ;
+ } else
+ throw new RuntimeException(
+ "The input stream does not support random access.");
+ }
+ long recordCount = 0;
+ long readTime = 0;
+ long parseTime = 0;
+ long normTime = 0;
+ long samTime = 0;
+ long writeTime = 0;
+ long time = 0;
+ while (true) {
Container c = null;
try {
+ time = System.nanoTime();
c = ReadWrite.readContainer(cramHeader.samFileHeader, is);
+ readTime += System.nanoTime() - time;
} catch (EOFException e) {
break;
}
@@ -179,8 +198,10 @@ public static void main(String[] args) throws IOException,
List<CramRecord> cramRecords = null;
try {
+ time = System.nanoTime();
cramRecords = BLOCK_PROTO.getRecords(c.h, c,
cramHeader.samFileHeader);
+ parseTime += System.nanoTime() - time;
} catch (EOFException e) {
throw e;
}
@@ -199,6 +220,7 @@ public static void main(String[] args) throws IOException,
ref, c.alignmentStart);
n.normalize(cramRecords, true);
long time2 = System.nanoTime();
+ normTime += time2 - time1;
Cram2BamRecordFactory c2sFactory = new Cram2BamRecordFactory(
cramHeader.samFileHeader);
@@ -207,12 +229,13 @@ public static void main(String[] args) throws IOException,
long sWriteTime = 0;
for (CramRecord r : cramRecords) {
- long time = System.nanoTime();
+ time = System.nanoTime();
SAMRecord s = c2sFactory.create(r);
if (ref != null)
Utils.calculateMdAndNmTags(s, ref, params.calculateMdTag,
params.calculateNmTag);
c2sTime += System.nanoTime() - time;
+ samTime += System.nanoTime() - time;
try {
if (params.requiredFlags != 0
@@ -229,6 +252,7 @@ public static void main(String[] args) throws IOException,
time = System.nanoTime();
writer.addAlignment(s);
sWriteTime += System.nanoTime() - time;
+ writeTime += System.nanoTime() - time;
if (params.outputFile == null && System.out.checkError())
break;
} catch (NullPointerException e) {
@@ -252,6 +276,12 @@ public static void main(String[] args) throws IOException,
System.out.println(recordCount);
writer.close();
+
+ log.warn(String
+ .format("TIMES: io %ds, parse %ds, norm %ds, convert %ds, BAM write %ds",
+ readTime / 1000000000, parseTime / 1000000000,
+ normTime / 1000000000, samTime / 1000000000,
+ writeTime / 1000000000));
}
private static SAMFileWriter createSAMFileWriter(Params params,
View
10 src/main/java/net/sf/cram/Cram2BamRecordFactory.java
@@ -35,8 +35,11 @@ public SAMRecord create(CramRecord cramRecord) {
samRecord.setReferenceIndex(cramRecord.sequenceId);
samRecord.setAlignmentStart(cramRecord.getAlignmentStart());
samRecord.setMappingQuality(cramRecord.getMappingQuality());
- samRecord.setCigar(getCigar2(cramRecord.getReadFeatures(),
- cramRecord.getReadLength()));
+ if (cramRecord.segmentUnmapped)
+ samRecord.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR);
+ else
+ samRecord.setCigar(getCigar2(cramRecord.getReadFeatures(),
+ cramRecord.getReadLength()));
if (samRecord.getReadPairedFlag()) {
samRecord.setMateReferenceIndex(cramRecord.mateSequnceID);
@@ -58,7 +61,8 @@ public SAMRecord create(CramRecord cramRecord) {
samRecord.setAttribute(tag.getKey(), tag.getValue());
if (cramRecord.getReadGroupID() < header.getReadGroups().size()) {
- SAMReadGroupRecord readGroupRecord = header.getReadGroups().get(cramRecord.getReadGroupID()) ;
+ SAMReadGroupRecord readGroupRecord = header.getReadGroups().get(
+ cramRecord.getReadGroupID());
samRecord.setAttribute("RG", readGroupRecord.getId());
}
View
8 src/main/java/net/sf/cram/CramRecord.java
@@ -26,7 +26,7 @@
import net.sf.cram.encoding.read_features.ReadFeature;
import net.sf.cram.encoding.read_features.SoftClipVariation;
-public class CramRecord implements Serializable{
+public class CramRecord implements Serializable {
public Collection<ReadTag> tags;
@@ -112,7 +112,7 @@ public int getFlags() {
public void setFlags(int value) {
int b = value;
-
+
duplicate = ((b & 1) == 0) ? false : true;
b >>>= 1;
vendorFiltered = ((b & 1) == 0) ? false : true;
@@ -407,7 +407,7 @@ public void setReadName(String readName) {
public int calcualteAlignmentEnd() {
if (readFeatures == null || readFeatures.isEmpty())
return alignmentStart + readLength;
-
+
int len = readLength;
for (ReadFeature f : readFeatures) {
switch (f.getOperator()) {
@@ -428,7 +428,7 @@ public int calcualteAlignmentEnd() {
break;
}
}
- return alignmentStart + len;
+ return alignmentStart + len - 1;
}
}
View
4 src/main/java/net/sf/cram/Index.java
@@ -119,8 +119,8 @@ protected Entry clone() throws CloneNotSupportedException {
if (index2 < 0)
index2 = -index2 - 1;
- if (index2 < index)
- index2 = index;
+ if (index2 <= index)
+ index2 = index+1;
return list.subList(index, index2);
}
View
1 src/main/java/net/sf/cram/ReadTag.java
@@ -184,6 +184,7 @@ public String getKeyAndType() {
public static Object restoreValueFromByteArray(char type, byte[] array) {
ByteBuffer buf = ByteBuffer.wrap(array) ;
+ buf.order(ByteOrder.LITTLE_ENDIAN) ;
return readSingleValue((byte) type, buf, null) ;
}
View
3 src/main/java/net/sf/cram/ReadWrite.java
@@ -2,6 +2,7 @@
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
+import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
@@ -539,7 +540,7 @@ private static long writeContainer(SAMFileHeader samFileHeader,
os.write(bytes);
os.write(baos.getBuffer(), 0, baos.size());
- return baos.size() ;
+ return bytes.length + baos.size() ;
}
private static SAMFileHeader readSAMFileHeader(String id, InputStream is)
View
8 src/main/java/net/sf/cram/Utils.java
@@ -288,9 +288,6 @@ public static int computeInsertSize(CramRecord firstEnd,
int adjustment = (secondEnd5PrimePosition >= firstEnd5PrimePosition) ? +1
: -1;
- // this seems to correlate with reality more, although Picard disagrees:
- adjustment = -adjustment ;
-
return secondEnd5PrimePosition - firstEnd5PrimePosition + adjustment;
}
@@ -381,8 +378,9 @@ public static ReferenceSequence getReferenceSequenceOrNull(
*/
public static void calculateMdAndNmTags(SAMRecord record, byte[] ref,
boolean calcMD, boolean calcNM) {
- if (!calcMD && !calcMD) return ;
-
+ if (!calcMD && !calcMD)
+ return;
+
Cigar cigar = record.getCigar();
List<CigarElement> cigarElements = cigar.getCigarElements();
byte[] seq = record.getReadBases();
View
4 src/main/java/net/sf/cram/lossy/BaseCategory.java
@@ -29,6 +29,10 @@ public static BaseCategory lower_than_coverage(int coverage) {
return new BaseCategory(BaseCategoryType.LOWER_COVERAGE, coverage);
};
+ public static BaseCategory insertion() {
+ return new BaseCategory(BaseCategoryType.INSERTION, -1);
+ };
+
@Override
public String toString() {
return String.format("[%s%d]", type.name(), param) ;
View
34 src/main/java/net/sf/cram/lossy/QualityScorePreservation.java
@@ -45,28 +45,18 @@ public int compare(PreservationPolicy o1, PreservationPolicy o2) {
}
});
}
+
+ public List<PreservationPolicy> getPreservationPolicies () {
+ return policyList ;
+ }
private static final int readParam(LinkedList<Character> list) {
int value = 0;
- while (!list.isEmpty() && Character.isDigit(list.getFirst())) {
+ while (!list.isEmpty() && Character.isDigit(list.getFirst()))
value = value * 10 + (list.removeFirst() - 48);
- }
return value;
- // char b1 = list.removeFirst();
- // if (!Character.isDigit(b1))
- // throw new RuntimeException("Expecting a digit but got: " + b1);
- //
- // if (list.isEmpty())
- // return b1 - 48;
- //
- // char b2 = list.getFirst();
- // if (!Character.isDigit(b2))
- // return b1 - 48;
- //
- // b2 = list.removeFirst();
- // return (b1 - 48) * 10 + (b2 - 48);
}
private static final QualityScoreTreatment readTreament(
@@ -128,9 +118,21 @@ private static final PreservationPolicy parseSinglePolicy(String spec) {
p.treatment = readTreament(list);
break;
case 'P':
- p.baseCategories.add(BaseCategory.pileup(3));
+ int mismatches = readParam(list) ;
+ p.baseCategories.add(BaseCategory.pileup(mismatches));
+ p.treatment = readTreament(list);
+ break;
+ case 'I':
+ p.baseCategories.add(BaseCategory.insertion());
p.treatment = readTreament(list);
break;
+// case '_':
+// p.treatment = readTreament(list);
+// break;
+// case '*':
+// p.readCategory = ReadCategory.all();
+// p.treatment = readTreament(list);
+// break;
default:
throw new RuntimeException("Uknown read or base category: "
View
4 src/main/java/net/sf/cram/lossy/ReadCategory.java
@@ -22,6 +22,10 @@ public static ReadCategory lower_than_mapping_score(int score) {
return new ReadCategory(ReadCategoryType.LOWER_MAPPING_SCORE, score);
};
+ public static ReadCategory all() {
+ return new ReadCategory(ReadCategoryType.ALL, -1);
+ };
+
@Override
public String toString() {
return String.format("[%s%d]", type.name(), param) ;
View
2 src/main/java/net/sf/cram/lossy/ReadCategoryType.java
@@ -1,7 +1,7 @@
package net.sf.cram.lossy;
public enum ReadCategoryType {
- UNPLACED('P'), HIGHER_MAPPING_SCORE('M'), LOWER_MAPPING_SCORE('m');
+ UNPLACED('P'), HIGHER_MAPPING_SCORE('M'), LOWER_MAPPING_SCORE('m'), ALL ('*');
public char code;

0 comments on commit 903d3a7

Please sign in to comment.