Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

fixed doc issues

  • Loading branch information...
commit f8ef833b270198594bc515e354d36210b9fe0bc6 1 parent bbfe398
Vince Buffalo authored March 19, 2012
5  .Rbuildignore
... ...
@@ -0,0 +1,5 @@
  1
+\.git.*
  2
+\.DS_Store
  3
+\.RData
  4
+\.Rbuildignore
  5
+\.Rhistory
4  DESCRIPTION
... ...
@@ -1,5 +1,5 @@
1 1
 Package: qrqc
2  
-Version: 1.9.4
  2
+Version: 1.9.5
3 3
 Date: 2012-02-20
4 4
 Title: Quick Read Quality Control
5 5
 Author: Vince Buffalo
@@ -13,5 +13,5 @@ Description: Quickly scans reads and gathers statistics on base and quality
13 13
   optional HTML quality report. S4 SequenceSummary objects allow specific tests 
14 14
   and functionality to be written around the data collected.
15 15
 License: GPL (>=2)
16  
-URL: http://bioinformatics.ucdavis.edu
  16
+URL: http://github.com/vsbuffalo/qrqc
17 17
 biocViews: Sequencing, QualityControl, DataImport, Preprocessing, Visualization, HighThroughputSequencing
2  R/AllClasses.R
@@ -6,7 +6,7 @@ setClass("SequenceSummary",
6 6
            filename='character',
7 7
            base.freqs='data.frame',
8 8
            seq.lengths='integer',
9  
-           hash='integer',
  9
+           hash='numeric',
10 10
            hash.prop='numeric',
11 11
            kmer='data.frame',
12 12
            k='integer',
6  R/main.R
@@ -80,8 +80,12 @@ function(filename, type=c("fastq", "fasta"), max.length=1000, quality=c("sanger"
80 80
     # hashed
81 81
     obj@kmer <- local({
82 82
       tmp <- unlist(out$kmer)
  83
+      if (!is.finite(tmp))
  84
+        warning(paste("Some k-mer counts are infinite, meaning there was",
  85
+                      "occurence of a k-mer over the maximum double size."))
83 86
       kmer <- do.call(rbind, strsplit(names(tmp), '-'))
84  
-      data.frame(kmer=kmer[, 1], position=as.integer(kmer[, 2]), count=tmp, row.names=NULL)
  87
+      data.frame(kmer=kmer[, 1], position=as.integer(kmer[, 2]),
  88
+                 count=as.numeric(tmp), row.names=NULL)
85 89
     })
86 90
     obj@hash.prop <- hash.prop
87 91
     obj@k <- as.integer(k)
4  man/basePlot-methods.Rd
@@ -19,7 +19,7 @@
19 19
 }
20 20
 
21 21
 \arguments{
22  
-  \item{x}{an S4 object that inherits from \code{SequenceSummary} from
  22
+  \item{x}{an S4 object that inherits from \code{\link[=SequenceSummary-class]{SequenceSummary}} from
23 23
     \code{readSeqFile}.}
24 24
   \item{geom}{Either "line", "bar", or "dodge" indicating
25 25
     the geom to use when plotting the bases. "line" will plot base
@@ -76,7 +76,7 @@
76 76
     geom_hline(yintercept=0.25, color="purple")
77 77
 }
78 78
 
79  
-\seealso{getBase,getBaseProp}
  79
+\seealso{\code{\link{getBase}}, \code{\link{getBaseProp}}}
80 80
 \keyword{methods}
81 81
 \keyword{graphics}
82 82
 
3  man/calcKL.Rd
@@ -8,7 +8,7 @@
8 8
 \description{
9 9
   
10 10
   \code{calcKL} takes in an object that inherits from
11  
-  \code{SequenceSummary} that has a kmers slot, and returns the terms of
  11
+  \code{\link[=SequenceSummary-class]{SequenceSummary}} that has a kmers slot, and returns the terms of
12 12
   the K-L divergence sum (which correspond to items in the sample space,
13 13
   in this case, k-mers). 
14 14
 
@@ -53,5 +53,6 @@
53 53
   p + scale_y_continuous("K-L divergence")
54 54
 }
55 55
 
  56
+\seealso{\code{\link{kmerKLPlot}}, \code{\link{getKmer}}}
56 57
 \keyword{methods}
57 58
 \keyword{graphics}
3  man/gcPlot-methods.Rd
@@ -50,7 +50,8 @@
50 50
   gcPlot(s.trimmed.fastq) + geom_hline(yintercept=0.5, color="purple")
51 51
 }
52 52
 
53  
-\seealso{getBase,getBaseProp}
  53
+\seealso{\code{\link{getBase}}, \code{\link{getBaseProp}}}
  54
+
54 55
 \keyword{methods}
55 56
 \keyword{graphics}
56 57
 
7  man/geom_qlinerange.Rd
@@ -3,8 +3,9 @@
3 3
 \title{Use Line Segments and Points to Plot Quality Statistics by Position in the Read}
4 4
 \description{
5 5
   \code{geom_qlinerange} uses multiple line segments and points to plot
6  
-  quality ranges. By default the 10\% and 90\% range in plotted in grey, the quartile
7  
-  range in orange, and the mean as a point in blue.
  6
+  quality ranges. By default the 10\% and 90\% range in plotted in grey,
  7
+  the quartile range in orange, and the mean as a point in blue. It is
  8
+  used in \code{\link{qualPlot}}.
8 9
 }
9 10
 
10 11
 \usage{
@@ -31,6 +32,6 @@ geom_qlinerange(extreme.color="grey", quartile.color="orange", mean.color="blue"
31 32
 }
32 33
 
33 34
 \author{Vince Buffalo <vsbuffalo@ucdavis.edu>}
34  
-\seealso{getQual}
  35
+\seealso{\code{\link{getQual}}, \code{\link{qualPlot}}}
35 36
 \keyword{methods}
36 37
 \keyword{graphics}
8  man/getBase-methods.Rd
@@ -5,14 +5,14 @@
5 5
 \alias{getBase,SequenceSummary-method}
6 6
 \title{Get a Data Frame of Base Frequency Data from a \code{SequenceSummary} Object}
7 7
 \description{
8  
-  An object that inherits from class \code{SequenceSummary} contains
9  
-  base frequency data by position gathered by \code{readSeqFile}. \code{getBase}
  8
+  An object that inherits from class \code{\link[=SequenceSummary-class]{SequenceSummary}} contains
  9
+  base frequency data by position gathered by \code{\link{readSeqFile}}. \code{\link{getBase}}
10 10
   is an accessor function that reshapes the base frequency data by position
11 11
   into a data frame.
12 12
 
13 13
   This accessor function is useful if you want to map variables to
14 14
   custom \code{ggplot2} aesthetics. Base proportions can be accessed
15  
-  with \code{getBaseProp}.
  15
+  with \code{\link{getBaseProp}}.
16 16
 }
17 17
 
18 18
 
@@ -57,6 +57,6 @@
57 57
 }
58 58
 
59 59
 
60  
-\seealso{getGC,getSeqlen,getBaseProp,getQual,getMCQual,basePlot}
  60
+\seealso{\code{\link{getGC}}, \code{\link{getSeqlen}}, \code{\link{getBaseProp}}, \code{\link{getQual}}, \code{\link{getMCQual}}, \code{\link{basePlot}}}
61 61
 \keyword{methods}
62 62
 \keyword{accessor}
3  man/getBaseProp-methods.Rd
@@ -56,6 +56,7 @@
56 56
     color=base)) + facet_grid(. ~ base) + scale_color_dna()
57 57
 }
58 58
 
59  
-\seealso{getGC,getSeqlen,getBase,getQual,getMCQual,basePlot}
  59
+\seealso{\code{\link{getGC}}, \code{\link{getSeqlen}}, \code{\link{getBase}}, \code{\link{getQual}}, \code{\link{getMCQual}}, \code{\link{basePlot}}}
  60
+
60 61
 \keyword{methods}
61 62
 \keyword{accessor}
13  man/getGC-methods.Rd
@@ -5,15 +5,16 @@
5 5
 \alias{getGC,SequenceSummary-method}
6 6
 \title{Get a Data Frame of GC Content from a \code{SequenceSummary} object}
7 7
 \description{
8  
-  An object that inherits from class \code{SequenceSummary} contains
9  
-  base frequency data by position gathered by \code{readSeqFile}. \code{getGC}
  8
+  An object that inherits from class \code{\link[=SequenceSummary-class]{SequenceSummary}} contains
  9
+  base frequency data by position gathered by
  10
+  \code{\link{readSeqFile}}. \code{\link{getGC}} 
10 11
   is an accessor function that reshapes the base frequency data into a
11 12
   data frame and returns the GC content by position.
12 13
 
13 14
   This accessor function is useful if you want to map variables to
14 15
   custom \code{ggplot2} aesthetics. Frequencies or proportions of all
15  
-  bases (not just GC) can be accessed with \code{getBase} and
16  
-  \code{getBaseProp} respectively.
  16
+  bases (not just GC) can be accessed with \code{\link{getBase}} and
  17
+  \code{\link{getBaseProp}} respectively.
17 18
 }
18 19
 
19 20
 \usage{
@@ -55,7 +56,9 @@
55 56
     high="blue") + scale_y_continuous("GC content")
56 57
   p
57 58
 }
  59
+\seealso{\code{\link{getSeqlen}}, \code{\link{getBase}},
  60
+  \code{\link{getBaseProp}}, \code{\link{getQual}},
  61
+  \code{\link{getMCQual}}, \code{\link{getKmer}}, \code{\link{gcPlot}}}
58 62
 
59  
-\seealso{getSeqlen,getBase,getBaseProp,getQual,getMCQual,cPlot}
60 63
 \keyword{methods}
61 64
 \keyword{accessor}
11  man/getKmer-methods.Rd
@@ -7,9 +7,9 @@
7 7
 \title{Get a Data Frame of k-mer Frequency by Position from a \code{SequenceSummary} Object}
8 8
 
9 9
 \description{
10  
-  An object that inherits from class \code{SequenceSummary} contains
11  
-  k-mer frequency data by position gathered by \code{readSeqFile} when
12  
-  \code{kmer=TRUE}. \code{getKmer} is an accessor function that is
  10
+  An object that inherits from class \code{\link[=SequenceSummary-class]{SequenceSummary}} contains
  11
+  k-mer frequency data by position gathered by \code{\link{readSeqFile}} when
  12
+  \code{kmer=TRUE}. \code{\link{getKmer}} is an accessor function that is
13 13
   useful for custom \code{ggplot2} aesthetics.
14 14
 }
15 15
 
@@ -51,6 +51,9 @@ getKmer(x)
51 51
     fill=kmer), stat="identity")
52 52
   p
53 53
 }
54  
-\seealso{getGC,getSeqlen,getBase,getBaseProp,getMCQual,kmerKLPlot,kmerEntropyPlot}
  54
+\seealso{\code{\link{getGC}}, \code{\link{getSeqlen}}, \code{\link{getBase}},
  55
+  \code{\link{getBaseProp}}, \code{\link{getQual}},
  56
+  \code{\link{getMCQual}}, \code{\link{kmerKLPlot}}, \code{\link{kmerEntropyPlot}}}
  57
+
55 58
 \keyword{methods}
56 59
 \keyword{accessor}
12  man/getMCQual-methods.Rd
@@ -5,10 +5,11 @@
5 5
 \alias{getMCQual,FASTQSummary-method}
6 6
 \title{Get a Data Frame of Simulated Qualitied from a \code{FASTQSummary} object}
7 7
 \description{
8  
-  An object that inherits from class \code{FASTQSummary} contains
9  
-  base quality data by position gathered by \code{readSeqFile}. \code{getMCQual}
10  
-  generates simulated quality data for each base from this binned
11  
-  quality data that can be used for adding smoothed lines via lowess.  
  8
+  An object that inherits from class \code{\link[=FASTQSummary-class]{FASTQSummary}} contains
  9
+  base quality data by position gathered by
  10
+  \code{\link{readSeqFile}}. \code{\link{getMCQual}} generates simulated
  11
+  quality data for each base from this binned  quality data that can be
  12
+  used for adding smoothed lines via lowess.  
12 13
 
13 14
   This accessor function is useful if you want to map variables to
14 15
   custom \code{ggplot2} aesthetics.
@@ -53,8 +54,7 @@
53 54
     ymax=upper), color="grey") + geom_smooth(aes(x=position, y=quality),
54 55
     data=getMCQual(s.fastq), color="blue", se=FALSE)
55 56
 }
56  
-
57  
-\seealso{getGC,getSeqlen,getBase,getBasePropgetQual,qualPlot}
  57
+\seealso{\code{\link{getGC}}, \code{\link{getSeqlen}}, \code{\link{getBase}}, \code{\link{getBaseProp}}, \code{\link{getQual}}, \code{\link{qualPlot}}}
58 58
 
59 59
 \keyword{methods}
60 60
 \keyword{accessor}
10  man/getQual-methods.Rd
@@ -8,9 +8,8 @@
8 8
   object}
9 9
 
10 10
 \description{
11  
-  An object of class \code{FASTQSummary} contains quality
12  
-  data (binned by \code{readSeqFile}). \code{getQual} is an accessor
13  
-  function that reshapes the data into a data frame.
  11
+  An object of class \code{\link[=FASTQSummary-class]{FASTQSummary}} contains quality data (binned by \code{\link{readSeqFile}}). \code{\link{getQual}} is
  12
+  an accessor  function that reshapes the data into a data frame.  
14 13
 
15 14
   This accessor function is useful if you want to map variables to
16 15
   custom \code{ggplot2} aesthetics.
@@ -65,6 +64,9 @@ getQual(x)
65 64
   p <- p +  scale_color_gradient("mean quality", low="red", high="green")
66 65
   p + scale_y_continuous("quality")
67 66
 }
68  
-\seealso{getGC,getSeqlen,getBase,getBaseProp,getMCQual,qualPlot}
  67
+\seealso{\code{\link{getGC}}, \code{\link{getSeqlen}},
  68
+  \code{\link{getBase}}, \code{\link{getBaseProp}},
  69
+  \code{\link{getMCQual}}, \code{\link{qualPlot}}}
  70
+
69 71
 \keyword{methods}
70 72
 \keyword{accessor}
4  man/getSeqlen-methods.Rd
@@ -56,7 +56,9 @@
56 56
     color="blue") + scale_y_continuous("quality/count") + theme_bw()
57 57
 }
58 58
 
59  
-\seealso{getGC,getBase,getBaseProp,getQual,getMCQual,seqlenPlot}
  59
+\seealso{\code{\link{getGC}}, \code{\link{getBase}},
  60
+  \code{\link{getBaseProp}}, \code{\link{getQual}},
  61
+  \code{\link{getMCQual}}, \code{\link{seqlenPlot}}}
60 62
 
61 63
 \keyword{methods}
62 64
 \keyword{accessor}
1  man/kmerEntropyPlot.Rd
@@ -59,5 +59,6 @@
59 59
     contaminated"=s.fastq, "random"=s.random.fasta))
60 60
 }
61 61
 
  62
+\seealso{\code{\link{getKmer}}, \code{\link{calcKL}}, \code{\link{kmerKLPlot}}}
62 63
 \keyword{methods}
63 64
 \keyword{graphics}
5  man/kmerKLPlot.Rd
@@ -7,7 +7,7 @@
7 7
 \title{Plot K-L Divergence Components for a Subset of k-mers to Inspect for Contamination}
8 8
 \description{
9 9
   
10  
-  \code{kmerKLPlot} calls \code{calcKL}, which calculates the
  10
+  \code{kmerKLPlot} calls \code{\link{calcKL}}, which calculates the
11 11
   Kullback-Leibler divergence between the k-mer distribution at each
12 12
   position compared to the k-mer distribution across all
13 13
   positions. \code{kmerKLPlot} then plots each k-mer's contribution to
@@ -89,6 +89,7 @@
89 89
   suppressWarnings(kmerKLPlot(list("highly contaminated"=s.contam.fastq, "less
90 90
     contaminated"=s.fastq, "random"=s.random.fasta)))
91 91
 }
92  
-
  92
+\seealso{\code{\link{getKmer}}, \code{\link{calcKL}},
  93
+  \code{\link{kmerEntropyPlot}}}
93 94
 \keyword{methods}
94 95
 \keyword{graphics}
2  man/makeReport.Rd
@@ -4,7 +4,7 @@
4 4
 \alias{makeReport-methods}
5 5
 \title{Make an HTML report from a FASTASummary of FASTQSummary object}
6 6
 \description{
7  
-  \code{makeReport} takes a \code{FASTQSummary} or \code{FASTASummary}
  7
+  \code{makeReport} takes a \code{\link[=FASTQSummary-class]{FASTQSummary}} or \code{\link[=FASTASummary-class]{FASTASummary}}
8 8
   object, creates an HTML report, and writes it to a file within a
9 9
   directory. The directory naming is incremental so past reports will
10 10
   not be overwritten.
2  man/plotBases.Rd
@@ -49,5 +49,5 @@
49 49
   plotBases(s.fastq, type="prop")
50 50
 }
51 51
 }
52  
-\seealso{basePlot}
  52
+\seealso{\code{\link{basePlot}}}
53 53
 \keyword{graphics}
3  man/plotGC.Rd
... ...
@@ -1,3 +1,4 @@
  1
+
1 2
 \name{plotGC-methods}
2 3
 \docType{methods}
3 4
 \alias{plotGC}
@@ -24,5 +25,5 @@
24 25
   plotGC(s.fastq)
25 26
 }
26 27
 }
27  
-\seealso{gcPlot}
  28
+\seealso{\code{\link{gcPlot}}}
28 29
 \keyword{graphics}
2  man/plotQuals.Rd
@@ -44,6 +44,6 @@
44 44
   plotQuals(s.fastq)
45 45
 }
46 46
 }
47  
-\seealso{qualPlot}
  47
+\seealso{\code{\link{qualPlot}}}
48 48
 \keyword{graphics}
49 49
 \keyword{methods}
2  man/plotSeqLengths.Rd
@@ -24,5 +24,5 @@
24 24
   plotSeqLengths(s.fastq)
25 25
 }
26 26
 }
27  
-\seealso{seqlenPlot}
  27
+\seealso{\code{\link{seqlenPlot}}}
28 28
 \keyword{graphics}
2  man/qualPlot-methods.Rd
@@ -73,6 +73,6 @@
73 73
   qualPlot(list("not trimmed"=s.fastq, "trimmed"=s.trimmed.fastq))
74 74
 }
75 75
 
76  
-\seealso{getQual}
  76
+\seealso{\code{\link{getQual}}}
77 77
 \keyword{methods}
78 78
 \keyword{graphics}
10  man/readSeqFile.Rd
@@ -86,20 +86,22 @@
86 86
   \code{\link[=FASTASummary-class]{FASTASummary}} are the classes of the
87 87
   objects returned by \code{readSeqFile}.
88 88
 
89  
-  \code{\link{plotBases}} is a function that plots the distribution of
  89
+  \code{\link{basePlot}} is a function that plots the distribution of
90 90
   bases over sequence length for a particular \code{FASTASummary} or
91  
-  \code{FASTQSummary} object. \code{\link{plotGC}} combines and plots
  91
+  \code{FASTQSummary} object. \code{\link{gcPlot}} combines and plots
92 92
   the GC proportion.
93 93
   
94 94
     
95  
-  \code{\link{plotQuals}} is a function that plots the distribution of
  95
+  \code{\link{qualPlot}} is a function that plots the distribution of
96 96
   qualities over sequence length for a particular \code{FASTASummary}
97 97
   or \code{FASTQSummary} object.
98 98
 
99  
-  \code{\link{plotSeqLengths}} is a function that plots a histogram of
  99
+  \code{\link{seqlenPlot}} is a function that plots a histogram of
100 100
   sequence lengths for a particular \code{FASTASummary} or
101 101
   \code{FASTQSummary} object.
102 102
 
  103
+  \code{\link{kmerKLPlot}} is a function that plots K-L divergence
  104
+  of k-mers to look for possible biase in reads.
103 105
 }
104 106
 
105 107
 
2  man/scale_color_dna.Rd
@@ -20,5 +20,5 @@ scale_color_dna()
20 20
     color=base)) + scale_color_dna()
21 21
 }
22 22
 
23  
-\seealso{scale_color_iupac,plotBase}
  23
+\seealso{\code{\link{scale_color_iupac}}, \code{\link{basePlot}}}
24 24
 \keyword{graphics}
2  man/scale_color_iupac.Rd
@@ -20,5 +20,5 @@ scale_color_iupac()
20 20
     color=base)) + scale_color_iupac()
21 21
 }
22 22
 
23  
-\seealso{scale_color_dna,plotBase}
  23
+\seealso{\code{\link{scale_color_dna}}, \code{\link{basePlot}}}
24 24
 \keyword{graphics}
2  man/seqlenPlot-methods.Rd
@@ -46,7 +46,7 @@
46 46
   seqlenPlot(list("not trimmed"=s.fastq, "trimmed"=s.trimmed.fastq))
47 47
 }
48 48
 
49  
-\seealso{getSeqlen}
  49
+\seealso{\code{\link{getSeqlen}}}
50 50
 \keyword{methods}
51 51
 \keyword{graphics}
52 52
 
40  src/io.c
@@ -34,7 +34,7 @@ KSEQ_INIT(FILE_TYPE*, gzreadclone)
34 34
 KSEQ_INIT(gzFile, gzread)
35 35
 #endif
36 36
 
37  
-KHASH_MAP_INIT_STR(str, int)
  37
+KHASH_MAP_INIT_STR(str, double)
38 38
 
39 39
 #define INIT_MAX_SEQ 500
40 40
 #define NUM_BASES 16 /* includes all IUPAC codes. */
@@ -186,10 +186,14 @@ static void add_seq_to_khash(khash_t(str) *h, kseq_t *block, unsigned int *num_u
186 186
   is_missing = (k == kh_end(h));
187 187
   if (is_missing) {
188 188
     k = kh_put(str, h, strdup(block->seq.s), &ret);
189  
-    kh_value(h, k) = 1;
  189
+    kh_value(h, k) = 1.0;
190 190
     (*num_unique_seqs)++;
191  
-  } else
192  
-    kh_value(h, k) = kh_value(h, k) + 1;
  191
+  } else {
  192
+    if (!R_FINITE(kh_value(h, k)) || !R_FINITE(1 + kh_value(h, k)))
  193
+      kh_value(h, k) = R_PosInf;
  194
+    else
  195
+      kh_value(h, k) = kh_value(h, k) + 1.0;
  196
+  }
193 197
 }
194 198
 
195 199
 static void hash_seq_kmers(int k, khash_t(str) *h, kseq_t *block, unsigned int *num_unique_kmers) {
@@ -205,19 +209,14 @@ static void hash_seq_kmers(int k, khash_t(str) *h, kseq_t *block, unsigned int *
205 209
      when represented as as string (log10(x) + 1): this is where the
206 210
      expression in Calloc comes from.
207 211
 
208  
-
209 212
      # Memory
210 213
      
211  
-     This method uses an int to store a k-mer. There 4^k possible
212  
-     k-mers, and say each int is 4 bytes. For a read of length l,
  214
+     This method uses a double to store a k-mer. There 4^k possible
  215
+     k-mers, and say each int is 8 bytes. For a read of length l,
213 216
      there are l-k k-mer positions, with worse case scenario being a
214  
-     different k-mer at each position. This would mean (l-k)*4^k*4
  217
+     different k-mer at each position. This would mean (l-k)*4^k*8
215 218
      bytes to hold the k-mers, not including hashing overhead.
216 219
 
217  
-     TODO:
218  
-      - if k-mer count is greater than SINT_MAX, Inf?
219  
-      - k-mer should have k > 2
220  
-      - if a genome is entirely AAAAA, k=5, how long before overrun?
221 220
   */
222 221
   char *a_kmer = Calloc(k + 2 + log10(SINT_MAX), char), *start_ptr;
223 222
   int i;
@@ -241,13 +240,13 @@ static void hash_seq_kmers(int k, khash_t(str) *h, kseq_t *block, unsigned int *
241 240
     is_missing = (key == kh_end(h));
242 241
     if (is_missing) {
243 242
       key = kh_put(str, h, strdup(a_kmer), &ret);
244  
-      kh_value(h, key) = 1;
  243
+      kh_value(h, key) = 1.0;
245 244
       (*num_unique_kmers)++;
246 245
     } else {
247  
-      /* if (kh_value(h, key) > 3 || !R_FINITE(kh_value(h, key))) //SINT_MAX) */
248  
-      /*   kh_value(h, key) = R_PosInf; */
249  
-      /* else */
250  
-      kh_value(h, key) = kh_value(h, key) + 1;
  246
+      if (!R_FINITE(kh_value(h, k)) || !R_FINITE(1 + kh_value(h, k)))
  247
+        kh_value(h, key) = R_PosInf;
  248
+      else
  249
+        kh_value(h, key) = kh_value(h, key) + 1.0;
251 250
     }
252 251
   }
253 252
 
@@ -268,7 +267,7 @@ static void seq_khash_to_VECSXP(khash_t(str) *h, SEXP seq_hash, SEXP seq_hash_na
268 267
     R_CheckUserInterrupt();
269 268
     if (kh_exist(h, k)) {
270 269
       SET_VECTOR_ELT(seq_hash_names, i, mkString(kh_key(h, k)));
271  
-      SET_VECTOR_ELT(seq_hash, i, ScalarInteger(kh_value(h, k)));
  270
+      SET_VECTOR_ELT(seq_hash, i, ScalarReal(kh_value(h, k)));
272 271
       /* per the comment here
273 272
          (http://attractivechaos.wordpress.com/2009/09/29/khash-h/),
274 273
          using character arrays keys with strdup must be freed during
@@ -337,7 +336,10 @@ extern SEXP summarize_file(SEXP filename, SEXP max_length, SEXP quality_type, SE
337 336
     if (LOGICAL(verbose)[0])
338 337
       Rprintf("initiating k-mer hash...");
339 338
     hkmer = kh_init(str);
340  
-    kh_resize(str, hkmer, (int) gammafn(kn + 1)); /* pre-allocate all possible k-mers */
  339
+    /*
  340
+      Pre-allocate for some possible k-mers.
  341
+    */
  342
+    kh_resize(str, hkmer, 1572869);
341 343
     size_out_list++;
342 344
   }
343 345
 

0 notes on commit f8ef833

Please sign in to comment.
Something went wrong with that request. Please try again.