Skip to content

Commit

Permalink
small optimisations to match functions & sum
Browse files Browse the repository at this point in the history
Mangaged to decrease runtime to ~75% of what it was with some simple
optimisations. md5sums of output appear the same
  • Loading branch information
kdm9 committed Apr 14, 2014
1 parent 9b40df7 commit 6c6e6a6
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 13 deletions.
20 changes: 11 additions & 9 deletions src/match.c
Expand Up @@ -8,7 +8,7 @@
#include <string.h>
#include "scythe.h"

int *score_sequence(const char *seq, const char *pattern, int n) {
int *score_sequence(const char *seq, const char *pattern, size_t n) {
/*
Score a string using constant match and mismatch scores. Assumes
seq is longer than or of equal length to pattern. Only the first
Expand All @@ -20,10 +20,12 @@ int *score_sequence(const char *seq, const char *pattern, int n) {
|----pattern-----|
*/
int i;
size_t i;
int *matches;
/* These are addressed by the assert below
assert(strlen(seq) >= n);
assert(strlen(pattern) >= n);
*/
matches = xmalloc(sizeof(int) * n);
for (i = 0; i < n; i++) {
assert(seq[i] && pattern[i]); /* no string termination */
Expand All @@ -35,21 +37,21 @@ int *score_sequence(const char *seq, const char *pattern, int n) {
return matches;
}

match *find_best_match(const adapter_array *aa, const char *read,
match *find_best_match(const adapter_array *aa, const char *read,
float *p_quals, float prior, float p_match, int min_l) {
/*
/*
Take an adapter array, and check the read against all
adapters. Brute force string matching is used. This is to avoid
approximate matching algorithms which required an a priori
specified number mismatches.
*/

match *best_match=NULL;
int i, shift, max_shift, found_contam=0;
int *best_arr=NULL, best_adapter=0, best_length=0, best_shift=0, best_score=INT_MIN;
int al, curr_score, *curr_arr=NULL;
int rl = strlen(read);
int curr_score, *curr_arr=NULL;
size_t al, rl = strlen(read);
posterior_set *ps=NULL;
float *best_p_quals=NULL;

Expand All @@ -61,11 +63,11 @@ match *find_best_match(const adapter_array *aa, const char *read,
"equal to length of adapter.\n");
exit(EXIT_FAILURE);
}
al = min(aa->adapters[i].length, strlen(&(read)[shift]));
al = min(aa->adapters[i].length, rl - shift);
curr_arr = score_sequence(&(read)[shift], (aa->adapters[i]).seq, al);
curr_score = sum(curr_arr, al);
if (curr_score > best_score) {
best_score = curr_score;
best_score = curr_score;
best_length = al;
best_shift = shift;
best_p_quals = &(p_quals)[shift];
Expand Down
4 changes: 2 additions & 2 deletions src/scythe.h
Expand Up @@ -100,12 +100,12 @@ void fprint_float_array(FILE *, const float *, int);
void print_int_array(const int *, int);
void print_uint_array(const unsigned int *, int);
void fprint_uint_array(FILE *, const unsigned int *, int);
int sum(const int *, int);
extern int sum(const int *, size_t);
void write_fastq(FILE *, kseq_t *, int, int, int);
void print_summary(adapter_array *, float, int, int, int);

/* match.c prototypes */
int *score_sequence(const char *, const char *, int);
int *score_sequence(const char *, const char *, size_t);
match *find_best_match(const adapter_array *, const char *, float *, float, float, int);
void print_match(kseq_t *, match *, FILE *, const adapter_array *, quality_type);
void destroy_match(match *);
Expand Down
4 changes: 2 additions & 2 deletions src/util.c
Expand Up @@ -161,8 +161,8 @@ void fprint_uint_array(FILE *fp, const unsigned int *array, int n) {
}
}

int sum(const int *x, int n) {
int i;
inline int sum(const int *x, size_t n) {
size_t i;
int s = 0;
for (i = 0; i < n; i++)
s += x[i];
Expand Down

0 comments on commit 6c6e6a6

Please sign in to comment.