Skip to content

Commit

Permalink
Implement --best-only feature; handle Ns; bump version number to 0.2b…
Browse files Browse the repository at this point in the history
…eta;
  • Loading branch information
Stefan Washietl committed Jun 26, 2009
1 parent 2a746e8 commit a6f291f
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 60 deletions.
2 changes: 1 addition & 1 deletion configure.ac
@@ -1,4 +1,4 @@
AC_INIT(RNAcode, 0.1, wash@tbi.univie.ac.at, RNAcode)
AC_INIT(RNAcode, 0.2beta, wash@tbi.univie.ac.at, RNAcode)
AM_INIT_AUTOMAKE

AC_PROG_CC
Expand Down
7 changes: 2 additions & 5 deletions src/RNAcode.c
Expand Up @@ -103,16 +103,13 @@ int main(int argc, char *argv[]){

counter++;

/* Currently repeat masked regions are ignored and Ns are converted to gaps */
/* Currently repeat masked regions are ignored*/
/* Fix this */
for (i=0; inputAln[i]!=NULL; i++){
tmpSeq=inputAln[i]->seq;
j=0;
while (tmpSeq[j]){
tmpSeq[j]=toupper(tmpSeq[j]);
if (tmpSeq[j]=='N'){
tmpSeq[j]='-';
}
j++;
}
}
Expand Down Expand Up @@ -227,7 +224,7 @@ void help(void){
printf("--outfile -o Output file (default: stdout)\n");
printf("--gtf -g Format output as GTF\n");
printf("--tabular -t Format output as tab delimited fields\n");
printf("--best-only -b Show only best hit for each input alignment\n");
printf("--best-only -b Show only best non-overlapping hits\n");
printf("--num-samples -n Number of samples to calculate p-value (default: 100)\n");
printf("--cutoff -p p-value cutoff (default: 1.0)\n");
printf("--pars -c Parameters as comma separated string (see README for details)\n");
Expand Down
159 changes: 106 additions & 53 deletions src/misc.c
Expand Up @@ -51,6 +51,33 @@ int compareScores(const void * a, const void * b){
return 0;
}

/*********************************************************************
compareLocation
compare function for qsort that compares start positions given as
pointer to a segmentStats structure.
*********************************************************************/

int compareLocation(const void * a, const void * b){

segmentStats* statsA;
segmentStats* statsB;

statsA=(segmentStats*)a;
statsB=(segmentStats*)b;

if ( statsA->startSite > statsB->startSite) {
return +1;
} else {
return -1;
}

return 0;
}





void reintroduceGaps(const struct aln* origAln[], struct aln* sampledAln[]){
Expand Down Expand Up @@ -303,9 +330,6 @@ void copyAln(struct aln *src[],struct aln *dest[]){






void printAlnClustal(FILE *out, const struct aln* AS[]){

int i;
Expand All @@ -323,60 +347,95 @@ void printAlnClustal(FILE *out, const struct aln* AS[]){

void printResults(FILE* outfile, int outputFormat, segmentStats results[]){

int i,k;
int i,k, hssCount, currHSS, nextHSS;
char c;
char name[1024]="";
char prefix[1024]="";
char suffix[1024]="";

if (outputFormat==0){
hssCount = 0;
while (results[hssCount].score > 0.0){
results[hssCount++].hide=0;
}

if (pars.bestOnly){

/* First sort by start position */
qsort((segmentStats*) results, hssCount,sizeof(segmentStats),compareLocation);
currHSS=0;
nextHSS=1;
while (nextHSS<=hssCount) {

/* the two HSS overlap */
if (!(results[currHSS].endSite <= results[nextHSS].startSite)){

/* Hide the HSS with lower score */
if (results[currHSS].score > results[nextHSS].score){
results[nextHSS].hide=1;
nextHSS++;
} else {
results[currHSS].hide=1;
currHSS=nextHSS;
nextHSS++;
}
} else {
currHSS=nextHSS;
nextHSS++;
}
}
}

if (results[0].score<0.0){
fprintf(outfile,"\nNo significant coding regions found.\n");
} else {
qsort((segmentStats*) results, hssCount,sizeof(segmentStats),compareScores);


if (results[0].score<0.0){
fprintf(outfile,"\nNo significant coding regions found.\n");
return;
}

if (outputFormat==0){
fprintf(outfile, "\n%5s%7s%6s%6s%12s%12s%12s%9s%9s\n",
"Frame","Length","From","To","Name","Start","End", "Score","P");
fprintf(outfile, "==============================================================================\n");
}

fprintf(outfile, "\n%5s%7s%6s%6s%12s%12s%12s%9s%9s\n",
"Frame","Length","From","To","Name","Start","End", "Score","P");

fprintf(outfile, "==============================================================================\n");

i=0;

while (results[i].score>0){

fprintf(outfile, "%4c%i%7i%6i%6i%12s%12i%12i%9.2f",
results[i].strand, results[i].frame+1,
results[i].endSite-results[i].startSite+1,
results[i].startSite+1,results[i].endSite+1,
results[i].name,
results[i].start,results[i].end,
results[i].score);

if (results[i].pvalue < 0.001){
/*Seems to be the minimum number I can get, don't know why
we don't get down to 1e-37 which should be the limit for
floats.
*/
if (results[i].pvalue < 10e-16){
fprintf(outfile, " <1e-16\n");
} else {
fprintf(outfile, "% 9.1e\n",results[i].pvalue);
}
i=0;
while (results[i].score>0){

if (results[i].hide) {
i++;
continue;
}

if (outputFormat==0){

fprintf(outfile, "%4c%i%7i%6i%6i%12s%12i%12i%9.2f",
results[i].strand, results[i].frame+1,
results[i].endSite-results[i].startSite+1,
results[i].startSite+1,results[i].endSite+1,
results[i].name,
results[i].start,results[i].end,
results[i].score);

if (results[i].pvalue < 0.001){
/*Seems to be the minimum number I can get, don't know why
we don't get down to 1e-37 which should be the limit for
floats.
*/
if (results[i].pvalue < 10e-16){
fprintf(outfile, " <1e-16\n");
} else {
fprintf(outfile, "% 9.3f\n",results[i].pvalue);
fprintf(outfile, "% 9.1e\n",results[i].pvalue);
}
i++;

if ((pars.bestOnly) && (i > 0)) break;

} else {
fprintf(outfile, "% 9.3f\n",results[i].pvalue);
}
i++;
}
}

if (outputFormat==1){
i=0;
while (results[i].score>0){
if (outputFormat==1){
/* if name is of the form hg18.chromX than only display chromX */
k=0;
while (1){
Expand All @@ -399,31 +458,25 @@ void printResults(FILE* outfile, int outputFormat, segmentStats results[]){
results[i].pvalue,
results[i].strand, '.',"gene_id \"Gene 0\"; transcript_id \"transcript 0\";");
i++;

if ((pars.bestOnly) && (i > 0)) break;

}
}

if (outputFormat==2){

i=0;

if (outputFormat==2){

while (results[i].score>0){
fprintf(outfile, "%c\t%i\t%i\t%i\t%i\t%s\t%i\t%i\t%7.3f\t",
results[i].strand, results[i].frame+1,
results[i].endSite-results[i].startSite+1,
results[i].startSite+1,results[i].endSite+1,
results[i].name,
results[i].start,results[i].end,
results[i].score);

if (results[i].pvalue < 0.001){
fprintf(outfile, "% 9.3e\n",results[i].pvalue);
} else {
fprintf(outfile, "% 9.3f\n",results[i].pvalue);
}
i++;
if ((pars.bestOnly) && (i > 0)) break;
}
i++;
}
}
8 changes: 7 additions & 1 deletion src/score.c
Expand Up @@ -366,9 +366,15 @@ float calculateSigma(char* block_0, char* block_k, int k, bgModel* models){

codonA[3]=codonB[3]='\0';

if (codonA[0] == 'N' || codonA[1] == 'N' || codonA[2] == 'N' ||
codonB[0] == 'N' || codonB[1] == 'N' || codonB[2] == 'N'
) {
return 0.0;
}

h=hDist(ntMap[codonA[0]], ntMap[codonA[1]], ntMap[codonA[2]],
ntMap[codonB[0]], ntMap[codonB[1]], ntMap[codonB[2]]);

if (h==0) return 0.0;

pepA=transcode[ntMap[codonA[0]]][ntMap[codonA[1]]][ntMap[codonA[2]]];
Expand Down
1 change: 1 addition & 0 deletions src/score.h
Expand Up @@ -54,6 +54,7 @@ struct _segmentStats {
char *name;
float score;
float pvalue;
int hide;

};

Expand Down

0 comments on commit a6f291f

Please sign in to comment.