Skip to content

Commit

Permalink
Automatically detect and convert fastq to fasta in the search query
Browse files Browse the repository at this point in the history
    Automatically detect and convert FASTQ in the query

    If a user provides a valid FASTQ, it can be converted to FASTA
    that can then be consumed by BLAST algorithms. Currently BLAST
    errs given a fastq.

    This should save users time and effort manually converting FASTQ
    to FASTA.

    If FASTQ is invalid, no modifications to user query will be made.

    As part of this change notifications have also been moved to
    the top of the screen, as we may have 2 notifications at once e.g.
    FASTQ conversion notification + detected query type notification.
  • Loading branch information
tadast committed Jan 25, 2024
1 parent b7c3884 commit 3e59445
Show file tree
Hide file tree
Showing 9 changed files with 146 additions and 28 deletions.
20 changes: 9 additions & 11 deletions public/css/sequenceserver.css
Original file line number Diff line number Diff line change
Expand Up @@ -372,24 +372,22 @@ td.nowrap-ellipsis {

.query-container {
margin-top: 35px;
margin-bottom: 0;
margin-bottom: 35px;
position: relative;
}

.notifications {
position: relative;
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 32px;

/**
* Notifications should appear over the bottom border of the textarea, high
* enough that they do not interfere with the database listing below and
* narrow enough not to cover the textarea resize handle on the right. */
top: -16px;
margin-right: 35px;
margin-left: 35px;
padding-right: 35px;
padding-left: 35px;
}

.notification {
height: 32px !important;
margin-top: 6px;
}

.notification .alert-info,
Expand Down
4 changes: 2 additions & 2 deletions public/css/sequenceserver.min.css

Large diffs are not rendered by default.

35 changes: 35 additions & 0 deletions public/js/fastq_to_fasta.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
const convertChunk = (fastqChunk) => {
fastqChunk[0] = '>' + fastqChunk[0].substring(1);
return fastqChunk.slice(0, 2);
};

const isValidFastq = (fastqChunk) => {
if (fastqChunk.length !== 4) {
return false;
}

return fastqChunk[0][0] === '@' && fastqChunk[2][0] === '+' && fastqChunk[1].length === fastqChunk[3].length;
};

export const fastqToFasta = (sequence) => {
let trimmedSequence = sequence.trim();
// return unmodified if sequence does not look like fastq
if (!trimmedSequence.startsWith('@')) {
return sequence;
}

const sequenceLines = trimmedSequence.split('\n');
const fastaChunks = [];

for (let i = 0; i < sequenceLines.length; i += 4) {
const fastqChunk = sequenceLines.slice(i, i + 4);
if (isValidFastq(fastqChunk)) {
fastaChunks.push(...convertChunk(fastqChunk));
} else {
// return unmodified sequence if it does not look like valid fastq
return sequence;
}
}

return fastaChunks.join('\n');
};
28 changes: 23 additions & 5 deletions public/js/form.js
Original file line number Diff line number Diff line change
Expand Up @@ -175,15 +175,18 @@ export class Form extends Component {
return (
<div className="container">
<div id="overlay" style={{ position: 'absolute', top: 0, left: 0, width: '100vw', height: '100vw', background: 'rgba(0, 0, 0, 0.2)', display: 'none', zIndex: 99 }} />

<div className="notifications" id="notifications">
<FastqNotification />
<NucleotideNotification />
<ProteinNotification />
<MixedNotification />
</div>

<form id="blast" ref={this.formRef} onSubmit={this.handleFormSubmission} className="form-horizontal">
<div className="form-group query-container">
<SearchQueryWidget ref="query" onSequenceTypeChanged={this.handleSequenceTypeChanged} />
</div>
<div className="notifications" id="notifications">
<NucleotideNotification />
<ProteinNotification />
<MixedNotification />
</div>
{this.useTreeWidget() ?
<DatabasesTree ref="databases"
databases={this.state.databases} tree={this.state.tree}
Expand Down Expand Up @@ -244,6 +247,21 @@ class NucleotideNotification extends Component {
}
}

class FastqNotification extends Component {
render() {
return (<div
className="notification row"
id="fastq-sequence-notification"
style={{ display: 'none' }}>
<div
className="alert-info col-md-6 col-md-offset-3">
Detected FASTQ and automatically converted to FASTA.
</div>
</div>
);
}
}

class MixedNotification extends Component {
render() {
return (
Expand Down
25 changes: 22 additions & 3 deletions public/js/query.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import _ from 'underscore';
import HitsOverview from './hits_overview';
import LengthDistribution from './length_distribution'; // length distribution of hits
import Utils from './utils';
import { fastqToFasta } from './fastq_to_fasta';

/**
* Query component displays query defline, graphical overview, length
Expand Down Expand Up @@ -103,6 +104,7 @@ export class SearchQueryWidget extends Component {
this.indicateNormal = this.indicateNormal.bind(this);
this.type = this.type.bind(this);
this.guessSequenceType = this.guessSequenceType.bind(this);
this.preProcessSequence = this.preProcessSequence.bind(this);
this.notify = this.notify.bind(this);

this.textareaRef = createRef()
Expand All @@ -120,6 +122,8 @@ export class SearchQueryWidget extends Component {

componentDidUpdate() {
this.hideShowButton();
this.preProcessSequence();

var type = this.type();
if (!type || type !== this._type) {
this._type = type;
Expand Down Expand Up @@ -240,7 +244,12 @@ export class SearchQueryWidget extends Component {
* of directly calling this method.
*/
type() {
var sequences = this.value().split(/>.*/);
let sequence = this.value().trim();
// FASTQ detected, but we don't know if conversion has succeeded yet
// will notify separately if it does
if (sequence.startsWith('@') ) { return undefined; }

var sequences = sequence.split(/>.*/);

var type, tmp;

Expand All @@ -263,6 +272,16 @@ export class SearchQueryWidget extends Component {
return type;
}

preProcessSequence() {
var sequence = this.value();
var updatedSequence = fastqToFasta(sequence);

if (sequence !== updatedSequence) {
this.value(updatedSequence);
this.notify('fastq');
}
}

/**
* Guesses and returns the type of the given sequence (nucleotide,
* protein).
Expand Down Expand Up @@ -290,9 +309,9 @@ export class SearchQueryWidget extends Component {
}

notify(type) {
clearTimeout(this.notification_timeout);
this.indicateNormal();
$('.notifications .active').hide().removeClass('active');
clearTimeout(this.notification_timeout);
// $('.notifications .active').hide().removeClass('active');

if (type) {
$('#' + type + '-sequence-notification').show('drop', { direction: 'up' }).addClass('active');
Expand Down
19 changes: 18 additions & 1 deletion public/js/tests/mock_data/sequences.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,21 @@ GAGATGGAAATGGCCGATTACCCGCTCGCCTATGATATTTCCCCGTATCTTCCGCCGTTC
CTGTCGCGAGCGAGGGCACGGGGAATGTTAGACGGTCGCTTCGCCGGCAGACGCTACCGA
AGGGAGTCGCGGGGCATTCACGAGGAGTGTTGCATCAACGGATGTACGATAAACGAATTG
ACCAGCTACTGCGGCCCC
`;
`;

export const FASTQ_SEQUENCE =
`@SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=72
GGGTGATGGCCGCTGCCGATGGCGTCAAATCCCACCAAGTTACCCTTAACAACTTAAGGGTTTTCAAATAGA
+SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=72
IIIIIIIIIIIIIIIIIIIIIIIIIIIIII9IG9ICIIIIIIIIIIIIIIIIIIIIDIIIIIII>IIIIII/
@SRR001666.2 071112_SLXA-EAS1_s_7:5:1:801:338 length=72
GTTCAGGGATACGACGTTTGTATTTTAAGAATCTGAAGCAGAAGTCGATGATAATACGCGTCGTTTTATCAT
+SRR001666.2 071112_SLXA-EAS1_s_7:5:1:801:338 length=72
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII6IBIIIIIIIIIIIIIIIIIIIIIIIGII>IIIII-I)8I
`;

export const FASTA_OF_FASTQ_SEQUENCE =
`>SRR001666.1 071112_SLXA-EAS1_s_7:5:1:817:345 length=72
GGGTGATGGCCGCTGCCGATGGCGTCAAATCCCACCAAGTTACCCTTAACAACTTAAGGGTTTTCAAATAGA
>SRR001666.2 071112_SLXA-EAS1_s_7:5:1:801:338 length=72
GTTCAGGGATACGACGTTTGTATTTTAAGAATCTGAAGCAGAAGTCGATGATAATACGCGTCGTTTTATCAT`;
15 changes: 12 additions & 3 deletions public/js/tests/search_query.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import { render, screen, fireEvent } from '@testing-library/react';
import { SearchQueryWidget } from '../query';
import { Form } from '../form';
import { AMINO_ACID_SEQUENCE, NUCLEOTIDE_SEQUENCE } from './mock_data/sequences';
import { AMINO_ACID_SEQUENCE, NUCLEOTIDE_SEQUENCE, FASTQ_SEQUENCE, FASTA_OF_FASTQ_SEQUENCE } from './mock_data/sequences';
import '@testing-library/jest-dom/extend-expect';
import '@testing-library/react/dont-cleanup-after-each';

Expand All @@ -16,7 +16,7 @@ describe('SEARCH COMPONENT', () => {
} />).container;
inputEl = screen.getByRole('textbox', { name: '' });
});

test('should render the search component textarea', () => {
expect(inputEl).toHaveClass('form-control');
});
Expand Down Expand Up @@ -47,12 +47,21 @@ describe('SEARCH COMPONENT', () => {
expect(activeNotification.id).toBe('nucleotide-sequence-notification');
expect(alertWrapper).toHaveTextContent('Detected: nucleotide sequence(s).');
});

test('should correctly detect the mixed sequences and show error notification', () => {
fireEvent.change(inputEl, { target: { value: `${NUCLEOTIDE_SEQUENCE}${AMINO_ACID_SEQUENCE}` } });
const activeNotification = container.querySelector('.notification.active');
expect(activeNotification.id).toBe('mixed-sequence-notification');
const alertWrapper = activeNotification.children[0];
expect(alertWrapper).toHaveTextContent('Error: mixed nucleotide and amino-acid sequences detected.');
});

test('should correctly detect FASTQ and convert it to FASTA', () => {
fireEvent.change(inputEl, { target: { value: FASTQ_SEQUENCE } });
const activeNotification = container.querySelector('.notification.active');
const alertWrapper = activeNotification.children[0];
expect(activeNotification.id).toBe('fastq-sequence-notification');
expect(alertWrapper).toHaveTextContent('Detected FASTQ and automatically converted to FASTA.');
expect(inputEl).toHaveValue(FASTA_OF_FASTQ_SEQUENCE);
});
});
13 changes: 12 additions & 1 deletion public/sequenceserver-report.min.js

Large diffs are not rendered by default.

15 changes: 13 additions & 2 deletions public/sequenceserver-search.min.js

Large diffs are not rendered by default.

0 comments on commit 3e59445

Please sign in to comment.