Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,7 @@ fs.createReadStream('./resources/speech.wav')

### Text to Speech

Use the [Text to Speech][text_to_speech] service to synthesize text into a .wav file.
Use the [Text to Speech][text_to_speech] service to synthesize text into an audio file.

```js
var TextToSpeechV1 = require('watson-developer-cloud/text-to-speech/v1');
Expand Down Expand Up @@ -632,8 +632,16 @@ textToSpeech
fs.writeFileSync('audio.wav', audio);
console.log('audio.wav written with a corrected wav header');
});


// or, using WebSockets
textToSpeech.synthesizeUsingWebSocket(params);
synthStream.pipe(fs.createWriteStream('./audio.ogg'));
// see more information in examples/text_to_speech_websocket.js
```



### Tone Analyzer

Use the [Tone Analyzer][tone_analyzer] service to analyze the
Expand Down
2 changes: 1 addition & 1 deletion examples/.eslintrc.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module.exports = {
"parserOptions": { "ecmaVersion": 5 },
"parserOptions": { "ecmaVersion": 6 },
"rules": {
"no-console": "off",
"node/no-missing-require": "off",
Expand Down
51 changes: 51 additions & 0 deletions examples/text_to_speech_websocket.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
'use strict';

const fs = require('fs');
const TextToSpeechV1 = require('watson-developer-cloud/text-to-speech/v1');

const textToSpeech = new TextToSpeechV1({
// if left unspecified here, the SDK will fall back to the TEXT_TO_SPEECH_USERNAME and TEXT_TO_SPEECH_PASSWORD
// environment properties, and then IBM Cloud's VCAP_SERVICES environment property
// username: 'INSERT YOUR USERNAME FOR THE SERVICE HERE',
// password: 'INSERT YOUR PASSWORD FOR THE SERVICE HERE'
});

// specify the text to synthesize
const params = {
text: 'Hello, world.',
accept: 'audio/ogg;codecs=opus',
};

// synthesizeUsingWebSocket returns a Readable Stream that can be piped or listened to
const synthesizeStream = textToSpeech.synthesizeUsingWebSocket(params);

// the output of the stream can be piped to any writable stream, like an audio file
synthesizeStream.pipe(fs.createWriteStream('./speech.ogg'));

// if the stream is not being piped anywhere and is only being listened to, the stream needs
// to be explicitly set to flowing mode:

// synthesizeStream.resume();

// the 'message' event is emitted when data is processed and returned from the service
// the 'message' parameter is the entire response frame of information returned from the
// service. it is mainly useful for debugging
// the 'data' parameter is the data payload contained within the message. it is typically
// binary audio data, but if the text includes SSML marks or the request includes the
// 'timings' parameter, 'data' could be a string containing marks or timing information
synthesizeStream.on('message', (message, data) => {
console.log(data);
});

// the 'error' event is emitted if there is an error during the connection
// 'err' is the Error object describing the error
synthesizeStream.on('error', err => {
console.log(err);
});

// the 'close' event is emitted once, when the connection is terminated by the service
// the 'code' parameter is the status code. 1000 is the code for a normal termination
// the 'reason' parameter provides a string description of how the connection closed
synthesizeStream.on('close', (code, reason) => {
console.log(code);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice 🎉

});
2 changes: 1 addition & 1 deletion lib/recognize-stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ interface RecognizeStream extends Duplex {
}

/**
* pipe()-able Node.js Readable/Writeable stream - accepts binary audio and emits text in it's `data` events.
* pipe()-able Node.js Readable/Writeable stream - accepts binary audio and emits text in its `data` events.
* Also emits `results` events with interim results and other data.
*
* Cannot be instantiated directly, instead created by calling #recognizeUsingWebSocket()
Expand Down
207 changes: 207 additions & 0 deletions lib/synthesize-stream.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
/**
* Copyright 2014 IBM Corp. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*/

import extend = require('extend');
import pick = require('object.pick');
import { Readable } from 'stream';
import websocket = require ('websocket');
import qs = require('./querystring');

const w3cWebSocket = websocket.w3cwebsocket;

const PAYLOAD_PARAMS_ALLOWED = [
'text',
'accept',
'timings'
];

const QUERY_PARAMS_ALLOWED = [
'watson-token',
'voice',
'customization_id',
'x-watson-learning-opt-out',
'x-watson-metadata'
];

interface SynthesizeStream extends Readable {
_readableState;
}

/**
* pipe()-able Node.js Readable stream - accepts text in the constructor and emits binary audio data in its 'message' events
*
* Cannot be instantiated directly, instead created by calling #synthesizeUsingWebSocket()
*
* Uses WebSockets under the hood.
* @param {Object} options
* @constructor
*/
class SynthesizeStream extends Readable {

static WEBSOCKET_CONNECTION_ERROR: string = 'WebSocket connection error';

private options;
private socket;
private initialized: boolean;
private authenticated: boolean;


/**
* pipe()-able Node.js Readable stream - accepts text and emits binary audio data in its 'message' events
*
* Uses WebSockets under the hood.
*
*
* Note that the WebSocket connection is not established until the first chunk of data is recieved. This allows for IAM token request management by the SDK.
*
* @param {Object} options
* @param {String} options.text - The text that us to be synthesized. Provide plain text or text that is annotated with SSML. SSML input can include the SSML <mark> element. Pass a maximum of 5 KB of text.
* @param {String} options.accept - The requested audio format (MIME type) of the audio.
* @param {String[]} [options.timings] - An array that specifies whether the service is to return word timing information for all strings of the input text
* @param {String} [options.voice='en-US_MichaelVoice'] - The voice that is to be used for the synthesis.
* @param {String} [options.customization_id] - The customization ID (GUID) of a custom voice model that is to be used for the synthesis.
* @param {String} [options.url='wss://stream.watsonplatform.net/speech-to-text/api'] base URL for service
* @param {String} [options.watson-token] - Auth token
* @param {Object} [options.headers] - Only works in Node.js, not in browsers. Allows for custom headers to be set, including an Authorization header (preventing the need for auth tokens)
* @param {Boolean} [options.x-watson-learning-opt-out=false] - set to true to opt-out of allowing Watson to use this request to improve it's services
* @param {String} [options.x-watson-metadata] - Associates a customer ID with data that is passed over the connection.
* @param {IamTokenManagerV1} [options.token_manager] - Token manager for authenticating with IAM
* @param {Boolean} [options.rejectUnauthorized] - If true, disable SSL verification for the WebSocket connection
*
* @constructor
*/
constructor(options) {
super(options);
this.options = options;
this.initialized = false;
this.authenticated = options.token_manager ? false : true;
}

initialize() {
const options = this.options;

const queryParams = pick(options, QUERY_PARAMS_ALLOWED);
const queryString = qs.stringify(queryParams);

const url =
(options.url || 'wss://stream.watsonplatform.net/text-to-speech/api')
.replace(/^http/, 'ws') +
'/v1/synthesize' +
queryString;

const socket = (this.socket = new w3cWebSocket(
url,
null,
null,
options.headers,
null,
{ tlsOptions: { rejectUnauthorized: options.rejectUnauthorized }}
));

// use class context within arrow functions
const self = this;

socket.onopen = () => {
const payload = pick(options, PAYLOAD_PARAMS_ALLOWED);
socket.send(JSON.stringify(payload));
/**
* emitted once the WebSocket connection has been established
* @event SynthesizeStream#open
*/
self.emit('open');
};

socket.onmessage = message => {
const chunk = message.data;
// some messages are strings - emit those unencoded, but push them to
// the stream as binary
const data = typeof chunk === 'string' ? chunk : Buffer.from(chunk);
/**
* Emit any messages received over the wire, mainly used for debugging.
*
* @event SynthesizeStream#message
* @param {Object} message - frame object received from service
* @param {Object} data - a data attribute of the frame that's either a string or a Buffer/TypedArray
*/
self.emit('message', message, data);
self.push(Buffer.from(chunk));
};

socket.onerror = event => {
const err = new Error('WebSocket connection error');
err.name = SynthesizeStream.WEBSOCKET_CONNECTION_ERROR;
err['event'] = event;
self.emit('error', err);
self.push(null);
};

socket.onclose = event => {
self.push(null);
/**
* @event SynthesizeStream#close
* @param {Number} reasonCode
* @param {String} description
*/
self.emit('close', event.code, event.reason);
};

this.initialized = true;
}

_read() {
// even though we aren't controlling the read from websocket,
// we can take advantage of the fact that _read is async and hack
// this funtion to retrieve a token if the service is using IAM auth
this.setAuthorizationHeaderToken(err => {
if (err) {
this.emit('error', err);
this.push(null);
return;
}

if (!this.initialized) {
this.initialize();
}
});
}

/**
* This function retrieves an IAM access token and stores it in the
* request header before calling the callback function, which will
* execute the next iteration of `_read()`
*
*
* @private
* @param {Function} callback
*/
setAuthorizationHeaderToken(callback) {
if (!this.authenticated) {
this.options.token_manager.getToken((err, token) => {
if (err) {
callback(err);
}
const authHeader = { authorization: 'Bearer ' + token };
this.options.headers = extend(authHeader, this.options.headers);
this.authenticated = true;
callback(null);
});
} else {
callback(null);
}
}
}

export = SynthesizeStream;
36 changes: 29 additions & 7 deletions test/integration/text_to_speech.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,39 @@ describe('text_to_speech_integration', function() {
text_to_speech.voices(null, done);
});

it('synthesize()', function(done) {
describe('synthesize', function() {
const params = {
text: 'test',
accept: 'audio/wav',
};
// wav.Reader parses the wav header and will throw if it isn't valid
const reader = new wav.Reader();
text_to_speech
.synthesize(params)
.pipe(reader)
.on('format', done.bind(null, null));

it('synthesize using http', function(done) {
// wav.Reader parses the wav header and will throw if it isn't valid
const reader = new wav.Reader();
text_to_speech
.synthesize(params)
.pipe(reader)
.on('format', done.bind(null, null));
});

it('synthesize using websocket', function(done) {
const synthStream = text_to_speech.synthesizeUsingWebSocket(params);
synthStream.resume();

synthStream.on('message', function(message, data) {
expect(data).not.toBeNull();
});

synthStream.on('error', function(err) {
// fail assertation
throw err;
});

synthStream.on('close', function(code, reason) {
expect(code).toBe(1000);
done();
});
});
});

it('pronunciation()', function(done) {
Expand Down
Loading