org.mock-server
diff --git a/src/main/java/com/ibm/watson/developer_cloud/concept_expansion/v1/ConceptExpansion.java b/src/main/java/com/ibm/watson/developer_cloud/concept_expansion/v1/ConceptExpansion.java
index 3951ed17add..757c5bb7788 100755
--- a/src/main/java/com/ibm/watson/developer_cloud/concept_expansion/v1/ConceptExpansion.java
+++ b/src/main/java/com/ibm/watson/developer_cloud/concept_expansion/v1/ConceptExpansion.java
@@ -67,6 +67,17 @@ public ConceptExpansion() {
setDataset(Dataset.MT_SAMPLES);
}
+ /**
+ * Creates a {@link Job}.
+ *
+ * @param seeds List of terms to be used as seeds
+ *
+ * @return the {@link Job}
+ */
+ public Job createJob(final String[] seeds) {
+ return createJob(null, seeds);
+ }
+
/**
* Creates a {@link Job}.
*
@@ -76,7 +87,6 @@ public ConceptExpansion() {
* @return the {@link Job}
*/
public Job createJob(final String label, final String[] seeds) {
- Validate.notEmpty(label, "label cannot be null or empty");
Validate.notEmpty(seeds, "seeds cannot be null or empty");
Validate.notNull(dataset, "dataset cannot be null");
diff --git a/src/main/java/com/ibm/watson/developer_cloud/http/HttpHeaders.java b/src/main/java/com/ibm/watson/developer_cloud/http/HttpHeaders.java
index a3d819a0004..63db918e4e2 100644
--- a/src/main/java/com/ibm/watson/developer_cloud/http/HttpHeaders.java
+++ b/src/main/java/com/ibm/watson/developer_cloud/http/HttpHeaders.java
@@ -179,4 +179,6 @@ public interface HttpHeaders {
*/
public static final String WWW_AUTHENTICATE = "WWW-Authenticate";
+ /** The Authorization token header. */
+ public static final String X_WATSON_AUTHORIZATION_TOKEN = "X-Watson-Authorization-Token";
}
diff --git a/src/main/java/com/ibm/watson/developer_cloud/service/ConflictException.java b/src/main/java/com/ibm/watson/developer_cloud/service/ConflictException.java
new file mode 100644
index 00000000000..6c489308fd6
--- /dev/null
+++ b/src/main/java/com/ibm/watson/developer_cloud/service/ConflictException.java
@@ -0,0 +1,39 @@
+/**
+ * Copyright 2015 IBM Corp. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.ibm.watson.developer_cloud.service;
+
+import com.ibm.watson.developer_cloud.http.HttpStatus;
+import com.squareup.okhttp.Response;
+
+/**
+ * 409 Conflict (HTTP/1.1 - RFC 2616)
+ */
+public class ConflictException extends ServiceResponseException {
+
+ /**
+ * The Constant serialVersionUID.
+ */
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * Instantiates a new Forbidden Exception.
+ *
+ * @param message the error message
+ * @param response the HTTP response
+ */
+ public ConflictException(String message, Response response) {
+ super(HttpStatus.CONFLICT, message, response);
+ }
+
+}
diff --git a/src/main/java/com/ibm/watson/developer_cloud/service/WatsonService.java b/src/main/java/com/ibm/watson/developer_cloud/service/WatsonService.java
index 2c45680b560..fcfaeaea5a2 100644
--- a/src/main/java/com/ibm/watson/developer_cloud/service/WatsonService.java
+++ b/src/main/java/com/ibm/watson/developer_cloud/service/WatsonService.java
@@ -26,12 +26,14 @@
import com.google.gson.JsonSyntaxException;
import com.ibm.watson.developer_cloud.http.HttpHeaders;
import com.ibm.watson.developer_cloud.http.HttpStatus;
+import com.ibm.watson.developer_cloud.http.RequestBuilder;
import com.ibm.watson.developer_cloud.service.model.GenericModel;
import com.ibm.watson.developer_cloud.util.BluemixUtils;
import com.ibm.watson.developer_cloud.util.RequestUtil;
import com.ibm.watson.developer_cloud.util.ResponseUtil;
import com.squareup.okhttp.Credentials;
import com.squareup.okhttp.Headers;
+import com.squareup.okhttp.HttpUrl;
import com.squareup.okhttp.OkHttpClient;
import com.squareup.okhttp.Request;
import com.squareup.okhttp.Request.Builder;
@@ -69,9 +71,9 @@ public WatsonService(String name) {
/**
- * Configure HTTP client.
+ * Configures the HTTP client.
*
- * @return the okhttp client
+ * @return the HTTP client
*/
protected OkHttpClient configureHttpClient() {
final OkHttpClient client = new OkHttpClient();
@@ -147,6 +149,8 @@ protected Response execute(Request request) {
case HttpStatus.NOT_ACCEPTABLE: // HTTP 406
throw new ForbiddenException(error != null ? error
: "Forbidden: Service refuse the request", response);
+ case HttpStatus.CONFLICT: // HTTP 409
+ throw new ConflictException(error != null ? error : "", response);
case HttpStatus.REQUEST_TOO_LONG: // HTTP 413
throw new RequestTooLargeException(error != null ? error
: "Request too large: The request entity is larger than the server is able to process",
@@ -216,6 +220,20 @@ public String getEndPoint() {
return endPoint;
}
+ /**
+ * Gets an authorization token that can be use to authorize API calls.
+ *
+ *
+ * @return the token
+ */
+ public String getToken() {
+ HttpUrl url =
+ HttpUrl.parse(getEndPoint()).newBuilder().setPathSegment(0, "authorization").build();
+ Request request = RequestBuilder.get(url + "/v1/token").withQuery("url", getEndPoint()).build();
+ Response response = execute(request);
+ return ResponseUtil.getJsonObject(response).get("token").getAsString();
+ }
+
/**
* Gets the error message from a JSON response
*
@@ -267,7 +285,7 @@ public String getName() {
* @return the user agent
*/
private final String getUserAgent() {
- return "watson-developer-cloud-java-sdk-2.5.0";
+ return "watson-developer-cloud-java-sdk-2.6.0";
}
/**
diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/RecognizeOptions.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/RecognizeOptions.java
index 75c78ff963f..b656911b15c 100644
--- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/RecognizeOptions.java
+++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/RecognizeOptions.java
@@ -13,24 +13,40 @@
*/
package com.ibm.watson.developer_cloud.speech_to_text.v1;
+import org.apache.commons.lang3.Validate;
+
+import com.google.gson.annotations.SerializedName;
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechSession;
+import com.squareup.okhttp.MediaType;
/**
- * Recognize Options when using the
- * {@link SpeechToText#recognize(java.io.File, String, RecognizeOptions)} method.
+ * Parameters to be use during a recognize call in the {@link SpeechToText} service.
*/
public class RecognizeOptions {
+
+ @SerializedName("content-type")
+ private String contentType;
private Boolean continuous;
private Integer inactivityTimeout;
+
+ @SerializedName("interim_results")
+ private Boolean interimResults;
+ private String[] keywords;
+
+ @SerializedName("keywords_threshold")
+ private Double keywordsThreshold;
private Integer maxAlternatives;
private String model;
private String sessionId;
-
private Boolean timestamps;
- private Boolean wordConfidence;
+ @SerializedName("word_alternatives_threshold")
+ private Double wordAlternativesThreshold;
+
+ @SerializedName("word_confidence")
+ private Boolean wordConfidence;
/**
* If true, multiple final results that represent multiple consecutive phrases separated by pauses
@@ -44,6 +60,15 @@ public RecognizeOptions continuous(Boolean continuous) {
return this;
}
+ /**
+ * Gets the content type.
+ *
+ * @return the contentType
+ */
+ public String getContentType() {
+ return contentType;
+ }
+
/**
* Gets the continuous.
*
@@ -62,6 +87,33 @@ public Integer getInactivityTimeout() {
return inactivityTimeout;
}
+ /**
+ * Gets the interim results.
+ *
+ * @return the interimResults
+ */
+ public Boolean getInterimResults() {
+ return interimResults;
+ }
+
+ /**
+ * Gets the keywords.
+ *
+ * @return the keywords
+ */
+ public String[] getKeywords() {
+ return keywords;
+ }
+
+ /**
+ * Gets the keywords threshold.
+ *
+ * @return the keywordsThreshold
+ */
+ public Double getKeywordsThreshold() {
+ return keywordsThreshold;
+ }
+
/**
* Gets the max alternatives.
*
@@ -98,7 +150,14 @@ public Boolean getTimestamps() {
return timestamps;
}
-
+ /**
+ * Gets the word alternatives threshold.
+ *
+ * @return the wordAlternativesThreshold
+ */
+ public Double getWordAlternativesThreshold() {
+ return wordAlternativesThreshold;
+ }
/**
* Gets the word confidence.
@@ -121,7 +180,50 @@ public RecognizeOptions inactivityTimeout(Integer inactivityTimeout) {
}
/**
- * Maximum number of alternative transcripts returned
+ * If true, the service sends interim results for the transcription. Otherwise, the recognition
+ * ends after first "end of speech" is detected. The default is false.
+ *
+ * @param interimResults the interim results
+ * @return the recognize options
+ */
+ public RecognizeOptions interimResults(Boolean interimResults) {
+ this.interimResults = interimResults;
+ return this;
+ }
+
+ /**
+ * Specifies an array of keyword strings to be matched in the input audio. By default, the service
+ * does no keyword spotting.
+ *
+ *
+ * @param keywords the keywords
+ * @return the recognize options
+ */
+ public RecognizeOptions keywords(String[] keywords) {
+ this.keywords = keywords;
+ return this;
+ }
+
+
+
+ /**
+ * Specifies a minimum level of confidence that the service must have to report a matching keyword
+ * in the input audio. Specify a probability value between 0 and 1 inclusive. A match must have at
+ * least the specified confidence to be returned. Omit the parameter or specify a value of null
+ * (the default) to spot no keywords. If you specify a valid threshold, you must also specify at
+ * least one keyword.
+ *
+ *
+ * @param keywordsThreshold the keywords threshold
+ * @return the recognize options
+ */
+ public RecognizeOptions keywordsThreshold(Double keywordsThreshold) {
+ this.keywordsThreshold = keywordsThreshold;
+ return this;
+ }
+
+ /**
+ * Maximum number of alternative transcripts returned.
*
* @param maxAlternatives the max alternatives
* @return the recognize options
@@ -132,7 +234,7 @@ public RecognizeOptions maxAlternatives(Integer maxAlternatives) {
}
/**
- * Sets the model name used for the recognition
+ * Sets the model name used for the recognition.
*
* @param model the model
* @return the recognize options
@@ -142,6 +244,17 @@ public RecognizeOptions model(String model) {
return this;
}
+ /**
+ * Sets the session id.
+ *
+ * @param session the {@link SpeechSession}
+ * @return the recognize options
+ */
+ public RecognizeOptions session(SpeechSession session) {
+ this.sessionId = session.getSessionId();
+ return this;
+ }
+
/**
* Sets session id.
*
@@ -154,29 +267,54 @@ public RecognizeOptions sessionId(String sessionId) {
}
/**
- * Sets the session id.
+ * If true, time alignment for each word is returned.
*
- * @param session the {@link SpeechSession}
+ * @param timestamps the timestamps
* @return the recognize options
*/
- public RecognizeOptions session(SpeechSession session) {
- this.sessionId = session.getSessionId();
+ public RecognizeOptions timestamps(Boolean timestamps) {
+ this.timestamps = timestamps;
return this;
}
/**
- * If true, time alignment for each word is returned
+ * Specifies a minimum level of confidence that the service must have to report a hypothesis for a
+ * word from the input audio. Specify a probability value between 0 and 1 inclusive. A hypothesis
+ * must have at least the specified confidence to be returned as a word alternative. Omit the
+ * parameter or specify a value of null (the default) to return no word alternatives.
*
- * @param timestamps the timestamps
+ *
+ *
+ * @param wordAlternativesThreshold the wordAalternatives threshold
* @return the recognize options
*/
- public RecognizeOptions timestamps(Boolean timestamps) {
- this.timestamps = timestamps;
+ public RecognizeOptions wordAlternativesThreshold(Double wordAlternativesThreshold) {
+ this.wordAlternativesThreshold = wordAlternativesThreshold;
return this;
}
/**
- * If true, confidence measure per word is returned if available
+ * The format of the audio data specified as one of the following values:
+ *
+ * audio/flac
for Free Lossless Audio Codec (FLAC)
+ * audio/l16
for Linear 16-bit Pulse-Code Modulation (PCM)
+ * audio/wav
for Waveform Audio File Format (WAV)
+ * audio/ogg;codecs=opus
for Ogg format files that use the opus codec
+ *
+ *
+ * @param contentType the content type
+ * @return the recognize options
+ */
+ public RecognizeOptions contentType(String contentType) {
+ Validate.isTrue(MediaType.parse(contentType) != null,
+ "contentType is not a valid mime audio format. Valid formats start with 'audio/'");
+ this.contentType = contentType;
+ return this;
+ }
+
+
+ /**
+ * If true, confidence measure per word is returned if available.
*
* @param wordConfidence the word confidence
* @return the recognize options
diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToText.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToText.java
index daabfda4a51..4e6c314517c 100644
--- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToText.java
+++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToText.java
@@ -14,6 +14,7 @@
package com.ibm.watson.developer_cloud.speech_to_text.v1;
import java.io.File;
+import java.io.InputStream;
import java.util.List;
import com.google.gson.JsonObject;
@@ -26,6 +27,8 @@
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechResults;
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechSession;
import com.ibm.watson.developer_cloud.speech_to_text.v1.util.MediaTypeUtils;
+import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.RecognizeDelegate;
+import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.WebSocketSpeechToTextClient;
import com.ibm.watson.developer_cloud.util.GsonSingleton;
import com.ibm.watson.developer_cloud.util.ResponseUtil;
import com.ibm.watson.developer_cloud.util.Validate;
@@ -60,6 +63,9 @@ public class SpeechToText extends WatsonService {
private final static String URL = "https://stream.watsonplatform.net/speech-to-text/api";
private static final String WORD_CONFIDENCE = "word_confidence";
private static final String SESSION = "session";
+ private static final String KEYWORDS_THRESHOLD = "keywords_threshold";
+ private static final String WORD_ALTERNATIVES_THRESHOLD = "word_alternatives_threshold";
+ private static final String KEYWORDS = "keywords";
/**
* Instantiates a new speech to text.
@@ -96,6 +102,15 @@ private void buildRecognizeRequest(RequestBuilder requestBuilder, RecognizeOptio
if (options.getModel() != null)
requestBuilder.withQuery(MODEL, options.getModel());
+
+ if (options.getKeywordsThreshold() != null)
+ requestBuilder.withQuery(KEYWORDS_THRESHOLD, options.getKeywordsThreshold());
+
+ if (options.getKeywords() != null && options.getKeywords().length > 0)
+ requestBuilder.withQuery(KEYWORDS, GsonSingleton.getGson().toJson(options.getKeywords()));
+
+ if (options.getWordAlternativesThreshold() != null)
+ requestBuilder.withQuery(WORD_ALTERNATIVES_THRESHOLD, options.getWordAlternativesThreshold());
}
/**
@@ -209,24 +224,76 @@ public SessionStatus getRecognizeStatus(final SpeechSession session) {
/**
* Recognizes an audio file and returns {@link SpeechResults}. It will try to recognize the audio
- * format based on the file extension.
+ * format based on the file extension.
+ * Here is an example of how to recognize an audio file:
+ *
+ *
+ * SpeechToText service = new SpeechToText();
+ * service.setUsernameAndPassword("USERNAME", "PASSWORD");
+ * service.setEndPoint("SERVICE_URL");
+ *
+ * SpeechResults results = service.recognize(new File("sample1.wav"));
+ * System.out.println(results);
+ *
*
* @param audio the audio file
* @return the {@link SpeechResults}
+ * @throws IllegalArgumentException if the file extension doesn't match a valid audio type
*/
public SpeechResults recognize(File audio) {
+ return recognize(audio, (RecognizeOptions) null);
+ }
+
+ /**
+ * Recognizes an audio file and returns {@link SpeechResults}.
+ *
+ * Here is an example of how to recognize an audio file:
+ *
+ *
+ * SpeechToText service = new SpeechToText();
+ * service.setUsernameAndPassword("USERNAME", "PASSWORD");
+ * service.setEndPoint("SERVICE_URL");
+ *
+ * RecognizeOptions options = new RecognizeOptions().maxAlternatives(3).continuous(true);
+ *
+ * SpeechResults results = service.recognize(new File("sample1.wav"), options);
+ * System.out.println(results);
+ *
+ *
+ * @param audio the audio
+ * @param options the options
+ * @return the speech results
+ */
+ public SpeechResults recognize(File audio, RecognizeOptions options) {
+ Validate.isTrue(audio != null && audio.exists(), "audio file is null or does not exist");
+
+ final double fileSize = audio.length() / Math.pow(1024, 2);
+ Validate.isTrue(fileSize < 100.0, "The audio file is greater than 100MB.");
+
String contentType = MediaTypeUtils.getMediaTypeFromFile(audio);
- Validate.notNull(contentType, "Audio format cannot be recognized");
- return recognize(audio, contentType, null);
+ if (options != null && options.getContentType() != null)
+ contentType = options.getContentType();
+ Validate.notNull(contentType, "The audio format cannot be recognized");
+
+ String path = PATH_RECOGNIZE;
+ if (options != null && (options.getSessionId() != null && !options.getSessionId().isEmpty()))
+ path = String.format(PATH_SESSION_RECOGNIZE, options.getSessionId());
+
+ final RequestBuilder requestBuilder = RequestBuilder.post(path);
+ buildRecognizeRequest(requestBuilder, options);
+ requestBuilder.withBody(RequestBody.create(MediaType.parse(contentType), audio));
+ return executeRequest(requestBuilder.build(), SpeechResults.class);
}
/**
* Recognizes an audio file and returns {@link SpeechResults}.
*
* @param audio the audio file
- * @param contentType the media type of the audio. If you use the audio/l16 MIME type, specify the
- * rate and channels.
+ * @param contentType the media type of the audio.
* @return the {@link SpeechResults}
+ * @deprecated Deprecated in 2.6.0
+ * Use {@link SpeechToText#recognize(File, RecognizeOptions)}
+ *
*/
public SpeechResults recognize(File audio, String contentType) {
return recognize(audio, contentType, null);
@@ -238,28 +305,56 @@ public SpeechResults recognize(File audio, String contentType) {
* @param audio the audio file
* @param contentType the media type of the audio. If you use the audio/l16 MIME type, specify the
* rate and channels.
+ *
* @param options the {@link RecognizeOptions}
* @return the {@link SpeechResults}
+ * @deprecated Deprecated in 2.6.0
+ * Use {@link SpeechToText#recognize(File, RecognizeOptions)}
*/
public SpeechResults recognize(File audio, String contentType, RecognizeOptions options) {
- Validate.isTrue(audio != null && audio.exists(), "audio file is null or does not exist");
- Validate.isTrue(audio != null && audio.exists(), "audio file is null or does not exist");
-
- Validate.isTrue((audio.length() / (1024 * 1024)) < 100.0,
- "The audio file is greater than 100MB.");
-
- Validate.isTrue(MediaType.parse(contentType) != null,
- "contentType is not a valid mime audio format. Valid formats start with 'audio/'");
-
- String path = PATH_RECOGNIZE;
- if (options != null && (options.getSessionId() != null && !options.getSessionId().isEmpty()))
- path = String.format(PATH_SESSION_RECOGNIZE, options.getSessionId());
+ RecognizeOptions opt = options;
+ if (opt == null)
+ opt = new RecognizeOptions().contentType(contentType);
- final RequestBuilder requestBuilder = RequestBuilder.post(path);
-
- buildRecognizeRequest(requestBuilder, options);
+ return recognize(audio, opt);
+ }
- requestBuilder.withBody(RequestBody.create(MediaType.parse(contentType), audio));
- return executeRequest(requestBuilder.build(), SpeechResults.class);
+ /**
+ * Recognizes an audio {@link InputStream} using WebSockets. The {@link RecognizeDelegate}
+ * instance will be called every time the service sends {@link SpeechResults}.
+ *
+ *
+ * Here is an example of how to recognize an audio file using WebSockets and get interim results:
+ *
+ *
+ * SpeechToText service = new SpeechToText();
+ * service.setUsernameAndPassword("USERNAME", "PASSWORD");
+ * service.setEndPoint("SERVICE_URL");
+ *
+ * RecognizeOptions options = new RecognizeOptions().continuous(true).interimResults(true);
+ *
+ * service.recognizeWS(new FileInputStream("sample1.wav"), options, new BaseRecognizeDelegate() {
+ * @Override
+ * public void onMessage(SpeechResults speechResults) {
+ * System.out.println(speechResults);
+ * }
+ * });
+ *
+ *
+ * @param audio the audio input stream
+ * @param options the recognize options
+ * @param delegate the delegate
+ */
+ public void recognizeUsingWebSockets(InputStream audio, RecognizeOptions options,
+ RecognizeDelegate delegate) {
+ Validate.notNull(audio, "audio cannot be null");
+ Validate.notNull(options, "options cannot be null");
+ Validate.notNull(options.getContentType(), "options.contentType cannot be null");
+ Validate.notNull(delegate, "delegate cannot be null");
+
+ String url = getEndPoint().replaceFirst("(https|http)", "wss");
+ WebSocketSpeechToTextClient webSocket =
+ new WebSocketSpeechToTextClient(url + PATH_RECOGNIZE, getToken());
+ webSocket.recognize(audio, options, delegate);
}
}
diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SessionStatus.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SessionStatus.java
index f2fd3b0e49e..f49d25e6975 100644
--- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SessionStatus.java
+++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SessionStatus.java
@@ -18,7 +18,7 @@
import com.ibm.watson.developer_cloud.speech_to_text.v1.SpeechToText;
/**
- * SessionStatus Status used by {@link SpeechToText}.
+ * SessionStatus used by {@link SpeechToText}.
*/
public class SessionStatus extends GenericModel {
diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechAlternative.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechAlternative.java
index 1191dc762ec..13d7e60f10c 100644
--- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechAlternative.java
+++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechAlternative.java
@@ -1,11 +1,11 @@
/**
* Copyright 2015 IBM Corp. All Rights Reserved.
- *
+ *
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
@@ -20,7 +20,8 @@
import com.ibm.watson.developer_cloud.service.model.GenericModel;
/**
- * The Class SpeechAlternative.
+ * SpeechAlternative contains the transcript of the utterance along with confidence, timestamp,
+ * etc...
*/
public class SpeechAlternative extends GenericModel {
@@ -39,7 +40,7 @@ public class SpeechAlternative extends GenericModel {
/**
* Gets the transcript.
- *
+ *
* @return The transcript
*/
public String getTranscript() {
@@ -48,7 +49,7 @@ public String getTranscript() {
/**
* Sets the transcript.
- *
+ *
* @param transcript The transcript
*/
public void setTranscript(final String transcript) {
@@ -57,7 +58,7 @@ public void setTranscript(final String transcript) {
/**
* Gets the confidence.
- *
+ *
* @return The confidence
*/
public Double getConfidence() {
@@ -66,7 +67,7 @@ public Double getConfidence() {
/**
* Sets the confidence.
- *
+ *
* @param confidence The confidence
*/
public void setConfidence(final Double confidence) {
@@ -75,7 +76,7 @@ public void setConfidence(final Double confidence) {
/**
* Gets the timestamps.
- *
+ *
* @return The timestamps
*/
public List getTimestamps() {
@@ -84,7 +85,7 @@ public List getTimestamps() {
/**
* Sets the timestamps.
- *
+ *
* @param timestamps The timestamps
*/
public void setTimestamps(final List timestamps) {
@@ -93,7 +94,7 @@ public void setTimestamps(final List timestamps) {
/**
* With timestamps.
- *
+ *
* @param timestamps the timestamps
* @return the speech
*/
@@ -105,7 +106,7 @@ public SpeechAlternative withTimestamps(final List timestamps)
/**
* Gets the word confidences.
- *
+ *
* @return The wordConfidences
*/
public List getWordConfidences() {
@@ -114,7 +115,7 @@ public List getWordConfidences() {
/**
* Sets the word confidences.
- *
+ *
* @param wordConfidences The wordConfidences
*/
public void setWordConfidences(final List wordConfidences) {
@@ -123,7 +124,7 @@ public void setWordConfidences(final List wordConfidences)
/**
* With word confidences.
- *
+ *
* @param wordConfidences the wordConfidences
* @return the speech
*/
diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechModel.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechModel.java
index dce6a3db848..1aa887cd9a8 100644
--- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechModel.java
+++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechModel.java
@@ -17,13 +17,16 @@
import com.ibm.watson.developer_cloud.service.model.GenericModel;
/**
- * The Class SpeechModel.
+ * Speech model
*/
public class SpeechModel extends GenericModel {
/** US English broadband model (16KHz). */
public static final SpeechModel EN_BROADBAND16K = new SpeechModel("en-US_BroadbandModel");
+ /** US English narrowband model (8KHz). */
+ public static final SpeechModel EN_NARROWBAND8K = new SpeechModel("en-US_NarrowbandModel");
+
/** Spanish broadband model (16KHz). */
public static final SpeechModel ES_BROADBAND16K = new SpeechModel("es-ES_BroadbandModel");
diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechModelSet.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechModelSet.java
index b15e5b4294c..1adc822d4d0 100644
--- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechModelSet.java
+++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechModelSet.java
@@ -20,7 +20,7 @@
import com.ibm.watson.developer_cloud.service.model.GenericModel;
/**
- * The Class SpeechModelSet.
+ * Speech model set
*/
public class SpeechModelSet extends GenericModel {
diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechResults.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechResults.java
index 700446253b7..0514f902c24 100644
--- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechResults.java
+++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechResults.java
@@ -67,4 +67,13 @@ public void setResults(final List results) {
this.results = results;
}
+ /**
+ * Returns true
if the results are final
+ *
+ * @return true, if the results are final
+ */
+ public boolean isFinal() {
+ return (results != null && results.get(resultIndex) != null && results.get(resultIndex)
+ .isFinal());
+ }
}
diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechTimestamp.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechTimestamp.java
index 081cc96a174..32d1ddb66f8 100644
--- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechTimestamp.java
+++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechTimestamp.java
@@ -1,11 +1,11 @@
/**
* Copyright 2015 IBM Corp. All Rights Reserved.
- *
+ *
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
@@ -19,7 +19,7 @@
import com.ibm.watson.developer_cloud.speech_to_text.v1.util.SpeechTimestampTypeAdapter;
/**
- * The Class SpeechTimestamp.
+ * Transcription timestamp
*/
@JsonAdapter(SpeechTimestampTypeAdapter.class)
public class SpeechTimestamp extends GenericModel {
@@ -35,7 +35,7 @@ public class SpeechTimestamp extends GenericModel {
/**
* Gets the word.
- *
+ *
* @return The word
*/
public String getWord() {
@@ -44,7 +44,7 @@ public String getWord() {
/**
* Sets the word.
- *
+ *
* @param word The word
*/
public void setWord(final String word) {
@@ -53,7 +53,7 @@ public void setWord(final String word) {
/**
* Gets the start time.
- *
+ *
* @return The start time
*/
public Double getStartTime() {
@@ -62,7 +62,7 @@ public Double getStartTime() {
/**
* Sets the start time.
- *
+ *
* @param startTime The start time
*/
public void setStartTime(final Double startTime) {
@@ -71,7 +71,7 @@ public void setStartTime(final Double startTime) {
/**
* Gets the end time.
- *
+ *
* @return The end time
*/
public Double getEndTime() {
@@ -80,7 +80,7 @@ public Double getEndTime() {
/**
* Sets the end time.
- *
+ *
* @param endTime The end time
*/
public void setEndTime(final Double endTime) {
diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechWordConfidence.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechWordConfidence.java
index 66e6123bcc1..5c469c69e5c 100644
--- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechWordConfidence.java
+++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechWordConfidence.java
@@ -1,11 +1,11 @@
/**
* Copyright 2015 IBM Corp. All Rights Reserved.
- *
+ *
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
@@ -19,7 +19,7 @@
import com.ibm.watson.developer_cloud.speech_to_text.v1.util.SpeechWordConfidenceTypeAdapter;
/**
- * The Class SpeechWordConfidence.
+ * Transcription word confidence
*/
@JsonAdapter(SpeechWordConfidenceTypeAdapter.class)
public class SpeechWordConfidence extends GenericModel {
@@ -32,7 +32,7 @@ public class SpeechWordConfidence extends GenericModel {
/**
* Gets the word.
- *
+ *
* @return The word
*/
public String getWord() {
@@ -41,7 +41,7 @@ public String getWord() {
/**
* Sets the word.
- *
+ *
* @param word The word
*/
public void setWord(final String word) {
@@ -50,7 +50,7 @@ public void setWord(final String word) {
/**
* Gets the confidence.
- *
+ *
* @return The confidence
*/
public Double getConfidence() {
@@ -59,7 +59,7 @@ public Double getConfidence() {
/**
* Sets the confidence.
- *
+ *
* @param confidence The confidence
*/
public void setConfidence(final Double confidence) {
diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/BaseRecognizeDelegate.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/BaseRecognizeDelegate.java
new file mode 100644
index 00000000000..e7ccb2fea40
--- /dev/null
+++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/BaseRecognizeDelegate.java
@@ -0,0 +1,56 @@
+/**
+ * Copyright 2015 IBM Corp. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.ibm.watson.developer_cloud.speech_to_text.v1.websocket;
+
+import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechResults;
+
+/**
+ * An empty implementation of {@link RecognizeDelegate} interface.
+ */
+public class BaseRecognizeDelegate implements RecognizeDelegate {
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.RecognizeDelegate#onMessage(com.
+ * ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechResults)
+ */
+ public void onMessage(SpeechResults speechResults) {};
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.RecognizeDelegate#onConnected()
+ */
+ public void onConnected() {};
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.RecognizeDelegate#onError(java.lang
+ * .Exception)
+ */
+ public void onError(Exception e) {};
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.RecognizeDelegate#onDisconnected()
+ */
+ public void onDisconnected() {};
+
+}
diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/RecognizeDelegate.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/RecognizeDelegate.java
new file mode 100644
index 00000000000..081d800e5fe
--- /dev/null
+++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/RecognizeDelegate.java
@@ -0,0 +1,49 @@
+/**
+ * Copyright 2015 IBM Corp. All Rights Reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package com.ibm.watson.developer_cloud.speech_to_text.v1.websocket;
+
+import com.ibm.watson.developer_cloud.speech_to_text.v1.SpeechToText;
+import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechResults;
+
+
+/**
+ * The recognize delegate used in the
+ * {@link SpeechToText#recognizeUsingWebSockets(java.io.InputStream, com.ibm.watson.developer_cloud.speech_to_text.v1.RecognizeOptions, RecognizeDelegate)}
+ */
+public interface RecognizeDelegate {
+
+ /**
+ * Called when a {@link SpeechResults} was received.
+ *
+ * @param speechResults the speech results
+ */
+ public void onMessage(SpeechResults speechResults);
+
+ /**
+ * Called when a WebSocket connection was made
+ */
+ public void onConnected();
+
+ /**
+ * Called when there is an error in the Web Socket connection
+ *
+ * @param e the exception
+ */
+ public void onError(Exception e);
+
+ /**
+ * Called when a WebSocket connection was closed
+ */
+ public void onDisconnected();
+}
diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/WebSocketSpeechToTextClient.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/WebSocketSpeechToTextClient.java
new file mode 100644
index 00000000000..730f5b789be
--- /dev/null
+++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/WebSocketSpeechToTextClient.java
@@ -0,0 +1,268 @@
+package com.ibm.watson.developer_cloud.speech_to_text.v1.websocket;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import com.google.gson.Gson;
+import com.google.gson.JsonObject;
+import com.google.gson.JsonParseException;
+import com.google.gson.JsonParser;
+import com.ibm.watson.developer_cloud.http.HttpHeaders;
+import com.ibm.watson.developer_cloud.speech_to_text.v1.RecognizeOptions;
+import com.ibm.watson.developer_cloud.speech_to_text.v1.SpeechToText;
+import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechResults;
+import com.ibm.watson.developer_cloud.util.GsonSingleton;
+import com.neovisionaries.ws.client.WebSocket;
+import com.neovisionaries.ws.client.WebSocketAdapter;
+import com.neovisionaries.ws.client.WebSocketException;
+import com.neovisionaries.ws.client.WebSocketExtension;
+import com.neovisionaries.ws.client.WebSocketFactory;
+import com.neovisionaries.ws.client.WebSocketFrame;
+
+
+/**
+ * WebSocket client used by the {@link SpeechToText} to recognize audio
+ */
+public class WebSocketSpeechToTextClient {
+ private static final String MODEL = "model";
+ private static final String START = "start";
+ private static final String STOP = "stop";
+ private static final String ACTION = "action";
+ private static final String RESULTS = "results";
+ private static final String ERROR = "error";
+
+ private static final int TEN_SECONDS = 10000; // milliseconds
+
+ /**
+ * Listener that call the {@link RecognizeDelegate} when a message from the WebSocket connection
+ * arrives
+ */
+ public class WebSocketListener extends WebSocketAdapter {
+ private RecognizeDelegate delegate;
+
+ /**
+ * Instantiates a new WebSocket listener.
+ *
+ * @param delegate the delegate to notify events
+ */
+ public WebSocketListener(RecognizeDelegate delegate) {
+ super();
+ this.delegate = delegate;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * com.neovisionaries.ws.client.WebSocketAdapter#onTextMessage(com.neovisionaries.ws.client.
+ * WebSocket, java.lang.String)
+ */
+ public void onTextMessage(WebSocket websocket, String message) {
+ try {
+ JsonObject json = new JsonParser().parse(message).getAsJsonObject();
+
+ if (json.has(ERROR)) {
+ delegate.onError(new RuntimeException(json.get(ERROR).getAsString()));
+ } else if (json.has(RESULTS)) {
+ SpeechResults transcript = GsonSingleton.getGson().fromJson(message, SpeechResults.class);
+ delegate.onMessage(transcript);
+
+ // if final is true
+ if (transcript.isFinal())
+ websocket.disconnect();
+ }
+ } catch (JsonParseException e) {
+ new RuntimeException("Error parsing the incoming message: " + message);
+ }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * com.neovisionaries.ws.client.WebSocketAdapter#onConnected(com.neovisionaries.ws.client.WebSocket
+ * , java.util.Map)
+ */
+ @Override
+ public void onConnected(WebSocket websocket, Map> headers)
+ throws Exception {
+ delegate.onConnected();
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * com.neovisionaries.ws.client.WebSocketAdapter#onDisconnected(com.neovisionaries.ws.client
+ * .WebSocket, com.neovisionaries.ws.client.WebSocketFrame,
+ * com.neovisionaries.ws.client.WebSocketFrame, boolean)
+ */
+ @Override
+ public void onDisconnected(WebSocket websocket, WebSocketFrame serverCloseFrame,
+ WebSocketFrame clientCloseFrame, boolean closedByServer) throws Exception {
+ delegate.onDisconnected();
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * com.neovisionaries.ws.client.WebSocketAdapter#onError(com.neovisionaries.ws.client.WebSocket,
+ * com.neovisionaries.ws.client.WebSocketException)
+ */
+ @Override
+ public void onError(WebSocket websocket, WebSocketException cause) throws Exception {
+ delegate.onError(cause);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * com.neovisionaries.ws.client.WebSocketAdapter#handleCallbackError(com.neovisionaries.ws.client
+ * .WebSocket, java.lang.Throwable)
+ */
+ @Override
+ public void handleCallbackError(WebSocket websocket, Throwable cause) throws Exception {
+ cause.printStackTrace();
+ }
+
+ }
+
+ private static final int FOUR_KB = 4096;
+ private String token;
+ private String webSocketUrl;
+
+ /**
+ * Instantiates a new web socket speech to text client.
+ *
+ * @param webSocketUrl the web socket url.
+ *
+ *
+ * wss://stream.watsonplatform.net/speech-to-text/api/v1/api/recognize
+ *
+ * @param token the authorization token
+ */
+ public WebSocketSpeechToTextClient(String webSocketUrl, String token) {
+ this.token = token;
+ this.webSocketUrl = webSocketUrl;
+ }
+
+ /**
+ * Creates a WebSocket connection to the Speech To Text service and sends the audio bytes from the
+ * input stream for recognition
+ *
+ * @param stream the stream
+ * @param options the options
+ * @param delegate the delegate
+ */
+ public void recognize(InputStream stream, RecognizeOptions options, RecognizeDelegate delegate) {
+ WebSocketListener listener = new WebSocketListener(delegate);
+
+ try {
+ // 1. Connect to the WebSocket
+ WebSocket ws = connect(options);
+
+ // 2. Add a listener to messages coming from the WebSocket
+ ws.addListener(listener);
+
+ // 3. Send start message
+ ws.sendText(buildStartMessage(options));
+
+ // 4. Send the input stream as binary data
+ sendInputStream(ws, stream);
+
+ // 5. Send stop message
+ ws.sendText(buildStopMessage());
+
+ } catch (WebSocketException e) {
+ delegate.onError(e);
+ } catch (IOException e) {
+ delegate.onError(e);
+ } catch (InterruptedException e) {
+ delegate.onError(e);
+ }
+ }
+
+ /**
+ * Builds the stop message.
+ *
+ *
+ * { "action": "stop" }
+ *
+ *
+ * @return the string
+ */
+ private String buildStopMessage() {
+ JsonObject stopMessage = new JsonObject();
+ stopMessage.addProperty(ACTION, STOP);
+ return stopMessage.toString();
+ }
+
+ /**
+ * Sends 4k byte arrays to the WebSocket as binary data
+ *
+ * @param ws the WebSocket
+ * @param stream the stream
+ * @throws IOException Signals that an I/O exception has occurred.
+ * @throws InterruptedException if any thread has interrupted the current thread. The interrupted
+ * status of the current thread is cleared when this exception is thrown.
+ */
+ private void sendInputStream(WebSocket ws, InputStream stream) throws IOException,
+ InterruptedException {
+ byte[] buffer = new byte[FOUR_KB];
+ int read;
+ while ((read = stream.read(buffer)) > 0) {
+ if (read == FOUR_KB)
+ ws.sendBinary(buffer);
+ else
+ ws.sendBinary(Arrays.copyOfRange(buffer, 0, read));
+
+ Thread.sleep(10);
+ }
+
+ stream.close();
+ }
+
+ /**
+ * Builds the start message using the {@link RecognizeOptions}.
+ *
+ *
+ * {
+ * "action": "start",
+ * "content-type": "audio/wav"
+ * }
+ *
+ *
+ * @param options the recognize options
+ * @return the string
+ */
+ private String buildStartMessage(RecognizeOptions options) {
+ JsonObject startMessage = new JsonParser().parse(new Gson().toJson(options)).getAsJsonObject();
+ startMessage.remove(MODEL);
+ startMessage.addProperty(ACTION, START);
+ return startMessage.toString();
+ }
+
+ /**
+ * Creates a connects to the Speech to Text service
+ *
+ * @param options
+ *
+ * @return the WebSocket
+ * @throws IOException Signals that an I/O exception has occurred.
+ * @throws WebSocketException the WebSocket exception
+ */
+ private WebSocket connect(RecognizeOptions options) throws IOException, WebSocketException {
+ String speechModel = options.getModel() != null ? "?model=" + options.getModel() : "";
+
+ WebSocketFactory factory = new WebSocketFactory().setConnectionTimeout(TEN_SECONDS);
+ WebSocket ws = factory.createSocket(webSocketUrl + speechModel);
+ ws.addHeader(HttpHeaders.X_WATSON_AUTHORIZATION_TOKEN, token);
+ ws.addExtension(WebSocketExtension.PERMESSAGE_DEFLATE).connect();
+ return ws;
+ }
+}
diff --git a/src/main/java/com/ibm/watson/developer_cloud/text_to_speech/v1/util/WaveUtils.java b/src/main/java/com/ibm/watson/developer_cloud/text_to_speech/v1/util/WaveUtils.java
new file mode 100644
index 00000000000..626bcc65069
--- /dev/null
+++ b/src/main/java/com/ibm/watson/developer_cloud/text_to_speech/v1/util/WaveUtils.java
@@ -0,0 +1,78 @@
+package com.ibm.watson.developer_cloud.text_to_speech.v1.util;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import com.ibm.watson.developer_cloud.text_to_speech.v1.TextToSpeech;
+
+
+/**
+ * Utility class to write the data size header in wave(.wav) files synthesized with the
+ * {@link TextToSpeech} service
+ */
+public class WaveUtils {
+ /** The WAVE meta-data header size. (value is 8) */
+ private static final int WAVE_HEADER_SIZE = 8;
+
+ /** The WAVE meta-data size position. (value is 4) */
+ private static final int WAVE_SIZE_POS = 4;
+
+ /** The WAVE meta-data position in bytes. (value is 74) */
+ private static final int WAVE_METADATA_POS = 74;
+
+ /**
+ * Re-writes the data size in the header(bytes 4-8) of the WAVE(.wav) input stream.
+ * It needs to be read in order to calculate the size.
+ *
+ * @param is the input stream
+ * @return A new input stream that includes the data header in the header
+ * @throws IOException Signals that an I/O exception has occurred.
+ */
+ public static InputStream reWriteWaveHeader(InputStream is) throws IOException {
+ byte[] audioBytes = toByteArray(is);
+ int filesize = audioBytes.length - WAVE_HEADER_SIZE;
+
+ writeInt(filesize, audioBytes, WAVE_SIZE_POS);
+ writeInt(filesize - WAVE_HEADER_SIZE, audioBytes, WAVE_METADATA_POS);
+
+ return new ByteArrayInputStream(audioBytes);
+ }
+
+ /**
+ * Writes an number into an array using 4 bytes
+ *
+ * @param value the number to write
+ * @param array the byte array
+ * @param offset the offset
+ */
+ private static void writeInt(int value, byte[] array, int offset) {
+ for (int i = 0; i < 4; i++) {
+ array[offset + i] = (byte) (value >>> (8 * i));
+ }
+ }
+
+ /**
+ * Converts an {@link InputStream} to byte array
+ *
+ * @param is the input stream
+ * @return the byte array
+ * @throws IOException If the first byte cannot be read for any reason other than end of file, or
+ * if the input stream has been closed, or if some other I/O error occurs.
+ */
+ public static byte[] toByteArray(InputStream is) throws IOException {
+ ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+
+ int nRead;
+ byte[] data = new byte[16384]; // 4 kb
+
+ while ((nRead = is.read(data, 0, data.length)) != -1) {
+ buffer.write(data, 0, nRead);
+ }
+
+ buffer.flush();
+ return buffer.toByteArray();
+ }
+
+}
diff --git a/src/test/java/com/ibm/watson/developer_cloud/WatsonServiceTest.java b/src/test/java/com/ibm/watson/developer_cloud/WatsonServiceTest.java
index eb3a8771f0c..99d04f05122 100755
--- a/src/test/java/com/ibm/watson/developer_cloud/WatsonServiceTest.java
+++ b/src/test/java/com/ibm/watson/developer_cloud/WatsonServiceTest.java
@@ -13,12 +13,17 @@
*/
package com.ibm.watson.developer_cloud;
+import static org.junit.Assert.fail;
+
import java.io.BufferedReader;
+import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
+import java.io.OutputStream;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;
@@ -35,6 +40,9 @@ public abstract class WatsonServiceTest {
private static final Logger log = Logger.getLogger(WatsonServiceTest.class.getName());
+ /**
+ * Instantiates a new watson service test.
+ */
public WatsonServiceTest() {
if (prop == null)
loadProperties();
@@ -128,8 +136,8 @@ public String getExistingProperty(String property) {
}
/**
- * Gets the existing property if exists, otherwise it returns the defaultValue
- *
+ * Gets the existing property if exists, otherwise it returns the defaultValue.
+ *
* @param property the property
* @param defaultValue the default value
* @return the existing property
@@ -182,6 +190,34 @@ private void setupLogging() {
root.setLevel(ch.qos.logback.classic.Level.OFF);
}
+ /**
+ * Write input stream to file.
+ *
+ * @param inputStream the input stream
+ * @param audio the audio
+ */
+ public static void writeInputStreamToFile(InputStream inputStream, File audio) {
+ OutputStream outStream = null;
+ try {
+ outStream = new FileOutputStream(audio);
+
+ byte[] buffer = new byte[8 * 1024];
+ int bytesRead;
+ while ((bytesRead = inputStream.read(buffer)) != -1) {
+ outStream.write(buffer, 0, bytesRead);
+ }
+ } catch (Exception e) {
+ fail();
+ } finally {
+ try {
+ inputStream.close();
+ outStream.close();
+ } catch (Exception e) {
+ fail();
+ }
+ }
+ }
+
/**
* Loads fixture.
*
@@ -197,6 +233,11 @@ public static T loadFixture(String filename, Class returnType)
return GsonSingleton.getGson().fromJson(jsonString, returnType);
}
+ /**
+ * Sets the up.
+ *
+ * @throws Exception the exception
+ */
public void setUp() throws Exception {}
}
diff --git a/src/test/java/com/ibm/watson/developer_cloud/concept_insights/v2/ConceptInsightsIT.java b/src/test/java/com/ibm/watson/developer_cloud/concept_insights/v2/ConceptInsightsIT.java
index a4b1c47721a..da3cb60b8b1 100644
--- a/src/test/java/com/ibm/watson/developer_cloud/concept_insights/v2/ConceptInsightsIT.java
+++ b/src/test/java/com/ibm/watson/developer_cloud/concept_insights/v2/ConceptInsightsIT.java
@@ -17,6 +17,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import java.util.UUID;
import org.junit.Assert;
import org.junit.Before;
@@ -316,8 +317,9 @@ public void testGetGraphs() {
*/
@Test
public void testCreateAndDeleteCorpus() {
+ final String name = UUID.randomUUID().toString();
final Account account = service.getAccountsInfo().getAccounts().get(0);
- Corpus corpus = new Corpus(account.getId(), "integration-test-corpus");
+ Corpus corpus = new Corpus(account.getId(), name);
try {
service.createCorpus(corpus);
corpus = service.getCorpus(corpus);
diff --git a/src/test/java/com/ibm/watson/developer_cloud/service/GenericServiceTest.java b/src/test/java/com/ibm/watson/developer_cloud/service/GenericServiceTest.java
index 2cb89d48c52..0edf7dda7f4 100644
--- a/src/test/java/com/ibm/watson/developer_cloud/service/GenericServiceTest.java
+++ b/src/test/java/com/ibm/watson/developer_cloud/service/GenericServiceTest.java
@@ -170,7 +170,7 @@ public void testUserAgentIsSet() {
mockAPICall();
service.getProfile(sampleText);
mockServer.verify(new HttpRequest().withMethod("POST").withHeader(
- new Header(HttpHeaders.USER_AGENT, "watson-developer-cloud-java-sdk-2.5.0")));
+ new Header(HttpHeaders.USER_AGENT, "watson-developer-cloud-java-sdk-2.6.0")));
}
@Test
diff --git a/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextIT.java b/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextIT.java
index e74d55d5d7b..151d1191240 100644
--- a/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextIT.java
+++ b/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextIT.java
@@ -17,7 +17,11 @@
import static org.junit.Assert.assertTrue;
import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
import java.util.List;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.TimeUnit;
import org.junit.Before;
import org.junit.Test;
@@ -28,12 +32,25 @@
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechModel;
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechResults;
import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechSession;
+import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.BaseRecognizeDelegate;
+/**
+ * The Class SpeechToTextIT.
+ */
public class SpeechToTextIT extends WatsonServiceTest {
- private static final String EN_BROADBAND16K = "en-US_BroadbandModel";
+ private static String EN_BROADBAND16K = "en-US_BroadbandModel";
+ private SpeechResults asyncResults;
+
+ private CountDownLatch lock = new CountDownLatch(1);
+
private SpeechToText service;
+ /*
+ * (non-Javadoc)
+ *
+ * @see com.ibm.watson.developer_cloud.WatsonServiceTest#setUp()
+ */
@Override
@Before
public void setUp() throws Exception {
@@ -44,9 +61,12 @@ public void setUp() throws Exception {
service.setEndPoint(getValidProperty("speech_to_text.url"));
}
+ /**
+ * Test create session.
+ */
@Test
public void testCreateSession() {
- final SpeechSession session = service.createSession();
+ SpeechSession session = service.createSession();
try {
assertNotNull(session);
assertNotNull(session.getSessionId());
@@ -55,9 +75,12 @@ public void testCreateSession() {
}
}
+ /**
+ * Test create session speech model.
+ */
@Test
public void testCreateSessionSpeechModel() {
- final SpeechSession session = service.createSession(SpeechModel.EN_BROADBAND16K);
+ SpeechSession session = service.createSession(SpeechModel.EN_BROADBAND16K);
try {
assertNotNull(session);
assertNotNull(session.getSessionId());
@@ -66,9 +89,12 @@ public void testCreateSessionSpeechModel() {
}
}
+ /**
+ * Test create session string.
+ */
@Test
public void testCreateSessionString() {
- final SpeechSession session = service.createSession(EN_BROADBAND16K);
+ SpeechSession session = service.createSession(EN_BROADBAND16K);
try {
assertNotNull(session);
assertNotNull(session.getSessionId());
@@ -77,25 +103,34 @@ public void testCreateSessionString() {
}
}
+ /**
+ * Test get model.
+ */
@Test
public void testGetModel() {
- final SpeechModel model = service.getModel(EN_BROADBAND16K);
+ SpeechModel model = service.getModel(EN_BROADBAND16K);
assertNotNull(model);
assertNotNull(model.getName());
assertNotNull(model.getRate());
}
+ /**
+ * Test get models.
+ */
@Test
public void testGetModels() {
- final List models = service.getModels();
+ List models = service.getModels();
assertNotNull(models);
assertTrue(!models.isEmpty());
}
+ /**
+ * Test get recognize status.
+ */
@Test
public void testGetRecognizeStatus() {
- final SpeechSession session = service.createSession(SpeechModel.EN_BROADBAND16K);
- final SessionStatus status = service.getRecognizeStatus(session);
+ SpeechSession session = service.createSession(SpeechModel.EN_BROADBAND16K);
+ SessionStatus status = service.getRecognizeStatus(session);
try {
assertNotNull(status);
assertNotNull(status.getModel());
@@ -105,23 +140,78 @@ public void testGetRecognizeStatus() {
}
}
+ /**
+ * Test recognize audio file
+ */
@Test
public void testRecognizeFileString() {
- final File audio = new File("src/test/resources/speech_to_text/sample1.wav");
- final SpeechResults results = service.recognize(audio);
+ File audio = new File("src/test/resources/speech_to_text/sample1.wav");
+ SpeechResults results = service.recognize(audio);
assertNotNull(results.getResults().get(0).getAlternatives().get(0).getTranscript());
}
+ /**
+ * Test recognize file string recognize options.
+ */
@Test
public void testRecognizeFileStringRecognizeOptions() {
- final File audio = new File("src/test/resources/speech_to_text/sample1.wav");
- final String contentType = HttpMediaType.AUDIO_WAV;
- final RecognizeOptions options = new RecognizeOptions();
- options.continuous(true).timestamps(true).wordConfidence(true).model(EN_BROADBAND16K);
- final SpeechResults results = service.recognize(audio, contentType, options);
+ File audio = new File("src/test/resources/speech_to_text/sample1.wav");
+ String contentType = HttpMediaType.AUDIO_WAV;
+ RecognizeOptions options = new RecognizeOptions();
+ options.continuous(true).timestamps(true).wordConfidence(true).model(EN_BROADBAND16K)
+ .contentType(contentType);
+ SpeechResults results = service.recognize(audio, options);
assertNotNull(results.getResults().get(0).getAlternatives().get(0).getTranscript());
assertNotNull(results.getResults().get(0).getAlternatives().get(0).getTimestamps());
assertNotNull(results.getResults().get(0).getAlternatives().get(0).getWordConfidences());
}
+ /**
+ * Test recognize webSocket
+ *
+ * @throws FileNotFoundException the file not found exception
+ * @throws InterruptedException
+ */
+ @Test
+ public void testRecognizeWebSocket() throws FileNotFoundException, InterruptedException {
+ RecognizeOptions options = new RecognizeOptions();
+ options.continuous(true).interimResults(true);
+ options.inactivityTimeout(40).timestamps(true).maxAlternatives(2);
+ options.model(EN_BROADBAND16K).contentType(HttpMediaType.AUDIO_WAV);
+
+ service.recognizeUsingWebSockets(new FileInputStream(
+ "src/test/resources/speech_to_text/sample1.wav"), options, new BaseRecognizeDelegate() {
+
+ @Override
+ public void onConnected() {
+ System.out.println("onConnected()");
+ }
+
+ @Override
+ public void onDisconnected() {
+ System.out.println("onDisconnected()");
+ lock.countDown();
+ }
+
+ @Override
+ public void onError(Exception e) {
+ e.printStackTrace();
+ lock.countDown();
+ }
+
+ @Override
+ public void onMessage(SpeechResults speechResults) {
+ if (speechResults != null && speechResults.isFinal()) {
+ asyncResults = speechResults;
+ System.out.println(speechResults);
+ lock.countDown();
+ }
+ }
+
+ });
+
+ lock.await(20000, TimeUnit.MILLISECONDS);
+ assertNotNull(asyncResults);
+ }
+
}
diff --git a/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextTest.java b/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextTest.java
index 048930c141d..54a3b47ec2c 100644
--- a/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextTest.java
+++ b/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextTest.java
@@ -209,7 +209,7 @@ public void testRecognize() throws URISyntaxException {
response().withHeaders(
new Header(HttpHeaders.Names.CONTENT_TYPE, HttpMediaType.APPLICATION_JSON)).withBody(
GsonSingleton.getGson().toJson(speechResults)));
- final SpeechResults result = service.recognize(audio, HttpMediaType.AUDIO_WAV);
+ final SpeechResults result = service.recognize(audio);
Assert.assertNotNull(result);
Assert.assertEquals(result, speechResults);
}
@@ -251,7 +251,7 @@ public void testRecognizeMissingAudioFile() throws URISyntaxException {
boolean didItHappen = false;
try {
- service.recognize(null, HttpMediaType.AUDIO_WAV);
+ service.recognize(null);
} catch (final IllegalArgumentException e) {
didItHappen = true;
}
diff --git a/src/test/java/com/ibm/watson/developer_cloud/text_to_speech/v1/TextToSpeechIT.java b/src/test/java/com/ibm/watson/developer_cloud/text_to_speech/v1/TextToSpeechIT.java
index 2e3a6be187b..7b2861ecbb2 100644
--- a/src/test/java/com/ibm/watson/developer_cloud/text_to_speech/v1/TextToSpeechIT.java
+++ b/src/test/java/com/ibm/watson/developer_cloud/text_to_speech/v1/TextToSpeechIT.java
@@ -27,16 +27,14 @@
* the License.
*/
-import static org.junit.Assert.fail;
-
import java.io.File;
-import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
-import java.io.OutputStream;
import java.util.List;
-import org.apache.commons.io.IOUtils;
+import javax.sound.sampled.AudioSystem;
+import javax.sound.sampled.UnsupportedAudioFileException;
+
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
@@ -44,6 +42,7 @@
import com.ibm.watson.developer_cloud.WatsonServiceTest;
import com.ibm.watson.developer_cloud.http.HttpMediaType;
import com.ibm.watson.developer_cloud.text_to_speech.v1.model.Voice;
+import com.ibm.watson.developer_cloud.text_to_speech.v1.util.WaveUtils;
/**
* The Class TextToSpeechIntegrationTest.
@@ -67,52 +66,44 @@ public void setUp() throws Exception {
service.setEndPoint(prop.getProperty("text_to_speech.url"));
}
- /**
- * Synthesize.
- *
- * @param text the text
- * @param audio the audio
- */
- private void synthesize(String text, File audio) {
- final InputStream is = service.synthesize(text, Voice.EN_LISA, HttpMediaType.AUDIO_WAV);
- Assert.assertNotNull(is);
- OutputStream outStream = null;
- try {
- outStream = new FileOutputStream(audio);
- final byte[] buffer = new byte[8 * 1024];
- int bytesRead;
- while ((bytesRead = is.read(buffer)) != -1) {
- outStream.write(buffer, 0, bytesRead);
- }
- } catch (final Exception e) {
- fail();
- } finally {
- IOUtils.closeQuietly(is);
- IOUtils.closeQuietly(outStream);
- }
- }
/**
* Test get voices.
*/
@Test
public void testGetVoices() {
- final List voices = service.getVoices();
+ List voices = service.getVoices();
Assert.assertNotNull(voices);
Assert.assertTrue(!voices.isEmpty());
}
/**
- * Test synthesize.
+ * Synthesize text and write it to a temporary file
*
* @throws IOException Signals that an I/O exception has occurred.
*/
@Test
public void testSynthesize() throws IOException {
- final String text = "This is an integration test";
- final File audio = File.createTempFile("tts-audio", "wav");
+ String text = "This is an integration test";
+ InputStream result = service.synthesize(text, Voice.EN_LISA, HttpMediaType.AUDIO_WAV);
+ writeInputStreamToFile(result, File.createTempFile("tts-audio", "wav"));
+ }
- synthesize(text, audio);
+ /**
+ * Test the fix wave header not having the size due to be streamed.
+ *
+ * @throws IOException Signals that an I/O exception has occurred.
+ * @throws UnsupportedAudioFileException the unsupported audio file exception
+ */
+ @Test
+ public void testSynthesizeAndFixHeader() throws IOException, UnsupportedAudioFileException {
+ String text = "one two three four five";
+ InputStream result = service.synthesize(text, Voice.EN_LISA, HttpMediaType.AUDIO_WAV);
+ Assert.assertNotNull(result);
+ result = WaveUtils.reWriteWaveHeader(result);
+ File tempFile = File.createTempFile("output", ".wav");
+ writeInputStreamToFile(result, tempFile);
+ Assert.assertNotNull(AudioSystem.getAudioFileFormat(tempFile));
}
}
diff --git a/src/test/java/com/ibm/watson/developer_cloud/text_to_speech/v1/TextToSpeechTest.java b/src/test/java/com/ibm/watson/developer_cloud/text_to_speech/v1/TextToSpeechTest.java
index cd036046609..00fd7b933b0 100644
--- a/src/test/java/com/ibm/watson/developer_cloud/text_to_speech/v1/TextToSpeechTest.java
+++ b/src/test/java/com/ibm/watson/developer_cloud/text_to_speech/v1/TextToSpeechTest.java
@@ -18,6 +18,7 @@
import io.netty.handler.codec.http.HttpHeaders;
import java.io.File;
+import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
@@ -28,6 +29,9 @@
import java.util.List;
import java.util.Map;
+import javax.sound.sampled.AudioSystem;
+import javax.sound.sampled.UnsupportedAudioFileException;
+
import org.junit.Assert;
import org.junit.Before;
import org.junit.FixMethodOrder;
@@ -40,6 +44,7 @@
import com.ibm.watson.developer_cloud.WatsonServiceUnitTest;
import com.ibm.watson.developer_cloud.http.HttpMediaType;
import com.ibm.watson.developer_cloud.text_to_speech.v1.model.Voice;
+import com.ibm.watson.developer_cloud.text_to_speech.v1.util.WaveUtils;
import com.ibm.watson.developer_cloud.util.GsonSingleton;
/**
@@ -213,4 +218,23 @@ public void testWithVoiceAsWav() {
}
}
+
+
+ /**
+ * Test the fix wave header not having the size due to be streamed.
+ *
+ * @throws IOException Signals that an I/O exception has occurred.
+ * @throws UnsupportedAudioFileException the unsupported audio file exception
+ */
+ @Test
+ public void testSynthesizeAndFixHeader() throws IOException, UnsupportedAudioFileException {
+ File audio = new File("src/test/resources/text_to_speech/numbers.wav");
+ InputStream stream = new FileInputStream(audio);
+ Assert.assertNotNull(stream);
+ stream = WaveUtils.reWriteWaveHeader(stream);
+ File tempFile = File.createTempFile("output", ".wav");
+ writeInputStreamToFile(stream, tempFile);
+ Assert.assertNotNull(AudioSystem.getAudioFileFormat(tempFile));
+ }
+
}
diff --git a/src/test/resources/text_to_speech/numbers.wav b/src/test/resources/text_to_speech/numbers.wav
new file mode 100644
index 00000000000..d03169a650c
Binary files /dev/null and b/src/test/resources/text_to_speech/numbers.wav differ