diff --git a/README.md b/README.md index 23b780231e2..a486bc920d3 100644 --- a/README.md +++ b/README.md @@ -53,13 +53,13 @@ APIs and SDKs that use cognitive computing to solve complex problems. com.ibm.watson.developer_cloud java-sdk - 2.5.0 + 2.6.0 ``` ##### Gradle ```gradle -'com.ibm.watson.developer_cloud:java-sdk:2.5.0' +'com.ibm.watson.developer_cloud:java-sdk:2.6.0' ``` Now, you are ready to see some [examples](https://github.com/watson-developer-cloud/java-sdk/tree/master/examples/java/com/ibm/watson/developer_cloud). @@ -481,7 +481,7 @@ Gradle: ```sh $ cd java-sdk - $ gradle jar # build jar file (build/libs/watson-developer-cloud-2.5.0.jar) + $ gradle jar # build jar file (build/libs/watson-developer-cloud-2.6.0.jar) $ gradle test # run tests ``` @@ -551,4 +551,4 @@ See [CONTRIBUTING.md](CONTRIBUTING.md). [apache_maven]: http://maven.apache.org/ [releases]: https://github.com/watson-developer-cloud/java-sdk/releases -[jar]: https://github.com/watson-developer-cloud/java-sdk/releases/download/java-sdk-2.5.0/java-sdk-2.5.0-jar-with-dependencies.jar +[jar]: https://github.com/watson-developer-cloud/java-sdk/releases/download/java-sdk-2.6.0/java-sdk-2.6.0-jar-with-dependencies.jar diff --git a/build.gradle b/build.gradle index 2c69f84db5d..bfa3083ae50 100644 --- a/build.gradle +++ b/build.gradle @@ -6,7 +6,7 @@ sourceCompatibility = 1.6 targetCompatibility = 1.6 group = 'com.ibm.watson.developercloud' archivesBaseName = 'watson-developer-cloud' -version = '2.5.0' +version = '2.6.0' description = 'Client library to use the IBM Watson Services and AlchemyAPI' diff --git a/config.properties.enc b/config.properties.enc index 3985663ea4b..fde13015d00 100644 Binary files a/config.properties.enc and b/config.properties.enc differ diff --git a/examples/java/com/ibm/watson/developer_cloud/concept_expansion/v1/ConceptExpansionExample.java b/examples/java/com/ibm/watson/developer_cloud/concept_expansion/v1/ConceptExpansionExample.java index c0b51f917cc..58dc68205c7 100644 --- a/examples/java/com/ibm/watson/developer_cloud/concept_expansion/v1/ConceptExpansionExample.java +++ b/examples/java/com/ibm/watson/developer_cloud/concept_expansion/v1/ConceptExpansionExample.java @@ -23,9 +23,8 @@ public static void main(String[] args) { service.setUsernameAndPassword("", ""); String[] seeds = new String[] {"nyc", "dc", "london", "big cities"}; - String label = "demo"; - Job job = service.createJob(label, seeds); + Job job = service.createJob(seeds); while (service.getJobStatus(job) == Job.Status.AWAITING_WORK || service.getJobStatus(job) == Job.Status.IN_FLIGHT) { diff --git a/examples/java/com/ibm/watson/developer_cloud/document_conversion/v1/DocumentConversionCustomConfigExample.java b/examples/java/com/ibm/watson/developer_cloud/document_conversion/v1/DocumentConversionCustomConfigExample.java index aa3e7bb4052..1f9ec66f382 100644 --- a/examples/java/com/ibm/watson/developer_cloud/document_conversion/v1/DocumentConversionCustomConfigExample.java +++ b/examples/java/com/ibm/watson/developer_cloud/document_conversion/v1/DocumentConversionCustomConfigExample.java @@ -1,11 +1,11 @@ /** * Copyright 2015 IBM Corp. All Rights Reserved. - * + * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under @@ -13,72 +13,75 @@ */ package com.ibm.watson.developer_cloud.document_conversion.v1; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; + import com.google.gson.JsonObject; import com.google.gson.JsonParser; import com.ibm.watson.developer_cloud.document_conversion.v1.model.Answers; import com.ibm.watson.developer_cloud.http.HttpMediaType; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; - public class DocumentConversionCustomConfigExample { - public static void main(String[] args) { - final String versionDate = "2015-12-14"; - DocumentConversion service = new DocumentConversion(versionDate); - service.setUsernameAndPassword("", ""); - - final File html = new File("src/test/resources/document_conversion/html-with-extra-content-input.htm"); + public static void main(String[] args) { + final String versionDate = "2015-12-14"; + DocumentConversion service = new DocumentConversion(versionDate); + service.setUsernameAndPassword("", ""); - // Run a conversion with no configuration specified. The Document Conversion service will use - // its default configuration when no configuration is specified. For this example, the - // Document Conversion service will section a HTML document by h1, h2, h3, h4, h5, and h6 tags. - // Those sections will be returned as Answers - System.out.println("Convert html document to Answer Units using default configuration"); - final Answers htmlToAnswersWithDefaultConfig = - service.convertDocumentToAnswer(html, HttpMediaType.TEXT_HTML); - System.out.println(htmlToAnswersWithDefaultConfig); + final File html = + new File("src/test/resources/document_conversion/html-with-extra-content-input.htm"); - System.out.println("=================================================="); + // Run a conversion with no configuration specified. The Document Conversion service will use + // its default configuration when no configuration is specified. For this example, the + // Document Conversion service will section a HTML document by h1, h2, h3, h4, h5, and h6 tags. + // Those sections will be returned as Answers + System.out.println("Convert html document to Answer Units using default configuration"); + final Answers htmlToAnswersWithDefaultConfig = + service.convertDocumentToAnswer(html, HttpMediaType.TEXT_HTML); + System.out.println(htmlToAnswersWithDefaultConfig); - // Run a conversion with a custom configuration. The next example shows how to convert this same - // document with a custom configuration. Instead of sectioning by the default settings (h1, h2, - // h3, h4, h5, and h6), the following example shows how to section a HTML document by only the - // h1 tag. This will result in Answers that are sectioned by h1 tags. - String configAsString = "{\n" + - " \"answer_units\": {\n" + - " \"selector_tags\": [\"h1\"]\n" + - " }\n" + - "}"; - JsonParser jsonParser = new JsonParser(); - JsonObject customConfig = jsonParser.parse(configAsString).getAsJsonObject(); + System.out.println("=================================================="); - System.out.println("Convert html document to Answer Units using custom configuration"); - final Answers htmlToAnswersWithCustomConfig = - service.convertDocumentToAnswer(html, HttpMediaType.TEXT_HTML, customConfig); - System.out.println(htmlToAnswersWithCustomConfig); + // Run a conversion with a custom configuration. The next example shows how to convert this same + // document with a custom configuration. Instead of sectioning by the default settings (h1, h2, + // h3, h4, h5, and h6), the following example shows how to section a HTML document by only the + // h1 tag. This will result in Answers that are sectioned by h1 tags. + String configAsString = + "{\n" + " \"answer_units\": {\n" + " \"selector_tags\": [\"h1\"]\n" + " }\n" + + "}"; + JsonParser jsonParser = new JsonParser(); + JsonObject customConfig = jsonParser.parse(configAsString).getAsJsonObject(); - System.out.println("=================================================="); + System.out.println("Convert html document to Answer Units using custom configuration"); + final Answers htmlToAnswersWithCustomConfig = + service.convertDocumentToAnswer(html, HttpMediaType.TEXT_HTML, customConfig); + System.out.println(htmlToAnswersWithCustomConfig); - // Run a conversion with a custom configuration that is loaded from a file. This example is similar - // to the previous one above. The custom configuration from the file will section a HTML document - // by only the h2 tag. This will result in Answers that are sectioned by h2 tags. - System.out.println("Convert html document to Answer Units using custom configuration loaded from a file"); - String customConfigFilePath = "src/test/resources/document_conversion/answer_unit_config_selector_h2.json"; - JsonObject customConfigFromFile = null; - try { - customConfigFromFile = service.loadCustomConfig(new FileInputStream(customConfigFilePath)); - } catch(FileNotFoundException e ) { - e.printStackTrace(); - } + System.out.println("=================================================="); - if(customConfigFilePath == null) { - System.err.println("ERROR - Unable to load custom config from file " + customConfigFilePath); - return; - } + // Run a conversion with a custom configuration that is loaded from a file. This example is + // similar + // to the previous one above. The custom configuration from the file will section a HTML + // document + // by only the h2 tag. This will result in Answers that are sectioned by h2 tags. + System.out + .println("Convert html document to Answer Units using custom configuration loaded from a file"); + String customConfigFilePath = + "src/test/resources/document_conversion/answer_unit_config_selector_h2.json"; + JsonObject customConfigFromFile = null; + try { + customConfigFromFile = service.loadCustomConfig(new FileInputStream(customConfigFilePath)); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } - final Answers htmlToAnswersWithCustomConfigFromFile = - service.convertDocumentToAnswer(html, HttpMediaType.TEXT_HTML, customConfigFromFile); - System.out.println(htmlToAnswersWithCustomConfigFromFile); + if (customConfigFromFile == null) { + System.err.println("ERROR - Unable to load custom config from file " + customConfigFilePath); + return; } + + final Answers htmlToAnswersWithCustomConfigFromFile = + service.convertDocumentToAnswer(html, HttpMediaType.TEXT_HTML, customConfigFromFile); + System.out.println(htmlToAnswersWithCustomConfigFromFile); + } } diff --git a/examples/java/com/ibm/watson/developer_cloud/speech_to_text/v1/RecognizeUsingWebSockets.java b/examples/java/com/ibm/watson/developer_cloud/speech_to_text/v1/RecognizeUsingWebSockets.java new file mode 100644 index 00000000000..b33cb0fad1b --- /dev/null +++ b/examples/java/com/ibm/watson/developer_cloud/speech_to_text/v1/RecognizeUsingWebSockets.java @@ -0,0 +1,38 @@ +package com.ibm.watson.developer_cloud.speech_to_text.v1; + +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import com.ibm.watson.developer_cloud.http.HttpMediaType; +import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechResults; +import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.BaseRecognizeDelegate; + +/** + * Recognize using WebSockets a sample wav file and print the transcript into the console output. + */ +public class RecognizeUsingWebSockets { + private static CountDownLatch lock = new CountDownLatch(1); + + public static void main(String[] args) throws FileNotFoundException, InterruptedException { + SpeechToText service = new SpeechToText(); + service.setUsernameAndPassword("", ""); + + FileInputStream audio = new FileInputStream("src/test/resources/speech_to_text/sample1.wav"); + + RecognizeOptions options = new RecognizeOptions(); + options.continuous(true).interimResults(true).contentType(HttpMediaType.AUDIO_WAV); + + service.recognizeUsingWebSockets(audio, options, new BaseRecognizeDelegate() { + @Override + public void onMessage(SpeechResults speechResults) { + System.out.println(speechResults); + if (speechResults.isFinal()) + lock.countDown(); + } + }); + + lock.await(20000, TimeUnit.MILLISECONDS); + } +} diff --git a/examples/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextExample.java b/examples/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextExample.java index b6d523ab94f..687524dd241 100644 --- a/examples/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextExample.java +++ b/examples/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextExample.java @@ -1,11 +1,11 @@ /** * Copyright 2015 IBM Corp. All Rights Reserved. - * + * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under @@ -15,7 +15,6 @@ import java.io.File; -import com.ibm.watson.developer_cloud.http.HttpMediaType; import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechResults; @@ -30,7 +29,7 @@ public static void main(String[] args) { service.setUsernameAndPassword("", ""); File audio = new File("src/test/resources/speech_to_text/sample1.wav"); - SpeechResults transcript = service.recognize(audio, HttpMediaType.AUDIO_WAV); + SpeechResults transcript = service.recognize(audio); System.out.println(transcript); } diff --git a/pom.xml b/pom.xml index cb47b7bb97a..7c04428672c 100644 --- a/pom.xml +++ b/pom.xml @@ -2,7 +2,7 @@ 4.0.0 com.ibm.watson.developer_cloud - 2.5.1-SNAPSHOT + 2.6.1-SNAPSHOT jar java-sdk Watson Developer Cloud Java SDK @@ -20,7 +20,7 @@ com.squareup.okhttp okhttp - 2.7.0 + 2.7.2 com.google.code.gson @@ -35,9 +35,14 @@ junit junit - 4.11 + 4.12 test + + com.neovisionaries + nv-websocket-client + 1.19 + org.mock-server diff --git a/src/main/java/com/ibm/watson/developer_cloud/concept_expansion/v1/ConceptExpansion.java b/src/main/java/com/ibm/watson/developer_cloud/concept_expansion/v1/ConceptExpansion.java index 3951ed17add..757c5bb7788 100755 --- a/src/main/java/com/ibm/watson/developer_cloud/concept_expansion/v1/ConceptExpansion.java +++ b/src/main/java/com/ibm/watson/developer_cloud/concept_expansion/v1/ConceptExpansion.java @@ -67,6 +67,17 @@ public ConceptExpansion() { setDataset(Dataset.MT_SAMPLES); } + /** + * Creates a {@link Job}. + * + * @param seeds List of terms to be used as seeds + * + * @return the {@link Job} + */ + public Job createJob(final String[] seeds) { + return createJob(null, seeds); + } + /** * Creates a {@link Job}. * @@ -76,7 +87,6 @@ public ConceptExpansion() { * @return the {@link Job} */ public Job createJob(final String label, final String[] seeds) { - Validate.notEmpty(label, "label cannot be null or empty"); Validate.notEmpty(seeds, "seeds cannot be null or empty"); Validate.notNull(dataset, "dataset cannot be null"); diff --git a/src/main/java/com/ibm/watson/developer_cloud/http/HttpHeaders.java b/src/main/java/com/ibm/watson/developer_cloud/http/HttpHeaders.java index a3d819a0004..63db918e4e2 100644 --- a/src/main/java/com/ibm/watson/developer_cloud/http/HttpHeaders.java +++ b/src/main/java/com/ibm/watson/developer_cloud/http/HttpHeaders.java @@ -179,4 +179,6 @@ public interface HttpHeaders { */ public static final String WWW_AUTHENTICATE = "WWW-Authenticate"; + /** The Authorization token header. */ + public static final String X_WATSON_AUTHORIZATION_TOKEN = "X-Watson-Authorization-Token"; } diff --git a/src/main/java/com/ibm/watson/developer_cloud/service/ConflictException.java b/src/main/java/com/ibm/watson/developer_cloud/service/ConflictException.java new file mode 100644 index 00000000000..6c489308fd6 --- /dev/null +++ b/src/main/java/com/ibm/watson/developer_cloud/service/ConflictException.java @@ -0,0 +1,39 @@ +/** + * Copyright 2015 IBM Corp. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ +package com.ibm.watson.developer_cloud.service; + +import com.ibm.watson.developer_cloud.http.HttpStatus; +import com.squareup.okhttp.Response; + +/** + * 409 Conflict (HTTP/1.1 - RFC 2616) + */ +public class ConflictException extends ServiceResponseException { + + /** + * The Constant serialVersionUID. + */ + private static final long serialVersionUID = 1L; + + /** + * Instantiates a new Forbidden Exception. + * + * @param message the error message + * @param response the HTTP response + */ + public ConflictException(String message, Response response) { + super(HttpStatus.CONFLICT, message, response); + } + +} diff --git a/src/main/java/com/ibm/watson/developer_cloud/service/WatsonService.java b/src/main/java/com/ibm/watson/developer_cloud/service/WatsonService.java index 2c45680b560..fcfaeaea5a2 100644 --- a/src/main/java/com/ibm/watson/developer_cloud/service/WatsonService.java +++ b/src/main/java/com/ibm/watson/developer_cloud/service/WatsonService.java @@ -26,12 +26,14 @@ import com.google.gson.JsonSyntaxException; import com.ibm.watson.developer_cloud.http.HttpHeaders; import com.ibm.watson.developer_cloud.http.HttpStatus; +import com.ibm.watson.developer_cloud.http.RequestBuilder; import com.ibm.watson.developer_cloud.service.model.GenericModel; import com.ibm.watson.developer_cloud.util.BluemixUtils; import com.ibm.watson.developer_cloud.util.RequestUtil; import com.ibm.watson.developer_cloud.util.ResponseUtil; import com.squareup.okhttp.Credentials; import com.squareup.okhttp.Headers; +import com.squareup.okhttp.HttpUrl; import com.squareup.okhttp.OkHttpClient; import com.squareup.okhttp.Request; import com.squareup.okhttp.Request.Builder; @@ -69,9 +71,9 @@ public WatsonService(String name) { /** - * Configure HTTP client. + * Configures the HTTP client. * - * @return the okhttp client + * @return the HTTP client */ protected OkHttpClient configureHttpClient() { final OkHttpClient client = new OkHttpClient(); @@ -147,6 +149,8 @@ protected Response execute(Request request) { case HttpStatus.NOT_ACCEPTABLE: // HTTP 406 throw new ForbiddenException(error != null ? error : "Forbidden: Service refuse the request", response); + case HttpStatus.CONFLICT: // HTTP 409 + throw new ConflictException(error != null ? error : "", response); case HttpStatus.REQUEST_TOO_LONG: // HTTP 413 throw new RequestTooLargeException(error != null ? error : "Request too large: The request entity is larger than the server is able to process", @@ -216,6 +220,20 @@ public String getEndPoint() { return endPoint; } + /** + * Gets an authorization token that can be use to authorize API calls. + * + * + * @return the token + */ + public String getToken() { + HttpUrl url = + HttpUrl.parse(getEndPoint()).newBuilder().setPathSegment(0, "authorization").build(); + Request request = RequestBuilder.get(url + "/v1/token").withQuery("url", getEndPoint()).build(); + Response response = execute(request); + return ResponseUtil.getJsonObject(response).get("token").getAsString(); + } + /** * Gets the error message from a JSON response * @@ -267,7 +285,7 @@ public String getName() { * @return the user agent */ private final String getUserAgent() { - return "watson-developer-cloud-java-sdk-2.5.0"; + return "watson-developer-cloud-java-sdk-2.6.0"; } /** diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/RecognizeOptions.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/RecognizeOptions.java index 75c78ff963f..b656911b15c 100644 --- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/RecognizeOptions.java +++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/RecognizeOptions.java @@ -13,24 +13,40 @@ */ package com.ibm.watson.developer_cloud.speech_to_text.v1; +import org.apache.commons.lang3.Validate; + +import com.google.gson.annotations.SerializedName; import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechSession; +import com.squareup.okhttp.MediaType; /** - * Recognize Options when using the - * {@link SpeechToText#recognize(java.io.File, String, RecognizeOptions)} method. + * Parameters to be use during a recognize call in the {@link SpeechToText} service. */ public class RecognizeOptions { + + @SerializedName("content-type") + private String contentType; private Boolean continuous; private Integer inactivityTimeout; + + @SerializedName("interim_results") + private Boolean interimResults; + private String[] keywords; + + @SerializedName("keywords_threshold") + private Double keywordsThreshold; private Integer maxAlternatives; private String model; private String sessionId; - private Boolean timestamps; - private Boolean wordConfidence; + @SerializedName("word_alternatives_threshold") + private Double wordAlternativesThreshold; + + @SerializedName("word_confidence") + private Boolean wordConfidence; /** * If true, multiple final results that represent multiple consecutive phrases separated by pauses @@ -44,6 +60,15 @@ public RecognizeOptions continuous(Boolean continuous) { return this; } + /** + * Gets the content type. + * + * @return the contentType + */ + public String getContentType() { + return contentType; + } + /** * Gets the continuous. * @@ -62,6 +87,33 @@ public Integer getInactivityTimeout() { return inactivityTimeout; } + /** + * Gets the interim results. + * + * @return the interimResults + */ + public Boolean getInterimResults() { + return interimResults; + } + + /** + * Gets the keywords. + * + * @return the keywords + */ + public String[] getKeywords() { + return keywords; + } + + /** + * Gets the keywords threshold. + * + * @return the keywordsThreshold + */ + public Double getKeywordsThreshold() { + return keywordsThreshold; + } + /** * Gets the max alternatives. * @@ -98,7 +150,14 @@ public Boolean getTimestamps() { return timestamps; } - + /** + * Gets the word alternatives threshold. + * + * @return the wordAlternativesThreshold + */ + public Double getWordAlternativesThreshold() { + return wordAlternativesThreshold; + } /** * Gets the word confidence. @@ -121,7 +180,50 @@ public RecognizeOptions inactivityTimeout(Integer inactivityTimeout) { } /** - * Maximum number of alternative transcripts returned + * If true, the service sends interim results for the transcription. Otherwise, the recognition + * ends after first "end of speech" is detected. The default is false. + * + * @param interimResults the interim results + * @return the recognize options + */ + public RecognizeOptions interimResults(Boolean interimResults) { + this.interimResults = interimResults; + return this; + } + + /** + * Specifies an array of keyword strings to be matched in the input audio. By default, the service + * does no keyword spotting. + * + * + * @param keywords the keywords + * @return the recognize options + */ + public RecognizeOptions keywords(String[] keywords) { + this.keywords = keywords; + return this; + } + + + + /** + * Specifies a minimum level of confidence that the service must have to report a matching keyword + * in the input audio. Specify a probability value between 0 and 1 inclusive. A match must have at + * least the specified confidence to be returned. Omit the parameter or specify a value of null + * (the default) to spot no keywords. If you specify a valid threshold, you must also specify at + * least one keyword. + * + * + * @param keywordsThreshold the keywords threshold + * @return the recognize options + */ + public RecognizeOptions keywordsThreshold(Double keywordsThreshold) { + this.keywordsThreshold = keywordsThreshold; + return this; + } + + /** + * Maximum number of alternative transcripts returned. * * @param maxAlternatives the max alternatives * @return the recognize options @@ -132,7 +234,7 @@ public RecognizeOptions maxAlternatives(Integer maxAlternatives) { } /** - * Sets the model name used for the recognition + * Sets the model name used for the recognition. * * @param model the model * @return the recognize options @@ -142,6 +244,17 @@ public RecognizeOptions model(String model) { return this; } + /** + * Sets the session id. + * + * @param session the {@link SpeechSession} + * @return the recognize options + */ + public RecognizeOptions session(SpeechSession session) { + this.sessionId = session.getSessionId(); + return this; + } + /** * Sets session id. * @@ -154,29 +267,54 @@ public RecognizeOptions sessionId(String sessionId) { } /** - * Sets the session id. + * If true, time alignment for each word is returned. * - * @param session the {@link SpeechSession} + * @param timestamps the timestamps * @return the recognize options */ - public RecognizeOptions session(SpeechSession session) { - this.sessionId = session.getSessionId(); + public RecognizeOptions timestamps(Boolean timestamps) { + this.timestamps = timestamps; return this; } /** - * If true, time alignment for each word is returned + * Specifies a minimum level of confidence that the service must have to report a hypothesis for a + * word from the input audio. Specify a probability value between 0 and 1 inclusive. A hypothesis + * must have at least the specified confidence to be returned as a word alternative. Omit the + * parameter or specify a value of null (the default) to return no word alternatives. * - * @param timestamps the timestamps + * + * + * @param wordAlternativesThreshold the wordAalternatives threshold * @return the recognize options */ - public RecognizeOptions timestamps(Boolean timestamps) { - this.timestamps = timestamps; + public RecognizeOptions wordAlternativesThreshold(Double wordAlternativesThreshold) { + this.wordAlternativesThreshold = wordAlternativesThreshold; return this; } /** - * If true, confidence measure per word is returned if available + * The format of the audio data specified as one of the following values:
+ *
    + *
  • audio/flac for Free Lossless Audio Codec (FLAC)
  • + *
  • audio/l16 for Linear 16-bit Pulse-Code Modulation (PCM)
  • + *
  • audio/wav for Waveform Audio File Format (WAV)
  • + *
  • audio/ogg;codecs=opus for Ogg format files that use the opus codec
  • + *
+ * + * @param contentType the content type + * @return the recognize options + */ + public RecognizeOptions contentType(String contentType) { + Validate.isTrue(MediaType.parse(contentType) != null, + "contentType is not a valid mime audio format. Valid formats start with 'audio/'"); + this.contentType = contentType; + return this; + } + + + /** + * If true, confidence measure per word is returned if available. * * @param wordConfidence the word confidence * @return the recognize options diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToText.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToText.java index daabfda4a51..4e6c314517c 100644 --- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToText.java +++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToText.java @@ -14,6 +14,7 @@ package com.ibm.watson.developer_cloud.speech_to_text.v1; import java.io.File; +import java.io.InputStream; import java.util.List; import com.google.gson.JsonObject; @@ -26,6 +27,8 @@ import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechResults; import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechSession; import com.ibm.watson.developer_cloud.speech_to_text.v1.util.MediaTypeUtils; +import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.RecognizeDelegate; +import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.WebSocketSpeechToTextClient; import com.ibm.watson.developer_cloud.util.GsonSingleton; import com.ibm.watson.developer_cloud.util.ResponseUtil; import com.ibm.watson.developer_cloud.util.Validate; @@ -60,6 +63,9 @@ public class SpeechToText extends WatsonService { private final static String URL = "https://stream.watsonplatform.net/speech-to-text/api"; private static final String WORD_CONFIDENCE = "word_confidence"; private static final String SESSION = "session"; + private static final String KEYWORDS_THRESHOLD = "keywords_threshold"; + private static final String WORD_ALTERNATIVES_THRESHOLD = "word_alternatives_threshold"; + private static final String KEYWORDS = "keywords"; /** * Instantiates a new speech to text. @@ -96,6 +102,15 @@ private void buildRecognizeRequest(RequestBuilder requestBuilder, RecognizeOptio if (options.getModel() != null) requestBuilder.withQuery(MODEL, options.getModel()); + + if (options.getKeywordsThreshold() != null) + requestBuilder.withQuery(KEYWORDS_THRESHOLD, options.getKeywordsThreshold()); + + if (options.getKeywords() != null && options.getKeywords().length > 0) + requestBuilder.withQuery(KEYWORDS, GsonSingleton.getGson().toJson(options.getKeywords())); + + if (options.getWordAlternativesThreshold() != null) + requestBuilder.withQuery(WORD_ALTERNATIVES_THRESHOLD, options.getWordAlternativesThreshold()); } /** @@ -209,24 +224,76 @@ public SessionStatus getRecognizeStatus(final SpeechSession session) { /** * Recognizes an audio file and returns {@link SpeechResults}. It will try to recognize the audio - * format based on the file extension. + * format based on the file extension.
+ * Here is an example of how to recognize an audio file: + * + *
+   * SpeechToText service = new SpeechToText();
+   * service.setUsernameAndPassword("USERNAME", "PASSWORD");
+   * service.setEndPoint("SERVICE_URL");
+   * 
+   * SpeechResults results = service.recognize(new File("sample1.wav"));
+   * System.out.println(results);
+   * 
* * @param audio the audio file * @return the {@link SpeechResults} + * @throws IllegalArgumentException if the file extension doesn't match a valid audio type */ public SpeechResults recognize(File audio) { + return recognize(audio, (RecognizeOptions) null); + } + + /** + * Recognizes an audio file and returns {@link SpeechResults}.
+ *
+ * Here is an example of how to recognize an audio file: + * + *
+   * SpeechToText service = new SpeechToText();
+   * service.setUsernameAndPassword("USERNAME", "PASSWORD");
+   * service.setEndPoint("SERVICE_URL");
+   * 
+   * RecognizeOptions options = new RecognizeOptions().maxAlternatives(3).continuous(true);
+   * 
+   * SpeechResults results = service.recognize(new File("sample1.wav"), options);
+   * System.out.println(results);
+   * 
+ * + * @param audio the audio + * @param options the options + * @return the speech results + */ + public SpeechResults recognize(File audio, RecognizeOptions options) { + Validate.isTrue(audio != null && audio.exists(), "audio file is null or does not exist"); + + final double fileSize = audio.length() / Math.pow(1024, 2); + Validate.isTrue(fileSize < 100.0, "The audio file is greater than 100MB."); + String contentType = MediaTypeUtils.getMediaTypeFromFile(audio); - Validate.notNull(contentType, "Audio format cannot be recognized"); - return recognize(audio, contentType, null); + if (options != null && options.getContentType() != null) + contentType = options.getContentType(); + Validate.notNull(contentType, "The audio format cannot be recognized"); + + String path = PATH_RECOGNIZE; + if (options != null && (options.getSessionId() != null && !options.getSessionId().isEmpty())) + path = String.format(PATH_SESSION_RECOGNIZE, options.getSessionId()); + + final RequestBuilder requestBuilder = RequestBuilder.post(path); + buildRecognizeRequest(requestBuilder, options); + requestBuilder.withBody(RequestBody.create(MediaType.parse(contentType), audio)); + return executeRequest(requestBuilder.build(), SpeechResults.class); } /** * Recognizes an audio file and returns {@link SpeechResults}. * * @param audio the audio file - * @param contentType the media type of the audio. If you use the audio/l16 MIME type, specify the - * rate and channels. + * @param contentType the media type of the audio. * @return the {@link SpeechResults} + * @deprecated Deprecated in 2.6.0
+ * Use {@link SpeechToText#recognize(File, RecognizeOptions)} + * */ public SpeechResults recognize(File audio, String contentType) { return recognize(audio, contentType, null); @@ -238,28 +305,56 @@ public SpeechResults recognize(File audio, String contentType) { * @param audio the audio file * @param contentType the media type of the audio. If you use the audio/l16 MIME type, specify the * rate and channels. + * * @param options the {@link RecognizeOptions} * @return the {@link SpeechResults} + * @deprecated Deprecated in 2.6.0
+ * Use {@link SpeechToText#recognize(File, RecognizeOptions)} */ public SpeechResults recognize(File audio, String contentType, RecognizeOptions options) { - Validate.isTrue(audio != null && audio.exists(), "audio file is null or does not exist"); - Validate.isTrue(audio != null && audio.exists(), "audio file is null or does not exist"); - - Validate.isTrue((audio.length() / (1024 * 1024)) < 100.0, - "The audio file is greater than 100MB."); - - Validate.isTrue(MediaType.parse(contentType) != null, - "contentType is not a valid mime audio format. Valid formats start with 'audio/'"); - - String path = PATH_RECOGNIZE; - if (options != null && (options.getSessionId() != null && !options.getSessionId().isEmpty())) - path = String.format(PATH_SESSION_RECOGNIZE, options.getSessionId()); + RecognizeOptions opt = options; + if (opt == null) + opt = new RecognizeOptions().contentType(contentType); - final RequestBuilder requestBuilder = RequestBuilder.post(path); - - buildRecognizeRequest(requestBuilder, options); + return recognize(audio, opt); + } - requestBuilder.withBody(RequestBody.create(MediaType.parse(contentType), audio)); - return executeRequest(requestBuilder.build(), SpeechResults.class); + /** + * Recognizes an audio {@link InputStream} using WebSockets. The {@link RecognizeDelegate} + * instance will be called every time the service sends {@link SpeechResults}.
+ *
+ * + * Here is an example of how to recognize an audio file using WebSockets and get interim results: + * + *
+   * SpeechToText service = new SpeechToText();
+   * service.setUsernameAndPassword("USERNAME", "PASSWORD");
+   * service.setEndPoint("SERVICE_URL");
+   * 
+   * RecognizeOptions options = new RecognizeOptions().continuous(true).interimResults(true);
+   * 
+   * service.recognizeWS(new FileInputStream("sample1.wav"), options, new BaseRecognizeDelegate() {
+   *   @Override
+   *   public void onMessage(SpeechResults speechResults) {
+   *     System.out.println(speechResults);
+   *   }
+   * });
+   * 
+ * + * @param audio the audio input stream + * @param options the recognize options + * @param delegate the delegate + */ + public void recognizeUsingWebSockets(InputStream audio, RecognizeOptions options, + RecognizeDelegate delegate) { + Validate.notNull(audio, "audio cannot be null"); + Validate.notNull(options, "options cannot be null"); + Validate.notNull(options.getContentType(), "options.contentType cannot be null"); + Validate.notNull(delegate, "delegate cannot be null"); + + String url = getEndPoint().replaceFirst("(https|http)", "wss"); + WebSocketSpeechToTextClient webSocket = + new WebSocketSpeechToTextClient(url + PATH_RECOGNIZE, getToken()); + webSocket.recognize(audio, options, delegate); } } diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SessionStatus.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SessionStatus.java index f2fd3b0e49e..f49d25e6975 100644 --- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SessionStatus.java +++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SessionStatus.java @@ -18,7 +18,7 @@ import com.ibm.watson.developer_cloud.speech_to_text.v1.SpeechToText; /** - * SessionStatus Status used by {@link SpeechToText}. + * SessionStatus used by {@link SpeechToText}. */ public class SessionStatus extends GenericModel { diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechAlternative.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechAlternative.java index 1191dc762ec..13d7e60f10c 100644 --- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechAlternative.java +++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechAlternative.java @@ -1,11 +1,11 @@ /** * Copyright 2015 IBM Corp. All Rights Reserved. - * + * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under @@ -20,7 +20,8 @@ import com.ibm.watson.developer_cloud.service.model.GenericModel; /** - * The Class SpeechAlternative. + * SpeechAlternative contains the transcript of the utterance along with confidence, timestamp, + * etc... */ public class SpeechAlternative extends GenericModel { @@ -39,7 +40,7 @@ public class SpeechAlternative extends GenericModel { /** * Gets the transcript. - * + * * @return The transcript */ public String getTranscript() { @@ -48,7 +49,7 @@ public String getTranscript() { /** * Sets the transcript. - * + * * @param transcript The transcript */ public void setTranscript(final String transcript) { @@ -57,7 +58,7 @@ public void setTranscript(final String transcript) { /** * Gets the confidence. - * + * * @return The confidence */ public Double getConfidence() { @@ -66,7 +67,7 @@ public Double getConfidence() { /** * Sets the confidence. - * + * * @param confidence The confidence */ public void setConfidence(final Double confidence) { @@ -75,7 +76,7 @@ public void setConfidence(final Double confidence) { /** * Gets the timestamps. - * + * * @return The timestamps */ public List getTimestamps() { @@ -84,7 +85,7 @@ public List getTimestamps() { /** * Sets the timestamps. - * + * * @param timestamps The timestamps */ public void setTimestamps(final List timestamps) { @@ -93,7 +94,7 @@ public void setTimestamps(final List timestamps) { /** * With timestamps. - * + * * @param timestamps the timestamps * @return the speech */ @@ -105,7 +106,7 @@ public SpeechAlternative withTimestamps(final List timestamps) /** * Gets the word confidences. - * + * * @return The wordConfidences */ public List getWordConfidences() { @@ -114,7 +115,7 @@ public List getWordConfidences() { /** * Sets the word confidences. - * + * * @param wordConfidences The wordConfidences */ public void setWordConfidences(final List wordConfidences) { @@ -123,7 +124,7 @@ public void setWordConfidences(final List wordConfidences) /** * With word confidences. - * + * * @param wordConfidences the wordConfidences * @return the speech */ diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechModel.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechModel.java index dce6a3db848..1aa887cd9a8 100644 --- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechModel.java +++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechModel.java @@ -17,13 +17,16 @@ import com.ibm.watson.developer_cloud.service.model.GenericModel; /** - * The Class SpeechModel. + * Speech model */ public class SpeechModel extends GenericModel { /** US English broadband model (16KHz). */ public static final SpeechModel EN_BROADBAND16K = new SpeechModel("en-US_BroadbandModel"); + /** US English narrowband model (8KHz). */ + public static final SpeechModel EN_NARROWBAND8K = new SpeechModel("en-US_NarrowbandModel"); + /** Spanish broadband model (16KHz). */ public static final SpeechModel ES_BROADBAND16K = new SpeechModel("es-ES_BroadbandModel"); diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechModelSet.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechModelSet.java index b15e5b4294c..1adc822d4d0 100644 --- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechModelSet.java +++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechModelSet.java @@ -20,7 +20,7 @@ import com.ibm.watson.developer_cloud.service.model.GenericModel; /** - * The Class SpeechModelSet. + * Speech model set */ public class SpeechModelSet extends GenericModel { diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechResults.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechResults.java index 700446253b7..0514f902c24 100644 --- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechResults.java +++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechResults.java @@ -67,4 +67,13 @@ public void setResults(final List results) { this.results = results; } + /** + * Returns true if the results are final + * + * @return true, if the results are final + */ + public boolean isFinal() { + return (results != null && results.get(resultIndex) != null && results.get(resultIndex) + .isFinal()); + } } diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechTimestamp.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechTimestamp.java index 081cc96a174..32d1ddb66f8 100644 --- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechTimestamp.java +++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechTimestamp.java @@ -1,11 +1,11 @@ /** * Copyright 2015 IBM Corp. All Rights Reserved. - * + * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under @@ -19,7 +19,7 @@ import com.ibm.watson.developer_cloud.speech_to_text.v1.util.SpeechTimestampTypeAdapter; /** - * The Class SpeechTimestamp. + * Transcription timestamp */ @JsonAdapter(SpeechTimestampTypeAdapter.class) public class SpeechTimestamp extends GenericModel { @@ -35,7 +35,7 @@ public class SpeechTimestamp extends GenericModel { /** * Gets the word. - * + * * @return The word */ public String getWord() { @@ -44,7 +44,7 @@ public String getWord() { /** * Sets the word. - * + * * @param word The word */ public void setWord(final String word) { @@ -53,7 +53,7 @@ public void setWord(final String word) { /** * Gets the start time. - * + * * @return The start time */ public Double getStartTime() { @@ -62,7 +62,7 @@ public Double getStartTime() { /** * Sets the start time. - * + * * @param startTime The start time */ public void setStartTime(final Double startTime) { @@ -71,7 +71,7 @@ public void setStartTime(final Double startTime) { /** * Gets the end time. - * + * * @return The end time */ public Double getEndTime() { @@ -80,7 +80,7 @@ public Double getEndTime() { /** * Sets the end time. - * + * * @param endTime The end time */ public void setEndTime(final Double endTime) { diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechWordConfidence.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechWordConfidence.java index 66e6123bcc1..5c469c69e5c 100644 --- a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechWordConfidence.java +++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/model/SpeechWordConfidence.java @@ -1,11 +1,11 @@ /** * Copyright 2015 IBM Corp. All Rights Reserved. - * + * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under @@ -19,7 +19,7 @@ import com.ibm.watson.developer_cloud.speech_to_text.v1.util.SpeechWordConfidenceTypeAdapter; /** - * The Class SpeechWordConfidence. + * Transcription word confidence */ @JsonAdapter(SpeechWordConfidenceTypeAdapter.class) public class SpeechWordConfidence extends GenericModel { @@ -32,7 +32,7 @@ public class SpeechWordConfidence extends GenericModel { /** * Gets the word. - * + * * @return The word */ public String getWord() { @@ -41,7 +41,7 @@ public String getWord() { /** * Sets the word. - * + * * @param word The word */ public void setWord(final String word) { @@ -50,7 +50,7 @@ public void setWord(final String word) { /** * Gets the confidence. - * + * * @return The confidence */ public Double getConfidence() { @@ -59,7 +59,7 @@ public Double getConfidence() { /** * Sets the confidence. - * + * * @param confidence The confidence */ public void setConfidence(final Double confidence) { diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/BaseRecognizeDelegate.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/BaseRecognizeDelegate.java new file mode 100644 index 00000000000..e7ccb2fea40 --- /dev/null +++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/BaseRecognizeDelegate.java @@ -0,0 +1,56 @@ +/** + * Copyright 2015 IBM Corp. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ +package com.ibm.watson.developer_cloud.speech_to_text.v1.websocket; + +import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechResults; + +/** + * An empty implementation of {@link RecognizeDelegate} interface. + */ +public class BaseRecognizeDelegate implements RecognizeDelegate { + + /* + * (non-Javadoc) + * + * @see + * com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.RecognizeDelegate#onMessage(com. + * ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechResults) + */ + public void onMessage(SpeechResults speechResults) {}; + + /* + * (non-Javadoc) + * + * @see com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.RecognizeDelegate#onConnected() + */ + public void onConnected() {}; + + /* + * (non-Javadoc) + * + * @see + * com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.RecognizeDelegate#onError(java.lang + * .Exception) + */ + public void onError(Exception e) {}; + + /* + * (non-Javadoc) + * + * @see + * com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.RecognizeDelegate#onDisconnected() + */ + public void onDisconnected() {}; + +} diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/RecognizeDelegate.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/RecognizeDelegate.java new file mode 100644 index 00000000000..081d800e5fe --- /dev/null +++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/RecognizeDelegate.java @@ -0,0 +1,49 @@ +/** + * Copyright 2015 IBM Corp. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except + * in compliance with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under the License + * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express + * or implied. See the License for the specific language governing permissions and limitations under + * the License. + */ +package com.ibm.watson.developer_cloud.speech_to_text.v1.websocket; + +import com.ibm.watson.developer_cloud.speech_to_text.v1.SpeechToText; +import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechResults; + + +/** + * The recognize delegate used in the + * {@link SpeechToText#recognizeUsingWebSockets(java.io.InputStream, com.ibm.watson.developer_cloud.speech_to_text.v1.RecognizeOptions, RecognizeDelegate)} + */ +public interface RecognizeDelegate { + + /** + * Called when a {@link SpeechResults} was received. + * + * @param speechResults the speech results + */ + public void onMessage(SpeechResults speechResults); + + /** + * Called when a WebSocket connection was made + */ + public void onConnected(); + + /** + * Called when there is an error in the Web Socket connection + * + * @param e the exception + */ + public void onError(Exception e); + + /** + * Called when a WebSocket connection was closed + */ + public void onDisconnected(); +} diff --git a/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/WebSocketSpeechToTextClient.java b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/WebSocketSpeechToTextClient.java new file mode 100644 index 00000000000..730f5b789be --- /dev/null +++ b/src/main/java/com/ibm/watson/developer_cloud/speech_to_text/v1/websocket/WebSocketSpeechToTextClient.java @@ -0,0 +1,268 @@ +package com.ibm.watson.developer_cloud.speech_to_text.v1.websocket; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import com.google.gson.Gson; +import com.google.gson.JsonObject; +import com.google.gson.JsonParseException; +import com.google.gson.JsonParser; +import com.ibm.watson.developer_cloud.http.HttpHeaders; +import com.ibm.watson.developer_cloud.speech_to_text.v1.RecognizeOptions; +import com.ibm.watson.developer_cloud.speech_to_text.v1.SpeechToText; +import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechResults; +import com.ibm.watson.developer_cloud.util.GsonSingleton; +import com.neovisionaries.ws.client.WebSocket; +import com.neovisionaries.ws.client.WebSocketAdapter; +import com.neovisionaries.ws.client.WebSocketException; +import com.neovisionaries.ws.client.WebSocketExtension; +import com.neovisionaries.ws.client.WebSocketFactory; +import com.neovisionaries.ws.client.WebSocketFrame; + + +/** + * WebSocket client used by the {@link SpeechToText} to recognize audio + */ +public class WebSocketSpeechToTextClient { + private static final String MODEL = "model"; + private static final String START = "start"; + private static final String STOP = "stop"; + private static final String ACTION = "action"; + private static final String RESULTS = "results"; + private static final String ERROR = "error"; + + private static final int TEN_SECONDS = 10000; // milliseconds + + /** + * Listener that call the {@link RecognizeDelegate} when a message from the WebSocket connection + * arrives + */ + public class WebSocketListener extends WebSocketAdapter { + private RecognizeDelegate delegate; + + /** + * Instantiates a new WebSocket listener. + * + * @param delegate the delegate to notify events + */ + public WebSocketListener(RecognizeDelegate delegate) { + super(); + this.delegate = delegate; + } + + /* + * (non-Javadoc) + * + * @see + * com.neovisionaries.ws.client.WebSocketAdapter#onTextMessage(com.neovisionaries.ws.client. + * WebSocket, java.lang.String) + */ + public void onTextMessage(WebSocket websocket, String message) { + try { + JsonObject json = new JsonParser().parse(message).getAsJsonObject(); + + if (json.has(ERROR)) { + delegate.onError(new RuntimeException(json.get(ERROR).getAsString())); + } else if (json.has(RESULTS)) { + SpeechResults transcript = GsonSingleton.getGson().fromJson(message, SpeechResults.class); + delegate.onMessage(transcript); + + // if final is true + if (transcript.isFinal()) + websocket.disconnect(); + } + } catch (JsonParseException e) { + new RuntimeException("Error parsing the incoming message: " + message); + } + } + + /* + * (non-Javadoc) + * + * @see + * com.neovisionaries.ws.client.WebSocketAdapter#onConnected(com.neovisionaries.ws.client.WebSocket + * , java.util.Map) + */ + @Override + public void onConnected(WebSocket websocket, Map> headers) + throws Exception { + delegate.onConnected(); + } + + /* + * (non-Javadoc) + * + * @see + * com.neovisionaries.ws.client.WebSocketAdapter#onDisconnected(com.neovisionaries.ws.client + * .WebSocket, com.neovisionaries.ws.client.WebSocketFrame, + * com.neovisionaries.ws.client.WebSocketFrame, boolean) + */ + @Override + public void onDisconnected(WebSocket websocket, WebSocketFrame serverCloseFrame, + WebSocketFrame clientCloseFrame, boolean closedByServer) throws Exception { + delegate.onDisconnected(); + } + + /* + * (non-Javadoc) + * + * @see + * com.neovisionaries.ws.client.WebSocketAdapter#onError(com.neovisionaries.ws.client.WebSocket, + * com.neovisionaries.ws.client.WebSocketException) + */ + @Override + public void onError(WebSocket websocket, WebSocketException cause) throws Exception { + delegate.onError(cause); + } + + /* + * (non-Javadoc) + * + * @see + * com.neovisionaries.ws.client.WebSocketAdapter#handleCallbackError(com.neovisionaries.ws.client + * .WebSocket, java.lang.Throwable) + */ + @Override + public void handleCallbackError(WebSocket websocket, Throwable cause) throws Exception { + cause.printStackTrace(); + } + + } + + private static final int FOUR_KB = 4096; + private String token; + private String webSocketUrl; + + /** + * Instantiates a new web socket speech to text client. + * + * @param webSocketUrl the web socket url. + * + *
+   * wss://stream.watsonplatform.net/speech-to-text/api/v1/api/recognize
+   * 
+ * @param token the authorization token + */ + public WebSocketSpeechToTextClient(String webSocketUrl, String token) { + this.token = token; + this.webSocketUrl = webSocketUrl; + } + + /** + * Creates a WebSocket connection to the Speech To Text service and sends the audio bytes from the + * input stream for recognition + * + * @param stream the stream + * @param options the options + * @param delegate the delegate + */ + public void recognize(InputStream stream, RecognizeOptions options, RecognizeDelegate delegate) { + WebSocketListener listener = new WebSocketListener(delegate); + + try { + // 1. Connect to the WebSocket + WebSocket ws = connect(options); + + // 2. Add a listener to messages coming from the WebSocket + ws.addListener(listener); + + // 3. Send start message + ws.sendText(buildStartMessage(options)); + + // 4. Send the input stream as binary data + sendInputStream(ws, stream); + + // 5. Send stop message + ws.sendText(buildStopMessage()); + + } catch (WebSocketException e) { + delegate.onError(e); + } catch (IOException e) { + delegate.onError(e); + } catch (InterruptedException e) { + delegate.onError(e); + } + } + + /** + * Builds the stop message.
+ *
+ * + * { "action": "stop" } + * + * + * @return the string + */ + private String buildStopMessage() { + JsonObject stopMessage = new JsonObject(); + stopMessage.addProperty(ACTION, STOP); + return stopMessage.toString(); + } + + /** + * Sends 4k byte arrays to the WebSocket as binary data + * + * @param ws the WebSocket + * @param stream the stream + * @throws IOException Signals that an I/O exception has occurred. + * @throws InterruptedException if any thread has interrupted the current thread. The interrupted + * status of the current thread is cleared when this exception is thrown. + */ + private void sendInputStream(WebSocket ws, InputStream stream) throws IOException, + InterruptedException { + byte[] buffer = new byte[FOUR_KB]; + int read; + while ((read = stream.read(buffer)) > 0) { + if (read == FOUR_KB) + ws.sendBinary(buffer); + else + ws.sendBinary(Arrays.copyOfRange(buffer, 0, read)); + + Thread.sleep(10); + } + + stream.close(); + } + + /** + * Builds the start message using the {@link RecognizeOptions}.
+ *
+ * + * { + * "action": "start", + * "content-type": "audio/wav" + * } + * + * + * @param options the recognize options + * @return the string + */ + private String buildStartMessage(RecognizeOptions options) { + JsonObject startMessage = new JsonParser().parse(new Gson().toJson(options)).getAsJsonObject(); + startMessage.remove(MODEL); + startMessage.addProperty(ACTION, START); + return startMessage.toString(); + } + + /** + * Creates a connects to the Speech to Text service + * + * @param options + * + * @return the WebSocket + * @throws IOException Signals that an I/O exception has occurred. + * @throws WebSocketException the WebSocket exception + */ + private WebSocket connect(RecognizeOptions options) throws IOException, WebSocketException { + String speechModel = options.getModel() != null ? "?model=" + options.getModel() : ""; + + WebSocketFactory factory = new WebSocketFactory().setConnectionTimeout(TEN_SECONDS); + WebSocket ws = factory.createSocket(webSocketUrl + speechModel); + ws.addHeader(HttpHeaders.X_WATSON_AUTHORIZATION_TOKEN, token); + ws.addExtension(WebSocketExtension.PERMESSAGE_DEFLATE).connect(); + return ws; + } +} diff --git a/src/main/java/com/ibm/watson/developer_cloud/text_to_speech/v1/util/WaveUtils.java b/src/main/java/com/ibm/watson/developer_cloud/text_to_speech/v1/util/WaveUtils.java new file mode 100644 index 00000000000..626bcc65069 --- /dev/null +++ b/src/main/java/com/ibm/watson/developer_cloud/text_to_speech/v1/util/WaveUtils.java @@ -0,0 +1,78 @@ +package com.ibm.watson.developer_cloud.text_to_speech.v1.util; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; + +import com.ibm.watson.developer_cloud.text_to_speech.v1.TextToSpeech; + + +/** + * Utility class to write the data size header in wave(.wav) files synthesized with the + * {@link TextToSpeech} service + */ +public class WaveUtils { + /** The WAVE meta-data header size. (value is 8) */ + private static final int WAVE_HEADER_SIZE = 8; + + /** The WAVE meta-data size position. (value is 4) */ + private static final int WAVE_SIZE_POS = 4; + + /** The WAVE meta-data position in bytes. (value is 74) */ + private static final int WAVE_METADATA_POS = 74; + + /** + * Re-writes the data size in the header(bytes 4-8) of the WAVE(.wav) input stream.
+ * It needs to be read in order to calculate the size. + * + * @param is the input stream + * @return A new input stream that includes the data header in the header + * @throws IOException Signals that an I/O exception has occurred. + */ + public static InputStream reWriteWaveHeader(InputStream is) throws IOException { + byte[] audioBytes = toByteArray(is); + int filesize = audioBytes.length - WAVE_HEADER_SIZE; + + writeInt(filesize, audioBytes, WAVE_SIZE_POS); + writeInt(filesize - WAVE_HEADER_SIZE, audioBytes, WAVE_METADATA_POS); + + return new ByteArrayInputStream(audioBytes); + } + + /** + * Writes an number into an array using 4 bytes + * + * @param value the number to write + * @param array the byte array + * @param offset the offset + */ + private static void writeInt(int value, byte[] array, int offset) { + for (int i = 0; i < 4; i++) { + array[offset + i] = (byte) (value >>> (8 * i)); + } + } + + /** + * Converts an {@link InputStream} to byte array + * + * @param is the input stream + * @return the byte array + * @throws IOException If the first byte cannot be read for any reason other than end of file, or + * if the input stream has been closed, or if some other I/O error occurs. + */ + public static byte[] toByteArray(InputStream is) throws IOException { + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + + int nRead; + byte[] data = new byte[16384]; // 4 kb + + while ((nRead = is.read(data, 0, data.length)) != -1) { + buffer.write(data, 0, nRead); + } + + buffer.flush(); + return buffer.toByteArray(); + } + +} diff --git a/src/test/java/com/ibm/watson/developer_cloud/WatsonServiceTest.java b/src/test/java/com/ibm/watson/developer_cloud/WatsonServiceTest.java index eb3a8771f0c..99d04f05122 100755 --- a/src/test/java/com/ibm/watson/developer_cloud/WatsonServiceTest.java +++ b/src/test/java/com/ibm/watson/developer_cloud/WatsonServiceTest.java @@ -13,12 +13,17 @@ */ package com.ibm.watson.developer_cloud; +import static org.junit.Assert.fail; + import java.io.BufferedReader; +import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.io.OutputStream; import java.util.Properties; import java.util.logging.Level; import java.util.logging.Logger; @@ -35,6 +40,9 @@ public abstract class WatsonServiceTest { private static final Logger log = Logger.getLogger(WatsonServiceTest.class.getName()); + /** + * Instantiates a new watson service test. + */ public WatsonServiceTest() { if (prop == null) loadProperties(); @@ -128,8 +136,8 @@ public String getExistingProperty(String property) { } /** - * Gets the existing property if exists, otherwise it returns the defaultValue - * + * Gets the existing property if exists, otherwise it returns the defaultValue. + * * @param property the property * @param defaultValue the default value * @return the existing property @@ -182,6 +190,34 @@ private void setupLogging() { root.setLevel(ch.qos.logback.classic.Level.OFF); } + /** + * Write input stream to file. + * + * @param inputStream the input stream + * @param audio the audio + */ + public static void writeInputStreamToFile(InputStream inputStream, File audio) { + OutputStream outStream = null; + try { + outStream = new FileOutputStream(audio); + + byte[] buffer = new byte[8 * 1024]; + int bytesRead; + while ((bytesRead = inputStream.read(buffer)) != -1) { + outStream.write(buffer, 0, bytesRead); + } + } catch (Exception e) { + fail(); + } finally { + try { + inputStream.close(); + outStream.close(); + } catch (Exception e) { + fail(); + } + } + } + /** * Loads fixture. * @@ -197,6 +233,11 @@ public static T loadFixture(String filename, Class returnType) return GsonSingleton.getGson().fromJson(jsonString, returnType); } + /** + * Sets the up. + * + * @throws Exception the exception + */ public void setUp() throws Exception {} } diff --git a/src/test/java/com/ibm/watson/developer_cloud/concept_insights/v2/ConceptInsightsIT.java b/src/test/java/com/ibm/watson/developer_cloud/concept_insights/v2/ConceptInsightsIT.java index a4b1c47721a..da3cb60b8b1 100644 --- a/src/test/java/com/ibm/watson/developer_cloud/concept_insights/v2/ConceptInsightsIT.java +++ b/src/test/java/com/ibm/watson/developer_cloud/concept_insights/v2/ConceptInsightsIT.java @@ -17,6 +17,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.UUID; import org.junit.Assert; import org.junit.Before; @@ -316,8 +317,9 @@ public void testGetGraphs() { */ @Test public void testCreateAndDeleteCorpus() { + final String name = UUID.randomUUID().toString(); final Account account = service.getAccountsInfo().getAccounts().get(0); - Corpus corpus = new Corpus(account.getId(), "integration-test-corpus"); + Corpus corpus = new Corpus(account.getId(), name); try { service.createCorpus(corpus); corpus = service.getCorpus(corpus); diff --git a/src/test/java/com/ibm/watson/developer_cloud/service/GenericServiceTest.java b/src/test/java/com/ibm/watson/developer_cloud/service/GenericServiceTest.java index 2cb89d48c52..0edf7dda7f4 100644 --- a/src/test/java/com/ibm/watson/developer_cloud/service/GenericServiceTest.java +++ b/src/test/java/com/ibm/watson/developer_cloud/service/GenericServiceTest.java @@ -170,7 +170,7 @@ public void testUserAgentIsSet() { mockAPICall(); service.getProfile(sampleText); mockServer.verify(new HttpRequest().withMethod("POST").withHeader( - new Header(HttpHeaders.USER_AGENT, "watson-developer-cloud-java-sdk-2.5.0"))); + new Header(HttpHeaders.USER_AGENT, "watson-developer-cloud-java-sdk-2.6.0"))); } @Test diff --git a/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextIT.java b/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextIT.java index e74d55d5d7b..151d1191240 100644 --- a/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextIT.java +++ b/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextIT.java @@ -17,7 +17,11 @@ import static org.junit.Assert.assertTrue; import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import org.junit.Before; import org.junit.Test; @@ -28,12 +32,25 @@ import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechModel; import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechResults; import com.ibm.watson.developer_cloud.speech_to_text.v1.model.SpeechSession; +import com.ibm.watson.developer_cloud.speech_to_text.v1.websocket.BaseRecognizeDelegate; +/** + * The Class SpeechToTextIT. + */ public class SpeechToTextIT extends WatsonServiceTest { - private static final String EN_BROADBAND16K = "en-US_BroadbandModel"; + private static String EN_BROADBAND16K = "en-US_BroadbandModel"; + private SpeechResults asyncResults; + + private CountDownLatch lock = new CountDownLatch(1); + private SpeechToText service; + /* + * (non-Javadoc) + * + * @see com.ibm.watson.developer_cloud.WatsonServiceTest#setUp() + */ @Override @Before public void setUp() throws Exception { @@ -44,9 +61,12 @@ public void setUp() throws Exception { service.setEndPoint(getValidProperty("speech_to_text.url")); } + /** + * Test create session. + */ @Test public void testCreateSession() { - final SpeechSession session = service.createSession(); + SpeechSession session = service.createSession(); try { assertNotNull(session); assertNotNull(session.getSessionId()); @@ -55,9 +75,12 @@ public void testCreateSession() { } } + /** + * Test create session speech model. + */ @Test public void testCreateSessionSpeechModel() { - final SpeechSession session = service.createSession(SpeechModel.EN_BROADBAND16K); + SpeechSession session = service.createSession(SpeechModel.EN_BROADBAND16K); try { assertNotNull(session); assertNotNull(session.getSessionId()); @@ -66,9 +89,12 @@ public void testCreateSessionSpeechModel() { } } + /** + * Test create session string. + */ @Test public void testCreateSessionString() { - final SpeechSession session = service.createSession(EN_BROADBAND16K); + SpeechSession session = service.createSession(EN_BROADBAND16K); try { assertNotNull(session); assertNotNull(session.getSessionId()); @@ -77,25 +103,34 @@ public void testCreateSessionString() { } } + /** + * Test get model. + */ @Test public void testGetModel() { - final SpeechModel model = service.getModel(EN_BROADBAND16K); + SpeechModel model = service.getModel(EN_BROADBAND16K); assertNotNull(model); assertNotNull(model.getName()); assertNotNull(model.getRate()); } + /** + * Test get models. + */ @Test public void testGetModels() { - final List models = service.getModels(); + List models = service.getModels(); assertNotNull(models); assertTrue(!models.isEmpty()); } + /** + * Test get recognize status. + */ @Test public void testGetRecognizeStatus() { - final SpeechSession session = service.createSession(SpeechModel.EN_BROADBAND16K); - final SessionStatus status = service.getRecognizeStatus(session); + SpeechSession session = service.createSession(SpeechModel.EN_BROADBAND16K); + SessionStatus status = service.getRecognizeStatus(session); try { assertNotNull(status); assertNotNull(status.getModel()); @@ -105,23 +140,78 @@ public void testGetRecognizeStatus() { } } + /** + * Test recognize audio file + */ @Test public void testRecognizeFileString() { - final File audio = new File("src/test/resources/speech_to_text/sample1.wav"); - final SpeechResults results = service.recognize(audio); + File audio = new File("src/test/resources/speech_to_text/sample1.wav"); + SpeechResults results = service.recognize(audio); assertNotNull(results.getResults().get(0).getAlternatives().get(0).getTranscript()); } + /** + * Test recognize file string recognize options. + */ @Test public void testRecognizeFileStringRecognizeOptions() { - final File audio = new File("src/test/resources/speech_to_text/sample1.wav"); - final String contentType = HttpMediaType.AUDIO_WAV; - final RecognizeOptions options = new RecognizeOptions(); - options.continuous(true).timestamps(true).wordConfidence(true).model(EN_BROADBAND16K); - final SpeechResults results = service.recognize(audio, contentType, options); + File audio = new File("src/test/resources/speech_to_text/sample1.wav"); + String contentType = HttpMediaType.AUDIO_WAV; + RecognizeOptions options = new RecognizeOptions(); + options.continuous(true).timestamps(true).wordConfidence(true).model(EN_BROADBAND16K) + .contentType(contentType); + SpeechResults results = service.recognize(audio, options); assertNotNull(results.getResults().get(0).getAlternatives().get(0).getTranscript()); assertNotNull(results.getResults().get(0).getAlternatives().get(0).getTimestamps()); assertNotNull(results.getResults().get(0).getAlternatives().get(0).getWordConfidences()); } + /** + * Test recognize webSocket + * + * @throws FileNotFoundException the file not found exception + * @throws InterruptedException + */ + @Test + public void testRecognizeWebSocket() throws FileNotFoundException, InterruptedException { + RecognizeOptions options = new RecognizeOptions(); + options.continuous(true).interimResults(true); + options.inactivityTimeout(40).timestamps(true).maxAlternatives(2); + options.model(EN_BROADBAND16K).contentType(HttpMediaType.AUDIO_WAV); + + service.recognizeUsingWebSockets(new FileInputStream( + "src/test/resources/speech_to_text/sample1.wav"), options, new BaseRecognizeDelegate() { + + @Override + public void onConnected() { + System.out.println("onConnected()"); + } + + @Override + public void onDisconnected() { + System.out.println("onDisconnected()"); + lock.countDown(); + } + + @Override + public void onError(Exception e) { + e.printStackTrace(); + lock.countDown(); + } + + @Override + public void onMessage(SpeechResults speechResults) { + if (speechResults != null && speechResults.isFinal()) { + asyncResults = speechResults; + System.out.println(speechResults); + lock.countDown(); + } + } + + }); + + lock.await(20000, TimeUnit.MILLISECONDS); + assertNotNull(asyncResults); + } + } diff --git a/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextTest.java b/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextTest.java index 048930c141d..54a3b47ec2c 100644 --- a/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextTest.java +++ b/src/test/java/com/ibm/watson/developer_cloud/speech_to_text/v1/SpeechToTextTest.java @@ -209,7 +209,7 @@ public void testRecognize() throws URISyntaxException { response().withHeaders( new Header(HttpHeaders.Names.CONTENT_TYPE, HttpMediaType.APPLICATION_JSON)).withBody( GsonSingleton.getGson().toJson(speechResults))); - final SpeechResults result = service.recognize(audio, HttpMediaType.AUDIO_WAV); + final SpeechResults result = service.recognize(audio); Assert.assertNotNull(result); Assert.assertEquals(result, speechResults); } @@ -251,7 +251,7 @@ public void testRecognizeMissingAudioFile() throws URISyntaxException { boolean didItHappen = false; try { - service.recognize(null, HttpMediaType.AUDIO_WAV); + service.recognize(null); } catch (final IllegalArgumentException e) { didItHappen = true; } diff --git a/src/test/java/com/ibm/watson/developer_cloud/text_to_speech/v1/TextToSpeechIT.java b/src/test/java/com/ibm/watson/developer_cloud/text_to_speech/v1/TextToSpeechIT.java index 2e3a6be187b..7b2861ecbb2 100644 --- a/src/test/java/com/ibm/watson/developer_cloud/text_to_speech/v1/TextToSpeechIT.java +++ b/src/test/java/com/ibm/watson/developer_cloud/text_to_speech/v1/TextToSpeechIT.java @@ -27,16 +27,14 @@ * the License. */ -import static org.junit.Assert.fail; - import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.OutputStream; import java.util.List; -import org.apache.commons.io.IOUtils; +import javax.sound.sampled.AudioSystem; +import javax.sound.sampled.UnsupportedAudioFileException; + import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -44,6 +42,7 @@ import com.ibm.watson.developer_cloud.WatsonServiceTest; import com.ibm.watson.developer_cloud.http.HttpMediaType; import com.ibm.watson.developer_cloud.text_to_speech.v1.model.Voice; +import com.ibm.watson.developer_cloud.text_to_speech.v1.util.WaveUtils; /** * The Class TextToSpeechIntegrationTest. @@ -67,52 +66,44 @@ public void setUp() throws Exception { service.setEndPoint(prop.getProperty("text_to_speech.url")); } - /** - * Synthesize. - * - * @param text the text - * @param audio the audio - */ - private void synthesize(String text, File audio) { - final InputStream is = service.synthesize(text, Voice.EN_LISA, HttpMediaType.AUDIO_WAV); - Assert.assertNotNull(is); - OutputStream outStream = null; - try { - outStream = new FileOutputStream(audio); - final byte[] buffer = new byte[8 * 1024]; - int bytesRead; - while ((bytesRead = is.read(buffer)) != -1) { - outStream.write(buffer, 0, bytesRead); - } - } catch (final Exception e) { - fail(); - } finally { - IOUtils.closeQuietly(is); - IOUtils.closeQuietly(outStream); - } - } /** * Test get voices. */ @Test public void testGetVoices() { - final List voices = service.getVoices(); + List voices = service.getVoices(); Assert.assertNotNull(voices); Assert.assertTrue(!voices.isEmpty()); } /** - * Test synthesize. + * Synthesize text and write it to a temporary file * * @throws IOException Signals that an I/O exception has occurred. */ @Test public void testSynthesize() throws IOException { - final String text = "This is an integration test"; - final File audio = File.createTempFile("tts-audio", "wav"); + String text = "This is an integration test"; + InputStream result = service.synthesize(text, Voice.EN_LISA, HttpMediaType.AUDIO_WAV); + writeInputStreamToFile(result, File.createTempFile("tts-audio", "wav")); + } - synthesize(text, audio); + /** + * Test the fix wave header not having the size due to be streamed. + * + * @throws IOException Signals that an I/O exception has occurred. + * @throws UnsupportedAudioFileException the unsupported audio file exception + */ + @Test + public void testSynthesizeAndFixHeader() throws IOException, UnsupportedAudioFileException { + String text = "one two three four five"; + InputStream result = service.synthesize(text, Voice.EN_LISA, HttpMediaType.AUDIO_WAV); + Assert.assertNotNull(result); + result = WaveUtils.reWriteWaveHeader(result); + File tempFile = File.createTempFile("output", ".wav"); + writeInputStreamToFile(result, tempFile); + Assert.assertNotNull(AudioSystem.getAudioFileFormat(tempFile)); } } diff --git a/src/test/java/com/ibm/watson/developer_cloud/text_to_speech/v1/TextToSpeechTest.java b/src/test/java/com/ibm/watson/developer_cloud/text_to_speech/v1/TextToSpeechTest.java index cd036046609..00fd7b933b0 100644 --- a/src/test/java/com/ibm/watson/developer_cloud/text_to_speech/v1/TextToSpeechTest.java +++ b/src/test/java/com/ibm/watson/developer_cloud/text_to_speech/v1/TextToSpeechTest.java @@ -18,6 +18,7 @@ import io.netty.handler.codec.http.HttpHeaders; import java.io.File; +import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; @@ -28,6 +29,9 @@ import java.util.List; import java.util.Map; +import javax.sound.sampled.AudioSystem; +import javax.sound.sampled.UnsupportedAudioFileException; + import org.junit.Assert; import org.junit.Before; import org.junit.FixMethodOrder; @@ -40,6 +44,7 @@ import com.ibm.watson.developer_cloud.WatsonServiceUnitTest; import com.ibm.watson.developer_cloud.http.HttpMediaType; import com.ibm.watson.developer_cloud.text_to_speech.v1.model.Voice; +import com.ibm.watson.developer_cloud.text_to_speech.v1.util.WaveUtils; import com.ibm.watson.developer_cloud.util.GsonSingleton; /** @@ -213,4 +218,23 @@ public void testWithVoiceAsWav() { } } + + + /** + * Test the fix wave header not having the size due to be streamed. + * + * @throws IOException Signals that an I/O exception has occurred. + * @throws UnsupportedAudioFileException the unsupported audio file exception + */ + @Test + public void testSynthesizeAndFixHeader() throws IOException, UnsupportedAudioFileException { + File audio = new File("src/test/resources/text_to_speech/numbers.wav"); + InputStream stream = new FileInputStream(audio); + Assert.assertNotNull(stream); + stream = WaveUtils.reWriteWaveHeader(stream); + File tempFile = File.createTempFile("output", ".wav"); + writeInputStreamToFile(stream, tempFile); + Assert.assertNotNull(AudioSystem.getAudioFileFormat(tempFile)); + } + } diff --git a/src/test/resources/text_to_speech/numbers.wav b/src/test/resources/text_to_speech/numbers.wav new file mode 100644 index 00000000000..d03169a650c Binary files /dev/null and b/src/test/resources/text_to_speech/numbers.wav differ