fix(Speech to Text): Refactor the websocket method to better match the other SDKs

maxnussbaum · maxnussbaum · commit 917ef208ce7e · 2018-08-10T14:09:05.000-04:00
diff --git a/examples/speech_to_text_v1.rb b/examples/speech_to_text_v1.rb
@@ -72,7 +72,8 @@ def on_data(data:)
 File.open(Dir.getwd + "/resources/speech.wav") do |audio_file|
   speech_to_text.recognize_using_websocket(
     audio: audio_file,
-    recognize_callback: mycallback
+    recognize_callback: mycallback,
+    content_type: "audio/wav"
   ).start
 end
 
@@ -83,7 +84,8 @@ def on_data(data:)
   chunk_data: true, # Tell the websocket object that audio will be given in chunks
   recognize_callback: mycallback,
   interim_results: true,
-  inactivity_timeout: 3
+  inactivity_timeout: 3,
+  content_type: "audio/wav"
 )
 audio_file = File.open(Dir.getwd + "/resources/speech.wav")
 Thread.new do
diff --git a/lib/ibm_watson/speech_to_text_v1.rb b/lib/ibm_watson/speech_to_text_v1.rb
@@ -426,17 +426,17 @@ def recognize(audio:, content_type:, model: nil, customization_id: nil, acoustic
     end
 
     ##
-    # @!method recognize_using_websocket(audio: nil,chunk_data: false,content_type: "audio/l16; rate=44100",model: "en-US_BroadbandModel",recognize_callback: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,version: nil,inactivity_timeout: 30,interim_results: false,keywords: nil,keywords_threshold: nil,max_alternatives: 1,word_alternatives_threshold: nil,word_confidence: false,timestamps: false,profanity_filter: nil,smart_formatting: false,speaker_labels: nil)
+    # @!method recognize_using_websocket(content_type:,recognize_callback:,audio: nil,chunk_data: false,model: nil,customization_id: nil,acoustic_customization_id: nil,customization_weight: nil,base_model_version: nil,inactivity_timeout: nil,interim_results: nil,keywords: nil,keywords_threshold: nil,max_alternatives: nil,word_alternatives_threshold: nil,word_confidence: nil,timestamps: nil,profanity_filter: nil,smart_formatting: nil,speaker_labels: nil)
     # Sends audio for speech recognition using web sockets.
+    # @param content_type [String] The type of the input: audio/basic, audio/flac, audio/l16, audio/mp3, audio/mpeg, audio/mulaw, audio/ogg, audio/ogg;codecs=opus, audio/ogg;codecs=vorbis, audio/wav, audio/webm, audio/webm;codecs=opus, audio/webm;codecs=vorbis, or multipart/form-data.
+    # @param recognize_callback [RecognizeCallback] The instance handling events returned from the service.
     # @param audio [IO] Audio to transcribe in the format specified by the `Content-Type` header.
     # @param chunk_data [Boolean] If true, then the WebSocketClient will expect to receive data in chunks rather than as a single audio file
-    # @param content_type [String] The type of the input: audio/basic, audio/flac, audio/l16, audio/mp3, audio/mpeg, audio/mulaw, audio/ogg, audio/ogg;codecs=opus, audio/ogg;codecs=vorbis, audio/wav, audio/webm, audio/webm;codecs=opus, audio/webm;codecs=vorbis, or multipart/form-data.
     # @param model [String] The identifier of the model to be used for the recognition request.
-    # @param recognize_callback [RecognizeCallback] The instance handling events returned from the service.
     # @param customization_id [String] The GUID of a custom language model that is to be used with the request. The base model of the specified custom language model must match the model specified with the `model` parameter. You must make the request with service credentials created for the instance of the service that owns the custom model. By default, no custom language model is used.
     # @param acoustic_customization_id [String] The GUID of a custom acoustic model that is to be used with the request. The base model of the specified custom acoustic model must match the model specified with the `model` parameter. You must make the request with service credentials created for the instance of the service that owns the custom model. By default, no custom acoustic model is used.
     # @param customization_weight [Float] If you specify a `customization_id` with the request, you can use the `customization_weight` parameter to tell the service how much weight to give to words from the custom language model compared to those from the base model for speech recognition.   Specify a value between 0.0 and 1.0. Unless a different customization weight was specified for the custom model when it was trained, the default value is 0.3. A customization weight that you specify overrides a weight that was specified when the custom model was trained.   The default value yields the best performance in general. Assign a higher value if your audio makes frequent use of OOV words from the custom model. Use caution when setting the weight: a higher value can improve the accuracy of phrases from the custom model's domain, but it can negatively affect performance on non-domain phrases.
-    # @param version [String] The version of the specified base `model` that is to be used for speech recognition. Multiple versions of a base model can exist when a model is updated for internal improvements. The parameter is intended primarily for use with custom models that have been upgraded for a new base model. The default value depends on whether the parameter is used with or without a custom model. For more information, see [Base model version](https://console.bluemix.net/docs/services/speech-to-text/input.html#version).
+    # @param base_model_version [String] The version of the specified base `model` that is to be used for speech recognition. Multiple versions of a base model can exist when a model is updated for internal improvements. The parameter is intended primarily for use with custom models that have been upgraded for a new base model. The default value depends on whether the parameter is used with or without a custom model. For more information, see [Base model version](https://console.bluemix.net/docs/services/speech-to-text/input.html#version).
     # @param inactivity_timeout [Integer] The time in seconds after which, if only silence (no speech) is detected in submitted audio, the connection is closed with a 400 error. Useful for stopping audio submission from a live microphone when a user simply walks away. Use `-1` for infinity.
     # @param interim_results [Boolean] Send back non-final previews of each "sentence" as it is being processed. These results are ignored in text mode.
     # @param keywords [Array<String>] Array of keyword strings to spot in the audio. Each keyword string can include one or more tokens. Keywords are spotted only in the final hypothesis, not in interim results. If you specify any keywords, you must also specify a keywords threshold. Omit the parameter or specify an empty array if you do not need to spot keywords.
@@ -450,25 +450,25 @@ def recognize(audio:, content_type:, model: nil, customization_id: nil, acoustic
     # @param speaker_labels [Boolean] Indicates whether labels that identify which words were spoken by which participants in a multi-person exchange are to be included in the response. The default is `false`; no speaker labels are returned. Setting `speaker_labels` to `true` forces the `timestamps` parameter to be `true`, regardless of whether you specify `false` for the parameter.   To determine whether a language model supports speaker labels, use the `GET /v1/models` method and check that the attribute `speaker_labels` is set to `true`. You can also refer to [Speaker labels](https://console.bluemix.net/docs/services/speech-to-text/output.html#speaker_labels).
     # @return [WebSocketClient] Returns a new WebSocketClient object
     def recognize_using_websocket(
+      content_type:,
+      recognize_callback:,
       audio: nil,
       chunk_data: false,
-      content_type: "audio/l16; rate=44100",
-      model: "en-US_BroadbandModel",
-      recognize_callback: nil,
+      model: nil,
       customization_id: nil,
       acoustic_customization_id: nil,
       customization_weight: nil,
-      version: nil,
-      inactivity_timeout: 30,
-      interim_results: false,
+      base_model_version: nil,
+      inactivity_timeout: nil,
+      interim_results: nil,
       keywords: nil,
       keywords_threshold: nil,
-      max_alternatives: 1,
+      max_alternatives: nil,
       word_alternatives_threshold: nil,
-      word_confidence: false,
-      timestamps: false,
+      word_confidence: nil,
+      timestamps: nil,
       profanity_filter: nil,
-      smart_formatting: false,
+      smart_formatting: nil,
       speaker_labels: nil
     )
       raise ArgumentError("Audio must be provided") if audio.nil? && !chunk_data
@@ -489,7 +489,7 @@ def recognize_using_websocket(
         "customization_id" => customization_id,
         "acoustic_customization_id" => acoustic_customization_id,
         "customization_weight" => customization_weight,
-        "version" => version
+        "base_model_version" => base_model_version
       }
       params.delete_if { |_, v| v.nil? }
       url += "/v1/recognize?" + HTTP::URI.form_encode(params)
@@ -514,25 +514,25 @@ def recognize_using_websocket(
     # :nocov:
     # @deprecated This will method be removed in the next major release. Use {#recognize_using_websocket} instead.
     def recognize_with_websocket(
+      content_type:,
+      recognize_callback:,
       audio: nil,
       chunk_data: false,
-      content_type: "audio/l16; rate=44100",
-      model: "en-US_BroadbandModel",
-      recognize_callback: nil,
+      model: nil,
       customization_id: nil,
       acoustic_customization_id: nil,
       customization_weight: nil,
-      version: nil,
-      inactivity_timeout: 30,
-      interim_results: false,
+      base_model_version: nil,
+      inactivity_timeout: nil,
+      interim_results: nil,
       keywords: nil,
       keywords_threshold: nil,
-      max_alternatives: 1,
+      max_alternatives: nil,
       word_alternatives_threshold: nil,
-      word_confidence: false,
-      timestamps: false,
+      word_confidence: nil,
+      timestamps: nil,
       profanity_filter: nil,
-      smart_formatting: false,
+      smart_formatting: nil,
       speaker_labels: nil
     )
       Kernel.warn("[DEPRECATION] `recognize_with_websocket` is deprecated and will be removed in the next major release. Please use `recognize_using_websocket` instead.")
@@ -545,7 +545,7 @@ def recognize_with_websocket(
         customization_id: customization_id,
         acoustic_customization_id: acoustic_customization_id,
         customization_weight: customization_weight,
-        version: version,
+        base_model_version: base_model_version,
         inactivity_timeout: inactivity_timeout,
         interim_results: interim_results,
         keywords: keywords,
diff --git a/test/integration/test_speech_to_text_v1.rb b/test/integration/test_speech_to_text_v1.rb
@@ -138,7 +138,7 @@ def test_recognize_websocket_as_chunks
         timestamps: true,
         max_alternatives: 2,
         word_alternatives_threshold: 0.5,
-        model: "en-US_BroadbandModel"
+        content_type: "audio/wav"
       )
       Thread.new do
         until audio_file.eof?
@@ -162,7 +162,7 @@ def test_recognize_websocket
         timestamps: true,
         max_alternatives: 2,
         word_alternatives_threshold: 0.5,
-        model: "en-US_BroadbandModel"
+        content_type: "audio/wav"
       )
       thr = Thread.new { speech.start }
       thr.join
@@ -180,7 +180,7 @@ def test_inactivity_timeout_using_websocket
         timestamps: true,
         max_alternatives: 2,
         word_alternatives_threshold: 0.5,
-        model: "en-US_BroadbandModel"
+        content_type: "audio/wav"
       )
       thr = Thread.new { speech.start }
       thr.join
@@ -198,7 +198,7 @@ def test_broken_audio_using_websocket
         timestamps: true,
         max_alternatives: 2,
         word_alternatives_threshold: 0.5,
-        model: "en-US_BroadbandModel"
+        content_type: "audio/wav"
       )
       thr = Thread.new { speech.start }
       thr.join
@@ -225,7 +225,7 @@ def test_invalid_auth_using_websocket
         timestamps: true,
         max_alternatives: 2,
         word_alternatives_threshold: 0.5,
-        model: "en-US_BroadbandModel"
+        content_type: "audio/wav"
       )
       thr = Thread.new { speech.start }
       thr.join