Merge pull request #455 from watson-developer-cloud/tts-websockets

websockets
watson-developer-cloud · Apr 5, 2021 · 20726cb · 20726cb
2 parents 427b4c8 + 69dec74
commit 20726cb
Show file tree

Hide file tree

Showing 25 changed files with 1,399 additions and 77 deletions.
diff --git a/IBM.Watson.sln b/IBM.Watson.sln
diff --git a/README.md b/README.md
@@ -8,23 +8,6 @@
 
 The .NET Standard SDK uses [Watson][wdc] services, a collection of REST APIs that use cognitive computing to solve complex problems.
 
-## Table of Contents
-* [Before you begin](#before-you-begin)
-* [Installing the Watson .NET Standard SDK](#installing-the-watson-net-standard-sdk)
-* [.NET Standard 2.0](#net-standard-20)
-* [Authentication](#authentication)
-* [Custom Request Headers](#custom-request-headers)
-* [Response Headers, Status Code and Raw Json](#response-headers-status-code-and-raw-json)
-* [Self signed certificates](#self-signed-certificates)
-* [Discovery V2](#discovery-v2)
-* [Transaction IDs](#transactio-ids)
-* [Use behind a proxy](#use-behind-a-proxy)
-* [Documentation](#documentation)
-* [Questions](#questions)
-* [Open Source @ IBM](#open-source--ibm)
-* [License](#license)
-* [Contributing](#contributing)
-
 ## Announcements
 ### Updating endpoint URLs from watsonplatform.net
 Watson API endpoint URLs at watsonplatform.net are changing and will not work after 26 May 2021. Update your calls to use the newer endpoint URLs. For more information, see https://cloud.ibm.com/docs/watson?topic=watson-endpoint-change.

diff --git a/src/IBM.Watson.Common/IBM.Watson.Common.csproj b/src/IBM.Watson.Common/IBM.Watson.Common.csproj
@@ -10,7 +10,7 @@
         <PackageTags>watson;cognitive;speech;vision;machine-learning;ml;ai;artificial-intelligence;.NET;.NET-Standard</PackageTags>
         <PackageIconUrl>https://watson-developer-cloud.github.io/dotnet-standard-sdk/img/Watson_Avatar_Pos_RGB.png</PackageIconUrl>
         <PackageProjectUrl>https://github.com/watson-developer-cloud/dotnet-standard-sdk</PackageProjectUrl>
-        <Version>5.1.0</Version>
+        <Version>5.2.0-rc1</Version>
     </PropertyGroup>
 
     <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">

diff --git a/src/IBM.Watson.SpeechToText.v1/Examples/ServiceExample.cs b/src/IBM.Watson.SpeechToText.v1/Examples/ServiceExample.cs
@@ -18,12 +18,14 @@
 using IBM.Cloud.SDK.Core.Authentication.Iam;
 using IBM.Cloud.SDK.Core.Http;
 using IBM.Watson.SpeechToText.v1.Model;
+using IBM.Watson.SpeechToText.v1.Websockets;
 using System;
 using System.Collections.Generic;
 using System.IO;
 using System.Net.Http;
 using System.Threading.Tasks;
-
+using static IBM.Watson.SpeechToText.v1.SpeechToTextService;
+
 namespace IBM.Watson.SpeechToText.v1.Examples
 {
     public class ServiceExample
@@ -166,6 +168,57 @@ public void Recognize()
         }
         #endregion
 
+        #region Websockets
+        public void RecognizeusingWebSocket()
+        {
+            IamAuthenticator authenticator = new IamAuthenticator(
+                apikey: "{apikey}");
+
+            SpeechToTextService service = new SpeechToTextService(authenticator);
+            service.SetServiceUrl("{serviceUrl}");
+
+            RecognizeCallback callback = new RecognizeCallback();
+
+            string Name = @"SpeechToTextTestData/test-audio.wav";
+
+            try
+            {
+                byte[] filebytes = File.ReadAllBytes(Name);
+                MemoryStream stream = new MemoryStream(filebytes);
+
+                callback.OnOpen = () =>
+                {
+                    Console.WriteLine("On Open");
+                };
+                callback.OnClose = () =>
+                {
+                    Console.WriteLine("On Close");
+                };
+                callback.OnMessage = (speechResults) =>
+                {
+                    Console.WriteLine("On Message");
+                    Console.WriteLine(speechResults?.Results[0]?.Alternatives[0]?.Transcript);
+                };
+                callback.OnError = (err) =>
+                {
+                    Console.WriteLine("On error");
+                    Console.WriteLine(err);
+                };
+                service.RecognizeUsingWebSocket(
+                    callback: callback,
+                    audio: stream,
+                    contentType: RecognizeEnums.ContentTypeValue.AUDIO_WAV,
+                    interimResults: true,
+                    model: RecognizeEnums.ModelValue.EN_US_BROADBANDMODEL
+                    );
+            }
+            catch (Exception e)
+            {
+                Console.WriteLine(e);
+            }
+        }
+        #endregion
+
         #region Asynchronous
         public void RegisterCallback()
         {

diff --git a/src/IBM.Watson.SpeechToText.v1/IBM.Watson.SpeechToText.v1.csproj b/src/IBM.Watson.SpeechToText.v1/IBM.Watson.SpeechToText.v1.csproj
@@ -3,15 +3,15 @@
     <PropertyGroup>
         <Description>IBM.Watson.SpeechToText.v1 wraps the Watson Speech To Text service (http://www.ibm.com/watson/developercloud/speech-to-text.html)</Description>
         <AssemblyTitle>IBM.Watson.SpeechToText.v1</AssemblyTitle>
-        <VersionPrefix>5.1.0</VersionPrefix>
+        <VersionPrefix>5.2.0-rc1</VersionPrefix>
         <Authors>IBM Watson</Authors>
         <TargetFramework>netstandard2.0</TargetFramework>
         <AssemblyName>IBM.Watson.SpeechToText.v1</AssemblyName>
         <PackageId>IBM.Watson.SpeechToText.v1</PackageId>
         <PackageTags>watson;cognitive;speech;vision;machine-learning;ml;ai;artificial-intelligence;.NET;.NET-Standard</PackageTags>
         <PackageIconUrl>https://watson-developer-cloud.github.io/dotnet-standard-sdk/img/Watson_Avatar_Pos_RGB.png</PackageIconUrl>
         <PackageProjectUrl>https://github.com/watson-developer-cloud/dotnet-standard-sdk</PackageProjectUrl>
-        <Version>5.1.0</Version>
+        <Version>5.2.0-rc1</Version>
     </PropertyGroup>
 
     <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
@@ -35,7 +35,7 @@
 
     <ItemGroup>
         <PackageReference Include="IBM.Cloud.SDK.Core" Version="1.2.0" />
-        <PackageReference Include="IBM.Watson.Common" Version="5.1.0" />
+        <PackageReference Include="IBM.Watson.Common" Version="5.2.0-rc1" />
         <PackageReference Include="Newtonsoft.Json" Version="12.0.3" />
         <PackageReference Include="JsonSubTypes" Version="1.8.0" />
     </ItemGroup>

diff --git a/src/IBM.Watson.SpeechToText.v1/ISpeechToTextServiceExtension.cs b/src/IBM.Watson.SpeechToText.v1/ISpeechToTextServiceExtension.cs
@@ -0,0 +1,10 @@
+using System.Collections.Generic;
+using IBM.Watson.SpeechToText.v1.Websockets;
+
+namespace IBM.Watson.SpeechToText.v1
+{
+    public partial interface ISpeechToTextService
+    {
+        WebSocketClient RecognizeUsingWebSocket(RecognizeCallback callback, System.IO.MemoryStream audio, string contentType = null, string model = null, string languageCustomizationId = null, string acousticCustomizationId = null, string baseModelVersion = null, double? customizationWeight = null, long? inactivityTimeout = null, bool? interimResults = null, List<string> keywords = null, float? keywordsThreshold = null, long? maxAlternatives = null, float? wordAlternativesThreshold = null, bool? wordConfidence = null, bool? timestamps = null, bool? profanityFilter = null, bool? smartFormatting = null, bool? speakerLabels = null, string grammarName = null, bool? redaction = null, bool? processingMetrics = null, float? processingMetricsInterval = null, bool? audioMetrics = null, double? endOfPhraseSilenceTime = null, bool? splitTranscriptAtPhraseEnd = null, float? speechDetectorSensitivity = null, float? backgroundAudioSuppression = null);
+    }
+}
diff --git a/src/IBM.Watson.SpeechToText.v1/SpeechToTextServiceExtension.cs b/src/IBM.Watson.SpeechToText.v1/SpeechToTextServiceExtension.cs
@@ -0,0 +1,166 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using IBM.Cloud.SDK.Core.Http;
+using IBM.Cloud.SDK.Core.Service;
+using IBM.Watson.SpeechToText.v1.Websockets;
+
+namespace IBM.Watson.SpeechToText.v1
+{
+    public partial class SpeechToTextService : IBMService, ISpeechToTextService
+    {
+        public WebSocketClient RecognizeUsingWebSocket(RecognizeCallback callback, System.IO.MemoryStream audio, string contentType = null, string model = null, string languageCustomizationId = null, string acousticCustomizationId = null, string baseModelVersion = null, double? customizationWeight = null, long? inactivityTimeout = null, bool? interimResults = null, List<string> keywords = null, float? keywordsThreshold = null, long? maxAlternatives = null, float? wordAlternativesThreshold = null, bool? wordConfidence = null, bool? timestamps = null, bool? profanityFilter = null, bool? smartFormatting = null, bool? speakerLabels = null, string grammarName = null, bool? redaction = null, bool? processingMetrics = null, float? processingMetricsInterval = null, bool? audioMetrics = null, double? endOfPhraseSilenceTime = null, bool? splitTranscriptAtPhraseEnd = null, float? speechDetectorSensitivity = null, float? backgroundAudioSuppression = null)
+        {
+            if (callback == null)
+            {
+                throw new ArgumentNullException("callback cannot be null");
+            }
+            if (audio == null)
+            {
+                throw new ArgumentNullException("`audio` is required for `Recognize`");
+            }
+
+            try
+            {
+                IClient client = this.Client;
+                SetAuthentication();
+
+                string url = ($"{this.Endpoint}/v1/recognize").Replace("https://", "wss://");
+                WebSocketClient webSocketClient = new WebSocketClient(url, callback);
+
+                if (!string.IsNullOrEmpty(model))
+                {
+                    webSocketClient.AddArgument("model", model);
+                }
+                if (!string.IsNullOrEmpty(languageCustomizationId))
+                {
+                    webSocketClient.AddArgument("language_customization_id", languageCustomizationId);
+                }
+                if (!string.IsNullOrEmpty(acousticCustomizationId))
+                {
+                    webSocketClient.AddArgument("acoustic_customization_id", acousticCustomizationId);
+                }
+                if (!string.IsNullOrEmpty(baseModelVersion))
+                {
+                    webSocketClient.AddArgument("base_model_version", baseModelVersion);
+                }
+
+                // Websocket Open Message
+                if (!string.IsNullOrEmpty(contentType))
+                {
+                    webSocketClient.AddWebSocketParameter("content-type", contentType);
+                }
+                if (customizationWeight != null)
+                {
+                    webSocketClient.AddWebSocketParameter("customization_weight", customizationWeight);
+                }
+                if (inactivityTimeout != null)
+                {
+                    webSocketClient.AddWebSocketParameter("inactivity_timeout", inactivityTimeout);
+                }
+                if (interimResults != null)
+                {
+                    webSocketClient.AddWebSocketParameter("interim_results", interimResults);
+                }
+                if (keywords != null && keywords.Count > 0)
+                {
+                    webSocketClient.AddWebSocketParameter("keywords", string.Join(",", keywords.Select(x => "\"" + x + "\"")));
+                }
+                if (keywordsThreshold != null)
+                {
+                    webSocketClient.AddWebSocketParameter("keywords_threshold", keywordsThreshold);
+                }
+                if (maxAlternatives != null)
+                {
+                    webSocketClient.AddWebSocketParameter("max_alternatives", maxAlternatives);
+                }
+                if (wordAlternativesThreshold != null)
+                {
+                    webSocketClient.AddWebSocketParameter("word_alternatives_threshold", wordAlternativesThreshold);
+                }
+                if (wordConfidence != null)
+                {
+                    webSocketClient.AddWebSocketParameter("word_confidence", wordConfidence);
+                }
+                if (timestamps != null)
+                {
+                    webSocketClient.AddWebSocketParameter("timestamps", timestamps);
+                }
+                if (profanityFilter != null)
+                {
+                    webSocketClient.AddWebSocketParameter("profanity_filter", profanityFilter);
+                }
+                if (smartFormatting != null)
+                {
+                    webSocketClient.AddWebSocketParameter("smart_formatting", smartFormatting);
+                }
+                if (speakerLabels != null)
+                {
+                    webSocketClient.AddWebSocketParameter("speaker_labels", speakerLabels);
+                }
+                if (!string.IsNullOrEmpty(grammarName))
+                {
+                    webSocketClient.AddWebSocketParameter("grammar_name", grammarName);
+                }
+                if (redaction != null)
+                {
+                    webSocketClient.AddWebSocketParameter("redaction", redaction);
+                }
+                if (processingMetrics != null)
+                {
+                    webSocketClient.AddWebSocketParameter("processing_metrics", processingMetrics);
+                }
+                if (processingMetricsInterval != null)
+                {
+                    webSocketClient.AddWebSocketParameter("processing_metrics_interval", processingMetricsInterval);
+                }
+                if (audioMetrics != null)
+                {
+                    webSocketClient.AddWebSocketParameter("audio_metrics", audioMetrics);
+                }
+                if (endOfPhraseSilenceTime != null)
+                {
+                    webSocketClient.AddWebSocketParameter("end_of_phrase_silence_time", endOfPhraseSilenceTime);
+                }
+                if (splitTranscriptAtPhraseEnd != null)
+                {
+                    webSocketClient.AddWebSocketParameter("split_transcript_at_phrase_end", splitTranscriptAtPhraseEnd);
+                }
+                if (speechDetectorSensitivity != null)
+                {
+                    webSocketClient.AddWebSocketParameter("speech_detector_sensitivity", speechDetectorSensitivity);
+                }
+                if (backgroundAudioSuppression != null)
+                {
+                    webSocketClient.AddWebSocketParameter("background_audio_suppression", backgroundAudioSuppression);
+                }
+
+                var sdkHeaders = Common.GetSdkHeaders("speech_to_text", "v1", "RecognizeUsingWebSocket");
+                foreach (var header in sdkHeaders)
+                {
+                    webSocketClient.WithHeader(header.Key, header.Value);
+                }
+
+                foreach (var header in customRequestHeaders)
+                {
+                    webSocketClient.WithHeader(header.Key, header.Value);
+                }
+
+                foreach (var header in client.BaseClient.DefaultRequestHeaders)
+                {
+                    var enumerator = header.Value.GetEnumerator();
+                    enumerator.MoveNext();
+                    var value = enumerator.Current;
+                    webSocketClient = (WebSocketClient)webSocketClient.WithHeader(header.Key, value);
+                }
+                webSocketClient.Send(audio);
+
+                return webSocketClient;
+            }
+            catch (AggregateException ae)
+            {
+                throw ae.Flatten();
+            }
+        }
+    }
+}