Skip to content

Commit

Permalink
Merge pull request #455 from watson-developer-cloud/tts-websockets
Browse files Browse the repository at this point in the history
websockets
  • Loading branch information
kevinkowa committed Apr 5, 2021
2 parents 427b4c8 + 69dec74 commit 20726cb
Show file tree
Hide file tree
Showing 25 changed files with 1,399 additions and 77 deletions.
97 changes: 52 additions & 45 deletions IBM.Watson.sln

Large diffs are not rendered by default.

17 changes: 0 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,6 @@

The .NET Standard SDK uses [Watson][wdc] services, a collection of REST APIs that use cognitive computing to solve complex problems.

## Table of Contents
* [Before you begin](#before-you-begin)
* [Installing the Watson .NET Standard SDK](#installing-the-watson-net-standard-sdk)
* [.NET Standard 2.0](#net-standard-20)
* [Authentication](#authentication)
* [Custom Request Headers](#custom-request-headers)
* [Response Headers, Status Code and Raw Json](#response-headers-status-code-and-raw-json)
* [Self signed certificates](#self-signed-certificates)
* [Discovery V2](#discovery-v2)
* [Transaction IDs](#transactio-ids)
* [Use behind a proxy](#use-behind-a-proxy)
* [Documentation](#documentation)
* [Questions](#questions)
* [Open Source @ IBM](#open-source--ibm)
* [License](#license)
* [Contributing](#contributing)

## Announcements
### Updating endpoint URLs from watsonplatform.net
Watson API endpoint URLs at watsonplatform.net are changing and will not work after 26 May 2021. Update your calls to use the newer endpoint URLs. For more information, see https://cloud.ibm.com/docs/watson?topic=watson-endpoint-change.
Expand Down
2 changes: 1 addition & 1 deletion src/IBM.Watson.Common/IBM.Watson.Common.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
<PackageTags>watson;cognitive;speech;vision;machine-learning;ml;ai;artificial-intelligence;.NET;.NET-Standard</PackageTags>
<PackageIconUrl>https://watson-developer-cloud.github.io/dotnet-standard-sdk/img/Watson_Avatar_Pos_RGB.png</PackageIconUrl>
<PackageProjectUrl>https://github.com/watson-developer-cloud/dotnet-standard-sdk</PackageProjectUrl>
<Version>5.1.0</Version>
<Version>5.2.0-rc1</Version>
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
Expand Down
55 changes: 54 additions & 1 deletion src/IBM.Watson.SpeechToText.v1/Examples/ServiceExample.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,14 @@
using IBM.Cloud.SDK.Core.Authentication.Iam;
using IBM.Cloud.SDK.Core.Http;
using IBM.Watson.SpeechToText.v1.Model;
using IBM.Watson.SpeechToText.v1.Websockets;
using System;
using System.Collections.Generic;
using System.IO;
using System.Net.Http;
using System.Threading.Tasks;

using static IBM.Watson.SpeechToText.v1.SpeechToTextService;

namespace IBM.Watson.SpeechToText.v1.Examples
{
public class ServiceExample
Expand Down Expand Up @@ -166,6 +168,57 @@ public void Recognize()
}
#endregion

#region Websockets
public void RecognizeusingWebSocket()
{
IamAuthenticator authenticator = new IamAuthenticator(
apikey: "{apikey}");

SpeechToTextService service = new SpeechToTextService(authenticator);
service.SetServiceUrl("{serviceUrl}");

RecognizeCallback callback = new RecognizeCallback();

string Name = @"SpeechToTextTestData/test-audio.wav";

try
{
byte[] filebytes = File.ReadAllBytes(Name);
MemoryStream stream = new MemoryStream(filebytes);

callback.OnOpen = () =>
{
Console.WriteLine("On Open");
};
callback.OnClose = () =>
{
Console.WriteLine("On Close");
};
callback.OnMessage = (speechResults) =>
{
Console.WriteLine("On Message");
Console.WriteLine(speechResults?.Results[0]?.Alternatives[0]?.Transcript);
};
callback.OnError = (err) =>
{
Console.WriteLine("On error");
Console.WriteLine(err);
};
service.RecognizeUsingWebSocket(
callback: callback,
audio: stream,
contentType: RecognizeEnums.ContentTypeValue.AUDIO_WAV,
interimResults: true,
model: RecognizeEnums.ModelValue.EN_US_BROADBANDMODEL
);
}
catch (Exception e)
{
Console.WriteLine(e);
}
}
#endregion

#region Asynchronous
public void RegisterCallback()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
<PropertyGroup>
<Description>IBM.Watson.SpeechToText.v1 wraps the Watson Speech To Text service (http://www.ibm.com/watson/developercloud/speech-to-text.html)</Description>
<AssemblyTitle>IBM.Watson.SpeechToText.v1</AssemblyTitle>
<VersionPrefix>5.1.0</VersionPrefix>
<VersionPrefix>5.2.0-rc1</VersionPrefix>
<Authors>IBM Watson</Authors>
<TargetFramework>netstandard2.0</TargetFramework>
<AssemblyName>IBM.Watson.SpeechToText.v1</AssemblyName>
<PackageId>IBM.Watson.SpeechToText.v1</PackageId>
<PackageTags>watson;cognitive;speech;vision;machine-learning;ml;ai;artificial-intelligence;.NET;.NET-Standard</PackageTags>
<PackageIconUrl>https://watson-developer-cloud.github.io/dotnet-standard-sdk/img/Watson_Avatar_Pos_RGB.png</PackageIconUrl>
<PackageProjectUrl>https://github.com/watson-developer-cloud/dotnet-standard-sdk</PackageProjectUrl>
<Version>5.1.0</Version>
<Version>5.2.0-rc1</Version>
</PropertyGroup>

<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
Expand All @@ -35,7 +35,7 @@

<ItemGroup>
<PackageReference Include="IBM.Cloud.SDK.Core" Version="1.2.0" />
<PackageReference Include="IBM.Watson.Common" Version="5.1.0" />
<PackageReference Include="IBM.Watson.Common" Version="5.2.0-rc1" />
<PackageReference Include="Newtonsoft.Json" Version="12.0.3" />
<PackageReference Include="JsonSubTypes" Version="1.8.0" />
</ItemGroup>
Expand Down
10 changes: 10 additions & 0 deletions src/IBM.Watson.SpeechToText.v1/ISpeechToTextServiceExtension.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
using System.Collections.Generic;
using IBM.Watson.SpeechToText.v1.Websockets;

namespace IBM.Watson.SpeechToText.v1
{
public partial interface ISpeechToTextService
{
WebSocketClient RecognizeUsingWebSocket(RecognizeCallback callback, System.IO.MemoryStream audio, string contentType = null, string model = null, string languageCustomizationId = null, string acousticCustomizationId = null, string baseModelVersion = null, double? customizationWeight = null, long? inactivityTimeout = null, bool? interimResults = null, List<string> keywords = null, float? keywordsThreshold = null, long? maxAlternatives = null, float? wordAlternativesThreshold = null, bool? wordConfidence = null, bool? timestamps = null, bool? profanityFilter = null, bool? smartFormatting = null, bool? speakerLabels = null, string grammarName = null, bool? redaction = null, bool? processingMetrics = null, float? processingMetricsInterval = null, bool? audioMetrics = null, double? endOfPhraseSilenceTime = null, bool? splitTranscriptAtPhraseEnd = null, float? speechDetectorSensitivity = null, float? backgroundAudioSuppression = null);
}
}
166 changes: 166 additions & 0 deletions src/IBM.Watson.SpeechToText.v1/SpeechToTextServiceExtension.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
using System;
using System.Collections.Generic;
using System.Linq;
using IBM.Cloud.SDK.Core.Http;
using IBM.Cloud.SDK.Core.Service;
using IBM.Watson.SpeechToText.v1.Websockets;

namespace IBM.Watson.SpeechToText.v1
{
public partial class SpeechToTextService : IBMService, ISpeechToTextService
{
public WebSocketClient RecognizeUsingWebSocket(RecognizeCallback callback, System.IO.MemoryStream audio, string contentType = null, string model = null, string languageCustomizationId = null, string acousticCustomizationId = null, string baseModelVersion = null, double? customizationWeight = null, long? inactivityTimeout = null, bool? interimResults = null, List<string> keywords = null, float? keywordsThreshold = null, long? maxAlternatives = null, float? wordAlternativesThreshold = null, bool? wordConfidence = null, bool? timestamps = null, bool? profanityFilter = null, bool? smartFormatting = null, bool? speakerLabels = null, string grammarName = null, bool? redaction = null, bool? processingMetrics = null, float? processingMetricsInterval = null, bool? audioMetrics = null, double? endOfPhraseSilenceTime = null, bool? splitTranscriptAtPhraseEnd = null, float? speechDetectorSensitivity = null, float? backgroundAudioSuppression = null)
{
if (callback == null)
{
throw new ArgumentNullException("callback cannot be null");
}
if (audio == null)
{
throw new ArgumentNullException("`audio` is required for `Recognize`");
}

try
{
IClient client = this.Client;
SetAuthentication();

string url = ($"{this.Endpoint}/v1/recognize").Replace("https://", "wss://");
WebSocketClient webSocketClient = new WebSocketClient(url, callback);

if (!string.IsNullOrEmpty(model))
{
webSocketClient.AddArgument("model", model);
}
if (!string.IsNullOrEmpty(languageCustomizationId))
{
webSocketClient.AddArgument("language_customization_id", languageCustomizationId);
}
if (!string.IsNullOrEmpty(acousticCustomizationId))
{
webSocketClient.AddArgument("acoustic_customization_id", acousticCustomizationId);
}
if (!string.IsNullOrEmpty(baseModelVersion))
{
webSocketClient.AddArgument("base_model_version", baseModelVersion);
}

// Websocket Open Message
if (!string.IsNullOrEmpty(contentType))
{
webSocketClient.AddWebSocketParameter("content-type", contentType);
}
if (customizationWeight != null)
{
webSocketClient.AddWebSocketParameter("customization_weight", customizationWeight);
}
if (inactivityTimeout != null)
{
webSocketClient.AddWebSocketParameter("inactivity_timeout", inactivityTimeout);
}
if (interimResults != null)
{
webSocketClient.AddWebSocketParameter("interim_results", interimResults);
}
if (keywords != null && keywords.Count > 0)
{
webSocketClient.AddWebSocketParameter("keywords", string.Join(",", keywords.Select(x => "\"" + x + "\"")));
}
if (keywordsThreshold != null)
{
webSocketClient.AddWebSocketParameter("keywords_threshold", keywordsThreshold);
}
if (maxAlternatives != null)
{
webSocketClient.AddWebSocketParameter("max_alternatives", maxAlternatives);
}
if (wordAlternativesThreshold != null)
{
webSocketClient.AddWebSocketParameter("word_alternatives_threshold", wordAlternativesThreshold);
}
if (wordConfidence != null)
{
webSocketClient.AddWebSocketParameter("word_confidence", wordConfidence);
}
if (timestamps != null)
{
webSocketClient.AddWebSocketParameter("timestamps", timestamps);
}
if (profanityFilter != null)
{
webSocketClient.AddWebSocketParameter("profanity_filter", profanityFilter);
}
if (smartFormatting != null)
{
webSocketClient.AddWebSocketParameter("smart_formatting", smartFormatting);
}
if (speakerLabels != null)
{
webSocketClient.AddWebSocketParameter("speaker_labels", speakerLabels);
}
if (!string.IsNullOrEmpty(grammarName))
{
webSocketClient.AddWebSocketParameter("grammar_name", grammarName);
}
if (redaction != null)
{
webSocketClient.AddWebSocketParameter("redaction", redaction);
}
if (processingMetrics != null)
{
webSocketClient.AddWebSocketParameter("processing_metrics", processingMetrics);
}
if (processingMetricsInterval != null)
{
webSocketClient.AddWebSocketParameter("processing_metrics_interval", processingMetricsInterval);
}
if (audioMetrics != null)
{
webSocketClient.AddWebSocketParameter("audio_metrics", audioMetrics);
}
if (endOfPhraseSilenceTime != null)
{
webSocketClient.AddWebSocketParameter("end_of_phrase_silence_time", endOfPhraseSilenceTime);
}
if (splitTranscriptAtPhraseEnd != null)
{
webSocketClient.AddWebSocketParameter("split_transcript_at_phrase_end", splitTranscriptAtPhraseEnd);
}
if (speechDetectorSensitivity != null)
{
webSocketClient.AddWebSocketParameter("speech_detector_sensitivity", speechDetectorSensitivity);
}
if (backgroundAudioSuppression != null)
{
webSocketClient.AddWebSocketParameter("background_audio_suppression", backgroundAudioSuppression);
}

var sdkHeaders = Common.GetSdkHeaders("speech_to_text", "v1", "RecognizeUsingWebSocket");
foreach (var header in sdkHeaders)
{
webSocketClient.WithHeader(header.Key, header.Value);
}

foreach (var header in customRequestHeaders)
{
webSocketClient.WithHeader(header.Key, header.Value);
}

foreach (var header in client.BaseClient.DefaultRequestHeaders)
{
var enumerator = header.Value.GetEnumerator();
enumerator.MoveNext();
var value = enumerator.Current;
webSocketClient = (WebSocketClient)webSocketClient.WithHeader(header.Key, value);
}
webSocketClient.Send(audio);

return webSocketClient;
}
catch (AggregateException ae)
{
throw ae.Flatten();
}
}
}
}

0 comments on commit 20726cb

Please sign in to comment.