-
Notifications
You must be signed in to change notification settings - Fork 431
/
Copy pathTextToSpeechEndpoint.cs
106 lines (96 loc) · 5.81 KB
/
TextToSpeechEndpoint.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
using System;
using System.Collections.Generic;
using System.IO;
using System.Net.Http;
using System.Text;
using System.Threading.Tasks;
using OpenAI_API.Chat;
using OpenAI_API.Models;
using static System.Net.WebRequestMethods;
namespace OpenAI_API.Audio
{
/// <summary>
/// The Endpoint for the Text to Speech API. This allows you to generate audio from text. See <seealso href="https://platform.openai.com/docs/guides/text-to-speech"/>
/// </summary>
public class TextToSpeechEndpoint : EndpointBase, ITextToSpeechEndpoint
{
/// <inheritdoc/>
protected override string Endpoint => "audio/speech";
/// <summary>
/// This allows you to set default parameters for every request, for example to set a default voice or model. For every request, if you do not have a parameter set on the request but do have it set here as a default, the request will automatically pick up the default value.
/// </summary>
public TextToSpeechRequest DefaultTTSRequestArgs { get; set; } = new TextToSpeechRequest();
/// <summary>
/// Constructor of the api endpoint. Rather than instantiating this yourself, access it through an instance of <see cref="OpenAIAPI"/> as <see cref="OpenAIAPI.TextToSpeech"/>.
/// </summary>
/// <param name="api">Pass in the instance of the api</param>
internal TextToSpeechEndpoint(OpenAIAPI api) : base(api) { }
/// <summary>
/// Calls the API to create speech from text, and returns the raw stream of the audio file.
/// </summary>
/// <param name="request">The text to speech request to submit to the API</param>
/// <returns>A stream of the audio file in the requested format.</returns>
public async Task<Stream> GetSpeechAsStreamAsync(TextToSpeechRequest request)
{
return await HttpRequest(verb: HttpMethod.Post, postData: request);
}
/// <summary>
/// Calls the API to create speech from text, and returns the raw stream of the audio file.
/// </summary>
/// <param name="input">The text to generate audio for. The maximum length is 4096 characters.</param>
/// <param name="voice">The voice to use when generating the audio. Supported voices can be found in <see cref="TextToSpeechRequest.Voices"/>.</param>
/// <param name="speed">The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default.</param>
/// <param name="responseFormat">The default response format is "mp3", but other formats are available in <see cref="TextToSpeechRequest.ResponseFormats"/>. See <seealso href="https://platform.openai.com/docs/guides/text-to-speech/supported-output-formats"/></param>
/// <param name="model">TTS is an AI model that converts text to natural sounding spoken text. OpenAI offers two different model variates, <see cref="Model.TTS_Speed"/> is optimized for real time text to speech use cases and <see cref="Model.TTS_HD"/> is optimized for quality.</param>
/// <returns>A stream of the audio file in the requested format.</returns>
public async Task<Stream> GetSpeechAsStreamAsync(string input, string voice = null, double? speed = null, string responseFormat = null, Model model = null)
{
var request = new TextToSpeechRequest()
{
Input = input,
Voice = voice ?? DefaultTTSRequestArgs.Voice,
Speed = speed ?? DefaultTTSRequestArgs.Speed,
Model = model ?? DefaultTTSRequestArgs.Model,
ResponseFormat = responseFormat ?? DefaultTTSRequestArgs.ResponseFormat
};
return await HttpRequest(verb: HttpMethod.Post, postData: request);
}
/// <summary>
/// Calls the API to create speech from text, and saves the audio file to disk.
/// </summary>
/// <param name="request">The text to speech request to submit to the API</param>
/// <param name="localPath">The local path to save the audio file to.</param>
/// <returns>A <see cref="FileInfo"/> representing the saved speech file.</returns>
public async Task<FileInfo> SaveSpeechToFileAsync(TextToSpeechRequest request, string localPath)
{
using (var stream = await GetSpeechAsStreamAsync(request))
using (var outputFileStream = new FileStream(localPath, FileMode.Create))
{
await stream.CopyToAsync(outputFileStream);
}
return new FileInfo(localPath);
}
/// <summary>
/// Calls the API to create speech from text, and saves the audio file to disk.
/// </summary>
/// <param name="input">The text to generate audio for. The maximum length is 4096 characters.</param>
/// <param name="localPath">The local path to save the audio file to.</param>
/// <param name="voice">The voice to use when generating the audio. Supported voices can be found in <see cref="TextToSpeechRequest.Voices"/>.</param>
/// <param name="speed">The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default.</param>
/// <param name="responseFormat">The default response format is "mp3", but other formats are available in <see cref="TextToSpeechRequest.ResponseFormats"/>. See <seealso href="https://platform.openai.com/docs/guides/text-to-speech/supported-output-formats"/></param>
/// <param name="model">TTS is an AI model that converts text to natural sounding spoken text. OpenAI offers two different model variates, <see cref="Model.TTS_Speed"/> is optimized for real time text to speech use cases and <see cref="Model.TTS_HD"/> is optimized for quality.</param>
/// <returns>A stream of the audio file in the requested format.</returns>
public async Task<FileInfo> SaveSpeechToFileAsync(string input, string localPath, string voice = null, double? speed = null, string responseFormat = null, Model model = null)
{
var request = new TextToSpeechRequest()
{
Input = input,
Voice = voice ?? DefaultTTSRequestArgs.Voice,
Speed = speed ?? DefaultTTSRequestArgs.Speed,
Model = model ?? DefaultTTSRequestArgs.Model,
ResponseFormat = responseFormat ?? DefaultTTSRequestArgs.ResponseFormat
};
return await SaveSpeechToFileAsync(request, localPath);
}
}
}