-
Notifications
You must be signed in to change notification settings - Fork 876
/
Copy pathgradio_helper.py
102 lines (90 loc) · 4.41 KB
/
gradio_helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from typing import Callable
import gradio as gr
description = """
# OpenVoice accelerated by OpenVINO:
a versatile instant voice cloning approach that requires only a short audio clip from the reference speaker to replicate their voice and generate speech in multiple languages. OpenVoice enables granular control over voice styles, including emotion, accent, rhythm, pauses, and intonation, in addition to replicating the tone color of the reference speaker. OpenVoice also achieves zero-shot cross-lingual voice cloning for languages not included in the massive-speaker training set.
"""
content = """
<div>
<strong>If the generated voice does not sound like the reference voice, please refer to <a href='https://github.com/myshell-ai/OpenVoice/blob/main/docs/QA.md'>this QnA</a>.</strong> <strong>For multi-lingual & cross-lingual examples, please refer to <a href='https://github.com/myshell-ai/OpenVoice/blob/main/demo_part2.ipynb'>this jupyter notebook</a>.</strong>
This online demo mainly supports <strong>English</strong>. The <em>default</em> style also supports <strong>Chinese</strong>. But OpenVoice can adapt to any other language as long as a base speaker is provided.
</div>
"""
wrapped_markdown_content = f"<div style='border: 1px solid #000; padding: 10px;'>{content}</div>"
examples = [
[
"今天天气真好,我们一起出去吃饭吧。",
"default",
"OpenVoice/resources/demo_speaker1.mp3",
True,
],
[
"This audio is generated by open voice with a half-performance model.",
"whispering",
"OpenVoice/resources/demo_speaker2.mp3",
True,
],
[
"He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick, peppered, flour-fattened sauce.",
"sad",
"OpenVoice/resources/demo_speaker0.mp3",
True,
],
]
def make_demo(fn: Callable):
with gr.Blocks(analytics_enabled=False) as demo:
with gr.Row():
gr.Markdown(description)
with gr.Row():
gr.HTML(wrapped_markdown_content)
with gr.Row():
with gr.Column():
input_text_gr = gr.Textbox(
label="Text Prompt",
info="One or two sentences at a time is better. Up to 200 text characters.",
value="He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick, peppered, flour-fattened sauce.",
)
style_gr = gr.Dropdown(
label="Style",
info="Select a style of output audio for the synthesised speech. (Chinese only support 'default' now)",
choices=[
"default",
"whispering",
"cheerful",
"terrified",
"angry",
"sad",
"friendly",
],
max_choices=1,
value="default",
)
ref_gr = gr.Audio(
label="Reference Audio",
type="filepath",
value="OpenVoice/resources/demo_speaker2.mp3",
)
tos_gr = gr.Checkbox(
label="Agree",
value=False,
info="I agree to the terms of the cc-by-nc-4.0 license-: https://github.com/myshell-ai/OpenVoice/blob/main/LICENSE",
)
tts_button = gr.Button("Send", elem_id="send-btn", visible=True)
with gr.Column():
out_text_gr = gr.Text(label="Info")
audio_orig_gr = gr.Audio(label="Synthesised Audio", autoplay=False)
audio_gr = gr.Audio(label="Audio with cloned voice", autoplay=True)
gr.Examples(
label="Examples",
examples=examples,
fn=fn,
inputs=[input_text_gr, style_gr, ref_gr, tos_gr],
outputs=[out_text_gr, audio_gr],
cache_examples=False,
)
tts_button.click(
fn=fn,
inputs=[input_text_gr, style_gr, ref_gr, tos_gr],
outputs=[out_text_gr, audio_orig_gr, audio_gr],
)
return demo