In [None]:
# 安装必要模块

!pip install openai
!pip install edge-tts
!pip install pygame

In [None]:
# 导入各种模块

import openai
import asyncio
import edge_tts
import pygame
import shutil
import os
from datetime import date

In [None]:
# 填写自己的 OpenAI API Auth key
openai.api_key = '<your-openai-api-key>' 

print('本 Cell 执行完毕')

In [None]:
# 定义需要使用的 函数

def read_file(file_path):
    lines = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            line = line.strip()
            if line:
                lines.append(line)
    return lines

def list_to_file(list, file_path):
    with open(file_path, 'w') as f:
        for item in list:
            f.write(item + '\n')

def translate_text(text, target_language):
    if target_language == "Japanese":
        respect = "in honorific speech,"
    else:
        respect = "" 
    response = openai.Completion.create(
        engine='text-davinci-003',        
        prompt=f"Translate the following text into {target_language}, {respect} with a little bit casual, informal tone:\n{text}",
        max_tokens=1000,
        temperature=0.7,
        n=1,
        stop=None,
    )
    translation = response.choices[0].text.strip()

    return translation

def get_ipa(text):
    response = openai.Completion.create(
        engine='text-davinci-003',
        prompt=f"Translate the following english text into ipa phonetics:\n{text}",
        max_tokens=1000,
        temperature=1,
        n=1,
        stop=None,
    )
    translation = response.choices[0].text.strip().strip('/')

    return translation

def ipa_to_english(text):
    response = openai.Completion.create(
        engine='text-davinci-003',
        prompt=f"Translate the following ipa phonetics back into English:\n{text}",
        max_tokens=1000,
        temperature=1,
        n=1,
        stop=None,
    )
    translation = response.choices[0].text.strip()

    return translation

def get_romaji(text):
    response = openai.Completion.create(
        engine='text-davinci-003',
        prompt=f"Translate the following Japanese into Romaji:\n{text}",
        max_tokens=1000,
        temperature=1,
        n=1,
        stop=None,
    )
    translation = response.choices[0].text.strip()

    return translation

def play_audio(audio_file):
  pygame.mixer.init()
  pygame.mixer.music.load(audio_file)
  pygame.mixer.music.play()

print("必要函数加载完毕...")

In [None]:
INPUT_FILE = "list.txt" # 指定包含所有句子的文件（中文）一行一个句子，空行会被省略掉……

sentences = read_file(INPUT_FILE)
en_translations, jp_translations = [[] for _ in range(2)]

for s in sentences:

  print(s)
  
  ent = translate_text(s, 'English')
  print(ent)
  
  jpt = translate_text(s, "Japanese")
  print(jpt)
  
  en_translations.append(ent)
  jp_translations.append(jpt)

list_to_file(en_translations, 'en_translations.txt')
list_to_file(jp_translations, 'jp_translations.txt')

print("Translations written in files... done!")

In [None]:
# 用文本编辑器打开 en_translations.txt 和 jp_translations.txt 检查一下，不合适的，可以重新来过……

# 如果不需要修订，那么，这个 cell 直接跳过

# 以下是单句翻译，可将新翻译拷贝粘贴，替换不合适的翻译

RECTIFICATION_TEXT = "让他们最后吃主食，多吃肉和蔬菜吧。"

ent = translate_text(RECTIFICATION_TEXT, 'English')
print(ent)

jpt = translate_text(RECTIFICATION_TEXT, "Japanese")
print(jpt)

In [None]:
# 重新读取 英文翻译 和 日文翻译的文件

en_translations = read_file("en_translations.txt")
jp_translations = read_file("jp_translations.txt")
if len(en_translations) != len(jp_translations):
  print(f"The numbers of two lists' items dont' match...\n'en...txt' has {len(en_translations)} items, and\n'jp...txt' has {len(jp_translations)} items...")

print(f"总计含有 {len(en_translations)} 个句子。")

In [None]:
# 获取英文音标和日文注音，并保存到 list 和 相应文件 之中

ipa, romaji = [[] for _ in range(2)]

for e, j in zip(en_translations, jp_translations):
  enipa = get_ipa(e)
  print(enipa)
  ipa.append(enipa)

  jpromaji = get_romaji(j)
  print(jpromaji)
  romaji.append(jpromaji)

list_to_file(ipa, 'ipa.txt')
list_to_file(romaji, 'romaji.txt')

print("英文音标与日文注音已经完成!")

In [None]:
# 生成 mp3 语音文件
# 需要指定语音性别，0 为女性，1 为男性

GENDER = 0

if GENDER:
  GENDER = "Male"
  VOICE_CN = "zh-CN-YunxiNeural"
  VOICE_EN = "en-US-GuyNeural"
  VOICE_JP = "ja-JP-KeitaNeural"
else:
  GENDER = "Female"
  VOICE_CN = "zh-CN-XiaoxiaoNeural"
  VOICE_EN = "en-US-MichelleNeural"
  VOICE_JP = "ja-JP-NanamiNeural"

RATE = "-40%" # 较慢语速设定
VOLUME = "+50%" # 这一参数无需修改
OUTPUT_DIR = f'{date.today()}({GENDER})'
All_TEXT_FILE = f'{date.today()}({GENDER}).index.html'

ENVN, ENVS, JPVN, JPVS = [[] for _ in range(4)]

for e, j in zip(en_translations, jp_translations):

  # 英文
  
  TEXT = e
  
  INDEX = en_translations.index(e) + 1
  if INDEX < 10:
    INDEX_STR = '0' + str(INDEX)
  else:
    INDEX_STR = str(INDEX)
  
  OUTPUT_FILE = f'{INDEX_STR}.normal.en_{e}.mp3'
  ENVN.append(OUTPUT_FILE)
  communicate = edge_tts.Communicate(TEXT, VOICE_EN, volume=VOLUME)
  await communicate.save(OUTPUT_FILE)
  play_audio(OUTPUT_FILE)
  
  OUTPUT_FILE = f'{INDEX_STR}.slower.en_{e}.mp3'
  ENVS.append(OUTPUT_FILE)
  communicate = edge_tts.Communicate(TEXT, VOICE_EN, rate=RATE, volume=VOLUME)
  await communicate.save(OUTPUT_FILE)
  play_audio(OUTPUT_FILE)

  # 日文
  
  TEXT = j
  
  OUTPUT_FILE = f'{INDEX_STR}.normal.jp_{j}.mp3'
  JPVN.append(OUTPUT_FILE)
  communicate = edge_tts.Communicate(TEXT, VOICE_JP, volume=VOLUME)
  await communicate.save(OUTPUT_FILE)
  play_audio(OUTPUT_FILE)
  
  OUTPUT_FILE = f'{INDEX_STR}.slower.jp{j}.mp3'
  JPVS.append(OUTPUT_FILE)
  communicate = edge_tts.Communicate(TEXT, VOICE_JP, rate=RATE, volume=VOLUME)
  await communicate.save(OUTPUT_FILE)
  play_audio(OUTPUT_FILE)

list_to_file(ENVN, 'ENVN.txt')
list_to_file(ENVS, 'ENVS.txt')
list_to_file(JPVN, 'JPVN.txt')
list_to_file(JPVS, 'JPVS.txt')

print(f"{len(ENVN)} audio files created!")

In [None]:
# 创建 html 文件

All_TEXT_FILE = f'{date.today()}({GENDER}).index.html'
f =  open(All_TEXT_FILE, 'w')

f.write(f"""<html>
  <head>
  <meta charset="UTF-8">
    <style>
      body {font-size: 120%; font-family: "Helvetica"; width: 90%; margin: 50px; background: #f1f3f4;}
      h1 {font-size: 250%; padding-bottom: 20px; margin-bottom: 1em; border-bottom: 1px solid #A74B4B;}
      h2 {color: #fff; width: 3em; background: #001E6F; text-align: center; padding: 0.3em; border: 1px solid #001E6F; border-radius: 10px; margin: 1em 0 1em 0;}
      p.cn {color: #000; font-weight: bold;}
      p.en, p.jp {color: #A74B4B;}
      p.ipa, p.romaji {color: #001E6F;}
      hr {margin: 2em auto;}
      span.tag {padding: 5px 15px; margin-right: 1em; border: 1px solid #001E6F; border-radius: 25px;}
      span.cn {font-weight: bold;}
      span.en {font-size: 110%}
      span.speed {margin-right: 1em;}
      .audio {display: flex; vertical-align: middle; align-items: center;}
    </style>
  </head>
  <body>
    <h1>{date.today()}</h1>
""")



for cn, en, enipa, envn, envs, jp, jpromaji, jpvn, jpvs in zip(sentences, en_translations, ipa, ENVN, ENVS, jp_translations, romaji, JPVN, JPVS):

  # INDEX_STR 标号
  INDEX = sentences.index(cn) + 1
  if INDEX < 10:
    INDEX_STR = '0' + str(INDEX)
  else:
    INDEX_STR = str(INDEX)

  f.write(f'''
    <h2>{INDEX_STR}</h2>
    <div id="{INDEX_STR}">
      <p class="cn"><span class="tag">中文</span><span class="cn">{cn}</span></p>
      <p class="en"><span class="tag">英文</span><span class="en">{en}</span></p>
      <p class="ipa"><span class="tag">音标</span><span class="ipa">{enipa}</span></p>
      <div class="audio"><span class="speed">正常速度</span>
        <audio controls="">
          <source src="{envn}" type="audio/mpeg">
        </audio>
      </div>
      <div class="audio"><span class="speed">较慢速度</span>
        <audio controls="">
          <source src="{envs}" type="audio/mpeg">
        </audio>
      </div>    
      <p class="jp"><span class="tag">日文</span><span class="jp">{jp}</span></p>
      <p class="romaji"><span class="tag">注音</span><span class="romaji">{jpromaji}</span></p>
      <div class="audio"><span class="speed">正常速度</span>
        <audio controls="">
          <source src="{jpvn}" type="audio/mpeg">
        </audio>
      </div>
      <div class="audio"><span class="speed">较慢速度</span>
        <audio controls="">
          <source src="{jpvs}" type="audio/mpeg">
        </audio>        
    </div>

    <hr />

  ''')

f.write('''  </body>
</html>
''')
f.close()
print(f'{All_TEXT_FILE} 创建完成！')

In [None]:
# 创建文件夹，将所有文件挪入文件夹，并清理过程中生成的临时文件

def create_folder(folder_name):
    if os.path.exists(folder_name):
        shutil.rmtree(folder_name)
        print(f"Folder '{folder_name}' already exists. Deleted the existing folder.")
    os.makedirs(folder_name)
    print(f"Folder '{folder_name}' created successfully.")

def move_files_to_folder(folder_name):
    mp3_files = []
    html_files = []

    for file in os.listdir():
        if file.endswith(".mp3"):
            mp3_files.append(file)
        elif file.endswith(".html"):
            html_files.append(file)

    if mp3_files:
        for mp3_file in mp3_files:
            shutil.move(mp3_file, folder_name)
        print(f"Moved {len(mp3_files)} MP3 files to '{folder_name}'.")

    if html_files:
        for html_file in html_files:
            shutil.move(html_file, folder_name)
        print(f"Moved {len(html_files)} HTML files to '{folder_name}'.")

    if not mp3_files and not html_files:
        print("No MP3 or HTML files found.")

def remove_temp_files():
  temp = [
    "en_translations.txt",
    "jp_translations.txt",
    "ipa.txt",
    "ENVN.txt",
    "ENVS.txt",  
    "romaji.txt",
    "JPVN.txt",
    "JPVS.txt"
  ]
  for t in temp:
    os.remove(t)

# Create the folder
create_folder(OUTPUT_DIR)

# Move MP3 and HTML files to the folder
move_files_to_folder(OUTPUT_DIR)

# clear all temperarily used txt files 
remove_temp_files()

print(f"所有文件都已经转移到 {OUTPUT_DIR}，搞定！")

In [None]:
# list all voices, and export them into "voices.txt"
!edge-tts --list-voices > voices.txt

In [None]:
# 测试代码

def translate_text(text, target_language):
    response = openai.Completion.create(
        engine='text-davinci-003',
        prompt=f"Translate the following text into {target_language}, with a little bit casual, informal tone:\n{text}",
        max_tokens=100,
        temperature=0.7,
        n=1,
        stop=None,
    )
    translation = response.choices[0].text.strip()

    return translation

def get_romaji(text):
    response = openai.Completion.create(
        engine='text-davinci-003',
        prompt=f"Translate the following Japanese into Romaji:\n{text}",
        max_tokens=100,
        temperature=0.7,
        n=1,
        stop=None,
    )
    translation = response.choices[0].text.strip()

    return translation

text = translate_text("别管多少钱，我就要这个", "Japanese")
romaji = get_romaji(text)
print(text, romaji)

In [None]:
# 测试代码，生成语音文件

TEXT = "別にいくらかかろうと、これが欲しいんだよ。"

VOICE = "ja-JP-KeitaNeural"
  # 正常速度
OUTPUT_FILE = f'test.mp3'
communicate = edge_tts.Communicate(TEXT, VOICE)
await communicate.save(OUTPUT_FILE)

In [None]:
print(ipa_to_english("Tðeɪ ɛndɪd ʌp ˈiːtɪŋ ðə ˈmeɪn kɔːrs, ˈmeɪk ʃɜː ðə iːt lɒts əv ˈmiːt ənd ˈviːdʒiːz!"))

In [None]:

# 测试代码，生成语音文件

TEXT = "ðeɪ ɛndɪd ʌp ˈiːtɪŋ ðə ˈmeɪn kɔːrs, ˈmeɪk ʃɜː ðə iːt lɒts əv ˈmiːt ənd ˈviːdʒiːz!"

VOICE = "en-US-MichelleNeural"
  # 正常速度
OUTPUT_FILE = f'test.mp3'
communicate = edge_tts.Communicate(TEXT, VOICE)
await communicate.save(OUTPUT_FILE)

In [None]:
play_audio(OUTPUT_FILE)