In [2]:
from openai import Client
import os

from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())

client = Client(
    api_key=os.getenv("OPENAI_API_KEY"),
    base_url=os.getenv("OPENAI_API_HOST"),
)

In [3]:
def gpt_completion(prompt, model="gpt-4"):
    message = [{"role": "user", "content": prompt}]
    response = client.chat.completions.create(
        model=model,
        messages=message,
        temperature=0,
    )
    return response.choices[0].message.content
    # return response.choices[0].message.content


def gpt_completion_steam(prompt, model="gpt-4"):
    message = [{"role": "user", "content": prompt}]
    stream = client.chat.completions.create(
        model=model,
        messages=message,
        temperature=0,
        stream=True,
    )
    return stream


def stream_warp(prompt, model="gpt-4"):
    result = ""
    stream = gpt_completion_steam(prompt, model)
    print("Stream:")
    for chunk in stream:
        print(chunk.choices[0].delta.content or "", end="")
        result += chunk.choices[0].delta.content or ""
    print("Stream End")
    return result


In [4]:
from crowdin_api import CrowdinClient


class FirstCrowdinClient(CrowdinClient):
    TOKEN = os.getenv("CROWDIN_API_KEY")
    TIMEOUT = 30  # Optional, sets http request timeout.
    RETRY_DELAY = 0.5  # Optional, sets the delay between failed requests 
    MAX_RETRIES = 5  # Optional, sets the number of retries
    PAGE_SIZE = 100  # Optional, sets default page size


crowdin = FirstCrowdinClient()

projects = crowdin.projects.list_projects()['data']
for project in projects:
    print(project['data']['name'], ":", project['data']['id'])


Casbin : 312853
Docusaurus v2 : 428890
Casnode : 479711
Casnode-Website : 481953
Casdoor-Site : 491513
Casdoor-Website : 493595
Casbin-Website : 521262


In [5]:
crowdin.translation_status.get_file_progress(projectId=493595, fileId=1321)

{'data': [{'data': {'languageId': 'ar',
    'eTag': '4e479f2565aa1cfb95b34ddc93b919a5',
    'language': {'id': 'ar',
     'name': 'Arabic',
     'editorCode': 'ar',
     'twoLettersCode': 'ar',
     'threeLettersCode': 'ara',
     'locale': 'ar-SA',
     'androidCode': 'ar-rSA',
     'osxCode': 'ar.lproj',
     'osxLocale': 'ar',
     'pluralCategoryNames': ['zero', 'one', 'two', 'few', 'many', 'other'],
     'pluralRules': '(n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n%100>=3 && n%100<=10 ? 3 : n%100>=11 && n%100<=99 ? 4 : 5)',
     'pluralExamples': ['0',
      '1',
      '2',
      '3-10, 103-110, 203-210...',
      '11-99, 111-199, 211-299...',
      '100-102, 200-202, 300-302...; 0.2, 1.07, 2.94, 3.81, 11.68, 100.55...'],
     'textDirection': 'rtl',
     'dialectOf': None},
    'words': {'total': 155,
     'translated': 0,
     'preTranslateAppliedTo': 0,
     'approved': 0},
    'phrases': {'total': 22,
     'translated': 0,
     'preTranslateAppliedTo': 0,
     'approved': 0},
    'trans

In [29]:
from pathlib import Path
import json


class CrowdinFile:
    id: int
    project_id: int
    name: str
    path: str

    def __init__(self, project_id, _id, path):
        self.project_id = project_id
        self.id = _id
        self.path = path
        self.name = Path(path).name

    def __str__(self):
        return self.path


class CrowdinString:
    id: int
    file_id: int
    project_id: int
    text: str

    def __init__(self, project_id, file_id, _id, text):
        self.project_id = project_id
        self.file_id = file_id
        self.id = _id
        self.text = text

    def __str__(self):
        return self.text


class Translater:
    project_id: int
    project_name: str

    files = []
    prompt_zh = """
请将用三个反引号括起来的文本翻译为{lang}, 翻译时需要遵循下面的规则：
1. 文本中可能存在代码，你需要保留代码的原样。
2. 文本中可能包含 HTML 标签或者 HTML 转义字符，请不要翻译这些内容。
3. 如果你不确定如何翻译，请直接返回原文。
4. 请直接返回翻译后的文本或者原文，不要返回其他内容。

请翻译：
```{text}```
"""
    prompt = """
Please translate the text delimited by triple backticks into {lang}, adhering to the following rules:
1. The text may contain code, which you need to keep as is.
2. The text may include HTML tags or HTML escape characters, please do not translate these.
3. If you are unsure about how to translate, please return the original text.
4. Please return only the translated text or the original text, do not return anything else.

```{text}```
"""

    batch_prompt = """
Please translate the following texts enclosed in triple backticks into {lang}, following the rules below:
1. The text may contain code, which should be preserved as is.
2. The text may contain HTML tags or HTML escape characters, which should not be translated.
3. Please return the translated text or the original text in JSON format, containing a key-value pair, with the key as "translation" and the value as the translated or original text.
4. If there are multiple texts to be translated, please return them in an array format.
5. If you are unsure how to translate, please return the original text.

Please translate:
"""

    # 1. ```{text}```
    # 2. ```{text2}```
    # 3. ```{text3}```
    # """
    def __init__(self, project_id, project_name):
        self.project_id = project_id
        self.project_name = project_name

    def fetch_files(self):
        data = crowdin.source_files.list_files(self.project_id, limit=500)['data']
        for f in data:
            cf = CrowdinFile(self.project_id, f['data']['id'], f['data']['path'])
            allow_types = (".md", ".json", "mdx")
            if cf.name.endswith(allow_types):
                self.files.append(cf)
        return self.files

    def print_files(self):
        for file in self.files:
            print(file)

    def fetch_strings(self, file_id):
        data = crowdin.source_strings.list_strings(self.project_id, file_id, limit=500)['data']
        strings = []
        for s in data:
            if s['data']['isHidden']:
                continue
            
            string_id = s['data']['id']
            string_text : str = s['data']['text']
            
            # fix output json error
            if string_text.endswith("\n"):
                string_text = string_text[:-1]
            
            cs = CrowdinString(self.project_id, file_id, string_id, string_text)
            strings.append(cs)
        return strings

    def translate(self, text, lang):
        res = gpt_completion(self.prompt.format(text=text, lang=lang))
        if res.startswith("```") and res.endswith("```"):
            return res[3:-3]
        else:
            return res

    def translate_batch(self, texts, lang):
        append_text = "\n".join([f"{num}. ```{text}```" for num, text in enumerate(texts)])
        pmt = self.batch_prompt.format(lang=lang) + append_text

        res = stream_warp(pmt)
        try:
            json_res = json.loads(res)
        except Exception as e:
            raise Exception(f"解析失败: {e}")
        # remove ```
        for i, text in enumerate(texts):
            if json_res[i]["translation"].startswith("```") and json_res[i]["translation"].endswith("```"):
                json_res[i]["translation"] = json_res[i]["translation"][3:-3]

        return json_res

    def check_translation_if_existed(self, string_id, lang_id):
        res = crowdin.string_translations.list_string_translations(projectId=self.project_id, stringId=string_id,
                                                                   languageId=lang_id)
        if res['data']:
            return True
        else:
            return False

    def submit_translation(self, string_id, lang_id, translation):
        try:
            crowdin.string_translations.add_translation(projectId=self.project_id, stringId=string_id,
                                                        languageId=lang_id, text=translation)
        except Exception as e:
            print(e)

    def run_batch(self, lang, lang_id, begin="", offset=0, file_list=None):
        self.fetch_files()
        files = []
        if begin != "":
            for file in self.files:
                if file.path == begin:
                    files = self.files[self.files.index(file) + offset:]
                    break

        if file_list is not None:
            for file in self.files:
                if file.path in file_list:
                    files.append(file)
        
        failed_file = []
        for file in files:
            print(f"正在翻译(id: {file.id})：", file.path)
            print("=========")
            ss = self.fetch_strings(file.id)

            # remove existed
            strings = [s for s in ss if not self.check_translation_if_existed(s.id, lang_id)]

            print(f"共有{len(strings)}条待翻译")
            if len(strings) == 0:
                continue

            batch_size = 30
            for i in range(0, len(strings), batch_size):
                texts = [string.text for string in strings[i:i + batch_size]]
                try:
                    translation = self.translate_batch(texts, lang)
                except Exception as e:
                    print(e)
                    failed_file.append(file)
                    continue

                for j, t in enumerate(translation):
                    print(f"原文(id: {strings[i + j].id}):")
                    print(strings[i + j].text)
                    print("翻译:")
                    print(t["translation"])
                    print("=========")
                    self.submit_translation(strings[i + j].id, lang_id, t["translation"])

        print("失败文件：")
        for file in failed_file:
            print(file)


In [30]:
# Casbin : 312853
# Docusaurus v2 : 428890
# Casnode : 479711
# Casnode-Website : 481953
# Casdoor-Site : 491513
# Casdoor-Website : 493595
# Casbin-Website : 521262
PROJECT_ID = 493595
PROJECT_NAME = "Casdoor-Website"
# PROJECT_ID = 521262
# PROJECT_NAME = "Casbin-Website"
translater = Translater(PROJECT_ID, PROJECT_NAME)
# translater.run("Chinese", "zh-CN")
# now = "/docs/Overview.mdx"
# translater.run_batch("Chinese", "zh-CN", begin=now, offset=1)

file_list = (
    "/docs/integration/java/dolphinscheduler.md",
)

translater.run_batch("Chinese", "zh-CN", file_list=file_list)

正在翻译(id: 1317)： /docs/integration/java/dolphinscheduler.md
共有20条待翻译
Stream:
[
  {"translation": "Casdoor应用设置"},
  {"translation": "演示"},
  {"translation": "Casdoor是<a href=\"https://github.com/apache/dolphinscheduler\">Apache DolphinScheduler</a>支持的登录方式之一。"},
  {"translation": "首先，应部署Casdoor。"},
  {"translation": "您可以参考Casdoor官方文档的<a href=\"https://casdoor.org/docs/basic/server-installation\">服务器安装</a>。"},
  {"translation": "成功部署后，请确保："},
  {"translation": "Casdoor服务器在<a href=\"http://localhost:8000\" x-nc=\"1\">http://localhost:8000</a>成功运行。"},
  {"translation": "打开您喜欢的浏览器并访问<a href=\"http://localhost:7001\" x-nc=\"1\">http://localhost:7001</a>。"},
  {"translation": "您将看到Casdoor的登录页面。"},
  {"translation": "通过输入\"admin\"和\"123\"来测试登录功能。"},
  {"translation": "部署完成后，您可以按照以下步骤在自己的应用中快速实现基于Casdoor的登录页面。"},
  {"translation": "步骤2：配置Casdoor应用"},
  {"translation": "创建一个新的Casdoor应用或使用现有的一个。"},
  {"translation": "添加您的重定向URL（您可以在下一节中找到更多关于如何获取重定向URL的详细信息）。"},
  {"translation": "添加所需的提供商并填写其他必要的设