In [1]:
import glob
import sys
import os
from d2lbook import config, markdown, utils, common
import logging
import re
import glob
import http.client
import hashlib
import urllib
import random
import json

class MarkdownText(object):
    def __init__(self):
        self.mapping = []

    def _encode_pattern(self, pattern, text):
        matched = set(re.findall(pattern, text))
        for m in matched:
            # another solution is use some special tokens and put them in
            # the terminology. unfortuanly it doesn't work for amazon transcribe.
            # So use a number instead, hope it will not be translated.
            token = str(732293614+len(self.mapping))
            text = text.replace(m, token)
            self.mapping.append((m, token))
        return text

    def encode(self, text:str) -> str:
        patterns = [rf'(:{markdown.token}:`{markdown.token}`)', # mark
                    rf'(`{markdown.token}`)',  # code
                    rf'(\${markdown.token}\$)', # inline match
                    rf'(\[{markdown.basic_token}\]\({markdown.basic_token}\))', # link
                    ]
        for p in patterns:
            text = self._encode_pattern(p, text)
        return text

    def decode(self, text:str) -> str:
        for key, value in self.mapping:
            text = text.replace(value, key)
        text = text.replace('] (', '](')
        return text

class Translator(object):
    def translate(self, text: str):
        raise NotImplemented()

    def _translate_markdown(self, text):
        cells = markdown.split_markdown(text)
        for cell in cells:
            if cell['type'] == 'markdown':
                if 'class' in cell and cell['class']:
                    # it may have nested code blocks
                    cell['source'] = self._translate_markdown(cell['source'])
                else:
                    text_cells = markdown.split_text(cell['source'])
                    for t_cell in text_cells:
                        if t_cell['source'] and (
                            t_cell['type'] in ['text', 'list', 'title']):
                            text = t_cell['source']
                            markdown_text = MarkdownText()
                            t_cell['source'] = markdown_text.decode(self.translate(
                                markdown_text.encode(text)))
                            if text.endswith('\n'):
                                t_cell['source'] += '\n'
                    cell['source'] = markdown.join_text(text_cells)
        return markdown.join_markdown_cells(cells)

    def translate_markdown(self, src_fn: str, tgt_fn: str):
        with open(src_fn, 'r') as r:
            with open(tgt_fn, 'w') as w:
                w.write(self._translate_markdown(r.read()))

class Baidu(Translator):
    """Use Amazon Translate"""
    def __init__(self, src_lang, target_lang, terminology=None):
        #import boto3
        
        self.appid = '20181223000251311'  # 填写你的appid
        self.secretKey = 'IOgqRJSJu8jcc8sp0NBR'  # 填写你的密钥

        self.httpClient = None
        self.myurl = '/api/trans/vip/translate'

        self.fromLang = 'auto'   #原文语种
        self.toLang = 'zh'   #译文语种
        #self.client = boto3.client('translate')
        #self.terminology = [terminology] if terminology else []
        #self.src_lang = src_lang
        #self.tgt_lang = target_lang
        #logging.info(f'Amazon Translate {src_lang} -> {target_lang}, terminology {self.terminology}')

    def translate(self, text: str):
        q = text.replace('\n', ' ')
        salt = random.randint(32768, 65536)
        #q= 'We have described the affine transformation in:numref:`subsec_linear_model`,which is a linear transformation added by a bias.To begin, recall the model architecturecorresponding to our softmax regression example,illustrated in  :numref:`fig_softmaxreg`.This model mapped our inputs directly to our outputsvia a single affine transformation,followed by a softmax operation.If our labels truly were relatedto our input data by an affine transformation,then this approach would be sufficient.But linearity in affine transformations is a *strong* assumption.'
        sign = self.appid + q + str(salt) + self.secretKey
        sign = hashlib.md5(sign.encode()).hexdigest()
        myurl = self.myurl + '?appid=' + self.appid + '&q=' + urllib.parse.quote(q) + '&from=' + self.fromLang + '&to=' + self.toLang + '&salt=' + str(
        salt) + '&sign=' + sign
        try:
            httpClient = http.client.HTTPConnection('api.fanyi.baidu.com')
            httpClient.request('GET', myurl)

            # response是HTTPResponse对象
            response = httpClient.getresponse()
            result_all = response.read().decode("utf-8")
            result = json.loads(result_all)
        except Exception as e:
            print (e)
        finally:
            if httpClient:
                httpClient.close()
            if len(result['trans_result']) >1 :
                print('error')
            else:
                return result['trans_result'][0]['dst']#resp['TranslatedText']
from tencentcloud.common import credential
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException 
from tencentcloud.tmt.v20180321 import tmt_client, models 
class Tencent(Translator):
    """Use Amazon Translate"""
    def __init__(self, src_lang, target_lang, terminology=None):
        #import boto3
        
        cred = credential.Credential("AKID4A78YEPJxBjg93CTuuSRHAdoLFTileFR", "lTigrKm1iE4RTT8IaEISXlmSoMkPlP4g") 
        httpProfile = HttpProfile()
        httpProfile.endpoint = "tmt.tencentcloudapi.com"

        clientProfile = ClientProfile()
        clientProfile.httpProfile = httpProfile
        self.client = tmt_client.TmtClient(cred, "na-siliconvalley", clientProfile) 

        self.req = models.TextTranslateRequest()
        #self.client = boto3.client('translate')
        #self.terminology = [terminology] if terminology else []
        #self.src_lang = src_lang
        #self.tgt_lang = target_lang
        #logging.info(f'Amazon Translate {src_lang} -> {target_lang}, terminology {self.terminology}')

    def translate(self, text: str):
        q = text.replace('\n', ' ')
        params = json.dumps({"SourceText":q,"Source":"en","Target":"zh","ProjectId":0})
        self.req.from_json_string(params)
        resp = self.client.TextTranslate(self.req) 
        return json.loads(resp.to_json_string())["TargetText"]

In [2]:
translator = Baidu('src_lang', 'tgt_lang', 'terminology')
translator_tencent = Tencent('src_lang', 'tgt_lang', 'terminology')
data_dir = 'chapter_multilayer-perceptrons'
file_list = [file for file in os.listdir(data_dir) if 'baidu' not in file and 'tencent' not in file and 'translated' not in file]

In [3]:
data_dir_list = [
    'chapter_preliminaries',
    'chapter_linear-networks'
]

In [4]:
for data_dir in data_dir_list:
    print('====')
    print(data_dir)
    file_list = [file for file in os.listdir(data_dir) if 'baidu' not in file and 'tencent' not in file and 'translated' not in file]
    for file in file_list:
        print(file)
        file_baidu = file.replace('.md', '_baidu.md')
        file_tencent = file.replace('.md', '_tencent.md')
        translator.translate_markdown(os.path.join(data_dir,file), os.path.join(data_dir, file_baidu))
        translator_tencent.translate_markdown(os.path.join(data_dir,file), os.path.join(data_dir, file_tencent))

====
chapter_preliminaries
linear-algebra.md
calculus.md
probability.md
lookup-api.md
pandas.md
autograd.md
ndarray.md
index.md
====
chapter_linear-networks
linear-regression-scratch.md
softmax-regression.md
linear-regression-concise.md
linear-regression.md
softmax-regression-concise.md
softmax-regression-scratch.md
image-classification-dataset.md
index.md
