In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import datasurfer as ds
import time
import warnings
from pathlib import Path
from datasurfer.lib_llm.llm_agents import LLMAgent
from datasurfer.datautils import xml_valid_df

In [3]:
chapter = 6
version = 'R1V1'
root = Path(r'D:\02_Translation\01_Künstliche_Intelligenz_für_Ingenieure\06_KII_Chapter_06')

frcs  = root / f'KII_Chapter_{chapter:02}_Original.xlsx'
fdst1 = root / f'KII_Chapter_{chapter:02}_Translation_{version}.xlsx'
fdst2 = root / f'KII_Chapter_{chapter:02}_Translation_{version}.docx'
fbak  = root / f'KII_Chapter_{chapter:02}_Translation_{version}.csv'

In [4]:
Linda = LLMAgent('Linda', 'You are a Chinese linguist, you translate German to Chinese.')
Robin = LLMAgent('Robin', 'You are a Chinese linguist, you also know German.')

In [5]:
df_original = ds.AutoObject(frcs).df

df_original

Unnamed: 0,Page,Original,Translation,Review
0,175,Funktionale Programmierung in LISP,,4
1,175,Bei der Programmiersprache LISP werden alle Ve...,,4
2,175,6.1 Einführung in die funktionale Programmierung,,4
3,175,6.1.1 Grundidee von LISP,,4
4,175,LISP (List Processing Language) ist eine Progr...,,4
...,...,...,...,...
480,207,und dem im DANN-Teil angegebenen Seiteneffekt,,4
481,207,WASSERDRUCK⇒NIEDRIG,,4
482,207,Hier wird die REGEL_1 offensichtlich als Progr...,,4
483,207,Literaturhinweise,,4


In [6]:
pattern_Linda = 'Translating "{original}" to Chinese, return only the translation, do not include any other words.'
pattern_Robin = '根据德语原文\n"{original}"，\n将以下中文翻译改进到语义通顺, 修改其中的错误并去除不必要的句子：\n"{translation}"\n只返回修改过的不加引号的句子.'
print(pattern_Robin)

根据德语原文
"{original}"，
将以下中文翻译改进到语义通顺, 修改其中的错误并去除不必要的句子：
"{translation}"
只返回修改过的不加引号的句子.


In [7]:
def transview_text(original, retry=10, memory_length=100):
    """
    Translates and reviews a given text.

    This function takes an original text and its translation, processes the translation
    using Linda's pattern, and then reviews the translation using Robin's pattern.

    Args:
        original (str): The original text to be translated and reviewed.
        translation (str): The initial translation of the original text.

    Returns:
        list: A list containing the original text and the reviewed translation.
    """
    count = 0 
    while count < retry:
        try:
            translation = Linda.told(pattern_Linda.format(original=original), use_cache=False, memory_length=memory_length)

            if 'Instruction' not in translation:
                break
        except Exception as e:
            warnings.warn(f'Error: {e}')
            
        count += 1
    else:
        raise Exception(f'Failed to translate "{original}" after {retry} retries.')
            
    reviewed = Robin.told(pattern_Robin.format(original=original, translation=translation), use_cache=True, memory_length=memory_length*2)
    return [original, reviewed] 

In [8]:

def start_translation(df, nrows):
    
    out = []
    buffer = []
    
    for idx, (page, original, trans, review) in df.iterrows():
        start = time.time()
        if review != 0:
            print(f'Processing {idx+1}/{nrows} ({(idx)/nrows*100:0.2f}%)...\n')
            Linda.print_message(f'{original}', 80, role='User')
        
        if review != 2:
            if len(buffer):
                merged = ' '.join(buffer)
                buffer = []
                out.append([page, *transview_text(merged)])     
                              
        if review == 4:
            out.append([page, *transview_text(original)])  
            
        elif review == 2:
            buffer.append(original)
            
        elif review == 1:
            continue
        
        elif review == 3:
            txts = original.split('@')
            
            for txt in txts: 
                out.append([page, *transview_text(txt)])
        
        elif review == 0:
            Linda.append_history(pattern_Linda.format(original=original), role='User')
            Linda.append_history(trans)
            Robin.append_history(pattern_Robin.format(original=original, translation=trans), role='User')
            Robin.append_history(trans)
            out.append([page, original, trans])
        else:
            raise ValueError(f'Invalid review value: {review}')

        if review != 0:
            duration = time.time() - start
            tremain = int((nrows-idx-1)*duration)
            print(f'Completed in {duration:0.2f}s, remain {tremain//3600:0.0f}h{tremain%3600//60:0.0f}m{tremain%60}s\n')
        
    return out

In [9]:
dfbak = pd.DataFrame() if not fbak.is_file() else pd.read_csv(fbak)

while 1:
    
    idx_start = dfbak.index.max() + 1 if not dfbak.empty else 0
    
    if idx_start >= len(df_original):
        break
    
    df_working = dfbak.copy()
    df_working = pd.concat([df_working, df_original.loc[idx_start:min(idx_start+5, len(df_original)-1)]])
    df_working.fillna('', inplace=True)
       
    out = start_translation(df_working, len(df_original))
    
    pages, raw_text, translation = zip(*out)
    dfout = pd.DataFrame({'Page': pages, 'Original': raw_text, 'Translation': translation, 'Review': 0})
    dfout.Translation = dfout.Translation.str.replace('Let me know if you have more text to translate!', '')
    print('Saving...')
    dfbak = dfout #pd.concat([dfbak, dfout])
    dfbak.to_csv(fbak, index=False)


  df_working.fillna('', inplace=True)


Processing 1/485 (0.00%)...

[96mUser[0m:

Funktionale Programmierung in LISP

[92m--------------------------------------------------------------------------------[0m

[96mLinda[0m:

函数式编程在 Lisp  

[92m--------------------------------------------------------------------------------[0m

[96mRobin[0m:

函数式编程在Lisp中  

[92m--------------------------------------------------------------------------------[0m

Completed in 2.11s, remain 0h17m0s

Processing 2/485 (0.21%)...

[96mUser[0m:

Bei der Programmiersprache LISP werden alle Verarbeitungsschritte auf die
Manipulation von Listen zurückgeführt. Dieses Kapitel stellt die
Grundoperationen dieser Sprache vor und zeigt, wie mit diesen Operationen
Suchalgorithmen implementiert werdenkönnen.

[92m--------------------------------------------------------------------------------[0m

[96mLinda[0m:

在 Lisp 编程语言中，所有处理步骤都归结为对列表的操纵。本章介绍该语言的基本操作，并展示如何使用这些操作实现搜索算法。  

[92m-----------------------------------------------------------------

In [76]:
dfbak.to_excel(fdst1, index=False)


In [77]:
obj = ds.DOCXObject(dfbak[['Original', 'Translation']], name='Translation')
obj.save_df(fdst2)



<DOCXObject@Translation>