In [1]:
!pip install openai

Collecting openai
  Downloading openai-1.17.0-py3-none-any.whl.metadata (21 kB)
Collecting pydantic<3,>=1.9.0 (from openai)
  Using cached pydantic-2.7.0-py3-none-any.whl.metadata (103 kB)
Collecting annotated-types>=0.4.0 (from pydantic<3,>=1.9.0->openai)
  Using cached annotated_types-0.6.0-py3-none-any.whl.metadata (12 kB)
Collecting pydantic-core==2.18.1 (from pydantic<3,>=1.9.0->openai)
  Downloading pydantic_core-2.18.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.5 kB)
Downloading openai-1.17.0-py3-none-any.whl (268 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.3/268.3 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hUsing cached pydantic-2.7.0-py3-none-any.whl (407 kB)
Downloading pydantic_core-2.18.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m48.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hU

In [25]:
import os
import pandas as pd
import subprocess
import json
import time
import anthropic
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed

from openai import OpenAI

FLORES_DIR = "../data/flores-200/"


def get_lang_directions(languages, base_languages, order_matters=True, skip_base_pairs=False):
    """Helper function to generate the language directions."""

    directions = []
    
    for base in base_languages:
        if skip_base_pairs:
            other_languages = [lang for lang in languages if lang not in base_languages]
        else:
            other_languages = [lang for lang in languages if lang != base]

        for lang in other_languages:

            if order_matters:
                directions.append((base, lang))
                directions.append((lang, base))
            else:
                pair = tuple(sorted([base, lang]))
                if pair not in directions:
                    directions.append(pair)
    
    return directions

def prepare_batch_file(source_sentences, source_language, target_language, save_file, model="gpt-4o-mini"):
    batch_data = []
    for i, sentence in enumerate(source_sentences):
        prompt = TRANSLATION_PROMPT.format(source_sentence=sentence, source_language=source_language, target_language=target_language)
        
        custom_id = f"request-{i}"
        batch_data.append({
            "custom_id": custom_id,
            "method": "POST",
            "url": "/v1/chat/completions",
            "body": {
                "model": model,
                "messages": [{"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": prompt}],
                "max_tokens": 256
            }
        })

    with open(save_file, "w") as f:
        for entry in batch_data:
            f.write(json.dumps(entry) + "\n")

def upload_batch_file(file_path):
    response = client.files.create(
        file=open(file_path, "rb"),
        purpose="batch"
    )
    return response.id

def create_batch(file_id):
    response = client.batches.create(
        input_file_id=file_id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
        metadata={
            "description": "flores-200 translation job"
        }
    )
    return response.id

def check_batch_status(batch_id):
    response = client.batches.retrieve(batch_id)
    return response.status

def retrieve_batch_results(batch_id, save_file):
    batch_details = client.batches.retrieve(batch_id)
    output_file_id = batch_details.output_file_id
    
    file_content = client.files.content(output_file_id).text

    with open(save_file, "w") as f:
        f.write(file_content)
    
    results = []
    with open(save_file, "r") as f:
        for line in f:
            results.append(json.loads(line))
    return results


def load_flores(src, tgt):
    """Loads the flores devtest dataset from the flores-200 directory"""
    with open(os.path.join(FLORES_DIR, f"{src}{tgt}", f"test.{src}-{tgt}.json")) as f:
        return json.load(f)

def parse_results(results):
    return [
        r.get('response', {}).get('body', {}).get('choices', [{}])[0].get('message', {}).get('content', '').replace('\n', ' ')
        for r in results
    ]


def get_flores_tgt_file(src, tgt):
    return os.path.join(FLORES_DIR, f"{src}{tgt}", f"test.{src}-{tgt}.{tgt}")

API_KEY = "sk-blah"

client = OpenAI(api_key=API_KEY)

In [51]:
MODELS = ["gpt-4o", "gpt-3.5-turbo", "gpt-4-turbo"]
OUTPUT_DIR = "../data/benchmarks/"
METRICS = ["bleu", "chrf"]
CHRF_plus = True
SACREBLEU_TOKENIZER = "flores200"

USE_CACHED = True

LANGUAGE_PAIRS_NLLB_MAP = {"ban": "ban_Latn", "min": "min_Latn", "en": "eng_Latn", "id": "ind_Latn"}
TGT_LANGUAGES = ["ban", "min"]
BASE_LANGUAGES = ["en", "id"]
METRIC_MAPPING = {"BLEU": "bleu", "chrF2++": "chrf"}
DIRECTIONS = get_lang_directions(TGT_LANGUAGES, BASE_LANGUAGES)

LANG_TABLE = {
    "en": "English",
    "id": "Indonesian",
    "ban": "Balinese",
    "min": "Minangkabau"
}

SYSTEM_PROMPT = "You are a helpful translator that only responds with the translation alone. Translate as best as you can."
TRANSLATION_PROMPT = """Translate this from {source_language} to {target_language}:                                                                                               
{source_language}: {source_sentence}
{target_language}:"""

In [18]:
dfs = {
    m: pd.DataFrame(columns=['-'.join(d) for d in DIRECTIONS])
    for m in METRICS
}

for m in dfs.keys():
    for model_name in MODELS:
        dfs[m].loc[model_name] = ""


In [63]:

for model_name in MODELS:
    MODEL_RESULTS_DIR = os.path.join(OUTPUT_DIR, model_name)

    for src, tgt in DIRECTIONS:
        DIRECTION_RESULTS_DIR = os.path.join(MODEL_RESULTS_DIR, f"{src}{tgt}")
        os.makedirs(DIRECTION_RESULTS_DIR, exist_ok=True)

        sentence_output_path = os.path.join(DIRECTION_RESULTS_DIR, f"test-{src}-{tgt}")
        input_path = sentence_output_path + '.input.jsonl'
        output_path = sentence_output_path + '.output.jsonl'
        bleu_output_path = sentence_output_path + ".metrics.json"

        if not (os.path.isfile(bleu_output_path) and USE_CACHED):
            # run if there isn't a file
        
            translations = load_flores(src, tgt)
            src_sentences = [t["translation"][src] for t in translations]

            prepare_batch_file(src_sentences, LANG_TABLE[src], LANG_TABLE[tgt], input_path)
            file_id = upload_batch_file(input_path)
            batch_id = create_batch(file_id)

            while True:
                status = check_batch_status(batch_id)
                if status in ["completed", "failed", "expired"]:
                    break
                print(f"Batch status: {status}. Checking again in 10 seconds...")
                time.sleep(10)
    
            if status == "completed":
                raw_results = retrieve_batch_results(batch_id, output_path)
                translated_sentences = parse_results(raw_results)
                print(f"Batch {batch_id} completed. Results retrieved.")
            else:
                print(f"Batch {batch_id} failed with status: {status}.")
                continue
    
            # Dump to test-{src}-{tgt}
            
            with open(sentence_output_path, "w") as f:
                f.write('\n'.join(translated_sentences))
    
            # calculate bleu score by running the command
            command = f"sacrebleu -tok {SACREBLEU_TOKENIZER} -w 2 {get_flores_tgt_file(src, tgt)} -m {' '.join(METRICS)}"
            if CHRF_plus:
                command += ' --chrf-word-order 2'
            command += f" < {sentence_output_path} > {bleu_output_path}"

            process = subprocess.run(command, shell=True, check=True, text=True)

        with open(bleu_output_path, 'r') as f:
            metrics = json.load(f)

        for m in metrics:
            dfs[METRIC_MAPPING[m["name"]]].loc[model_name][src+'-'+tgt] = float(m["score"])
    


You are setting values through chained assignment. Currently this works in certain cases, but when using Copy-on-Write (which will become the default behaviour in pandas 3.0) this will never work to update the original DataFrame or Series, because the intermediate object on which we are setting values will behave as a copy.
A typical example is when you are setting values in a column of a DataFrame, like:

df["col"][row_indexer] = value

Use `df.loc[row_indexer, "col"] = values` instead, to perform the assignment in a single step and ensure this keeps updating the original `df`.

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

  dfs[METRIC_MAPPING[m["name"]]].loc[model_name][src+'-'+tgt] = float(m["score"])


Batch status: validating. Checking again in 10 seconds...
Batch status: in_progress. Checking again in 10 seconds...
Batch status: in_progress. Checking again in 10 seconds...
Batch status: in_progress. Checking again in 10 seconds...
Batch status: in_progress. Checking again in 10 seconds...
Batch status: in_progress. Checking again in 10 seconds...
Batch status: in_progress. Checking again in 10 seconds...
Batch status: in_progress. Checking again in 10 seconds...
Batch status: in_progress. Checking again in 10 seconds...
Batch status: in_progress. Checking again in 10 seconds...
Batch status: in_progress. Checking again in 10 seconds...
Batch status: in_progress. Checking again in 10 seconds...
Batch status: in_progress. Checking again in 10 seconds...
Batch status: in_progress. Checking again in 10 seconds...
Batch status: finalizing. Checking again in 10 seconds...
Batch status: finalizing. Checking again in 10 seconds...
Batch status: finalizing. Checking again in 10 seconds...
B

In [34]:
sentence_output_path

'../data/benchmarks/claude-3-5-sonnet-20240620/enban/test-en-ban'

In [60]:
anthropic_api_key = "sk-blah"  # Replace with your actual API key
anthropic_client = anthropic.Client(api_key=anthropic_api_key)

CLAUDE_PROMPT = """You are a professional translator tasked with translating multiple phrases from one language to another. Your goal is to provide accurate and natural-sounding translations for each phrase. Translate as best as you can.

The source language is:
<source_language>{source_language}</source_language>

The target language is:
<target_language>{target_language}</target_language>

Here are the phrases to be translated:
<phrases>
{sentences}
</phrases>

Please follow these instructions:

1. Translate each phrase from the source language to the target language.
2. Provide only the translated phrases in your response, with each translation on a new line.
3. Do not include any additional explanations, notes, or the original phrases in your output.
4. Maintain the original order of the phrases in your translations.

Begin your translation now, providing only the translated phrases, each on a new line:"""

BATCH_SIZE = 16
MAX_RETRIES = 3
USE_CACHED=False
ANTHROPIC_MODELS = ["claude-3-5-sonnet-20240620", "claude-3-opus-20240229", "claude-3-haiku-20240307"]

for m in dfs.keys():
    for model_name in ANTHROPIC_MODELS:
        dfs[m].loc[model_name] = ""


def chunk_list(lst, chunk_size):
    for i in range(0, len(lst), chunk_size):
        yield lst[i:i + chunk_size]

def translate_batch(batch, src, tgt, model_name):
    try:
        batch_prompt = CLAUDE_PROMPT.format(
            source_language=LANG_TABLE[src],
            target_language=LANG_TABLE[tgt],
            sentences='\n'.join(batch)
        )

        response = anthropic_client.messages.create(
            model=model_name,
            max_tokens=4096,
            messages=[
                {"role": "user", "content": batch_prompt}
            ]
        )

        translated_batch = [sent.strip() for sent in response.content[0].text.strip().split('\n') if sent != '']
        if len(translated_batch) != len(batch):
            print("mismatch lengths!", translated_batch, batch)
            raise Exception()
        return translated_batch

    except Exception as e:
        print(f"Error translating batch: {e}")
        retries += 1
        # list[::2], list[1::2]
        return translate_batch(batch[::2], src, tgt, model_name) + translate_batch(batch[1::2], src, tgt, model_name)


for model_name in ANTHROPIC_MODELS:
    MODEL_RESULTS_DIR = os.path.join(OUTPUT_DIR, model_name)

    for src, tgt in DIRECTIONS:
        DIRECTION_RESULTS_DIR = os.path.join(MODEL_RESULTS_DIR, f"{src}{tgt}")
        os.makedirs(DIRECTION_RESULTS_DIR, exist_ok=True)

        sentence_output_path = os.path.join(DIRECTION_RESULTS_DIR, f"test-{src}-{tgt}")
        input_path = sentence_output_path + '.input.jsonl'
        output_path = sentence_output_path + '.output.jsonl'
        bleu_output_path = sentence_output_path + ".metrics.json"

        if not (os.path.isfile(bleu_output_path) and USE_CACHED):
            translations = load_flores(src, tgt)
            src_sentences = [t["translation"][src] for t in translations]

            # Send requests to Anthropic's API
            translated_sentences = []
            batch_indices = list(chunk_list(range(len(src_sentences)), BATCH_SIZE))
            with ThreadPoolExecutor(max_workers=10) as executor:
                futures = {
                    executor.submit(translate_batch, [src_sentences[i] for i in batch], src, tgt, model_name): idx
                    for idx, batch in enumerate(batch_indices)
                }

                for future in tqdm(as_completed(futures), total=len(futures)):
                    idx = futures[future]
                    result = future.result()
                    if result:
                        for i, sentence in zip(batch_indices[idx], result):
                            translated_sentences[i] = sentence
                    else:
                        print("Warning: Received an empty result from a batch translation.")

            with open(sentence_output_path, "w") as f:
                f.write('\n'.join(translated_sentences))

            # Calculate BLEU score by running the command
            command = f"sacrebleu -tok {SACREBLEU_TOKENIZER} -w 2 {get_flores_tgt_file(src, tgt)} -m {' '.join(METRICS)}"
            if CHRF_plus:
                command += ' --chrf-word-order 2'
            command += f" < {sentence_output_path} > {bleu_output_path}"

            process = subprocess.run(command, shell=True, check=True, text=True)

        with open(bleu_output_path, 'r') as f:
            metrics = json.load(f)

        for m in metrics:
            dfs[METRIC_MAPPING[m["name"]]].loc[model_name][src+'-'+tgt] = float(m["score"])


  0%|          | 0/64 [00:03<?, ?it/s]

Error translating batch: Error code: 429 - {'type': 'error', 'error': {'type': 'rate_limit_error', 'message': 'Number of request tokens has exceeded your daily rate limit (https://docs.anthropic.com/en/api/rate-limits); see the response headers for current usage. Please reduce the prompt length or the maximum tokens requested, or try again later. You may also contact sales at https://www.anthropic.com/contact-sales to discuss your options for a rate limit increase.'}}
Error translating batch: Error code: 429 - {'type': 'error', 'error': {'type': 'rate_limit_error', 'message': 'Number of request tokens has exceeded your daily rate limit (https://docs.anthropic.com/en/api/rate-limits); see the response headers for current usage. Please reduce the prompt length or the maximum tokens requested, or try again later. You may also contact sales at https://www.anthropic.com/contact-sales to discuss your options for a rate limit increase.'}}
Error translating batch: Error code: 429 - {'type': 'e




Error translating batch: Error code: 429 - {'type': 'error', 'error': {'type': 'rate_limit_error', 'message': 'Number of request tokens has exceeded your daily rate limit (https://docs.anthropic.com/en/api/rate-limits); see the response headers for current usage. Please reduce the prompt length or the maximum tokens requested, or try again later. You may also contact sales at https://www.anthropic.com/contact-sales to discuss your options for a rate limit increase.'}}
Error translating batch: Error code: 429 - {'type': 'error', 'error': {'type': 'rate_limit_error', 'message': 'Number of request tokens has exceeded your daily rate limit (https://docs.anthropic.com/en/api/rate-limits); see the response headers for current usage. Please reduce the prompt length or the maximum tokens requested, or try again later. You may also contact sales at https://www.anthropic.com/contact-sales to discuss your options for a rate limit increase.'}}
Error translating batch: Error code: 429 - {'type': 'e

UnboundLocalError: local variable 'retries' referenced before assignment

In [61]:
metrics

[{'name': 'BLEU',
  'score': 5.3,
  'signature': 'nrefs:1|case:mixed|eff:no|tok:flores200|smooth:exp|version:2.4.2',
  'verbose_score': '18.7/6.7/3.8/2.2 (BP = 0.933 ratio = 0.935 hyp_len = 30681 ref_len = 32798)',
  'nrefs': '1',
  'case': 'mixed',
  'eff': 'no',
  'tok': 'flores200',
  'smooth': 'exp',
  'version': '2.4.2'},
 {'name': 'chrF2++',
  'score': 23.56,
  'signature': 'nrefs:1|case:mixed|eff:yes|nc:6|nw:2|space:no|version:2.4.2',
  'nrefs': '1',
  'case': 'mixed',
  'eff': 'yes',
  'nc': '6',
  'nw': '2',
  'space': 'no',
  'version': '2.4.2'}]

In [None]:
import os
from together import Together

client = Together(api_key=os.environ.get('TOGETHER_API_KEY'))

response = client.chat.completions.create(
    model="meta-llama/Llama-2-7b-chat-hf",
    messages=[],
    max_tokens=512,
    temperature=0.7,
    top_p=0.7,
    top_k=50,
    repetition_penalty=1,
    stop=["[/INST]","</s>"],
    stream=True
)
print(response.choices[0].message.content)

In [None]:
dfs

In [71]:
for m, df in dfs.items():
    dfs[m].to_csv(os.path.join(OUTPUT_DIR, f"openai.{m}.csv"))

In [68]:
columns = ["ban-en","en-ban","ban-id","id-ban", "min-en","en-min","min-id","id-min"]

In [70]:
dfs['bleu'][columns]

Unnamed: 0,ban-en,en-ban,ban-id,id-ban,min-en,en-min,min-id,id-min
gpt-4o,27.11,11.45,27.89,13.08,28.63,11.0,31.27,11.0
gpt-3.5-turbo,27.17,11.63,28.17,13.14,28.75,11.07,31.06,11.05
gpt-4-turbo,27.2,11.59,28.41,13.24,28.51,10.99,31.0,10.93


In [69]:
dfs['chrf'][columns]

Unnamed: 0,ban-en,en-ban,ban-id,id-ban,min-en,en-min,min-id,id-min
gpt-4o,49.03,36.12,50.45,38.34,50.22,37.35,54.54,37.62
gpt-3.5-turbo,48.9,36.3,50.83,38.51,50.33,37.44,54.37,37.55
gpt-4-turbo,48.78,36.37,50.76,38.5,50.18,37.41,54.38,37.58


In [24]:
os.makedirs(OUTPUT_DIR, exist_ok=True)

for pair in PAIRS:
    # translating from l1 -> l2
    l1 = pair[0]
    l2 = pair[1]
    
    if l1 == "en":
        dirname = l2+"en"
    else:
        dirname = l1+l2
    input_filename = os.path.join(DATA_DIR, dirname, f'test.{l1}-{l2}.{l1}')
    output_filename = os.path.join(OUTPUT_DIR, f'test-{l1}-{l2}')

    translated_list = []

    with open(input_filename) as f:
        for line in f:
            response = client.chat.completions.create(
              model="gpt-3.5-turbo",
              messages=[
                {"role": "system", "content": "You are a helpful translator that only provides the translation. Remain silent if you don't know the answer."},
                {"role": "user", "content": TRANSLATE_PROMPT.format(l1=LANG_TABLE[l1], l2=LANG_TABLE[l2], text=line)}
              ]
            )
            translated_list.append(response.choices[0].message.content)
            print(response.choices[0].message.content)

    with open(output_filename, 'w') as f:
        f.writelines([t + "\n" for t in translated_list])

    

I am aware of the pseudoarchaeology but it is not supported by archaeological evidence, and it does not orientate the Sphinx.
There are many events that are related to mysteries, conspiracy theories, and fictional stories, especially "Area 51," which is legendary.
The Euclidean algorithm is often used to solve linear Diophantine equations by finding integer solutions; it can also be applied to polynomial equations to determine the common factors.
The location of the ambush was behind a tree, but the victims were gathered at the riverbank to drink. The victims were attacked because of their belongings.
Floral motif, the concept based on the understanding of the label is beautifully elaborated.
The construction work is currently being carried out at the Amun-Re Police Office located near Seppulo Aruwa, in the city of Thebes, in the Egyptian capital, Riolo.
To achieve success, we must persevere with determination and diligence.
Sorry, I don't have the translation for the provided text fro

In [11]:
response.choices[0].message.content

'Analisis faktor adalah salah satu teknik untuk menemukan faktor penting dalam suatu efek.'