In [2]:
from openai import OpenAI
import requests
import os
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")

OPENROUTER_CLIENT = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=OPENROUTER_API_KEY
)

In [23]:
def call_openrouter_api(prompt: str) -> str:
    try:
        completion = OPENROUTER_CLIENT.chat.completions.create(
            # model="deepseek/deepseek-chat-v3.1:free",
            model="openai/gpt-oss-20b:free",
            messages=[
                {
                    "role": "user",
                    "content": prompt
                }
            ]
        )

        return completion.choices[0].message.content

    except Exception as e:
        print(f"API called failed: {e}")
        return None
    
def call_ollama_api(prompt: str) -> str:
    try:
        # API endpoint của Ollama (mặc định chạy local)
        url = "http://192.168.50.217:11434/api/generate"
        
        payload = {
            "model": "gpt-oss:120b-cloud",
            "prompt": prompt,
            "think": "medium",
            "stream": False  # Set False để nhận toàn bộ response một lần
        }
        
        response = requests.post(url, json=payload)
        response.raise_for_status()
        
        return response.json()["response"]
        
    except Exception as e:
        print(f"API call failed: {e}")
        return None

In [5]:
PROMPT = """
You are a linguistic alignment assistant.  
Your task is to align each **phoneme** in the IPA transcription of a word or multi-word phrase with its corresponding **grapheme(s)** (letters) in the English spelling.  
If multiple letters map to one phoneme (e.g., "ch" → "tʃ"), group them together.  
If one letter contributes to multiple phonemes, repeat it as needed.  
If the phrase has multiple words, insert an empty phoneme entry between words to mark the space.  
Also, make sure **no letter from the original input is missing** from the output — every character (except spaces and punctuation) must appear in at least one "letter" field.

Input:
Word: "{word}"  
Phonemes: "{phonemes}"

Output format:
A JSON list of objects, each with fields:
- "phoneme": string
- "letter": string (or letters that correspond to it)

Example output:
[
  {{"phoneme": "f", "letter": "f"}},
  {{"phoneme": "ɛ", "letter": "e"}},
  {{"phoneme": "b", "letter": "b"}},
  {{"phoneme": "j", "letter": "r"}},
  {{"phoneme": "u", "letter": "u"}},
  {{"phoneme": "w", "letter": "a"}},
  {{"phoneme": "ɛ", "letter": "a"}},
  {{"phoneme": "ɹ", "letter": "r"}},
  {{"phoneme": "i", "letter": "y"}},
  {{"phoneme": "", "letter": ""}},
  {{"phoneme": "s", "letter": "s"}},
  {{"phoneme": "eɪ", "letter": "a"}},
  {{"phoneme": "l", "letter": "le"}},
]

Rules:
- Use phoneme order to guide alignment, left to right.
- Preserve duplicates if a letter contributes to multiple phonemes.
- Do not add extra commentary — return only valid JSON. 
- Every "letter" value MUST be a contiguous substring of Word (case preserved) and MUST appear in Word at the specified start..end indices
"""

In [None]:
# result = call_openrouter_api(PROMPT.format(
#     word="my voice my choice",
#     phonemes="maɪvɔɪsmaɪtʃɔɪs"
# ))

result = call_ollama_api(PROMPT.format(
    word="open the door",
    phonemes="oʊpɪnðʌdɔɹ"
))
print(result)

[
  {"phoneme": "oʊ", "letter": "o"},
  {"phoneme": "p", "letter": "p"},
  {"phoneme": "ɪ", "letter": "e"},
  {"phoneme": "n", "letter": "n"},
  {"phoneme": "", "letter": ""},
  {"phoneme": "ð", "letter": "th"},
  {"phoneme": "ʌ", "letter": "e"},
  {"phoneme": "", "letter": ""},
  {"phoneme": "d", "letter": "d"},
  {"phoneme": "ɔ", "letter": "oo"},
  {"phoneme": "ɹ", "letter": "r"}
]


In [28]:
result = call_ollama_api(PROMPT.format(
    word="co-operate",
    phonemes="koʊɑpɝeɪt"
))
print(result)

[
  {"phoneme":"k","letter":"c"},
  {"phoneme":"oʊ","letter":"o"},
  {"phoneme":"ɑ","letter":"o"},
  {"phoneme":"p","letter":"p"},
  {"phoneme":"ɝ","letter":"er"},
  {"phoneme":"eɪ","letter":"a"},
  {"phoneme":"t","letter":"te"}
]
