In [1]:
from openai import OpenAI
import requests
import os
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")

OPENROUTER_CLIENT = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=OPENROUTER_API_KEY
)

In [3]:
def call_openrouter_api(prompt: str) -> str:
    try:
        completion = OPENROUTER_CLIENT.chat.completions.create(
            model="deepseek/deepseek-chat-v3.1:free",
            # model="openai/gpt-oss-20b:free",
            messages=[
                {
                    "role": "user",
                    "content": prompt
                }
            ]
        )

        return completion.choices[0].message.content

    except Exception as e:
        print(f"API called failed: {e}")
        return None
    
def call_ollama_api(prompt: str) -> str:
    try:
        # API endpoint của Ollama (mặc định chạy local)
        url = "http://192.168.50.217:11434/api/chat"
        
        payload = {
            # "model": "qwen3-vl:235b-cloud",
            # "model": "qwen3-coder:480b-cloud",
            "model": "deepseek-v3.1:671b-cloud",
            # "model": "gpt-oss:120b-cloud",
            "messages": [{
                "role": "user",
                "content": prompt
            }],
            # "think": "medium",
            "think": False,
            "stream": False  # Set False để nhận toàn bộ response một lần
        }
        response = requests.post(url, json=payload)
        response.raise_for_status()
        print(response.json())
        return response.json()["message"]["content"]
        
    except Exception as e:
        print(f"API call failed: {e}")
        return None

In [4]:
PROMPT = """
You are a linguistic alignment assistant.  
Your task is to align each **phoneme** in the IPA transcription of a word or multi-word phrase with its corresponding **grapheme(s)** (letters) in the English spelling.

Mapping rules:
- If multiple letters map to one phoneme (e.g., "ch" → "tʃ"), group them together.  
- There are DIPHTHONGS - ["tʃ", "dʒ", "oʊ", "oʊ̃", "əʊ", "aʊ", "əɹ", "ɜɹ", "ɔɹ"], which are single phonemes made of two sounds, so they should be treated as one unit.
- ["w", "j"] (the y-sound) are **semivowels**, so the next phoneme usually has same letter as them.
- If one letter contributes to multiple phonemes, repeat it as needed.  
- If the phrase has multiple words, insert an empty phoneme entry between words to mark the space.  
- Also, make sure **no letter from the original input is missing** from the output — every character (including punctuation such as apostrophes ’, hyphens -, diacritics, etc.except spaces and punctuation) must appear in at least one "letter" field with the next letter.
- DO NOT invent or drop any phonemes or letters.

Input:
Word: "{word}"  
Phonemes: "{phonemes}"

Output format:
A JSON list of objects, each with fields:
- "phoneme": string
- "letter": string (or letters that correspond to it)

Example output:
[
  {{"phoneme": "f", "letter": "f"}},
  {{"phoneme": "ɛ", "letter": "e"}},
  {{"phoneme": "b", "letter": "b"}},
  {{"phoneme": "j", "letter": "r"}},
  {{"phoneme": "u", "letter": "u"}},
  {{"phoneme": "w", "letter": "a"}},
  {{"phoneme": "ɛ", "letter": "a"}},
  {{"phoneme": "ɹ", "letter": "r"}},
  {{"phoneme": "i", "letter": "y"}},
  {{"phoneme": "", "letter": "'s"}},
  {{"phoneme": "", "letter": ""}},
  {{"phoneme": "s", "letter": "s"}},
  {{"phoneme": "eɪ", "letter": "a"}},
  {{"phoneme": "l", "letter": "le"}},
]

Rules:
- Use phoneme order to guide alignment, left to right.
- Preserve duplicates if a letter contributes to multiple phonemes.
- Do not add extra commentary — return only valid JSON. 
- Every "letter" value MUST be a contiguous substring of Word (case preserved) and MUST appear in Word at the specified start..end indices
"""

PROMPT_v2 = """
You are a linguistic alignment assistant.  
Your task is to align each **phoneme** in the IPA transcription of a word or multi-word phrase with its corresponding **grapheme(s)** (letters) in the English spelling.  
If multiple letters map to one phoneme (e.g., "ch" → "tʃ"), group them together.  
If one letter contributes to multiple phonemes, repeat it as needed.  
If the phrase has multiple words, insert an empty phoneme entry between words to mark the space.  
Also, make sure **no letter from the original input is missing** from the output — every character (except spaces and punctuation) must appear in at least one "letter" field.

⚠️ Delimiter rule for adjacent identical letters:
- When two adjacent, identical letters in the Word each contribute to different phonemes (e.g., the two "o" in "cooperate" mapping to "oʊ" and "ɑ"), insert a delimiter entry **between their rows** to mark the boundary:
  {{"phoneme":"###","letter":"###"}}
- This delimiter **does not consume any characters** from the Word, and is **exempt** from the “every letter must appear” and “contiguous substring” constraints.
- Use this delimiter only to disambiguate boundaries between adjacent identical letters that map to different phonemes.

Input:
Word: "{word}"  
Phonemes: "{phonemes}"

Output format:
A JSON list of objects, each with fields:
- "phoneme": string
- "letter": string (or letters that correspond to it)

Example output:
[
  {{"phoneme": "f", "letter": "f"}},
  {{"phoneme": "ɛ", "letter": "e"}},
  {{"phoneme": "b", "letter": "b"}},
  {{"phoneme": "j", "letter": "r"}},
  {{"phoneme": "u", "letter": "u"}},
  {{"phoneme": "w", "letter": "a"}},
  {{"phoneme": "ɛ", "letter": "a"}},
  {{"phoneme": "ɹ", "letter": "r"}},
  {{"phoneme": "i", "letter": "y"}},
  {{"phoneme": "", "letter": ""}},
  {{"phoneme": "s", "letter": "s"}},
  {{"phoneme": "eɪ", "letter": "a"}},
  {{"phoneme": "l", "letter": "le"}},
]

Additional example (delimiter with adjacent identical letters):
Input:
Word: "cooperate"
Phonemes: "koʊɑpɝeɪt"

Expected alignment snippet:
[
  {{"phoneme": "k",  "letter": "c"}},
  {{"phoneme": "oʊ","letter": "o"}},
  {{"phoneme": "###","letter": "###"}},
  {{"phoneme": "ɑ",  "letter": "o"}},
  {{"phoneme": "p",  "letter": "p"}},
  {{"phoneme": "ɝ",  "letter": "er"}},
  {{"phoneme": "eɪ","letter": "a"}},
  {{"phoneme": "t",  "letter": "t"}},
  {{"phoneme": "",   "letter": ""}}  // optional if followed by more words
]

Rules:
- Use phoneme order to guide alignment, left to right.
- Preserve duplicates if a letter contributes to multiple phonemes.
- Do not add extra commentary — return only valid JSON. 
- Every "letter" value MUST be a contiguous substring of Word (case preserved) and MUST appear in Word at the specified start..end indices
"""

PROMPT_v3 = """
You are a phoneme-to-grapheme alignment expert.

TASK: Align each phoneme to its corresponding letter(s) in the word.

INPUT:
Word: "{word}"
Phonemes: "{phonemes}"

INSTRUCTIONS:
Follow these steps carefully and show your work:

STEP 1: Break down the word into individual characters
List each character in order: [character1, character2, ...]
Example for "cat": [c, a, t]

STEP 2: Break down the phonemes into individual sounds
List each phoneme in order: [phoneme1, phoneme2, ...]
Example for "kæt": [k, æ, t]

STEP 3: Match phonemes to letters LEFT-TO-RIGHT
Start with the first unused letter and first unmatched phoneme.
For each phoneme, determine which letter(s) produce that sound.

Rules for matching:
- One letter → one phoneme (most common): "c" → /k/
- Multiple letters → one phoneme: "ch" → /tʃ/, "tion" → /ʃən/
- Silent letters: include with adjacent phoneme or mark as empty
- Apostrophes/punctuation: assign to empty phoneme entry
- Spaces between words: insert {{"phoneme":"", "letter":""}}

Show your matching process:
- Phoneme /.../ matches letter(s) "..." (position X in word)
- Phoneme /.../ matches letter(s) "..." (position Y in word)
- etc.

STEP 4: Verify completeness
Check that:
✓ Every character from the original word appears in output
✓ Every phoneme is assigned
✓ No letter is used twice (unless genuinely repeated sound)
✓ Word boundaries marked with empty entries

STEP 5: Generate final JSON
Format:
[
  {{"phoneme":"...", "letter":"..."}},
  {{"phoneme":"...", "letter":"..."}},
  ...
]

========================================
EXAMPLE WALKTHROUGH:

Word: "shop"
Phonemes: "ʃɑp"

STEP 1: Characters
[s, h, o, p]

STEP 2: Phonemes
[ʃ, ɑ, p]

STEP 3: Matching
- Phoneme /ʃ/ → letters "sh" (positions 1-2)
- Phoneme /ɑ/ → letter "o" (position 3)
- Phoneme /p/ → letter "p" (position 4)

STEP 4: Verification
✓ All characters used: s,h,o,p
✓ All phonemes assigned: ʃ,ɑ,p
✓ No duplicates

STEP 5: JSON output
[
  {{"phoneme":"ʃ", "letter":"sh"}},
  {{"phoneme":"ɑ", "letter":"o"}},
  {{"phoneme":"p", "letter":"p"}}
]

========================================
EXAMPLE 2 (with apostrophe and space):

Word: "it's ok"
Phonemes: "ɪtsoʊkeɪ"

STEP 1: Characters
[i, t, ', s, (space), o, k]

STEP 2: Phonemes
[ɪ, t, s, oʊ, k, eɪ]

STEP 3: Matching
- Phoneme /ɪ/ → letter "i" (position 1)
- Phoneme /t/ → letter "t" (position 2)
- Empty phoneme → apostrophe "'" (position 3)
- Phoneme /s/ → letter "s" (position 4)
- Empty phoneme → space "" (word boundary)
- Phoneme /oʊ/ → letter "o" (position 6)
- Phoneme /k/ → letter "k" (position 7)
- Empty phoneme → missing "ay" sound, but "k" is last letter... 
  Wait, recount: "ok" has only 2 letters. Phonemes "oʊkeɪ" = 3 sounds.
  Error in example - let me fix: actual phonemes should be "oʊkeɪ" for "okay" not "ok"

Let me redo: Word "it's ok" → phonemes "ɪtsokeɪ" is wrong.
Correct: "it's ok" → /ɪts oʊkeɪ/ if spelled "okay", or /ɪts oʊk/ if just "ok"

Assuming "okay":
- Phoneme /ɪ/ → letter "i"
- Phoneme /t/ → letter "t"
- Empty → apostrophe "'"
- Phoneme /s/ → letter "s"
- Empty → space
- Phoneme /oʊ/ → letter "o"
- Phoneme /k/ → letter "k"
- Phoneme /eɪ/ → letters "ay"

STEP 4: Verification
✓ All characters: i,t,',s,o,k,a,y
✓ All phonemes: ɪ,t,s,oʊ,k,eɪ

STEP 5: JSON
[
  {{"phoneme":"ɪ", "letter":"i"}},
  {{"phoneme":"t", "letter":"t"}},
  {{"phoneme":"", "letter":"'"}},
  {{"phoneme":"s", "letter":"s"}},
  {{"phoneme":"", "letter":""}},
  {{"phoneme":"oʊ", "letter":"o"}},
  {{"phoneme":"k", "letter":"k"}},
  {{"phoneme":"eɪ", "letter":"ay"}}
]

========================================
NOW YOUR TURN:

Word: "{word}"
Phonemes: "{phonemes}"

Think step by step:

STEP 1: List characters in "{word}":


STEP 2: List phonemes in "{phonemes}":


STEP 3: Match each phoneme to letter(s):


STEP 4: Verify completeness:


STEP 5: Final JSON (ONLY output the JSON array below, no extra text):

"""

In [14]:
result = call_ollama_api(PROMPT.format(
    word="Hello i'm a spider",
    phonemes="hʌloʊaɪməspaɪɾɝ"
))
print(result)

{'model': 'deepseek-v3.1:671b-cloud', 'remote_model': 'deepseek-v3.1:671b', 'remote_host': 'https://ollama.com:443', 'created_at': '2025-10-29T03:32:52.926145427Z', 'message': {'role': 'assistant', 'content': '[\n  {"phoneme": "h", "letter": "H"},\n  {"phoneme": "ʌ", "letter": "e"},\n  {"phoneme": "l", "letter": "l"},\n  {"phoneme": "oʊ", "letter": "lo"},\n  {"phoneme": "", "letter": " "},\n  {"phoneme": "aɪ", "letter": "i"},\n  {"phoneme": "m", "letter": "\'m"},\n  {"phoneme": "", "letter": " "},\n  {"phoneme": "ə", "letter": "a"},\n  {"phoneme": "", "letter": " "},\n  {"phoneme": "s", "letter": "s"},\n  {"phoneme": "p", "letter": "p"},\n  {"phoneme": "aɪ", "letter": "i"},\n  {"phoneme": "ɾ", "letter": "d"},\n  {"phoneme": "ɝ", "letter": "er"}\n]'}, 'done': True, 'done_reason': 'stop', 'total_duration': 2833066008, 'prompt_eval_count': 625, 'eval_count': 212}
[
  {"phoneme": "h", "letter": "H"},
  {"phoneme": "ʌ", "letter": "e"},
  {"phoneme": "l", "letter": "l"},
  {"phoneme": "oʊ", 

In [20]:
result = call_ollama_api(PROMPT.format(
    word="february's quick movement",
    phonemes="fɛbjuɝi kwɪk muvmɛnt"
))
print(result)

{'model': 'deepseek-v3.1:671b-cloud', 'remote_model': 'deepseek-v3.1:671b', 'remote_host': 'https://ollama.com:443', 'created_at': '2025-10-29T03:36:01.987996826Z', 'message': {'role': 'assistant', 'content': '[\n  {"phoneme": "f", "letter": "f"},\n  {"phoneme": "ɛ", "letter": "e"},\n  {"phoneme": "b", "letter": "b"},\n  {"phoneme": "j", "letter": "r"},\n  {"phoneme": "u", "letter": "u"},\n  {"phoneme": "ɝ", "letter": "a"},\n  {"phoneme": "i", "letter": "y"},\n  {"phoneme": "", "letter": "\'s"},\n  {"phoneme": "", "letter": " "},\n  {"phoneme": "k", "letter": "q"},\n  {"phoneme": "w", "letter": "u"},\n  {"phoneme": "ɪ", "letter": "i"},\n  {"phoneme": "k", "letter": "ck"},\n  {"phoneme": "", "letter": " "},\n  {"phoneme": "m", "letter": "m"},\n  {"phoneme": "u", "letter": "o"},\n  {"phoneme": "v", "letter": "v"},\n  {"phoneme": "m", "letter": "m"},\n  {"phoneme": "ɛ", "letter": "e"},\n  {"phoneme": "n", "letter": "n"},\n  {"phoneme": "t", "letter": "t"}\n]'}, 'done': True, 'done_reason'

In [21]:
result = call_ollama_api(PROMPT.format(
    word="it's been a while since we've all seen each other",
    phonemes="ɪts bɪn ʌ waɪl sɪns wiv ɔl sin itʃ ʌðɝ"
))
print(result)

{'model': 'deepseek-v3.1:671b-cloud', 'remote_model': 'deepseek-v3.1:671b', 'remote_host': 'https://ollama.com:443', 'created_at': '2025-10-29T03:36:11.888106142Z', 'message': {'role': 'assistant', 'content': '[\n  {"phoneme": "ɪ", "letter": "i"},\n  {"phoneme": "t", "letter": "t"},\n  {"phoneme": "s", "letter": "\'s"},\n  {"phoneme": "", "letter": " "},\n  {"phoneme": "b", "letter": "b"},\n  {"phoneme": "ɪ", "letter": "ee"},\n  {"phoneme": "n", "letter": "n"},\n  {"phoneme": "", "letter": " "},\n  {"phoneme": "ʌ", "letter": "a"},\n  {"phoneme": "", "letter": " "},\n  {"phoneme": "w", "letter": "wh"},\n  {"phoneme": "aɪ", "letter": "i"},\n  {"phoneme": "l", "letter": "l"},\n  {"phoneme": "e", "letter": "e"},\n  {"phoneme": "", "letter": " "},\n  {"phoneme": "s", "letter": "s"},\n  {"phoneme": "ɪ", "letter": "i"},\n  {"phoneme": "n", "letter": "n"},\n  {"phoneme": "s", "letter": "c"},\n  {"phoneme": "e", "letter": "e"},\n  {"phoneme": "", "letter": " "},\n  {"phoneme": "w", "letter": "w

In [5]:
result = call_ollama_api(PROMPT.format(
    word="'january'",
    phonemes="dʒænjuwɛɹi"
))
print(result)

{'model': 'deepseek-v3.1:671b-cloud', 'remote_model': 'deepseek-v3.1:671b', 'remote_host': 'https://ollama.com:443', 'created_at': '2025-10-29T08:38:50.358837263Z', 'message': {'role': 'assistant', 'content': '[\n  {"phoneme": "dʒ", "letter": "j"},\n  {"phoneme": "æ", "letter": "a"},\n  {"phoneme": "n", "letter": "n"},\n  {"phoneme": "j", "letter": "u"},\n  {"phoneme": "u", "letter": "a"},\n  {"phoneme": "w", "letter": "r"},\n  {"phoneme": "ɛ", "letter": "y"},\n  {"phoneme": "ɹ", "letter": ""},\n  {"phoneme": "i", "letter": ""}\n]'}, 'done': True, 'done_reason': 'stop', 'total_duration': 1609578932, 'prompt_eval_count': 619, 'eval_count': 130}
[
  {"phoneme": "dʒ", "letter": "j"},
  {"phoneme": "æ", "letter": "a"},
  {"phoneme": "n", "letter": "n"},
  {"phoneme": "j", "letter": "u"},
  {"phoneme": "u", "letter": "a"},
  {"phoneme": "w", "letter": "r"},
  {"phoneme": "ɛ", "letter": "y"},
  {"phoneme": "ɹ", "letter": ""},
  {"phoneme": "i", "letter": ""}
]


In [None]:
result = call_ollama_api(PROMPT.format(
    word="He plays guitar every Sunday night",
    phonemes="hipleɪzɡɪtɑɹɛvɹisʌndeɪnaɪt"
))
print(result)

In [19]:
result = call_ollama_api(PROMPT_v2.format(
    word="cooperate",
    phonemes="koʊɑpɝeɪt"
))
print(result)

[
  {"phoneme":"k","letter":"c"},
  {"phoneme":"oʊ","letter":"o"},
  {"phoneme":"###","letter":"###"},
  {"phoneme":"ɑ","letter":"o"},
  {"phoneme":"p","letter":"p"},
  {"phoneme":"ɝ","letter":"er"},
  {"phoneme":"eɪ","letter":"a"},
  {"phoneme":"t","letter":"t"},
  {"phoneme":"","letter":"e"}
]


In [20]:
result = call_ollama_api(PROMPT_v2.format(
    word="zoology",
    phonemes="zoʊɑlɪdʒi"
))
print(result)

[
  {"phoneme":"z","letter":"z"},
  {"phoneme":"oʊ","letter":"oo"},
  {"phoneme":"l","letter":"l"},
  {"phoneme":"ɑ","letter":"o"},
  {"phoneme":"dʒ","letter":"g"},
  {"phoneme":"i","letter":"y"}
]


In [21]:
result = call_ollama_api(PROMPT_v2.format(
    word="zooplankton",
    phonemes="zoʊoʊplæŋktɪn"
))
print(result)

[
  {"phoneme":"z","letter":"z"},
  {"phoneme":"oʊ","letter":"o"},
  {"phoneme":"###","letter":"###"},
  {"phoneme":"oʊ","letter":"o"},
  {"phoneme":"p","letter":"p"},
  {"phoneme":"l","letter":"l"},
  {"phoneme":"æ","letter":"a"},
  {"phoneme":"ŋ","letter":"n"},
  {"phoneme":"k","letter":"k"},
  {"phoneme":"t","letter":"t"},
  {"phoneme":"ɪ","letter":"o"},
  {"phoneme":"n","letter":"n"}
]


In [22]:
result = call_ollama_api(PROMPT_v2.format(
    word="zoonotic",
    phonemes="zuoʊnɑɾɪk"
))
print(result)

[
  {"phoneme":"z","letter":"z"},
  {"phoneme":"u","letter":"oo"},
  {"phoneme":"oʊ","letter":"o"},
  {"phoneme":"n","letter":"n"},
  {"phoneme":"ɑ","letter":"o"},
  {"phoneme":"ɾ","letter":"t"},
  {"phoneme":"ɪ","letter":"i"},
  {"phoneme":"k","letter":"c"}
]
