In [229]:
!curl -sS https://apertium.projectjj.com/apt/install-release.sh | sudo bash
!apt -qq install apertium-all-dev lexd

Cleaning up old install, if any...
removed '/etc/apt/trusted.gpg.d/apertium.gpg'
removed '/etc/apt/preferences.d/apertium.pref'
removed '/etc/apt/sources.list.d/apertium.list'
Determining Debian/Ubuntu codename...
Found evidence of jammy...
Settling for jammy - enabling the Apertium release repo...
Installing Apertium GnuPG key to /etc/apt/trusted.gpg.d/apertium.gpg
Installing package override to /etc/apt/preferences.d/apertium.pref
Creating /etc/apt/sources.list.d/apertium.list
Running apt-get update...
All done - enjoy the packages! If you just want all core tools, do: sudo apt-get install apertium-all-dev
apertium-all-dev is already the newest version (3.8.1-7~sid1).
lexd is already the newest version (1.3.5-1~jammy1).
0 upgraded, 0 newly installed, 0 to remove and 5 not upgraded.


In [88]:
!pip install flask-ngrok

Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Installing collected packages: flask-ngrok
Successfully installed flask-ngrok-0.0.25


In [106]:
import re
import os
from flask import Flask, render_template, request
import pandas as pd
from string import punctuation
from pyngrok import ngrok
ngrok.set_auth_token("37GPOrOmpihHJ2iwFst4kyBphhI_2n42PDoHozrLTyJEdhFYk")

In [124]:
class MorphAnalyzer():
  def lookup(self, word):
    word_lower = word.lower()
    translit = os.popen(f"echo {word_lower} | hfst-lookup cy2lat.transliterator.disam.hfst").read().split('\t')[1]
    pos = {'<n>' : 'noun', '<v>' : 'verb', '<adj>' : 'adjective', 'adv' : 'adverb', '<num>' : 'numeral', '<pron>' : 'pronoun', '<dem>' : 'demonstrative', '<refl>' : 'reflexive pronoun', '<cop>' : 'copula', '<aux>' : 'auxiliary', '<cvb>' : 'coverb', '<inter>' : 'interjection'}
    pron = {'до' : '1sg', 'ми' : '2sg', 'иле' : '1sg', 'миже' : '2sg'}
    analyses = os.popen(f"echo {word_lower} | hfst-lookup bezhta.analyzer.hfst").read()
    variants = []
    form_pos = "unknown"
    for v in analyses.split('\n')[:-2]:
      form = v.split('\t')[1]
      for i, j in pos.items():
        if i in form:
          form_pos = j
          if i == '<pron>':
            for k, v in pron.items():
              if k in form.split('<')[0]:
                form_pos = j, v
      variants.append((form, form_pos))
    segmentations = os.popen(f"echo {word_lower} | hfst-lookup bezhta.segm.hfst").read()
    segm = set()
    for s in segmentations.split('\n')[:-2]:
      segment = s.split('\t')[1]
      if segment.replace('>', '') == word_lower:
        segment = segment.replace('>', '-')
        segm.add(segment)
    return variants, segm, translit

  def analyse(self, query):
    for p in punctuation:
        if p in query:
            query = query.replace(p, '')

    words = query.split()
    rows = []

    for w in words:
        lookup_result = self.lookup(w)
        if not lookup_result:
            continue

        analyses, segmentations, translit = lookup_result
        analysis_objects = []

        for analysis, tag in analyses:
            if isinstance(tag, tuple):
                pos = tag[0]
                details = tag[1]
            else:
                pos = tag
                details = None

            analysis_objects.append({
                "analysis": analysis,
                "pos": pos,
                "details": details
            })

        rows.append({
            "word": w,
            "analyses": analysis_objects,
            "segmentation": sorted(segmentations),
            "transliteration": translit
        })

    return pd.DataFrame(rows)

In [180]:
class Transliterator():
  def cy2lat(self, query):
    query = query.lower()
    for p in punctuation:
        if p in query:
            query = query.replace(p, '')
    words = query.split()
    result = []
    for w in words:
      wrd = w
      if re.search(r'[1Il]', w):
        wrd = re.sub(r'[1Il]', 'ӏ', w)
      translit = os.popen(f"echo {wrd} | hfst-lookup cy2lat.transliterator.disam.hfst").read().split('\t')[1]
      if '?' in translit:
        return 'Incorrect input. Try changing the direction of transliteration.'
      else:
        result.append(translit)
    tr = ' '.join(result)
    return tr

  def lat2cy(self, query):
    query = query.lower()
    for p in punctuation:
        if p in query:
            query = query.replace(p, '')
    words = query.split()
    result = []
    for w in words:
      translit = os.popen(f"echo {w} | hfst-lookup lat2cy.transliterator.hfst").read().split('\t')[1]
      if '?' in translit:
        return 'Incorrect input. Try changing the direction of transliteration.'
      else:
        result.append(translit)
    tr = ' '.join(result)
    return tr


In [181]:
t = Transliterator()
m = MorphAnalyzer()

In [226]:
app = Flask(__name__)

public_url = ngrok.connect(5000)
print("Public URL:", public_url)


Public URL: NgrokTunnel: "https://arkosic-sheba-uncorrupt.ngrok-free.dev" -> "http://localhost:5000"


In [227]:
@app.route('/')
def index():
    return render_template('index.html')

@app.route('/morph', methods = ["GET"])
def morph():
  query = request.args.get("query") or ""
  results = None

  if query:
        df = m.analyse(query)

        results = df.to_dict(orient="records")

  return render_template(
        "morph.html",
        query=query,
        results=results
    )

@app.route('/translit', methods=["GET"])
def translit():
    text = request.args.get("text") or ""
    direction = request.args.get("direction") or "cyr2lat"
    result = None

    if text:
        if direction == "cyr2lat":
            result = t.cy2lat(text)
        elif direction == "lat2cyr":
            result = t.lat2cy(text)
        else:
            result = "Invalid direction selected."

    return render_template(
        "translit.html",
        text=text,
        direction=direction,
        result=result
    )

@app.route('/help')
def help_page():

    return render_template("help.html")

In [228]:
app.run()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [23/Dec/2025 23:38:09] "GET /help HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Dec/2025 23:38:16] "GET /morph HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Dec/2025 23:38:18] "GET /help HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Dec/2025 23:38:23] "GET /morph HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Dec/2025 23:38:27] "GET /morph?query=соралила HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Dec/2025 23:38:30] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Dec/2025 23:38:32] "GET /translit HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Dec/2025 23:38:39] "GET /translit?text=сораликьа&direction=cyr2lat HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Dec/2025 23:38:48] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Dec/2025 23:38:55] "GET /morph HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Dec/2025 23:38:57] "GET / HTTP/1.1" 200 -
