In [None]:
from IPython.display import display, HTML
from cassis import Cas
from py_lift.dkpro import T_ANOMALY, T_POS, T_TOKEN
from py_lift.preprocessing import Spacy_Preprocessor
from py_lift.util import get_lift_typesystem, detect_language
import polars as pl
from cas_visualizer.visualizer import SpanVisualizer
from py_lift.annotators.frequency import SE_TokenZipfFrequency
from py_lift.annotators.misc import SE_SpellErrorAnnotator
from py_lift.annotators.lists import SE_FiniteVerbAnnotator

def get_preprocessed_cas(text: str, language: str) -> Cas:
    spacy = Spacy_Preprocessor(language=language)
    return spacy.run(text)

def vis_pos(cas: Cas):
    span_vis = SpanVisualizer(ts)
    span_vis.selected_span_type = SpanVisualizer.HIGHLIGHT
    span_vis.add_type(
        name=T_POS,
        feature='PosValue',
        color="#C9DAF6"
    )
    html = span_vis.visualize(cas)
    display(HTML(html))

def vis_finite_verbs(cas: Cas, span_type = SpanVisualizer.UNDERLINE):
    span_vis = SpanVisualizer(ts)
    span_vis.selected_span_type = span_type
    span_vis.add_type(T_ANOMALY, label="SpellError", color="#F4C7C3")
    span_vis.add_type("org.lift.type.Structure", label="FiniteVerb", color="#C3F4C9")

    html = span_vis.visualize(cas)
    display(HTML(html))

def vis_frequency(cas: Cas):
    span_vis = SpanVisualizer(ts)
    span_vis.selected_span_type = SpanVisualizer.HIGHLIGHT
    span_vis.add_feature("org.lift.type.Frequency", feature="frequencyBand", value="f1", label = "1", color="#F60707")
    span_vis.add_feature("org.lift.type.Frequency", feature="frequencyBand", value="f2", label = "2", color="#F87171")
    span_vis.add_feature("org.lift.type.Frequency", feature="frequencyBand", value="f3", label = "3", color="#F99090")
    span_vis.add_feature("org.lift.type.Frequency", feature="frequencyBand", value="f4", label = "4", color="#F9B0B0")
    span_vis.add_feature("org.lift.type.Frequency", feature="frequencyBand", value="f5", label = "5", color="#F8C2C2")
    span_vis.add_feature("org.lift.type.Frequency", feature="frequencyBand", value="f6", label = "6", color="#F8E2E2")
    span_vis.add_feature("org.lift.type.Frequency", feature="frequencyBand", value="f7", label = "7", color="#F7F3F3")
    span_vis.add_feature("org.lift.type.Frequency", feature="frequencyBand", value="oov", color="#868FF0")

    html = span_vis.visualize(cas)
    display(HTML(html))


ts = get_lift_typesystem()

## Turkish POS visualization example with input box
tr_example = "Okulda Türkçe öğrendim ama çok kötü konuşuyorum."
tr_cas = get_preprocessed_cas(text=tr_example, language='tr')
vis_pos(tr_cas)


## German finite verb example with spelling errors
de_example = "Das Aufkommen des Buchdruckes fürte zu einer Umstrukturierung der Werkstätten. Im Laufe der Zeit entstehen Großbetriebe, wie der von Anton Koberger in Nürnberg. Dieser beschäftigte bis zu 100 Arbeiter an 24 Pressen. Nun wurden Facharbeiter verschiedener Berufe notwendig. Eine neue Art des intellektuellen Austausches wurde möglich. Der Drucker führte alle ausgefuhrten Arbeiten zusammen."
de_cas = get_preprocessed_cas(text=de_example, language='de')
SE_FiniteVerbAnnotator('de').process(de_cas)
SE_SpellErrorAnnotator('de').process(de_cas)
vis_finite_verbs(de_cas)

fr_example = """
La première émission de lumière par un semi-conducteur date de 1907 et est découverte par Henry Round, ingénieur chez Marconi. 
En 1927, le russe Oleg Lossev dépose le premier brevet de ce qui sera appelé plus tard une diode électroluminescente, mais les applications peinent à émerger, 
le carbure de silicium alors utilisé comme semi-conducteur ayant de piètres propriétés électroluminescentes.
"""
fr_cas = get_preprocessed_cas(text=fr_example, language='fr')
SE_TokenZipfFrequency('fr').process(fr_cas)
vis_frequency(fr_cas)


sl_example = """
Vsi ljudje se rodijo svobodni ter imajo enako dostojanstvo in pravice. 
Dana sta jim razum in vest, in bi morali drug z drugim ravnati v duhu bratstva.
"""
sl_cas = get_preprocessed_cas(text=sl_example, language='sl')

SE_FiniteVerbAnnotator('sl').process(sl_cas)
vis_finite_verbs(sl_cas, span_type=SpanVisualizer.HIGHLIGHT)




FileNotFoundError: File not found: ../shared_resources/resources/finite_verb_postags/finite_verb_postags_de_stts.txt