# Imports

In [1]:
import re
from pathlib import Path
import pandas as pd
from unicodedata import normalize
from pyarabic.araby import DIACRITICS, SHADDA, LETTERS, is_arabicword
from time import time
from datetime import datetime

# Define Variables

In [2]:
# Remove last Harakat from an Arabic Word
NOT_LETTERS_PATTERN = f"[^{LETTERS}]"
DIACRITICS_PATTERN = "".join(DIACRITICS)
LAST_HARAKAT_PATTERN = re.compile(
    rf"[{DIACRITICS_PATTERN}](?={NOT_LETTERS_PATTERN}*$)", re.UNICODE
)

In [3]:
# Set of Harakat
DIACRITICS_SET = set(DIACRITICS)  # Aabic diacritics/short vowels

In [4]:
by_pass_words = ["غير معدود"]


MORPHOLOGY_MAP = {
    "verb_form": "cls",
    "gender": "g",
    "root": "root",
    "plural": "pl",
    "masc_pl": "masc_pl",
    "fem_pl": "fem_pl",
    "imperfect": "imperfect",
}

In [5]:
TIME_STAMP = datetime.now().strftime("%Y%m%d.%H%M")

# Define paths

In [6]:
data_dir = Path("../data/interim/lexicon")
ar_adjectives_path = data_dir / "20231028.172908_adjectives_lexicon.csv"
ar_verbs_path = data_dir / "20231028.172908_verbs_lexicon.csv"
ar_nouns_path = data_dir / "20231028.172908_nouns_lexicon.csv"

In [7]:
output_dir = Path("../data/processed/gf")

In [8]:
pd.set_option('max_colwidth', 400)

# Define Functions

## Utilities Functions

In [9]:
def reorder_shadda(ar_string: str) -> str:
    """unicodedata.normalize put shadda before diacritics; not correct"""
    list_ar_str = list(ar_string)

    for i in range(len(list_ar_str) - 1):
        char = list_ar_str[i]
        next_char = list_ar_str[i + 1]

        if char in DIACRITICS_SET and next_char == SHADDA:
            list_ar_str[i], list_ar_str[i + 1] = (
                next_char,
                char,
            )  # Swap shadda and diacritic

    return "".join(list_ar_str)

In [10]:
def normalize_ar(ar_vocalized: str, verbose: bool = False) -> str:
    """get the normal form for the Unicode string unistr using NFC then fix the shadda order issue"""
    if not isinstance(ar_vocalized, str):
        return ar_vocalized
    ar_norm = normalize("NFC", ar_vocalized)
    ar_norm = reorder_shadda(ar_norm)
    if verbose:
        print([name(char) for char in ar_norm])
    return ar_norm

## Main Functions

In [11]:
def get_lin(row):
    row_dict = dict(row)
    dict_lins = {}
    for morpho, lin_value in row_dict.items():
        if lin_name := MORPHOLOGY_MAP.get(morpho):
            if isinstance(lin_value, str) and lin_value not in by_pass_words:
                if is_arabicword(lin_value):
                    lin_value = f'"{lin_value}"'
                dict_lins[lin_name] = f'{lin_name} = {lin_value}'
    return dict_lins

In [12]:
def build_gf_abstract_entries(row):
    cat = row["en"].split("_")[-1][0]
    lemma = row["vocal_forms"]
    idx = row["wiki_idx"]
    senses = row["senses"]
    source = "wikitionary"

    gf_fun_str = "fun '{}_{}' : {} ; "
    comment_str = "-- source: {}, idx: {}, senses: {}"

    gf_fun_str = gf_fun_str.format(lemma, cat, cat)
    comment_str = comment_str.format(source, idx, senses)

    dict_lins = get_lin(row)
    if cat == "V":
        LIN_ENTRY_V = ["cls", "imperfect", "perfect", "root"]
        list_lins = []
        for lin_entry in LIN_ENTRY_V:
            if lin_entry == "perfect":
                list_lins.append(f'perfect = "{lemma}"')
            else:
                if lin_fun := dict_lins.get(lin_entry):
                    list_lins.append(lin_fun)

    elif cat == "N":
        LIN_ENTRY_N = ["g", "pl", "root", "sg"]
        list_lins = []
        for lin_entry in LIN_ENTRY_N:
            if lin_entry == "sg":
                list_lins.append(f'sg = "{lemma}"')
            else:
                if lin_fun := dict_lins.get(lin_entry):
                    list_lins.append(lin_fun)

    elif cat == "A":
        LIN_ENTRY_A = ["fem_pl", "fem_sg", "masc_pl", "masc_sg", "root"]
        list_lins = []
        for lin_entry in LIN_ENTRY_A:
            if lin_entry == "fem_sg":
                if row["gender"] == "fem":
                    list_lins.append(f'fem_sg = "{lemma}"')
                else:
                    list_lins.append(f'fem_sg = "{row["other_gender_form"]}"')
            elif lin_entry == "masc_sg":
                if row["gender"] == "masc":
                    list_lins.append(f'masc_sg = "{lemma}"')
                else:
                    list_lins.append(f'masc_sg = "{row["other_gender_form"]}"')
            else:
                if lin_fun := dict_lins.get(lin_entry):
                    list_lins.append(lin_fun)

    str_lins = " ; ".join(list_lins)
    lin_entry = f"'{lemma}_{cat}'"
    lin = f"lin {lin_entry} = wmk{cat} {{ " + str_lins + " } ;"

    return f"{gf_fun_str}{comment_str}", lin

# Load CSV Files

In [13]:
df_adjs = pd.read_csv(ar_adjectives_path, index_col=0, converters={"senses": pd.eval})
df_nouns = pd.read_csv(ar_nouns_path, index_col=0, converters={"senses": pd.eval})
df_verbs = pd.read_csv(ar_verbs_path, index_col=0, converters={"senses": pd.eval})


# Build Abstract GF

In [14]:
df_adjs["other_gender_form"] = df_adjs.apply({"other_gender_form": normalize_ar})
df_adjs["other_gender_form"] = df_adjs.apply(
    {
        "other_gender_form": lambda s: LAST_HARAKAT_PATTERN.sub("", s)
        if isinstance(s, str)
        else s
    }
)
df_adjs["masc_pl"] = df_adjs.apply({"masc_pl": normalize_ar})
df_adjs["masc_pl"] = df_adjs.apply(
    {"masc_pl": lambda s: LAST_HARAKAT_PATTERN.sub("", s) if isinstance(s, str) else s}
)
df_adjs["fem_pl"] = df_adjs.apply({"fem_pl": normalize_ar})
df_adjs["fem_pl"] = df_adjs.apply(
    {"fem_pl": lambda s: LAST_HARAKAT_PATTERN.sub("", s) if isinstance(s, str) else s}
)
df_adjs["vocal_forms"] = df_adjs.apply({"vocal_forms": normalize_ar})
df_adjs["vocal_forms"] = df_adjs.apply(
    {"vocal_forms": lambda s: LAST_HARAKAT_PATTERN.sub("", s)}
)
df_adjs["abs"], df_adjs["cnc"] = zip(
    *df_adjs.apply(build_gf_abstract_entries, axis="columns")
)

In [15]:
df_nouns["vocal_forms"] = df_nouns.apply({"vocal_forms": normalize_ar})
df_nouns["vocal_forms"] = df_nouns.apply(
    {"vocal_forms": lambda s: LAST_HARAKAT_PATTERN.sub("", s)}
)
df_nouns["abs"], df_nouns["cnc"] = zip(
    *df_nouns.apply(build_gf_abstract_entries, axis="columns")
)

In [16]:

df_verbs["vocal_forms"] = df_verbs.apply({"vocal_forms": normalize_ar})
df_verbs["vocal_forms"] = df_verbs.apply(
    {
        "vocal_forms": lambda s: LAST_HARAKAT_PATTERN.sub("", s)
        if isinstance(s, str)
        else s
    }
)
df_verbs["imperfect"] = df_verbs.apply({"imperfect": normalize_ar})
df_verbs["imperfect"] = df_verbs.apply(
    {
        "imperfect": lambda s: LAST_HARAKAT_PATTERN.sub("", s)
        if isinstance(s, str)
        else s
    }
)
df_verbs["abs"], df_verbs["cnc"] = zip(
    *df_verbs.apply(build_gf_abstract_entries, axis="columns")
)

In [17]:
df_adjs

Unnamed: 0,li,wiki_idx,ar,en,vocal_forms,nesba,ar_letters,senses,gender,other_gender_form,masc_pl,fem_pl,root,abs,cnc
1,292,5922,مُطلَق,absolute_3_A,مُطْلَق,0,ميم-ضمة | طاء-سكون | لام-فتحة | قاف,"[absolute, utter, very, unlimited, unrestricted / downright / sovereign / liberated, free / implicit]",masc,مُطْلَقَة,مُطْلَقُون,مُطْلَقَات,طلق,"fun 'مُطْلَق_A' : A ; -- source: wikitionary, idx: 5922, senses: ['absolute, utter, very, unlimited, unrestricted / downright / sovereign / liberated, free / implicit']","lin 'مُطْلَق_A' = wmkA { fem_pl = ""مُطْلَقَات"" ; fem_sg = ""مُطْلَقَة"" ; masc_pl = ""مُطْلَقُون"" ; masc_sg = ""مُطْلَق"" ; root = ""طلق"" } ;"
7,3970,64,عربي,arabic_A,عَرَبِي,1,عين-فتحة | راء-فتحة | باء-كسرة | ياء-شدة,[Arab / Arabic / Arabian],masc,عَرَبِيَّة,عَرَب,عَرَبِيَّات,عرب,"fun 'عَرَبِي_A' : A ; -- source: wikitionary, idx: 64, senses: ['Arab / Arabic / Arabian']","lin 'عَرَبِي_A' = wmkA { fem_pl = ""عَرَبِيَّات"" ; fem_sg = ""عَرَبِيَّة"" ; masc_pl = ""عَرَب"" ; masc_sg = ""عَرَبِي"" ; root = ""عرب"" } ;"
48,16805,25071,شُيُوعِيّ,communist_A,شُيُوعِي,1,شين-ضمة | ياء-ضمة | واو | عين-كسرة | ياء-شدة,[communist / Communist / communal],masc,شُيُوعِيَّة,شُيُوعِيُّون,شُيُوعِيَّات,شيع,"fun 'شُيُوعِي_A' : A ; -- source: wikitionary, idx: 25071, senses: ['communist / Communist / communal']","lin 'شُيُوعِي_A' = wmkA { fem_pl = ""شُيُوعِيَّات"" ; fem_sg = ""شُيُوعِيَّة"" ; masc_pl = ""شُيُوعِيُّون"" ; masc_sg = ""شُيُوعِي"" ; root = ""شيع"" } ;"
50,17986,8330,دستوري,constitutional_2_A,دُسْتُورِي,1,دال-ضمة | سين-سكون | تاء-ضمة | واو | راء-كسرة | ياء-شدة,[constitutional],masc,دُسْتُورِيَّة,دُسْتُورِيُّون,دُسْتُورِيَّات,دستور,"fun 'دُسْتُورِي_A' : A ; -- source: wikitionary, idx: 8330, senses: ['constitutional']","lin 'دُسْتُورِي_A' = wmkA { fem_pl = ""دُسْتُورِيَّات"" ; fem_sg = ""دُسْتُورِيَّة"" ; masc_pl = ""دُسْتُورِيُّون"" ; masc_sg = ""دُسْتُورِي"" ; root = ""دستور"" } ;"
62,20596,2656,جَارٍ,current_A,جَار,0,جيم-فتحة | ألف | راء-كسرتان,[active participle of جَرَى (jarā)],masc,جَارِيَة,جَارُون,جَارِيَات,جري,"fun 'جَار_A' : A ; -- source: wikitionary, idx: 2656, senses: ['active participle of جَرَى (jarā)']","lin 'جَار_A' = wmkA { fem_pl = ""جَارِيَات"" ; fem_sg = ""جَارِيَة"" ; masc_pl = ""جَارُون"" ; masc_sg = ""جَار"" ; root = ""جري"" } ;"
100,33131,10031,سابق,former_3_A,سَابِق,0,سين-فتحة | ألف | باء-كسرة | قاف,"[preceding, previous / former / active participle of سَبَقَ (sabaqa).]",masc,سَابِقَة,سُبَّاق,سَابِقَات,سبق,"fun 'سَابِق_A' : A ; -- source: wikitionary, idx: 10031, senses: ['preceding, previous / former / active participle of سَبَقَ (sabaqa).']","lin 'سَابِق_A' = wmkA { fem_pl = ""سَابِقَات"" ; fem_sg = ""سَابِقَة"" ; masc_pl = ""سُبَّاق"" ; masc_sg = ""سَابِق"" ; root = ""سبق"" } ;"
102,33517,4339,حُرّ,free_1_A,حُر,0,حاء-ضمة | راء-شدة,"[free / unimpeded / set free, freedman / born free and noble / virtuous, genuine, true, pure, good / unmixed]",masc,حُرَّة,أَحْرَار,حُرَّات,حرر,"fun 'حُر_A' : A ; -- source: wikitionary, idx: 4339, senses: ['free / unimpeded / set free, freedman / born free and noble / virtuous, genuine, true, pure, good / unmixed']","lin 'حُر_A' = wmkA { fem_pl = ""حُرَّات"" ; fem_sg = ""حُرَّة"" ; masc_pl = ""أَحْرَار"" ; masc_sg = ""حُر"" ; root = ""حرر"" } ;"
121,39071,12466,عال,high_1_A,عَال,0,عين-فتحة | ألف | لام-كسرتان,"[active participle of عَلَا (ʕalā) or active participle of عَلِيَ (ʕaliya). / high / exalted, lofty, excellent]",masc,عَالِيَة,عَالُون,عَالِيَات,علو,"fun 'عَال_A' : A ; -- source: wikitionary, idx: 12466, senses: ['active participle of عَلَا (ʕalā) or active participle of عَلِيَ (ʕaliya). / high / exalted, lofty, excellent']","lin 'عَال_A' = wmkA { fem_pl = ""عَالِيَات"" ; fem_sg = ""عَالِيَة"" ; masc_pl = ""عَالُون"" ; masc_sg = ""عَال"" ; root = ""علو"" } ;"
141,42239,7474,شَخْصي,individual_4_A,شَخْصِي,1,شين-فتحة | خاء-سكون | صاد-كسرة | ياء-شدة,[own / personal / personal],masc,شَخْصِيَّة,شَخْصِيُّون,شَخْصِيَّات,شخص,"fun 'شَخْصِي_A' : A ; -- source: wikitionary, idx: 7474, senses: ['own / personal / personal']","lin 'شَخْصِي_A' = wmkA { fem_pl = ""شَخْصِيَّات"" ; fem_sg = ""شَخْصِيَّة"" ; masc_pl = ""شَخْصِيُّون"" ; masc_sg = ""شَخْصِي"" ; root = ""شخص"" } ;"
167,46486,4262,كبِير,large_1_A,كَبِير,0,كاف-فتحة | باء-كسرة | ياء | راء,"[big, large / great, great importance / old (for a person)]",masc,كَبِيرَة,كُبَرَاء,كَبِيرَات,كبر,"fun 'كَبِير_A' : A ; -- source: wikitionary, idx: 4262, senses: ['big, large / great, great importance / old (for a person)']","lin 'كَبِير_A' = wmkA { fem_pl = ""كَبِيرَات"" ; fem_sg = ""كَبِيرَة"" ; masc_pl = ""كُبَرَاء"" ; masc_sg = ""كَبِير"" ; root = ""كبر"" } ;"


In [18]:
df_verbs

Unnamed: 0,li,wiki_idx,ar,en,vocal_forms,nesba,ar_letters,senses,verb_form,root,imperfect,abs,cnc
79,22868,117316,صمم,designate_4_V2,صَمَّم,0,صاد-فتحة | ميم-شدة-فتحة | ميم-فتحة,"[to deafen [+accusative], to deafen / to resolve, to become bent on [+ عَلَى (object)], to resolve, to become bent on / to design, to configure, to devise, to contrive, to fix [+accusative], to design, to configure, to devise, to contrive, to fix]",FormII,صمم,يُصَمِّم,"fun 'صَمَّم_V' : V ; -- source: wikitionary, idx: 117316, senses: ['to deafen [+accusative], to deafen / to resolve, to become bent on [+ عَلَى (object)], to resolve, to become bent on / to design, to configure, to devise, to contrive, to fix [+accusative], to design, to configure, to devise, to contrive, to fix']","lin 'صَمَّم_V' = wmkV { cls = FormII ; imperfect = ""يُصَمِّم"" ; perfect = ""صَمَّم"" ; root = ""صمم"" } ;"
110,38154,698,ملك,have_1_V2,مَلَك,0,ميم-فتحة | لام-فتحة | كاف-فتحة,"[to take in possession, to take over, to acquire, to seize / to possess, to lay hold, to own, to have, to be the owner / to dominate, to control / to be the master / to be capable, to be able, to be in a position to / to rule, to reign, to exercise authority, to hold sway, to lord over]",FormI,ملك,يَمْلِك,"fun 'مَلَك_V' : V ; -- source: wikitionary, idx: 698, senses: ['to take in possession, to take over, to acquire, to seize / to possess, to lay hold, to own, to have, to be the owner / to dominate, to control / to be the master / to be capable, to be able, to be in a position to / to rule, to reign, to exercise authority, to hold sway, to lord over']","lin 'مَلَك_V' = wmkV { cls = FormI ; imperfect = ""يَمْلِك"" ; perfect = ""مَلَك"" ; root = ""ملك"" } ;"
232,75568,8035,عرض,show_2_V2,عَرَض,0,| | | | | | عين-فتحة | راء-فتحة | ضاد-فتحة,"[to appear / to happen, to occur / to show, to display, to present / to expose]",FormI,عرض,يَعْرِض,"fun 'عَرَض_V' : V ; -- source: wikitionary, idx: 8035, senses: ['to appear / to happen, to occur / to show, to display, to present / to expose']","lin 'عَرَض_V' = wmkV { cls = FormI ; imperfect = ""يَعْرِض"" ; perfect = ""عَرَض"" ; root = ""عرض"" } ;"
249,78616,6248,تكلم,speak_3_V2,تَكَلَّم,0,تاء-فتحة | كاف-فتحة | لام-شدة-فتحة | ميم-فتحة,"[to talk, to have a discussion or conversation]",FormV,كلم,يَتَكَلَّم,"fun 'تَكَلَّم_V' : V ; -- source: wikitionary, idx: 6248, senses: ['to talk, to have a discussion or conversation']","lin 'تَكَلَّم_V' = wmkV { cls = FormV ; imperfect = ""يَتَكَلَّم"" ; perfect = ""تَكَلَّم"" ; root = ""كلم"" } ;"
266,82083,20863,نجح,succeed_V2,نَجَح,0,نون-فتحة | جيم-فتحة | حاء-فتحة,"[to succeed, to be successful]",FormI,نجح,يَنْجَح,"fun 'نَجَح_V' : V ; -- source: wikitionary, idx: 20863, senses: ['to succeed, to be successful']","lin 'نَجَح_V' = wmkV { cls = FormI ; imperfect = ""يَنْجَح"" ; perfect = ""نَجَح"" ; root = ""نجح"" } ;"


In [19]:
df_nouns

Unnamed: 0,li,wiki_idx,ar,en,vocal_forms,nesba,ar_letters,senses,gender,plural,root,abs,cnc
3,1558,8230,أفْغانِيّ,afghani_1_N,أَفْغَانِي,0,همزة على الألف-فتحة | فاء-سكون | غين-فتحة | ألف | نون-كسرة | ياء-شدة,[Afghan],masc,أَفْغَان,أفغان,"fun 'أَفْغَانِي_N' : N ; -- source: wikitionary, idx: 8230, senses: ['Afghan']","lin 'أَفْغَانِي_N' = wmkN { g = masc ; pl = ""أَفْغَان"" ; root = ""أفغان"" ; sg = ""أَفْغَانِي"" } ;"
6,1643,1770,سِنّ,age_1_N,سِن,0,سين-كسرة | نون-شدة,"[tooth, tusk, fang / point or tip / a spearhead or arrowhead / age (years of life) / cog, sprocket, prong]",fem,أَسْنَان,سنن,"fun 'سِن_N' : N ; -- source: wikitionary, idx: 1770, senses: ['tooth, tusk, fang / point or tip / a spearhead or arrowhead / age (years of life) / cog, sprocket, prong']","lin 'سِن_N' = wmkN { g = fem ; pl = ""أَسْنَان"" ; root = ""سنن"" ; sg = ""سِن"" } ;"
10,4109,503,مِنْطقة,area_6_N,مِنْطَقَة,0,ميم-كسرة | نون-سكون | طاء-فتحة | قاف-فتحة | تاء مربوطة,"[belt, girdle / zone / vicinity, range, district, area, territory, sphere / military sector / area, an administrative subdivision of Kuwait / province]",fem,مِنْطَقَات,نطق,"fun 'مِنْطَقَة_N' : N ; -- source: wikitionary, idx: 503, senses: ['belt, girdle / zone / vicinity, range, district, area, territory, sphere / military sector / area, an administrative subdivision of Kuwait / province']","lin 'مِنْطَقَة_N' = wmkN { g = fem ; pl = ""مِنْطَقَات"" ; root = ""نطق"" ; sg = ""مِنْطَقَة"" } ;"
11,4109,504,مِنْطقة,area_6_N,مَنْطِقَة,0,ميم-فتحة | نون-سكون | طاء-كسرة | قاف-فتحة | تاء مربوطة,"[zone / vicinity, range, district, area, territory, sphere / military sector / area, an administrative subdivision of Kuwait / province]",fem,مَنْطِقَات,نطق,"fun 'مَنْطِقَة_N' : N ; -- source: wikitionary, idx: 504, senses: ['zone / vicinity, range, district, area, territory, sphere / military sector / area, an administrative subdivision of Kuwait / province']","lin 'مَنْطِقَة_N' = wmkN { g = fem ; pl = ""مَنْطِقَات"" ; root = ""نطق"" ; sg = ""مَنْطِقَة"" } ;"
12,4109,505,مِنْطقة,area_6_N,مَنْطَقَة,0,ميم-فتحة | نون-سكون | طاء-فتحة | قاف-فتحة | تاء مربوطة,[verbal noun of مَنْطَقَ (manṭaqa) (form Iq)],fem,مَناطِق,نطق,"fun 'مَنْطَقَة_N' : N ; -- source: wikitionary, idx: 505, senses: ['verbal noun of مَنْطَقَ (manṭaqa) (form Iq)']","lin 'مَنْطَقَة_N' = wmkN { g = fem ; pl = ""مَناطِق"" ; root = ""نطق"" ; sg = ""مَنْطَقَة"" } ;"
14,4871,115331,آشوري,assyrian_2_N,آشُورِي,0,ألف ممدودة | شين-ضمة | واو | راء-كسرة | ياء-شدة,"[Assyrian, Ashurite]",masc,آشُورِيُّون,أشور,"fun 'آشُورِي_N' : N ; -- source: wikitionary, idx: 115331, senses: ['Assyrian, Ashurite']","lin 'آشُورِي_N' = wmkN { g = masc ; pl = ""آشُورِيُّون"" ; root = ""أشور"" ; sg = ""آشُورِي"" } ;"
15,6160,6264,منع,ban_2_N,مَنْع,0,ميم-فتحة | نون-سكون | عين,[verbal noun of مَنَعَ (manaʕa) (form I) / prevention],masc,مُنُوعات,منع,"fun 'مَنْع_N' : N ; -- source: wikitionary, idx: 6264, senses: ['verbal noun of مَنَعَ (manaʕa) (form I) / prevention']","lin 'مَنْع_N' = wmkN { g = masc ; pl = ""مُنُوعات"" ; root = ""منع"" ; sg = ""مَنْع"" } ;"
18,7316,123391,بيلاروسي,belarusian_N,بِيلَارُوسِي,0,باء-كسرة | ياء | لام-فتحة | ألف | راء-ضمة | واو | سين-كسرة | ياء-شدة,[Belarusian (person)],masc,بيلاروسيون,,"fun 'بِيلَارُوسِي_N' : N ; -- source: wikitionary, idx: 123391, senses: ['Belarusian (person)']","lin 'بِيلَارُوسِي_N' = wmkN { g = masc ; pl = ""بيلاروسيون"" ; sg = ""بِيلَارُوسِي"" } ;"
27,9270,274,كِتاب,book_1_N,كِتَاب,0,كاف-كسرة | تاء-فتحة | ألف | باء,"[verbal noun of كَتَبَ (kataba) (form I) / verbal noun of كَاتَبَ (kātaba) (form III) / letter, note, paper, piece of writing, message / book / the Scripture, the Qur'an or the Bible / record, document, deed, contract / a marriage contract.]",masc,كُتُب,كتب,"fun 'كِتَاب_N' : N ; -- source: wikitionary, idx: 274, senses: [""verbal noun of كَتَبَ (kataba) (form I) / verbal noun of كَاتَبَ (kātaba) (form III) / letter, note, paper, piece of writing, message / book / the Scripture, the Qur'an or the Bible / record, document, deed, contract / a marriage contract.""]","lin 'كِتَاب_N' = wmkN { g = masc ; pl = ""كُتُب"" ; root = ""كتب"" ; sg = ""كِتَاب"" } ;"
35,9394,4496,حدّ,border_1_N,حَد,0,حاء-فتحة | دال-شدة,"[verbal noun of حَدَّ (ḥadda) (form I) / limit / boundary, border / frontier / term / end, goal, aim / district / reach, sphere of action / difference / definition / rule / punishment / edge, point / passion / intoxicating strength of liquors / strength, bravery / energy / manner, way / hindrance / side]",masc,حُدُود,حدد,"fun 'حَد_N' : N ; -- source: wikitionary, idx: 4496, senses: ['verbal noun of حَدَّ (ḥadda) (form I) / limit / boundary, border / frontier / term / end, goal, aim / district / reach, sphere of action / difference / definition / rule / punishment / edge, point / passion / intoxicating strength of liquors / strength, bravery / energy / manner, way / hindrance / side']","lin 'حَد_N' = wmkN { g = masc ; pl = ""حُدُود"" ; root = ""حدد"" ; sg = ""حَد"" } ;"


# Export GF Files

In [20]:
nouns_abs = "\n".join(df_nouns['abs'].to_list())
adjs_abs = "\n".join(df_adjs['abs'].to_list())
verbs_abs = "\n".join(df_verbs['abs'].to_list())

nouns_lin = "\n".join(df_nouns['cnc'].to_list())
adjs_lin = "\n".join(df_adjs['cnc'].to_list())
verbs_lin = "\n".join(df_verbs['cnc'].to_list())

ar_absolute_grammar = "abstract MorphoDictAraAbs = Cat ** {\n"
ar_absolute_grammar += f"{nouns_abs}" + "\n"
ar_absolute_grammar += f"{adjs_abs}" + "\n"
ar_absolute_grammar += f"{verbs_abs}" + "\n"
ar_absolute_grammar += "}"

ar_concrete_grammar = "concrete MorphoDictAra of MorphoDictAraAbs = CatAra ** open ParadigmsAra, MoreAra in {\n"
ar_concrete_grammar += f"{nouns_lin}" + "\n"
ar_concrete_grammar += f"{adjs_lin}" + "\n"
ar_concrete_grammar += f"{verbs_lin}" + "\n"
ar_concrete_grammar += "}"

In [21]:
with open(output_dir / "MorphoDictAraAbs.gf", encoding="utf-8", mode="wt") as file:
    file.write(ar_absolute_grammar)

In [22]:
with open(output_dir / "MorphoDictAra.gf", encoding="utf-8", mode="wt") as file:
    file.write(ar_concrete_grammar)