In [1]:
# Install a pip package in the current Jupyter kernel
# import sys
# !{sys.executable} -m pip install pandas

In [2]:
import pandas as pd
import utils.db_utils as db
import utils.file_utils as file
import utils.bible_utils as bible
import utils.morph_utils as mu

original_words_table = db.original_words_table
target_words_table = db.target_words_table
alignment_table = db.alignment_table
dbPath = './data/alignmentsData.sqlite'
origLangPathGreek = './data/OrigLangJson/ugnt/v0.14'
origLangPathHebrew = './data/OrigLangJson/uhb/v2.1.15'

connection = db.initAlignmentDB(dbPath)

searchOriginal = True
searchTarget = False
searchLemma = True
caseInsensitive = True

Connection to SQLite DB successful


In [3]:
#################

# get all greek words
items_orig_greek = db.fetchRecords(connection, original_words_table, "morph LIKE 'Gr%'")
print (f"{len(items_orig_greek)} greek words in original_words_table")
original_df = pd.DataFrame(items_orig_greek)
morphs_greek = original_df['morph']

# get count of times each used
frequency_greek = morphs_greek.value_counts()
print("Frequency of greek morph usage:")
print(frequency_greek)

# get alphabetized list of morphs
morphs_list_greek = list(dict(frequency_greek).keys())
morphs_list_greek.sort()
print("Alphabetized greek morph list:")
print(morphs_list_greek)

137962 greek words in original_words_table
Frequency of greek morph usage:
Gr,CC,,,,,,,,    13124
Gr,D,,,,,,,,,     6209
Gr,P,,,,,A,,,     3883
Gr,CS,,,,,,,,     3710
Gr,N,,,,,NMS,     3442
                 ...  
Gr,NP,,,,NMSS        1
Gr,V,SEA1,,P,        1
Gr,V,PAM,GNP,        1
Gr,AA,,,,GNPS        1
Gr,EP,,,,NMP,        1
Name: morph, Length: 1090, dtype: int64
Alphabetized greek morph list:
['Gr,AA,,,,AFP,', 'Gr,AA,,,,AFPC', 'Gr,AA,,,,AFS,', 'Gr,AA,,,,AFSC', 'Gr,AA,,,,AFSS', 'Gr,AA,,,,AMP,', 'Gr,AA,,,,AMPC', 'Gr,AA,,,,AMS,', 'Gr,AA,,,,AMSC', 'Gr,AA,,,,ANP,', 'Gr,AA,,,,ANPC', 'Gr,AA,,,,ANPS', 'Gr,AA,,,,ANS,', 'Gr,AA,,,,ANSC', 'Gr,AA,,,,ANSS', 'Gr,AA,,,,DFP,', 'Gr,AA,,,,DFPC', 'Gr,AA,,,,DFS,', 'Gr,AA,,,,DFSC', 'Gr,AA,,,,DFSS', 'Gr,AA,,,,DMP,', 'Gr,AA,,,,DMPC', 'Gr,AA,,,,DMS,', 'Gr,AA,,,,DMSC', 'Gr,AA,,,,DMSS', 'Gr,AA,,,,DNP,', 'Gr,AA,,,,DNPS', 'Gr,AA,,,,DNS,', 'Gr,AA,,,,DNSC', 'Gr,AA,,,,GFP,', 'Gr,AA,,,,GFPS', 'Gr,AA,,,,GFS,', 'Gr,AA,,,,GFSC', 'Gr,AA,,,,GMP,', 'Gr,AA,,,,GMPC', 'Gr,A

In [4]:
#################

unique_morph_list = list(frequency_greek.keys())
morph = unique_morph_list[0]
morph

'Gr,CC,,,,,,,,'

In [5]:
# roles = mu.getGreekRoles()
# roles

In [6]:
verbs = list(mu.filterSyntacticalRole(unique_morph_list, 'V'))

verb_list_ =  list(map(mu.morphToDict, verbs))
print(f"Number of verb types: {len(verb_list_)}")

verbs_frame = pd.DataFrame(verb_list_)

for field in mu.morphFields:
    field_key = field + '_key'
    field_frequency = verbs_frame[field_key].value_counts()
    print(f"\nFrequency of {field}:")
    print(field_frequency)

Number of verb types: 376

Frequency of role:
V    376
Name: role_key, dtype: int64

Frequency of type:
,    376
Name: type_key, dtype: int64

Frequency of mood:
P    191
I     97
S     39
M     27
N     11
O     11
Name: mood_key, dtype: int64

Frequency of tense:
P    131
A    115
E     79
F     25
I     18
L      8
Name: tense_key, dtype: int64

Frequency of voice:
A    138
P    125
M    113
Name: voice_key, dtype: int64

Frequency of person:
,    201
3     68
2     60
1     47
Name: person_key, dtype: int64

Frequency of case:
,    186
N     51
A     50
G     45
D     36
V      8
Name: case_key, dtype: int64

Frequency of gender:
,    186
M     74
N     60
F     56
Name: gender_key, dtype: int64

Frequency of number:
S    189
P    176
,     11
Name: number_key, dtype: int64


In [7]:
role = 'N'
field_data = mu.findFieldsForRole(unique_morph_list, role)


For role: 'N'

For 'N' - instances of 'role':
['N']

For 'N' - instances of 'type':
[',', 'S', 'P']

For 'N' - instances of 'mood':
[',']

For 'N' - instances of 'tense':
[',']

For 'N' - instances of 'voice':
[',']

For 'N' - instances of 'person':
[',']

For 'N' - instances of 'case':
['N', 'A', 'G', 'D', 'V']

For 'N' - instances of 'gender':
['M', 'N', 'F']

For 'N' - instances of 'number':
['S', 'P']


In [8]:
roles = mu.getGreekRoles()
print(f"roles: {roles}")

for role_ in roles:
    field_freq_data = mu.findFieldsFrequencyForRole(unique_morph_list, role_)


roles: ['N', 'A', 'E', 'R', 'V', 'I', 'P', 'D', 'C', 'T']

For role: 'N'

For 'noun (N)' - frequency of 'role':
N    157
Name: role_key, dtype: int64

For 'noun (N)' - frequency of 'type':
,    69
S    56
P    32
Name: type_key, dtype: int64

For 'noun (N)' - frequency of 'mood':
,    157
Name: mood_key, dtype: int64

For 'noun (N)' - frequency of 'tense':
,    157
Name: tense_key, dtype: int64

For 'noun (N)' - frequency of 'voice':
,    157
Name: voice_key, dtype: int64

For 'noun (N)' - frequency of 'person':
,    157
Name: person_key, dtype: int64

For 'noun (N)' - frequency of 'case':
N    43
A    38
G    32
D    28
V    16
Name: case_key, dtype: int64

For 'noun (N)' - frequency of 'gender':
M    58
N    52
F    47
Name: gender_key, dtype: int64

For 'noun (N)' - frequency of 'number':
S    83
P    74
Name: number_key, dtype: int64

For role: 'A'

For 'adjective (A)' - frequency of 'role':
A    85
Name: role_key, dtype: int64

For 'adjective (A)' - frequency of 'type':
A    59
R 

In [9]:
char = ':'
index = mu.getIndexForChar(char)
print(f"index for '{char}': {index}")

getIndexForChar - unexpected character ':'
index for ':': -1


In [10]:
mu.findRoleNameForCharGreek('V')

'verb'