In [1]:
import pandas as pd
import utils.db_utils as db
import utils.file_utils as file
import utils.bible_utils as bible

original_words_table = db.original_words_table
target_words_table = db.target_words_table
alignment_table = db.alignment_table
dbPath = './data/alignmentsData.sqlite'
origLangPathGreek = './data/OrigLangJson/ugnt/v0.14'
origLangPathHebrew = './data/OrigLangJson/uhb/v2.1.15'

connection = db.initAlignmentDB(dbPath)

items_target = db.fetchRecords(connection, target_words_table, '')
print (f"{len(items_target)} items in target_words_table")

items_align = db.fetchRecords(connection, alignment_table, '')
print (f"{len(items_align)} items in alignment_table")

items_orig = db.fetchRecords(connection, original_words_table, '')
print (f"{len(items_orig)} items in original_words_table")

# 69050 items in target_words_table
# 48892 items in alignment_table
# 52996 items in original_words_table

searchOriginal = True
searchTarget = False
searchLemma = True
caseInsensitive = True

Connection to SQLite DB successful
69522 items in target_words_table
47476 items in alignment_table
357007 items in original_words_table


In [2]:
# find exact word Θεοῦ in original language
godAlignments = db.findAlignmentsForWord(connection, 'Θεοῦ', searchOriginal)
# frequency of alignments
frequency = godAlignments['alignmentTxt'].value_counts()
frequency

262 items in search


Θεοῦ = God            138
Θεοῦ = of God          40
τοῦ Θεοῦ = God         39
τοῦ Θεοῦ = of God      34
τοῦ Θεοῦ = God s        2
Θεοῦ = by God           1
Θεοῦ = God s            1
Θεοῦ = between God      1
τοῦ Θεοῦ = for God      1
Θεοῦ = of a god         1
Name: alignmentTxt, dtype: int64

In [3]:
# find all forms of θεός in original language by lemma
godAlignments = db.findAlignmentsForWord(connection, 'θεός', searchOriginal, searchLemma)
# frequency of alignments
frequency = godAlignments['alignmentTxt'].value_counts()
frequency

509 items in search


Θεοῦ = God             138
ὁ Θεὸς = God            80
Θεοῦ = of God           40
τοῦ Θεοῦ = God          39
τὸν Θεόν = God          37
τοῦ Θεοῦ = of God       34
τὸν Θεὸν = God          22
Θεῷ = God               21
Θεὸς = God              18
τῷ Θεῷ = God            14
ὁ Θεός = God            12
Θεὸν = God               9
τῷ Θεῷ = to God          6
Θεὸς = the God           3
Θεῷ = the God            2
Θεὸν = the God           2
θεοὶ = gods              2
τοῦ Θεοῦ = God s         2
Θεόν = God               2
ὁ δὲ Θεός = But God      1
θεὸν = goddess           1
θεοῦ = god               1
Θεοῦ = God s             1
θεοὺς = gods             1
ὁ Θεὸς = god is          1
ὁ Θεός = God is          1
Θεοῦ = between God       1
θεοῖς = gods             1
Θεός = God               1
Θεοῦ = of a god          1
ὁ Θεὸς = but God         1
Θεῷ = with God           1
Θεοῦ = by God            1
θεοῦ = God               1
τοῦ Θεοῦ = for God       1
Θεῷ = an God             1
ὅτι ὁ Θεὸς = God         1
Θ

In [4]:
# find exact word 'God' in target language
godAlignments = db.findAlignmentsForWord(connection, 'God', searchTarget)
# frequency of alignments
frequency = godAlignments['alignmentTxt'].value_counts()
frequency

524 items in search


Θεοῦ = God                                 138
ὁ Θεὸς = God                                80
Θεοῦ = of God                               40
τοῦ Θεοῦ = God                              39
τὸν Θεόν = God                              37
τοῦ Θεοῦ = of God                           34
τὸν Θεὸν = God                              22
Θεῷ = God                                   21
Θεὸς = God                                  18
τῷ Θεῷ = God                                14
ὁ Θεός = God                                12
Θεὸν = God                                   9
τῷ Θεῷ = to God                              6
Θεὸς = the God                               3
τοῦ Θεοῦ = God s                             2
Θεῷ = the God                                2
Θεὸν = the God                               2
Θεόν = God                                   2
λατρεῦον = as they worship God               1
τοῦ Θεοῦ = for God                           1
ὁ δὲ Θεός = But God                          1
μετῴκισεν = G

In [5]:
godAlignments.describe()

Unnamed: 0,id,alignment_num,origSpan,alignmentOrigWords,targetSpan,alignmentTargetWords,frequency
count,508.0,508.0,508.0,508.0,508.0,508.0,508.0
mean,146533.259843,10.326772,0.511811,1.498031,0.344488,1.226378,0.127286
std,14291.086315,6.10037,0.534655,0.50831,1.20544,0.463599,0.098157
min,118054.0,0.0,0.0,1.0,0.0,1.0,0.001969
25%,136841.5,6.0,0.0,1.0,0.0,1.0,0.043307
50%,146983.0,9.0,0.0,1.0,0.0,1.0,0.07874
75%,159553.0,15.0,1.0,2.0,0.0,1.0,0.271654
max,165402.0,27.0,2.0,3.0,18.0,5.0,0.271654


In [12]:
# find word (ignore case) in target language
godAlignments = db.findAlignmentsForWord(connection, 'God', searchTarget, False, caseInsensitive)
frequency = godAlignments['alignmentTxt'].value_counts()
frequency

528 items in search


Θεοῦ = God                                 138
ὁ Θεὸς = God                                80
Θεοῦ = of God                               40
τοῦ Θεοῦ = God                              39
τὸν Θεόν = God                              37
τοῦ Θεοῦ = of God                           34
τὸν Θεὸν = God                              22
Θεῷ = God                                   21
Θεὸς = God                                  18
τῷ Θεῷ = God                                14
ὁ Θεός = God                                12
Θεὸν = God                                   9
τῷ Θεῷ = to God                              6
Θεὸς = the God                               3
τοῦ Θεοῦ = God s                             2
Θεῷ = the God                                2
Θεόν = God                                   2
Θεὸν = the God                               2
Θεοῦ = God s                                 1
ὁ δὲ Θεός = But God                          1
τοῦ Θεοῦ = for God                           1
ὁ Θεός = God 

In [13]:
godAlignments.isnull().values.any()

False

In [14]:
godAlignments.describe()

Unnamed: 0,id,alignment_num,origSpan,alignmentOrigWords,targetSpan,alignmentTargetWords,frequency
count,512.0,512.0,512.0,512.0,512.0,512.0,512.0
mean,146550.025391,10.332031,0.509766,1.496094,0.349609,1.232422,0.12532
std,14250.928866,6.128684,0.534433,0.508234,1.203729,0.470961,0.097625
min,118054.0,0.0,0.0,1.0,0.0,1.0,0.001953
25%,136851.0,6.0,0.0,1.0,0.0,1.0,0.042969
50%,146983.0,9.0,0.0,1.0,0.0,1.0,0.078125
75%,159515.5,15.0,1.0,2.0,0.0,1.0,0.269531
max,165402.0,27.0,2.0,3.0,18.0,5.0,0.269531


In [9]:
godAlignments

Unnamed: 0,id,book_id,chapter,verse,alignment_num,orig_lang_words,target_lang_words,origSpan,origWords,origWordsTxt,alignmentOrigWords,targetSpan,targetWords,targetWordsTxt,alignmentTargetWords,alignmentTxt,frequency
0,118054,luk,1,6,5,",750353,750354,",",727232,",1,"[{'id': 750353, 'book_id': 'luk', 'chapter': '...",τοῦ Θεοῦ,2,0,"[{'id': 727232, 'book_id': 'luk', 'chapter': '...",God,1,τοῦ Θεοῦ = God,0.076172
1,118093,luk,1,8,12,",750396,750397,",",727274,",1,"[{'id': 750396, 'book_id': 'luk', 'chapter': '...",τοῦ Θεοῦ,2,0,"[{'id': 727274, 'book_id': 'luk', 'chapter': '...",God,1,τοῦ Θεοῦ = God,0.076172
2,118206,luk,1,16,8,",750517,750518,",",727452,",1,"[{'id': 750517, 'book_id': 'luk', 'chapter': '...",τὸν Θεὸν,2,0,"[{'id': 727452, 'book_id': 'luk', 'chapter': '...",God,1,τὸν Θεὸν = God,0.042969
3,118262,luk,1,19,11,",750579,750580,",",727539,",1,"[{'id': 750579, 'book_id': 'luk', 'chapter': '...",τοῦ Θεοῦ,2,0,"[{'id': 727539, 'book_id': 'luk', 'chapter': '...",God,1,τοῦ Θεοῦ = God,0.076172
4,118382,luk,1,26,10,",750709,750710,",",727714,",1,"[{'id': 750709, 'book_id': 'luk', 'chapter': '...",τοῦ Θεοῦ,2,0,"[{'id': 727714, 'book_id': 'luk', 'chapter': '...",God,1,τοῦ Θεοῦ = God,0.076172
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
507,165087,2jn,1,3,7,",801721,",",796060,",0,"[{'id': 801721, 'book_id': '2jn', 'chapter': '...",Θεοῦ,1,0,"[{'id': 796060, 'book_id': '2jn', 'chapter': '...",God,1,Θεοῦ = God,0.269531
508,165188,2jn,1,9,9,",801838,",",796230,",0,"[{'id': 801838, 'book_id': '2jn', 'chapter': '...",Θεὸν,1,0,"[{'id': 796230, 'book_id': '2jn', 'chapter': '...",God,1,Θεὸν = God,0.017578
509,165319,3jn,1,6,12,",801998,",",796439,",0,"[{'id': 801998, 'book_id': '3jn', 'chapter': '...",Θεοῦ,1,0,"[{'id': 796439, 'book_id': '3jn', 'chapter': '...",God,1,Θεοῦ = God,0.269531
510,165396,3jn,1,11,11,",802076,802077,",",796557,",1,"[{'id': 802076, 'book_id': '3jn', 'chapter': '...",τοῦ Θεοῦ,2,0,"[{'id': 796557, 'book_id': '3jn', 'chapter': '...",God,1,τοῦ Θεοῦ = God,0.076172


In [10]:
godAlignments = db.findAlignmentsForWord(connection, 'θεός', True, True)


509 items in search


In [11]:
godAlignments.describe()


Unnamed: 0,id,alignment_num,origSpan,alignmentOrigWords,targetSpan,alignmentTargetWords,frequency
count,504.0,504.0,504.0,504.0,504.0,504.0,504.0
mean,146347.80754,10.422619,0.517857,1.503968,0.319444,1.206349,0.129307
std,14221.867386,6.103316,0.534755,0.508364,1.177632,0.409965,0.098672
min,118054.0,0.0,0.0,1.0,0.0,1.0,0.001984
25%,136828.0,6.0,0.0,1.0,0.0,1.0,0.043651
50%,146855.5,9.5,0.5,1.5,0.0,1.0,0.079365
75%,159140.5,15.0,1.0,2.0,0.0,1.0,0.27381
max,165402.0,27.0,2.0,3.0,18.0,3.0,0.27381
