### Testing the Inference functions here in bibliqal

### 1. Loading the directories and the libraries

In [1]:
import sys
sys.path.append('..')

import numpy as np
import sqlite3 as sq
from src.model import USEQA
    
# string addresses and questions
query_string = "Is Genesis a scientific document?"
vectorised_master_kb_dir = '../data/vectorised_master_kb.npz'
master_kb_text_dir = '../data/master_kb_text.db'

# load master kb
with np.load(vectorised_master_kb_dir) as data:
    master_matrix = data['arr_0']

### 2. Inference is made with asd

In [7]:
# init model
# model = USEQA()
encoded_question = model.predict(query_string, type='query').numpy()#[0,:]

encoded_question.shape

(1, 512)

Calculate cosine similarity with master matrix.

In [8]:
from sklearn.metrics.pairwise import cosine_similarity
similarity_score=cosine_similarity(encoded_question,master_matrix)
similarity_score


array([[0.41841924, 0.3805582 , 0.44002157, ..., 0.06836633, 0.07423057,
        0.1920658 ]], dtype=float32)

Sort the tags between the encoded question and the master matrix.

In [9]:
sortargs=np.flip(similarity_score.argsort(axis=1))
sortargs = list(sortargs[0])
sortargs[:5]

[10, 9, 2, 0, 1]

Finding the positions of the best relevant answers.

In [13]:
def find_pos_of_nth_smallest(sortargs, n):
    """
    Find the position of the nth smallest number
    
    args:
        sortargs: (list) contains the rankings
        n: (int) number positions of smallest number
        
    return:
        integer postion of the value
    """
    return sortargs.index(sorted(sortargs)[n])

def find_idx_of_best_k_answers(sortargs, k):
    """
    Find indices of the most relevant 
    
    args:
        sortargs: (list) contains the rankings
        k: (int) number positions of top rankings
        
    return:
        list of top k ranking positions in lists
        
    e.g.
    >>> find_idx_of_best_k_answers(sortargs, 5)
    >>> [3, 4, 2, 43, 5]
    
    where 3 is the index of 0, 
          4 is the index of 1,
          2 is the index of 2,
          43 is the index of 3,
          5 is the index of 4
    """
    return [find_pos_of_nth_smallest(sortargs, i) for i in range(k)]

idx_of_best_k_answers = find_idx_of_best_k_answers(sortargs, 5)
idx_of_best_k_answers

[3, 4, 2, 43, 5]

### 3. Query the ranks from the SQLite

In [12]:
def query_SQL_by_idx(con, idx):
    """
    Query an observation from SQL base based on idx
    
    args:
    -----
        con: SQL client
        idx: (int) index position of the row in the database
        
    return:
    ------
        query_results: list of tuple
    """
    return con.execute('SELECT * FROM master_kb_text WHERE idx = ?', (idx,)).fetchall() 

con = sq.connect(master_kb_text_dir)

query_SQL_by_idx(con, 4)

[(4.0,
  'Macarthur John',
  ' - Bible Introductions - Genesis',
  'Genesis 1–11 (primeval history) reveals the origins of the universe, i.e., the beginnings of time and space and many of the firsts in human experience, such as marriage, family, the Fall, sin, redemption, judgment, and nations. Genesis 12–50 (patriarchal history) explained to Israel how they came into existence as a family whose ancestry could be traced to Eber (hence the “Hebrews”; Gen. 10:24, 25) and even more remotely to Shem, the son of Noah (hence the “Semites”; Gen. 10:21). God’s people came to understand not only their ancestry and family history, but also the origins of their institutions, customs, languages, and different cultures, especially basic human experiences such as sin and death.',
  'https://www.blueletterbible.org/Comm/macarthur_john/bible-introductions/genesis-intro.cfm',
  '')]

### Querying multiple rows from SQL
There are a few ways:
1. query one index at a time
2. query one at a time with a list comprehension
3. use pandas io to query

In [15]:
list_of_context_tuples = [query_SQL_by_idx(con, idx) for idx in idx_of_best_k_answers]
list_of_context_tuples

[[(3.0,
   'Macarthur John',
   ' - Bible Introductions - Genesis',
   'In this book of beginnings, God revealed Himself and a worldview to Israel which contrasted, at times sharply, with the worldview of Israel’s neighbors. The author made no attempt to defend the existence of God or to present a systematic discussion of His person and works. Rather, Israel’s God distinguished Himself clearly from the alleged gods of her neighbors. Theological foundations are revealed which include God the Father, God the Son, God the Holy Spirit, man, sin, redemption, covenant, promise, Satan and angels, kingdom, revelation, Israel, judgment, and blessing.',
   'https://www.blueletterbible.org/Comm/macarthur_john/bible-introductions/genesis-intro.cfm',
   '')],
 [(4.0,
   'Macarthur John',
   ' - Bible Introductions - Genesis',
   'Genesis 1–11 (primeval history) reveals the origins of the universe, i.e., the beginnings of time and space and many of the firsts in human experience, such as marriage, f

In [25]:
con = sq.connect(master_kb_text_dir)
query = 'SELECT * FROM master_kb_text WHERE idx IN (?,?)' 
con.execute(query, [4,3,]).fetchall() 

[(3.0,
  'Macarthur John',
  ' - Bible Introductions - Genesis',
  'In this book of beginnings, God revealed Himself and a worldview to Israel which contrasted, at times sharply, with the worldview of Israel’s neighbors. The author made no attempt to defend the existence of God or to present a systematic discussion of His person and works. Rather, Israel’s God distinguished Himself clearly from the alleged gods of her neighbors. Theological foundations are revealed which include God the Father, God the Son, God the Holy Spirit, man, sin, redemption, covenant, promise, Satan and angels, kingdom, revelation, Israel, judgment, and blessing.',
  'https://www.blueletterbible.org/Comm/macarthur_john/bible-introductions/genesis-intro.cfm',
  ''),
 (4.0,
  'Macarthur John',
  ' - Bible Introductions - Genesis',
  'Genesis 1–11 (primeval history) reveals the origins of the universe, i.e., the beginnings of time and space and many of the firsts in human experience, such as marriage, family, the 

In [37]:
import pandas.io.sql as pds
data_df = pds.read_sql('SELECT * FROM master_kb_text WHERE idx IN (' + str(idx_of_best_k_answers)[1:-1] + ')', con)

In [38]:
data_df.set_index('idx').loc[idx_of_best_k_answers]

Unnamed: 0_level_0,authors,document_nm,paragraph,url,verse_references
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
3,Macarthur John,- Bible Introductions - Genesis,"In this book of beginnings, God revealed Himse...",https://www.blueletterbible.org/Comm/macarthur...,
4,Macarthur John,- Bible Introductions - Genesis,Genesis 1–11 (primeval history) reveals the or...,https://www.blueletterbible.org/Comm/macarthur...,
2,Macarthur John,- Bible Introductions - Genesis,The initial setting for Genesis is eternity pa...,https://www.blueletterbible.org/Comm/macarthur...,
43,Macarthur John,- Bible Introductions - Numbers,"The second interpretive question asks, “Is the...",https://www.blueletterbible.org/Comm/macarthur...,
5,Macarthur John,- Bible Introductions - Genesis,Because they were preparing to enter Canaan an...,https://www.blueletterbible.org/Comm/macarthur...,


### Testing the GR `make_query()` function.

In [1]:
import sys
sys.path.append('..')

import numpy as np
import sqlite3 as sq
from src.model import USEQA
    
# string addresses and questions
query_string = "Is Genesis a scientific document?"
vectorised_master_kb_dir = '../data/vectorised_master_kb.npz'
master_kb_text_dir = '../data/master_kb_text.db'

# load master kb
with np.load(vectorised_master_kb_dir) as data:
    master_matrix = data['arr_0']
    
# sql connection
con = sq.connect(master_kb_text_dir)

# load master kb
with np.load(vectorised_master_kb_dir) as data:
    master_matrix = data['arr_0']

Init and make query

In [2]:
# init model
model = USEQA()
# encoded_question = model.predict(query_string, type='query').numpy()#[0,:]

# encoded_question.shape

model initiated!


In [11]:
data_df, top_scores = model.make_query(query_string, master_matrix, con)
data_df

Unnamed: 0_level_0,authors,document_nm,paragraph,url,verse_references
idx,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
24,Expositors Commentary,,"There is indeed a prevalent suspicion, that in...",https://biblehub.com/commentaries/expositors/g...,Genesis-3
15,Expositors Commentary,,IF anyone is in search of accurate information...,https://biblehub.com/commentaries/expositors/g...,Genesis-3
5,Expositors Commentary,,Here then instead of anything to discompose us...,https://biblehub.com/commentaries/expositors/g...,Genesis-2
30,Expositors Commentary,,"PROFOUND as the teaching of this narrative is,...",https://biblehub.com/commentaries/expositors/g...,Genesis-4
60,Expositors Commentary,,"If, therefore, you continue at war with God it...",https://biblehub.com/commentaries/expositors/g...,Genesis-5


### Troubleshooting

In [126]:
from sklearn.metrics.pairwise import cosine_similarity
import pandas.io.sql as pds
from src.utils import *

k=5
query_string = "Is Genesis a scientific document?" # looksgood
query_string = "Is genesis scientific?"
query_string = "Should women be allowed to preach in ministry?"
query_string = "Is honesty a good thing?"
query_string = "Is the old testament promise over?"
query_string = "Are Christian men happy?" #22401
query_string = 'How are religious men like Paul?'

# score the stuff
%time similarity_score=cosine_similarity(master_matrix, model.predict(query_string, type='query'))
top_k_scores = similarity_score[similarity_score.argsort(axis=0)<5]
print(f"Top k scores: {top_k_scores}")
sortargs=np.flip(similarity_score.argsort(axis=0))
sortargs = [sortarg[0] for sortarg in sortargs]

# indices of best k answers
idx_of_best_k_answers = find_idx_of_best_k_answers(sortargs, k)

# call from sql
#list_of_context_tuples = [query_SQL_by_idx(con, idx) for idx in idx_of_best_k_answers]
data_df = pds.read_sql('SELECT * FROM master_kb_text WHERE idx IN (' + str(idx_of_best_k_answers)[1:-1] + ')', con)
data_df = data_df.set_index('idx').loc[idx_of_best_k_answers]

CPU times: user 128 ms, sys: 3.96 ms, total: 132 ms
Wall time: 89.4 ms


In [128]:
cosine_similarity(
    model.predict('Spiritual men are those who can say, with Paul, "We have received, not the spirit of the world, but the Spirit which is of God, that we might know the things that are freely given to us of God."', type='response'),
    model.predict('How are religious men like Paul?', type='query')
)

array([[0.44664282]], dtype=float32)

In [15]:
"A young athlete named Mitra and his sweetheart Neng Mardinah are to be wed. However, a young man named Mardjohan has fallen in love with Mardinah, and to win her heart he spreads rumours about Mitra being the son of a convicted criminal. In the backlash over the rumours, Mitra abandons the city and his beloved, hoping to find peace in the countryside. There, Mitra finds work at a factory which is, coincidentally, owned by Mardjohan. Refusing the romantic advances of a worker there, he leaves the factory. One day, he comes across Mardjohan, gravely injured following an accident. Mitra saves the man, then takes him for treatment. Mardjohan's mother, seeing Mitra, believes that he is her son who went missing when he was aged three. She finds several witnesses who testify to the resemblance and is ultimately able to prove her suspicions. Meanwhile, Mitra's name is cleared, and he is reunited with Mardinah."

"A young athlete named Mitra and his sweetheart Neng Mardinah are to be wed. However, a young man named Mardjohan has fallen in love with Mardinah, and to win her heart he spreads rumours about Mitra being the son of a convicted criminal. In the backlash over the rumours, Mitra abandons the city and his beloved, hoping to find peace in the countryside. There, Mitra finds work at a factory which is, coincidentally, owned by Mardjohan. Refusing the romantic advances of a worker there, he leaves the factory. One day, he comes across Mardjohan, gravely injured following an accident. Mitra saves the man, then takes him for treatment. Mardjohan's mother, seeing Mitra, believes that he is her son who went missing when he was aged three. She finds several witnesses who testify to the resemblance and is ultimately able to prove her suspicions. Meanwhile, Mitra's name is cleared, and he is reunited with Mardinah."

Does sentence level beats paragraph level?

In [16]:
from sklearn.metrics.pairwise import cosine_similarity
paragraph = "A young athlete named Mitra and his sweetheart Neng Mardinah are to be wed. However, a young man named Mardjohan has fallen in love with Mardinah, and to win her heart he spreads rumours about Mitra being the son of a convicted criminal. In the backlash over the rumours, Mitra abandons the city and his beloved, hoping to find peace in the countryside. There, Mitra finds work at a factory which is, coincidentally, owned by Mardjohan. Refusing the romantic advances of a worker there, he leaves the factory. One day, he comes across Mardjohan, gravely injured following an accident. Mitra saves the man, then takes him for treatment. Mardjohan's mother, seeing Mitra, believes that he is her son who went missing when he was aged three. She finds several witnesses who testify to the resemblance and is ultimately able to prove her suspicions. Meanwhile, Mitra's name is cleared, and he is reunited with Mardinah."
for p in range( 1, len(paragraph.split('.')) ):
    sim = cosine_similarity(
        model.predict( '.'.join(paragraph.split('.')[:p]) , type = 'response' ),
        model.predict( 'What did Mitra do with injured Mardjohn?' , type = 'query' )
    )
    print(f"{p} - {sim}")

1 - [[0.31539595]]
2 - [[0.36572763]]
3 - [[0.36249405]]
4 - [[0.3475117]]
5 - [[0.32431668]]
6 - [[0.34061146]]
7 - [[0.33991885]]
8 - [[0.35982224]]
9 - [[0.3580126]]
10 - [[0.36043617]]


In [22]:
from sklearn.metrics.pairwise import cosine_similarity
# paragraph = 'There is indeed a prevalent suspicion, that in presence of the discoveries made by evolutionists the argument from design is no longer tenable. Evolution shows us that the correspondence of the structure of animals, with their modes of life, has been generated by the nature of the case; and it is concluded that a blind mechanical necessity and not an intelligent design rules all. But the discovery of the process by which the presently existing living forms have been evolved, and the perception that this process is governed by laws which have always been operating, do not make intelligence and design at all less necessary, but rather more so. As Professor Huxley himself says: "The teleological and mechanical views of nature are not necessarily exclusive. The teleologist can always defy the evolutionist to disprove that the primordial molecular arrangement was not intended to evolve the phenomena of the universe." Evolution, in short, by disclosing to us the marvellous power and accuracy of natural law, compels us more emphatically than ever to refer all law to a supreme, originating intelligence.'
for p in range( len(paragraph.split('.')) ):
    
    sentence_ = paragraph.split('.')[p].strip()
    para_context = '.'.join(paragraph.split('.')[:p])
    
    sim = cosine_similarity(
        model.predict( sentence_, context = para_context , type = 'response' ),
        model.predict( 'What did Mitra do with injured Mardjohan?' , type = 'query' )
    )
    print(f"{p} - {sim}")

0 - [[0.31084716]]
1 - [[0.3507892]]
2 - [[0.40754443]]
3 - [[0.3989994]]
4 - [[0.1387328]]
5 - [[0.30246824]]
6 - [[0.42812115]]
7 - [[0.3442791]]
8 - [[0.12057083]]
9 - [[0.16380061]]
10 - [[0.08407693]]


In [21]:
paragraph.split('.')[6]

' Mitra saves the man, then takes him for treatment'

In [8]:
para_context

'There is indeed a prevalent suspicion, that in presence of the discoveries made by evolutionists the argument from design is no longer tenable. Evolution shows us that the correspondence of the structure of animals, with their modes of life, has been generated by the nature of the case; and it is concluded that a blind mechanical necessity and not an intelligent design rules all. But the discovery of the process by which the presently existing living forms have been evolved, and the perception that this process is governed by laws which have always been operating, do not make intelligence and design at all less necessary, but rather more so. As Professor Huxley himself says: "The teleological and mechanical views of nature are not necessarily exclusive. The teleologist can always defy the evolutionist to disprove that the primordial molecular arrangement was not intended to evolve the phenomena of the universe." Evolution, in short, by disclosing to us the marvellous power and accurac