In [1]:
# create virtual server here in notebook with access to all functions and variables
from IR_tools import *

100%|██████████| 49/49 [00:00<00:00, 352.33it/s]


In [3]:
# search settings

# pre-NBhū
priority_texts = ["VS", "MīmS", "MMK", "ViVy", "NS", "YSBh", "SK", "ViṃśV", "NBh", "MīmBh", "ĀP", "PSV", "NPS", "TriṃśBh", "YD", "PDhS", "NV", "PPad", "ŚV", "PVSV", "PV", "PVin", "HB", "NB", "VN", "SAS", "SP", "BhāV", "BrS", "VibhrV", "VidhV", "PSṬ", "HBṬ", "NBṬ", "PVA", "VSṬ", "TUS"]

# others
non_priority_texts = ["VyV", "NM", "NyKal", "NBhū", "SŚP", "ŚVK", "HBṬĀ", "NyKand", "AvNir", "PVV", "TCM", "MukV"]

N_tf_idf_shallow = int( len(doc_ids) * 0.15)
N_sw_w_shallow = 200

N_tf_idf_deep = int( len(doc_ids) * 1.00)
N_sw_w_deep = 1000

In [4]:
# functions for assessing speed and summarizing results 

from datetime import datetime, date
from time import sleep

def calc_dur(start, end):
    delta = datetime.combine(date.today(), end) - datetime.combine(date.today(), start)
    duration_secs = delta.seconds + delta.microseconds / 1000000
    return duration_secs

def summarize_result(results, label, duration, num_comparisons, display_depth):
    print('{} ({:.3f} s, {} comparisons, {:.6f} s/comparison)'.format(
            label,
            duration, 
            num_comparisons,
            duration/num_comparisons
            )
    )
    for k,v in list(results.items())[:display_depth]:
        print(k, ": ", v)
    print()
    sleep(0.01)


In [5]:
# streamlined version of get_closest_docs() with no HTML rendering, for assessing speed of algorithm parts

def get_closest_docs_2(query_id, search_depth='shallow', prioritze=True, display_depth=10, mode='speed_summary'):
    
    if search_depth=='shallow':
        N_tf_idf = N_tf_idf_shallow
        N_sw_w = N_sw_w_shallow
    elif search_depth=='deep':
        N_tf_idf = N_tf_idf_deep
        N_sw_w = N_sw_w_deep
    
    # rank candidates by topic similarity

    start1 = datetime.now().time()
    all_topic_candidates = rank_all_candidates_by_topic_similarity(
        query_id
        )    
    end1 = datetime.now().time()
    topic_time = calc_dur(start1, end1)
    if mode=='speed_summary': summarize_result(
        results=all_topic_candidates,
        label='topics',
        duration=topic_time,
        num_comparisons=len(doc_ids),
        display_depth=display_depth
    )

    if prioritze:
        # prioritize candidates by text name, discard secondary candidates
            priority_candidate_ids, _ = divide_doc_id_list_by_work_priority(
                list(all_topic_candidates.keys()),
                priority_texts
                )
            priority_topic_candidates = { doc_id: all_topic_candidates[doc_id]
                for doc_id in priority_candidate_ids
                }
    else:
        # don't prioritize
        priority_topic_candidates = all_topic_candidates
        priority_topic_candidate_ids = list(all_topic_candidates.keys())
    
    # limit further computation to only top N_tf_idf of sorted candidates (minus query itself)
    pruned_priority_topic_candidates = { k:v
        for (k,v) in list(priority_topic_candidates.items())[:N_tf_idf-1]
        }

    # further rank candidates by tiny tf-idf
    start2 = datetime.now().time()
    tf_idf_candidates = rank_candidates_by_tiny_TF_IDF_similarity(
        query_id,
        list(pruned_priority_topic_candidates.keys())
        )
    end2 = datetime.now().time()
    tf_idf_time = calc_dur(start2, end2)
    if mode=='speed_summary': summarize_result(
        results=tf_idf_candidates,
        label='tf-idf',
        duration=tf_idf_time,
        num_comparisons=N_tf_idf,
        display_depth=display_depth
    )

    # limit further computation to only top N_sw_w of sorted candidates
    pruned_tf_idf_candidates = { k:v
        for (k,v) in list(tf_idf_candidates.items())[:N_sw_w-1]
        }

    # further rank candidates by sw_w
    start3 = datetime.now().time()
    sw_w_candidates = rank_candidates_by_sw_w_alignment_score(
        query_id,
        list(pruned_tf_idf_candidates.keys())
        )
    end3 = datetime.now().time()
    sw_w_time = calc_dur(start3, end3)

    # do not convert sw_w scores of 0.0 to empty string
    # do not add blank entries for other docs for which sw_w comparison not performed

    if mode=='speed_summary':
        summarize_result(
            results=sw_w_candidates, 
            label='sw_w', 
            duration=sw_w_time, 
            num_comparisons=N_sw_w, 
            display_depth=display_depth
        )
        return

    elif mode=='evaluate_pairs':
        return all_topic_candidates, priority_topic_candidates, tf_idf_candidates, sw_w_candidates


In [6]:
# test single 'speed_summary' run, shallow (no values returned)

get_closest_docs_2('NBhū_142,19', search_depth='shallow', display_depth=10, mode='speed_summary')

topics (0.770 s, 28381 comparisons, 0.000027 s/comparison)
PVA_629,vi :  0.9698818351650111
PVSV_010,13_010,15 :  0.9523222396152328
NBṬ_217,iii_217,v :  0.9382695193578188
PVSV_010,19_010,21 :  0.9165924117385076
PVin_II,045,iii :  0.913539875583041
PVA_612,i_612,iii :  0.9116307949243329
PVV_290,iv_290,v :  0.895139987240839
PVV_493,viii_493,x :  0.8912334689992081
PVin_I,091,i_I,092,i :  0.8910857329884588
PVin_II,123,i_II,123,ii :  0.8907492314066945

tf-idf (1.208 s, 4257 comparisons, 0.000284 s/comparison)
PVin_I,091,i_I,092,i :  0.4476406323861196
PVSV_010,13_010,15 :  0.4366356177407236
PVin_I,092,ii_I,092,iii :  0.3154629129055202
PVSV_010,19_010,21 :  0.30638191389324454
PSṬ_II,115,ii_II,115,iii :  0.16877487866883265
PVSV_011,05_011,12 :  0.15478327326428523
PVSV_008,16_009,01 :  0.12926378368826538
PVA_607,iv_607,vii :  0.12628304328524773
PVin_II,111,ii_II,112,i :  0.12170302311234055
PV_4.205ab_4.207cd :  0.10956666140397969

sw_w (0.904 s, 200 comparisons, 0.004519 s/com

In [7]:
# run for 'speed_summary', deep (no values returned)

# get_closest_docs_2('NBhū_142,19', search_depth='deep', display_depth=10, mode='speed_summary')

In [8]:
# identify my NBhū docs

NBhu_doc_ids = [ di for di in doc_ids if parse_complex_doc_id(di)[0] == 'NBhū' ]
print(len(NBhu_doc_ids), NBhu_doc_ids[238], NBhu_doc_ids[377], 377-238+1)

doc_id_full_list = NBhu_doc_ids[238:377+1]
print(len(doc_id_full_list))

1765 NBhū_104,6^1 NBhū_154,15 140
140


In [9]:
# or do this for ALL NBhū docs (!)

doc_id_full_list = NBhu_doc_ids

In [10]:
# set up doc pairs to evaluate as expected from benchmark

doc_id_suspected_pair_list = [
    ('NBhū_104,6^1', 'PVin_I,034,i'),
    ('NBhū_104,6^2', 'PV_3.148ab_3.150cd'),
    ('NBhū_104,6^2', 'NS_4.2.8_4.2.14'),
    ('NBhū_106,3', 'NS_4.2.23_4.2.28'),
    ('NBhū_106,3', 'NV_487,02_487,04'),
    ('NBhū_106,11_107,1', 'ViṃśV_93,i_95,i'),
    ('NBhū_106,11_107,1', 'PVin_I,035,i_I,036,ii'),
    ('NBhū_106,11_107,1', 'NS_4.2.15_4.2.22'),
    ('NBhū_107,6_108,1', 'PVin_I,039,i_I,039,ii'),
    ('NBhū_108,4_108,6', 'PVin_I,040,i'),
    ('NBhū_108,10', 'PVin_I,041,i'),
    ('NBhū_108,10', 'PV_3.431cd_3.434cd'),
    ('NBhū_109,1', 'PV_3.431cd_3.434cd'),
    ('NBhū_109,7', 'PV_3.329ab_3.332ab'),
    ('NBhū_109,7', 'PVin_I,035,i_I,036,ii'),
    ('NBhū_115,1_115,4', 'PV_3.281ab_3.284ab'),
    ('NBhū_115,18', 'PVA_289,xiv_290,ii'),
    ('NBhū_115,18', 'PVA_290,iii'),
    ('NBhū_115,18', 'PVA_290,iv_290,vi'),
    ('NBhū_116,7', 'NS_4.1.35_4.1.42'),
    ('NBhū_116,7', 'NV_454,17_454,18'),
    ('NBhū_117,3^1', 'PVA_288,vii'),
    ('NBhū_117,3^2', 'PVA_288,vii'),
    ('NBhū_117,3^2', 'PV_3.208cd_3.211ab'),
    ('NBhū_121,2^2', 'PVin_I,046,i_I,046,iii'),
    ('NBhū_124,8^2', 'PV_2.066ab_2.069ab'),
    ('NBhū_125,15', 'ŚV_5,4.250ab_5,4.253ab'),
    ('NBhū_126,6^1', 'NS_4.2.8_4.2.14'),
    ('NBhū_126,6^1', 'NBh_1047,i_1047,ii'),
    ('NBhū_126,6^1', 'NS_2.1.33_2.1.39'),
    ('NBhū_126,6^3', 'NS_2.1.33_2.1.39'),
    ('NBhū_126,6^3', 'NS_4.2.8_4.2.14'),
    ('NBhū_131,11_131,17', 'ViṃśV_93,i_95,i'),
    ('NBhū_132,2', 'NS_4.2.15_4.2.22'),
    ('NBhū_132,11^1', 'TUS_ii,102,i_ii,102,ii'),
    ('NBhū_132,11^1', 'PV_3.386cd_3.389cd'),
    ('NBhū_138,9', 'PVA_353,x'),
    ('NBhū_138,9', 'PVA_353,xi_353,xii'),
    ('NBhū_139,1_139,3', 'PVA_353,xiii_353,xv'),
    ('NBhū_139,1_139,3', 'PVSV_022,06_022,20'),
    ('NBhū_139,1_139,3', 'PVin_I,086,ii^1'),
    ('NBhū_139,26_140,1', 'PV_3.326ab_3.328cd'),
    ('NBhū_140,21', 'PVin_I,046,i_I,046,iii'),
    ('NBhū_142,2', 'PV_3.329ab_3.332ab'),
    ('NBhū_142,12', 'PVA_387,xvii_387,xxii'),
    ('NBhū_142,12', 'PVA_359,iv_359,vi'),
    ('NBhū_142,19', 'PVA_361,xiii_361,xvi'),
    ('NBhū_142,19', 'PVSV_010,13_010,15'),
    ('NBhū_142,19', 'PVSV_010,19_010,21'),
    ('NBhū_142,19', 'PVin_I,091,i_I,092,i'),
    ('NBhū_142,19', 'PVin_I,092,ii_I,092,iii'),
    ('NBhū_144,20^1', 'PVA_361,xiii_361,xvi'),
    ('NBhū_145,15', 'PVA_360,ix'),
    ('NBhū_145,15', 'PVA_360,x'),
    ('NBhū_145,22', 'PVA_360,x'),
    ('NBhū_145,22', 'PVA_360,xi_361,i'),
    ('NBhū_146,7', 'PVA_360,xi_361,i'),
    ('NBhū_146,7', 'PVA_361,ii_361,iii'),
    ('NBhū_146,14_146,18', 'PVA_361,ii_361,iii'),
    ('NBhū_146,14_146,18', 'PVA_361,iv_361,vi'),
    ('NBhū_146,21', 'PVA_361,iv_361,vi'),
    ('NBhū_146,21', 'PVA_361,vii'),
    ('NBhū_147,3_147,6', 'PVA_361,x_361,xii'),
    ('NBhū_149,4_149,16', 'PVA_366,v_366,ix'),
    ('NBhū_149,19', 'PVA_366,iv'),
    ('NBhū_150,1', 'HB_3,1^1'),
    ('NBhū_150,1', 'PV_2.001ab_2.005cd'),
    ('NBhū_150,6^2', 'PVin_II,001,i_II,001,ii'),
    ('NBhū_150,6^2', 'NB_3.1_3.8'),
    ('NBhū_153,4_153,7', 'PVA_356,iv_356,vii'),
    ('NBhū_153,14', 'PVA_358,ix^4')
]
num_expected_pairs = len(doc_id_suspected_pair_list)

# turn into dict of lists
doc_id_suspected_pair_dict = {}
for doc_id_1, doc_id_2 in doc_id_suspected_pair_list:
    if doc_id_1 not in doc_id_suspected_pair_dict:
        doc_id_suspected_pair_dict[doc_id_1] = [doc_id_2]
    else:
        doc_id_suspected_pair_dict[doc_id_1].append(doc_id_2)

In [11]:
# function for describing doc_id's position in ranked results as well as giving absolute score

def format_score_summary(ranking_dict, doc_id):

    if doc_id in ranking_dict:

        ks = list(ranking_dict.keys())
        rank = ks.index(doc_id) + 1
        score = ranking_dict[doc_id]

        return "{} ({:.2f})".format(rank, score)

    else:
        return ""

In [None]:
# compare performance against benchmark
# i.e., loop over get_closest_docs_2() in mode='evaluate_pairs'
# outputs to tsv spreadsheet

from tqdm.notebook import tqdm

pbar = tqdm(total=len(doc_id_full_list))

output_buffer_1 = ""
output_buffer_2 = '\t'.join([
    "doc_id", "num_expected_pairs", "num_pairs_confirmed",
    "num_novel_pairs[50]", "num_novel_pairs[40]", "num_novel_pairs[30]"
    ]) + '\n'
i = 0
rank_threshold = 5 # this determines whether system "CONFIRMS" an expected pair
sw_w_min_threshold = 50 # this determines a "NOVEL PAIR"
sw_w_threshold_for_display = 50
for doc_id_1 in doc_id_full_list:

    # announce new doc_id_1
    
    print()
    print(doc_id_1)
    something_to_say = False # in case totally uneventful, will output "(none)"

    # perform full search of doc_id_1 with get_closest_docs_2() in 'evaluate_pairs' mode

    all_topic_candidates, priority_topic_candidates, tf_idf_candidates, sw_w_candidates = get_closest_docs_2(
        doc_id_1, search_depth='shallow', mode='evaluate_pairs'
    )

    # get doc_id_2 pair suspects

    if doc_id_1 in doc_id_suspected_pair_dict:
        doc_id_2_list = doc_id_suspected_pair_dict[doc_id_1]
        num_pairs_confirmed = 0
    else:
        doc_id_2_list = []
        num_pairs_confirmed = ""

    # evaluate each supposed pair
    
    num_expected_pairs = len(doc_id_2_list)
    for doc_id_2 in doc_id_2_list:
        
        something_to_say = True

        # format four ranked scores as "rank (score)"
        topic_all_scores = format_score_summary(all_topic_candidates, doc_id_2)
        topic_priority_scores = format_score_summary(priority_topic_candidates, doc_id_2)
        tf_idf_scores = format_score_summary(tf_idf_candidates, doc_id_2)
        sw_w_scores = format_score_summary(sw_w_candidates, doc_id_2)

        # format results for output to spreadsheet
        output_buffer_1 += '\t'.join([
            str(i),
            doc_id_1,
            doc_id_2,
            topic_all_scores,
            topic_priority_scores,
            tf_idf_scores,
            sw_w_scores,
        ]) + '\n'
        
        # have msg ready to confirm in notebook significant scores for suspected pairs
        if sw_w_scores == "":
            rank = 0
            sw_w_abs_score = 0
        else:
            rank_str, sw_w_abs_score_str = sw_w_scores.split(' ', 1)
            rank = int(rank_str)
            sw_w_abs_score = float(sw_w_abs_score_str[1:-1])
        
        confirmation_msg = (0 < rank <= rank_threshold) * '(CONFIRMED)'
        num_pairs_confirmed += bool(confirmation_msg) * 1

        # output to notebook
        i += 1
        print("pair #{}/{} ({}, {}): {} {} {} {} {}".format(
            i, num_expected_pairs,
            doc_id_1, doc_id_2,
            topic_all_scores,
            topic_priority_scores,
            tf_idf_scores,
            sw_w_scores,
            confirmation_msg
            )
        )

    # also report novel findings
        
    num_novel_pairs = {30: 0, 40: 0, 50: 0}
    for k,v in sw_w_candidates.items():

        if v > sw_w_min_threshold and k not in doc_id_2_list:
            
            something_to_say = True

            for threshold in num_novel_pairs.keys():
                if v > threshold:
                    num_novel_pairs[threshold] += 1

            # format four ranked scores as "rank (score)"
            topic_all_scores = format_score_summary(all_topic_candidates, k)
            topic_priority_scores = format_score_summary(priority_topic_candidates, k)
            tf_idf_scores = format_score_summary(tf_idf_candidates, k)
            sw_w_scores = format_score_summary(sw_w_candidates, k)

            # format results for output to spreadsheet
            output_buffer_1 += '\t'.join([
                "no id",
                doc_id_1,
                k,
                topic_all_scores,
                topic_priority_scores,
                tf_idf_scores,
                sw_w_scores,
            ]) + '\n'

            # output to notebook
            print("NOVEL PAIR ({}, {}): {} {} {} {}".format(
                doc_id_1, k,
                topic_all_scores,
                topic_priority_scores,
                tf_idf_scores,
                sw_w_scores,
                )
            )
        
        # stop once scores too low
        elif (v < sw_w_min_threshold):
            break
    
    if num_expected_pairs > 0:
        print("expected pairs confirmed (@{}): {}/{}".format(rank_threshold, num_pairs_confirmed, num_expected_pairs))
    if num_novel_pairs[sw_w_threshold_for_display] > 0:
        print("novel pairs (@{}): {}".format(sw_w_min_threshold, num_novel_pairs[sw_w_threshold_for_display]))
        
    # give explicit negative output in notebook if there are neither suspected pairs nor novel findings
    if not something_to_say:
        print("(none)")

    # prepare final summary of doc_1 output to second file
    output_buffer_2 += "{}\t{}\t{}\t{}\t{}\t{}".format(
        doc_id_1, num_expected_pairs, num_pairs_confirmed,
        num_novel_pairs[50], num_novel_pairs[40], num_novel_pairs[30]
        ) + '\n'
        
    # update progress bar as last thing
    pbar.update()        

            
# finish up

with open('pairs.tsv','w') as f_out_1:
    f_out_1.write(output_buffer_1)

with open('doc_1_summaries.tsv','w') as f_out_2:
    f_out_2.write(output_buffer_2)
    
pbar.close()

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1765.0), HTML(value='')))


NBhū_1,7_2,2
(none)

NBhū_2,15^1
(none)

NBhū_2,15^2_5,8
(none)

NBhū_5,11
(none)

NBhū_6,10_6,13
NOVEL PAIR (NBhū_6,10_6,13, PSṬ_I,21,i_I,21,ii): 21 (0.70) 21 (0.70) 8 (0.15) 1 (96.00)
novel pairs (@50): 1

NBhū_7,2
NOVEL PAIR (NBhū_7,2, HBṬ_8,ii^2): 6651 (0.08) 3628 (0.08) 29 (0.09) 1 (50.20)
novel pairs (@50): 1

NBhū_7,15^1
(none)

NBhū_7,15^2
(none)

NBhū_8,17
(none)

NBhū_9,1^1
(none)

NBhū_9,1^2
(none)

NBhū_10,2^1
(none)

NBhū_10,2^2
(none)

NBhū_10,21
(none)

NBhū_11,11_11,18
(none)

NBhū_11,22
(none)

NBhū_12,7^1
(none)

NBhū_12,7^2
(none)

NBhū_13,18
(none)

NBhū_14,8
(none)

NBhū_14,17^1
NOVEL PAIR (NBhū_14,17^1, NV_086,03^4): 56 (0.87) 31 (0.87) 29 (0.14) 1 (51.60)
novel pairs (@50): 1

NBhū_14,17^2
(none)

NBhū_15,15^1
(none)

NBhū_15,15^2
(none)

NBhū_15,28^1
NOVEL PAIR (NBhū_15,28^1, VSṬ_21,iii_21,iv): 130 (0.75) 54 (0.75) 2 (0.30) 1 (69.00)
NOVEL PAIR (NBhū_15,28^1, VS_2,2.20_2,2.28): 40 (0.84) 12 (0.84) 4 (0.21) 2 (69.00)
NOVEL PAIR (NBhū_15,28^1, NV_087,07^4): 121 (

(none)

NBhū_64,6
(none)

NBhū_64,16
NOVEL PAIR (NBhū_64,16, NBh_0044,ii_0045,i): 13 (0.76) 5 (0.76) 1 (0.57) 1 (415.40)
NOVEL PAIR (NBhū_64,16, NS_1.1.30_1.1.41): 3 (0.84) 2 (0.84) 15 (0.12) 2 (83.00)
NOVEL PAIR (NBhū_64,16, NBh_0258,i_0258,ii): 16 (0.74) 6 (0.74) 3 (0.17) 3 (79.00)
NOVEL PAIR (NBhū_64,16, NBh_0306,i_0309,i): 344 (0.50) 168 (0.50) 4 (0.17) 4 (55.00)
NOVEL PAIR (NBhū_64,16, NBh_0310,i): 251 (0.55) 118 (0.55) 9 (0.14) 5 (55.00)
NOVEL PAIR (NBhū_64,16, PVA_560,v_560,ix): 2914 (0.16) 912 (0.16) 13 (0.13) 6 (51.40)
novel pairs (@50): 6

NBhū_65,7^1
(none)

NBhū_65,7^2
NOVEL PAIR (NBhū_65,7^2, NV_098,02_098,06): 112 (0.86) 41 (0.86) 3 (0.39) 1 (62.00)
NOVEL PAIR (NBhū_65,7^2, NBh_0260,i_0261,i): 21 (0.93) 4 (0.93) 1 (0.59) 2 (51.00)
NOVEL PAIR (NBhū_65,7^2, NS_1.1.25_1.1.29): 30 (0.92) 11 (0.92) 2 (0.40) 3 (51.00)
novel pairs (@50): 3

NBhū_66,4^1
(none)

NBhū_66,4^2
NOVEL PAIR (NBhū_66,4^2, NV_098,12_098,14): 33 (0.90) 14 (0.90) 6 (0.29) 1 (83.00)
NOVEL PAIR (NBhū_66,4^2, 

pair #9/1 (NBhū_107,6_108,1, PVin_I,039,i_I,039,ii): 42 (0.94) 13 (0.94) 1 (0.53) 1 (263.00) (CONFIRMED)
expected pairs confirmed (@5): 1/1

NBhū_108,4_108,6
pair #10/1 (NBhū_108,4_108,6, PVin_I,040,i): 2 (0.84) 1 (0.84) 1 (0.61) 1 (223.20) (CONFIRMED)
NOVEL PAIR (NBhū_108,4_108,6, TUS_ii,86,i): 423 (0.51) 308 (0.51) 8 (0.14) 2 (50.20)
expected pairs confirmed (@5): 1/1
novel pairs (@50): 1

NBhū_108,10
pair #11/2 (NBhū_108,10, PVin_I,041,i): 871 (0.50) 428 (0.50) 3 (0.23) 1 (141.80) (CONFIRMED)
pair #12/2 (NBhū_108,10, PV_3.431cd_3.434cd): 34 (0.90) 14 (0.90) 1 (0.25) 2 (66.40) (CONFIRMED)
NOVEL PAIR (NBhū_108,10, PVA_432,vii_432,ix): 743 (0.56) 360 (0.56) 6 (0.18) 3 (59.20)
expected pairs confirmed (@5): 2/2
novel pairs (@50): 1

NBhū_108,20
(none)

NBhū_109,1
pair #13/1 (NBhū_109,1, PV_3.431cd_3.434cd): 23 (0.95) 10 (0.95) 1 (0.38) 1 (104.00) (CONFIRMED)
NOVEL PAIR (NBhū_109,1, PVA_432,vii_432,ix): 441 (0.73) 216 (0.73) 2 (0.35) 2 (97.80)
expected pairs confirmed (@5): 1/1
novel pai

pair #47/5 (NBhū_142,19, PVA_361,xiii_361,xvi): 4975 (0.03) 2633 (0.03) 463 (0.03)  
pair #48/5 (NBhū_142,19, PVSV_010,13_010,15): 2 (0.95) 2 (0.95) 2 (0.44) 3 (77.20) (CONFIRMED)
pair #49/5 (NBhū_142,19, PVSV_010,19_010,21): 4 (0.92) 4 (0.92) 4 (0.31) 2 (90.00) (CONFIRMED)
pair #50/5 (NBhū_142,19, PVin_I,091,i_I,092,i): 9 (0.89) 7 (0.89) 1 (0.45) 1 (93.20) (CONFIRMED)
pair #51/5 (NBhū_142,19, PVin_I,092,ii_I,092,iii): 27 (0.89) 17 (0.89) 3 (0.32) 4 (45.40) (CONFIRMED)
expected pairs confirmed (@5): 4/5

NBhū_143,3^1
(none)

NBhū_143,3^2
(none)

NBhū_143,22^1
(none)

NBhū_143,22^2
(none)

NBhū_144,9
(none)

NBhū_144,14
(none)

NBhū_144,20^1
pair #52/1 (NBhū_144,20^1, PVA_361,xiii_361,xvi): 79 (0.61) 37 (0.61) 1 (0.28) 2 (28.20) (CONFIRMED)
expected pairs confirmed (@5): 1/1

NBhū_144,20^2
(none)

NBhū_145,4_145,8
(none)

NBhū_145,15
pair #53/2 (NBhū_145,15, PVA_360,ix): 191 (0.49) 86 (0.49) 1 (0.40) 1 (154.00) (CONFIRMED)
pair #54/2 (NBhū_145,15, PVA_360,x): 4164 (0.17) 2052 (0.17) 2 (

(none)

NBhū_172,8^2
(none)

NBhū_173,10
(none)

NBhū_173,15
(none)

NBhū_173,24^1
(none)

NBhū_173,24^2
(none)

NBhū_173,24^3
(none)

NBhū_173,24^4
(none)

NBhū_173,24^5
(none)

NBhū_173,24^6
(none)

NBhū_173,24^7
(none)

NBhū_173,24^8
(none)

NBhū_175,15^1
(none)

NBhū_175,15^2
(none)

NBhū_176,8
(none)

NBhū_176,12
NOVEL PAIR (NBhū_176,12, PVin_I,007,i_I,007,iii): 84 (0.82) 55 (0.82) 2 (0.36) 1 (79.00)
NOVEL PAIR (NBhū_176,12, NBṬ_41,i_41,ii): 1 (0.92) 1 (0.92) 13 (0.25) 2 (75.40)
NOVEL PAIR (NBhū_176,12, NB_1.1_1.6): 301 (0.66) 167 (0.66) 11 (0.26) 3 (50.20)
novel pairs (@50): 3

NBhū_177,1
NOVEL PAIR (NBhū_177,1, PVin_I,029,iii_I,030,ii): 113 (0.76) 64 (0.76) 1 (0.35) 1 (307.60)
novel pairs (@50): 1

NBhū_177,12^1
NOVEL PAIR (NBhū_177,12^1, PVA_332,iii_332,vi): 48 (0.77) 32 (0.77) 3 (0.25) 1 (136.00)
NOVEL PAIR (NBhū_177,12^1, PVin_I,029,iii_I,030,ii): 39 (0.78) 26 (0.78) 15 (0.12) 2 (103.40)
NOVEL PAIR (NBhū_177,12^1, PV_3.288ab_3.291ab): 17 (0.86) 12 (0.86) 2 (0.32) 3 (97.00)
NO

(none)

NBhū_196,16
(none)

NBhū_196,24_197,3
(none)

NBhū_197,7
(none)

NBhū_197,12
(none)

NBhū_197,19
(none)

NBhū_197,27
(none)

NBhū_198,5
(none)

NBhū_198,15_198,17
NOVEL PAIR (NBhū_198,15_198,17, PVA_004,vii_004,ix): 23 (0.73) 10 (0.73) 1 (0.52) 1 (176.80)
novel pairs (@50): 1

NBhū_199,5
(none)

NBhū_199,13^1
NOVEL PAIR (NBhū_199,13^1, PVA_025,iii): 17 (0.91) 16 (0.91) 1 (0.52) 1 (337.40)
novel pairs (@50): 1

NBhū_199,13^2
NOVEL PAIR (NBhū_199,13^2, PVA_025,iii): 1 (0.97) 1 (0.97) 1 (0.69) 1 (571.60)
NOVEL PAIR (NBhū_199,13^2, PVA_025,iv_025,vi): 276 (0.85) 272 (0.85) 6 (0.16) 2 (79.20)
novel pairs (@50): 2

NBhū_200,6_200,8
NOVEL PAIR (NBhū_200,6_200,8, PVA_025,iv_025,vi): 20 (0.83) 17 (0.83) 1 (0.72) 1 (354.00)
novel pairs (@50): 1

NBhū_200,12
NOVEL PAIR (NBhū_200,12, PVA_025,vii_025,ix): 3 (0.96) 3 (0.96) 1 (0.68) 1 (380.40)
novel pairs (@50): 1

NBhū_200,17
NOVEL PAIR (NBhū_200,17, PVA_025,x): 4 (0.94) 3 (0.94) 1 (0.44) 1 (302.40)
NOVEL PAIR (NBhū_200,17, PVA_026,i_026,ii

NOVEL PAIR (NBhū_240,6^1, PVSV_024,24^2): 108 (0.79) 60 (0.79) 1 (0.24) 1 (65.00)
NOVEL PAIR (NBhū_240,6^1, PVSV_024,18): 3 (0.95) 1 (0.95) 4 (0.14) 2 (64.00)
NOVEL PAIR (NBhū_240,6^1, PVin_I,056,iii): 92 (0.81) 52 (0.81) 7 (0.12) 3 (64.00)
novel pairs (@50): 3

NBhū_240,6^2
(none)

NBhū_240,22_240,24
NOVEL PAIR (NBhū_240,22_240,24, PVSV_026,02_026,12): 9 (0.82) 5 (0.82) 1 (0.55) 1 (107.00)
NOVEL PAIR (NBhū_240,22_240,24, PVA_567,xi_568,i): 195 (0.62) 85 (0.62) 2 (0.28) 2 (99.80)
novel pairs (@50): 2

NBhū_240,28
NOVEL PAIR (NBhū_240,28, PVSV_026,14): 4 (0.87) 4 (0.87) 1 (0.46) 1 (179.20)
novel pairs (@50): 1

NBhū_241,5_241,14
NOVEL PAIR (NBhū_241,5_241,14, PVSV_027,03_027,07): 56 (0.68) 27 (0.68) 1 (0.47) 1 (161.60)
NOVEL PAIR (NBhū_241,5_241,14, PVSV_027,09): 52 (0.69) 26 (0.69) 2 (0.31) 2 (141.00)
NOVEL PAIR (NBhū_241,5_241,14, PVSV_028,03_028,08): 29 (0.72) 15 (0.72) 3 (0.25) 3 (97.00)
novel pairs (@50): 3

NBhū_241,17
(none)

NBhū_241,27^1
NOVEL PAIR (NBhū_241,27^1, PVSV_027,09):

NOVEL PAIR (NBhū_266,6, PVSV_038,17^2_039,19): 27 (0.77) 11 (0.77) 1 (0.25) 1 (180.20)
novel pairs (@50): 1

NBhū_266,13_266,17
NOVEL PAIR (NBhū_266,13_266,17, PVSV_067,01^4): 190 (0.50) 128 (0.50) 1 (0.18) 1 (97.20)
novel pairs (@50): 1

NBhū_266,22
NOVEL PAIR (NBhū_266,22, PVSV_067,01^1): 101 (0.66) 36 (0.66) 1 (0.20) 1 (94.80)
novel pairs (@50): 1

NBhū_267,2^1
NOVEL PAIR (NBhū_267,2^1, PVSV_069,21_070,14): 2 (0.95) 2 (0.95) 1 (0.45) 1 (306.80)
novel pairs (@50): 1

NBhū_267,2^2
NOVEL PAIR (NBhū_267,2^2, PVSV_073,22^2_074,20): 28 (0.76) 13 (0.76) 2 (0.23) 1 (98.20)
NOVEL PAIR (NBhū_267,2^2, PVSV_072,19^2): 150 (0.63) 67 (0.63) 1 (0.27) 2 (77.20)
novel pairs (@50): 2

NBhū_267,17^1
NOVEL PAIR (NBhū_267,17^1, PVSV_070,16): 3 (0.91) 3 (0.91) 1 (0.40) 1 (193.40)
novel pairs (@50): 1

NBhū_267,17^2
(none)

NBhū_268,9^1
(none)

NBhū_268,9^2
(none)

NBhū_268,24
NOVEL PAIR (NBhū_268,24, PVSV_078,15_078,22): 82 (0.77) 32 (0.77) 2 (0.20) 1 (96.00)
novel pairs (@50): 1

NBhū_269,1
NOVEL PAIR (

(none)

NBhū_293,3^1
(none)

NBhū_293,3^2_293,25
(none)

NBhū_293,29
(none)

NBhū_294,12_294,16^1
NOVEL PAIR (NBhū_294,12_294,16^1, PVin_II,085,i_II,085,iii): 6 (0.84) 2 (0.84) 2 (0.34) 1 (146.20)
NOVEL PAIR (NBhū_294,12_294,16^1, PVSV_008,07_008,12): 25 (0.78) 10 (0.78) 1 (0.37) 2 (123.80)
NOVEL PAIR (NBhū_294,12_294,16^1, PVSV_007,14_007,20): 112 (0.69) 49 (0.69) 3 (0.33) 3 (91.00)
NOVEL PAIR (NBhū_294,12_294,16^1, PVin_II,084,i): 2 (0.87) 1 (0.87) 4 (0.33) 4 (89.20)
novel pairs (@50): 4

NBhū_294,16^2
(none)

NBhū_295,10_295,22
NOVEL PAIR (NBhū_295,10_295,22, PVA_027,x_027,xiii): 307 (0.55) 182 (0.55) 1 (0.21) 1 (92.00)
novel pairs (@50): 1

NBhū_295,27
NOVEL PAIR (NBhū_295,27, PVA_028,i_028,iii): 278 (0.68) 267 (0.68) 1 (0.24) 1 (83.00)
novel pairs (@50): 1

NBhū_296,8
NOVEL PAIR (NBhū_296,8, PVA_028,iv_028,vii): 1 (0.99) 1 (0.99) 1 (0.85) 1 (286.80)
novel pairs (@50): 1

NBhū_296,14
NOVEL PAIR (NBhū_296,14, PVA_028,ix): 3 (0.99) 3 (0.99) 1 (0.81) 1 (327.40)
NOVEL PAIR (NBhū_296,14

NOVEL PAIR (NBhū_329,22^2, NV_138,10^2): 6 (0.94) 4 (0.94) 8 (0.32) 1 (72.60)
NOVEL PAIR (NBhū_329,22^2, NBh_0335,i_0336,i): 29 (0.92) 22 (0.92) 1 (0.44) 2 (68.80)
NOVEL PAIR (NBhū_329,22^2, NV_140,10): 211 (0.62) 118 (0.62) 2 (0.43) 3 (68.20)
novel pairs (@50): 3

NBhū_330,8^1
NOVEL PAIR (NBhū_330,8^1, NV_139,05^2): 11 (0.99) 11 (0.99) 7 (0.16) 1 (51.00)
novel pairs (@50): 1

NBhū_330,8^2
(none)

NBhū_330,21^1
NOVEL PAIR (NBhū_330,21^1, NBh_0342,i): 13 (0.90) 8 (0.90) 2 (0.21) 1 (56.60)
novel pairs (@50): 1

NBhū_330,21^2
(none)

NBhū_331,4^1
NOVEL PAIR (NBhū_331,4^1, NBh_0339,i): 4 (0.96) 2 (0.96) 4 (0.32) 1 (53.00)
novel pairs (@50): 1

NBhū_331,4^2
(none)

NBhū_331,17
(none)

NBhū_331,25_331,28
(none)

NBhū_331,31
(none)

NBhū_332,7
(none)

NBhū_332,13
NOVEL PAIR (NBhū_332,13, NBh_0359,ii_0360,i): 22 (0.94) 14 (0.94) 4 (0.19) 1 (111.60)
NOVEL PAIR (NBhū_332,13, NV_496,05_496,07): 12 (0.95) 6 (0.95) 1 (0.44) 2 (111.00)
NOVEL PAIR (NBhū_332,13, NBh_1099,i_1099,iii): 13 (0.95) 7 (0.95

NOVEL PAIR (NBhū_351,25, NBh_1133,iv_1133,vii): 6112 (0.04) 2865 (0.04) 1 (0.37) 1 (68.00)
NOVEL PAIR (NBhū_351,25, NV_513,06_513,08): 1961 (0.23) 837 (0.23) 5 (0.21) 2 (57.60)
novel pairs (@50): 2

NBhū_352,10
NOVEL PAIR (NBhū_352,10, NBh_1136,i_1136,ii): 85 (0.81) 46 (0.81) 1 (0.55) 1 (100.40)
NOVEL PAIR (NBhū_352,10, NBh_1136,iii_1137,i): 1895 (0.17) 954 (0.17) 4 (0.28) 2 (50.80)
novel pairs (@50): 2

NBhū_352,17
NOVEL PAIR (NBhū_352,17, NBh_1136,iii_1137,i): 2 (0.96) 2 (0.96) 1 (0.50) 1 (118.40)
novel pairs (@50): 1

NBhū_352,22
NOVEL PAIR (NBhū_352,22, NBh_1137,ii_1138,i): 371 (0.55) 190 (0.55) 1 (0.56) 1 (53.00)
NOVEL PAIR (NBhū_352,22, NS_5.1.23_5.1.28): 470 (0.52) 231 (0.52) 2 (0.28) 2 (53.00)
novel pairs (@50): 2

NBhū_352,27
NOVEL PAIR (NBhū_352,27, NBh_1139,i_1139,iii): 5705 (0.08) 2777 (0.08) 1 (0.20) 1 (52.00)
novel pairs (@50): 1

NBhū_353,19
NOVEL PAIR (NBhū_353,19, NBh_1140,i_1140,ii): 271 (0.64) 193 (0.64) 1 (0.38) 1 (85.00)
NOVEL PAIR (NBhū_353,19, NS_5.1.29_5.1.33): 

(none)

NBhū_368,26
(none)

NBhū_369,9
NOVEL PAIR (NBhū_369,9, VN_49,15_49,18): 65 (0.98) 44 (0.98) 1 (0.65) 1 (70.40)
NOVEL PAIR (NBhū_369,9, NBh_1186,i_1186,ii): 35 (0.99) 28 (0.99) 2 (0.43) 2 (70.40)
novel pairs (@50): 2

NBhū_369,13
NOVEL PAIR (NBhū_369,13, NBh_1187,i_1187,iii): 236 (0.66) 93 (0.66) 2 (0.55) 1 (91.00)
NOVEL PAIR (NBhū_369,13, VN_50,01_50,05): 2 (0.89) 2 (0.89) 3 (0.31) 2 (57.20)
NOVEL PAIR (NBhū_369,13, NS_5.2.14_5.2.24): 1017 (0.35) 515 (0.35) 85 (0.09) 3 (55.20)
novel pairs (@50): 3

NBhū_369,22
(none)

NBhū_369,26
NOVEL PAIR (NBhū_369,26, NBh_1187,iv_1187,v): 424 (0.52) 251 (0.52) 4 (0.15) 1 (52.00)
novel pairs (@50): 1

NBhū_370,11
NOVEL PAIR (NBhū_370,11, VN_50,14): 87 (0.92) 54 (0.92) 2 (0.27) 1 (106.60)
NOVEL PAIR (NBhū_370,11, VN_50,21_51,06): 166 (0.85) 94 (0.85) 1 (0.33) 2 (82.20)
novel pairs (@50): 2

NBhū_370,21
NOVEL PAIR (NBhū_370,21, NBh_1189,i_1189,ii): 27 (0.99) 20 (0.99) 1 (0.84) 1 (107.60)
NOVEL PAIR (NBhū_370,21, VN_52,06_52,11): 14 (0.99) 11 (0

(none)

NBhū_412,21
(none)

NBhū_412,27_413,3
(none)

NBhū_413,6_413,10
NOVEL PAIR (NBhū_413,6_413,10, ŚV_6.104ab_6.107ab): 256 (0.65) 124 (0.65) 1 (0.19) 1 (52.00)
novel pairs (@50): 1

NBhū_413,14
(none)

NBhū_413,22
(none)

NBhū_413,28
(none)

NBhū_414,5
NOVEL PAIR (NBhū_414,5, ŚV_6.104ab_6.107ab): 279 (0.54) 133 (0.54) 1 (0.21) 1 (52.00)
novel pairs (@50): 1

NBhū_414,13
(none)

NBhū_414,17
(none)

NBhū_414,23
(none)

NBhū_415,9
(none)

NBhū_415,16_415,24
(none)

NBhū_416,2
(none)

NBhū_416,7
(none)

NBhū_416,13
(none)

NBhū_416,19
(none)

NBhū_417,7
(none)

NBhū_417,13
(none)

NBhū_417,21
(none)

NBhū_417,25
(none)

NBhū_418,7
(none)

NBhū_418,12
(none)

NBhū_418,22
(none)

NBhū_418,28
(none)

NBhū_419,9
(none)

NBhū_419,14
NOVEL PAIR (NBhū_419,14, NBh_0168,i_0172,i): 47 (0.81) 14 (0.81) 1 (0.37) 1 (64.80)
novel pairs (@50): 1

NBhū_419,23
(none)

NBhū_420,5
(none)

NBhū_420,10
(none)

NBhū_420,18
(none)

NBhū_420,24
(none)

NBhū_421,13
(none)

NBhū_421,25^1
(none)

NBhū_421,25^2


(none)

NBhū_463,8
(none)

NBhū_463,14_463,26
(none)

NBhū_464,4^1
(none)

NBhū_464,4^2
(none)

NBhū_464,20
(none)

NBhū_465,4
(none)

NBhū_465,12
(none)

NBhū_465,18^1
(none)

NBhū_465,18^2
(none)

NBhū_466,6^1
(none)

NBhū_466,6^2
(none)

NBhū_466,6^3
(none)

NBhū_467,2_467,7
NOVEL PAIR (NBhū_467,2_467,7, PVA_032,ix_032,xi): 73 (0.68) 53 (0.68) 1 (0.48) 1 (136.20)
NOVEL PAIR (NBhū_467,2_467,7, PV_2.007ab_2.008cd): 162 (0.59) 99 (0.59) 2 (0.42) 2 (98.40)
NOVEL PAIR (NBhū_467,2_467,7, PV_2.009ab_2.012ab): 297 (0.48) 167 (0.48) 3 (0.33) 3 (92.00)
NOVEL PAIR (NBhū_467,2_467,7, PVA_034,vi): 7416 (0.05) 3747 (0.05) 5 (0.15) 4 (52.00)
novel pairs (@50): 4

NBhū_467,17
(none)

NBhū_467,22
(none)

NBhū_468,5^1
(none)

NBhū_468,5^2
(none)

NBhū_468,19
(none)

NBhū_468,24
(none)

NBhū_469,2_469,6
NOVEL PAIR (NBhū_469,2_469,6, PVA_033,ix^1): 7719 (0.06) 3547 (0.06) 1 (0.22) 1 (88.00)
novel pairs (@50): 1

NBhū_469,16_469,19
(none)

NBhū_469,23
NOVEL PAIR (NBhū_469,23, PV_2.032ab_2.033cd): 2 (0.9