In [1]:
import pandas as pd
import numpy as np
from utils.data_helper import get_markable_dataframe, get_embedding_variables
from model_builders.coreference_classifier import CoreferenceClassifierModelBuilder
from functools import reduce
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
from utils.clusterers import BestFirstClusterer, get_anaphora_scores_by_antecedent, ClosestFirstClusterer
from utils.scorers import MUCScorer, B3Scorer, AverageScorer
from utils.data_structures import UFDS

In [2]:
embedding_indexes_file_path = 'helper_files/embedding/embedding_indexes.txt'
indexed_embedding_file_path = 'helper_files/embedding/indexed_embedding.txt'

word_vector, embedding_matrix, idx_by_word, word_by_idx = get_embedding_variables(embedding_indexes_file_path, indexed_embedding_file_path)

In [3]:
markables = get_markable_dataframe("data/testing/markables_with_predicted_singleton.csv", word_vector, idx_by_word)
singletons = set(markables[markables['is_singleton'].map(lambda x: True if x[1] > 0 else False)]['id'])
markables.head()

Unnamed: 0,id,text,is_pronoun,entity_type,is_proper_name,is_first_person,previous_words,next_words,is_singleton
0,1916,"[1263, 1264, 1968, 1395]",0,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 1]",1,0,[],"[999, 379, 1161, 213, 27, 1263, 1969, 1188, 14...","[0.0, 1.0]"
1,1917,[213],1,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0]",0,0,"[1263, 1264, 1968, 1395, 999, 379, 1161]","[27, 1263, 1969, 1188, 1470, 25, 1161, 63, 424...","[1.0, 0.0]"
2,1918,"[1263, 1969, 1188]",0,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 1]",1,0,"[1263, 1264, 1968, 1395, 999, 379, 1161, 213, 27]","[1470, 25, 1161, 63, 424, 1223, 25, 1415, 1161...","[0.0, 1.0]"
3,1919,"[1470, 25, 1161]",0,"[0, 1, 0, 1, 0, 0, 0, 0, 0, 0]",0,0,"[1968, 1395, 999, 379, 1161, 213, 27, 1263, 19...","[63, 424, 1223, 25, 1415, 1161, 876, 344, 213,...","[0.0, 1.0]"
4,1920,[424],0,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,"[1161, 213, 27, 1263, 1969, 1188, 1470, 25, 11...","[1223, 25, 1415, 1161, 876, 344, 213, 406, 122...","[0.0, 1.0]"


In [4]:
pairs = pd.read_csv("data/testing/mention_pairs.csv")

label = np.vstack(to_categorical(pairs.is_coreference, num_classes=2))
label_chains = ClosestFirstClusterer().get_chains(get_anaphora_scores_by_antecedent(pairs.m1_id, pairs.m2_id, label))

pairs.head()

Unnamed: 0,m1_id,m2_id,is_exact_match,is_words_match,is_substring,is_abbreviation,is_appositive,is_nearest_candidate,sentence_distance,word_distance,markable_distance,is_coreference
0,1916,1917,0,0,0,0,0,1,0,3,1,1
1,1916,1918,0,0,0,0,0,0,0,5,2,0
2,1916,1919,0,0,0,0,0,0,0,8,3,0
3,1916,1920,0,0,0,0,0,0,0,12,4,0
4,1916,1921,0,0,0,0,0,0,0,13,5,0


In [5]:
max_text_length = 10
max_prev_words_length = 10
max_next_words_length = 10

def get_data(markable_ids):
    indices = reduce(lambda a, b: a + [b], map(lambda a: markables.index[markables['id'] == a].tolist()[0], markable_ids), [])
    data = markables.loc[indices]
    
    data_text = pad_sequences(data.text, maxlen=max_text_length, padding='post')
    data_previous_words = pad_sequences(data.previous_words.map(lambda seq: seq[(-1*max_prev_words_length):]), maxlen=max_prev_words_length, padding='pre')
    data_next_words = pad_sequences(data.next_words.map(lambda seq: seq[:max_next_words_length]), maxlen=max_next_words_length, padding='post')
    data_syntactic = data[['is_pronoun', 'entity_type', 'is_proper_name', 'is_first_person']]

    data_syntactic = np.array(list(map(lambda p: reduce(lambda x,y: x + y, [i if type(i) is list else [i] for i in p]), data_syntactic.values)))
    is_singleton = np.vstack(data.is_singleton)
    
    return data_text, data_previous_words, data_next_words, data_syntactic, is_singleton

def get_pair_data(markable_ids_1, markable_ids_2):
    text_1, prev_1, next_1, syntactic_1, is_singleton_1 = get_data(markable_ids_1)
    text_2, prev_2, next_2, syntactic_2, is_singleton_2 = get_data(markable_ids_2)
    
    return text_1, text_2, prev_1, prev_2, next_1, next_2, syntactic_1, syntactic_2, is_singleton_1, is_singleton_2

def get_relation_data(mention_pairs):
    return mention_pairs[['is_exact_match', 'is_words_match', 'is_substring', 'is_abbreviation', 'is_appositive', 'is_nearest_candidate', 'sentence_distance', 'word_distance', 'markable_distance']]

# Compute Baseline Score

In [6]:
baseline_result_file_path = 'baseline/test_result.txt'

baseline_ufds = UFDS()

for m1, m2 in zip(pairs.m1_id, pairs.m2_id):
    baseline_ufds.init_id(m1, m2)
    
for line in open(baseline_result_file_path, 'r').readlines():
    line = line.split(', ')
    baseline_ufds.join(int(line[0]), int(line[1]))

baseline_chains = baseline_ufds.get_chain_list()

print('MUC: ', MUCScorer().get_scores(baseline_chains, label_chains))
print('B3: ', B3Scorer().get_scores(baseline_chains, label_chains))
print('Average: ', AverageScorer([MUCScorer(), B3Scorer()]).get_scores(baseline_chains, label_chains))

MUC:  (0.5544554455445545, 0.7272727272727273, 0.6292134831460674)
B3:  (0.3124361294443262, 0.6732829670329671, 0.4268110965737344)
Average:  (0.5280122898599009, 0.5280122898599009, 0.5280122898599009)


# Test Models

In [7]:
text_1, text_2, prev_1, prev_2, next_1, next_2, syntactic_1, syntactic_2, is_singleton_1, is_singleton_2 = get_pair_data(pairs.m1_id, pairs.m2_id)
relation = get_relation_data(pairs)

In [8]:
models = {}

def get_model(features, data_generation, epoch):
    name = '_'.join([*features, data_generation, str(epoch)])
    
    if name not in models:
        models[name] = load_model(f'models/coreference_classifiers/{name}.model')
    
    return models[name]

In [26]:
base_thresholds = [0.1, 0.01, 0.001, 0.0001, 0.00001]
thresholds = [0] + [base * multiplier for base in base_thresholds for multiplier in range(1, 10)]

muc_scorer = MUCScorer()
b3_scorer = B3Scorer()
average_scorer = AverageScorer([muc_scorer, b3_scorer])

def get_sorted_scores(clusterer, pred):
    scores = [] # will be a tuple (average_f1, (prec_muc, rec_muc, f1_muc), (prec_b3, rec_b3, f1_b3), threshold)
    
    for threshold in thresholds:
        predicted_chains = clusterer.get_chains(pred, threshold)
        
#         avg_f1 = average_scorer.get_scores(predicted_chains, label_chains)[2]
        muc = muc_scorer.get_scores(predicted_chains, label_chains)
        b3 = b3_scorer.get_scores(predicted_chains, label_chains)
        avg_f1 = (muc[2] + b3[2]) / 2
        
        scores.append((avg_f1, muc, b3, threshold))
    
    return sorted(scores, reverse=True)

def evaluate(features, data_generation, epoch):
    model = get_model(features, data_generation, epoch)
    
    test_features = []
    if 'words' in features:
        test_features.extend([text_1, text_2])
    if 'context' in features:
        test_features.extend([prev_1, prev_2, next_1, next_2])
    if 'syntactic' in features:
        test_features.extend([syntactic_1, syntactic_2, relation])
    
    print('getting anaphora scores by antecedent dict')
    raw_pred = model.predict(test_features, verbose=1)
    pred_without_singleton_classifier = get_anaphora_scores_by_antecedent(pairs.m1_id, pairs.m2_id, raw_pred)
    pred_with_singleton_classifier = get_anaphora_scores_by_antecedent(pairs.m1_id, pairs.m2_id, raw_pred, singletons)
    
    print('get sorted_scores_without_sc_closest')
    sorted_scores_without_sc_closest = get_sorted_scores(ClosestFirstClusterer(), pred_without_singleton_classifier)
    print('Without singleton classifier, closest-first:', sorted_scores_without_sc_closest[0])
    
    print()
    
    print('get sorted_scores_with_sc_closest')
    sorted_scores_with_sc_closest = get_sorted_scores(ClosestFirstClusterer(), pred_with_singleton_classifier)
    print('With singleton classifier, closest-first:', sorted_scores_with_sc_closest[0])
    
    print()
    
    print('get sorted_scores_without_sc_best')
    sorted_scores_without_sc_best = get_sorted_scores(BestFirstClusterer(), pred_without_singleton_classifier)
    print('Without singleton classifier, best-first:', sorted_scores_without_sc_best[0])
    
    print()
    
    print('get sorted_scores_with_sc_best')
    sorted_scores_with_sc_best = get_sorted_scores(BestFirstClusterer(), pred_with_singleton_classifier)
    print('With singleton classifier, best-first:', sorted_scores_with_sc_best[0])

## Budi

### Words + Context + Syntactic

In [10]:
evaluate(['words', 'context', 'syntactic'], 'budi', 5)

W0401 05:45:48.619380 139999969802048 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/initializers.py:111: calling RandomUniform.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0401 05:45:48.623163 139999969802048 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/initializers.py:135: calling RandomNormal.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0401 05:45:48.624130 139999969802048 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:96: calling Zeros.__init__ (from tensorflow.python.ops.init_ops) 

getting anaphora scores by antecedent dict
get sorted_scores_without_sc_closest
Without singleton classifier, closest-first: (0.40268219804618965, (0.4111111111111111, 0.4805194805194805, 0.4431137724550898), (0.3176070428171268, 0.4214972527472528, 0.36225062363728955), 0.07)

get sorted_scores_with_sc_closest
With singleton classifier, closest-first: (0.5561313845271152, (0.5894736842105263, 0.7272727272727273, 0.6511627906976745), (0.3505823974005792, 0.6733745421245423, 0.46109997835655586), 0.002)

get sorted_scores_without_sc_best
Without singleton classifier, best-first: (0.43106769113480564, (0.4375, 0.5454545454545454, 0.48554913294797686), (0.31022056455940744, 0.47907509157509165, 0.3765862493216344), 0.06)

get sorted_scores_with_sc_best
With singleton classifier, best-first: (0.5561313845271152, (0.5894736842105263, 0.7272727272727273, 0.6511627906976745), (0.3505823974005792, 0.6733745421245423, 0.46109997835655586), 0.002)


In [11]:
evaluate(['words', 'context', 'syntactic'], 'budi', 10)

getting anaphora scores by antecedent dict
get sorted_scores_without_sc_closest
Without singleton classifier, closest-first: (0.47892857821862, (0.64, 0.4155844155844156, 0.5039370078740157), (0.6042617960426179, 0.3634844322344322, 0.4539201485632242), 0.07)

get sorted_scores_with_sc_closest
With singleton classifier, closest-first: (0.5456840460771337, (0.5591397849462365, 0.6753246753246753, 0.6117647058823529), (0.3860544217687075, 0.6329899267399269, 0.47960338627191446), 0.001)

get sorted_scores_without_sc_best
Without singleton classifier, best-first: (0.5009281618024803, (0.6538461538461539, 0.44155844155844154, 0.5271317829457364), (0.6009259259259259, 0.3923305860805861, 0.4747245406592243), 0.06)

get sorted_scores_with_sc_best
With singleton classifier, best-first: (0.5516078956345187, (0.5698924731182796, 0.6883116883116883, 0.623529411764706), (0.3835263835263835, 0.6402014652014654, 0.4796863795043314), 0.001)


In [12]:
evaluate(['words', 'context', 'syntactic'], 'budi', 20)

getting anaphora scores by antecedent dict
get sorted_scores_without_sc_closest
Without singleton classifier, closest-first: (0.4127603382276283, (0.8214285714285714, 0.2987012987012987, 0.4380952380952381), (0.8118518518518518, 0.2544184981684982, 0.3874254383600185), 0.30000000000000004)

get sorted_scores_with_sc_closest
With singleton classifier, closest-first: (0.5473389599833366, (0.5670103092783505, 0.7142857142857143, 0.6321839080459771), (0.3552794796157628, 0.6623855311355312, 0.4624940119206961), 1e-05)

get sorted_scores_without_sc_best
Without singleton classifier, best-first: (0.4259544779106286, (0.373134328358209, 0.6493506493506493, 0.4739336492890996), (0.2725667712194659, 0.6163232600732601, 0.37797530653215766), 0.0009000000000000001)

get sorted_scores_with_sc_best
With singleton classifier, best-first: (0.5462740753170315, (0.5670103092783505, 0.7142857142857143, 0.6321839080459771), (0.3527721049844943, 0.6623855311355312, 0.46036424258808595), 1e-05)


## Gilang

### Words + Context + Syntactic

In [25]:
import importlib
import utils.scorers
importlib.reload(utils.scorers)

<module 'utils.scorers' from '/home/m13515133/ta/utils/scorers.py'>

In [27]:
evaluate(['words', 'context', 'syntactic'], 'gilang', 5)

getting anaphora scores by antecedent dict
get sorted_scores_without_sc_closest
Without singleton classifier, closest-first: (0.4999867495480671, (0.5384615384615384, 0.5454545454545454, 0.5419354838709678), (0.4154696705667579, 0.5103250915750918, 0.4580380152251664), 0.4)

get sorted_scores_with_sc_closest
With singleton classifier, closest-first: (0.5326126495434207, (0.56, 0.7272727272727273, 0.632768361581921), (0.31850376634859395, 0.6733745421245423, 0.4324569375049205), 0.07)

get sorted_scores_without_sc_best
Without singleton classifier, best-first: (0.5255750302135886, (0.5333333333333333, 0.7272727272727273, 0.6153846153846153), (0.32275970695970707, 0.6705357142857145, 0.4357654450425619), 0.30000000000000004)

get sorted_scores_with_sc_best
With singleton classifier, best-first: (0.5630509018557133, (0.6144578313253012, 0.6623376623376623, 0.6375), (0.40797247307148304, 0.6089514652014653, 0.4886018037114265), 0.30000000000000004)


In [28]:
evaluate(['words', 'context', 'syntactic'], 'gilang', 10)

getting anaphora scores by antecedent dict
get sorted_scores_without_sc_closest
Without singleton classifier, closest-first: (0.5647287334026654, (0.5714285714285714, 0.6753246753246753, 0.619047619047619), (0.43289034411915767, 0.6217490842490843, 0.5104098477577117), 0.30000000000000004)

get sorted_scores_with_sc_closest
With singleton classifier, closest-first: (0.5556106498269193, (0.5822784810126582, 0.5974025974025974, 0.5897435897435898), (0.49021035598705504, 0.5570054945054945, 0.5214777099102488), 0.30000000000000004)

get sorted_scores_without_sc_best
Without singleton classifier, best-first: (0.6036155932625624, (0.6043956043956044, 0.7142857142857143, 0.6547619047619048), (0.47355946522613196, 0.6629349816849819, 0.5524692817632202), 0.30000000000000004)

get sorted_scores_with_sc_best
With singleton classifier, best-first: (0.5875367585402943, (0.620253164556962, 0.6363636363636364, 0.6282051282051282), (0.5118932038834951, 0.5869734432234434, 0.5468683888754604), 0.3000

In [29]:
evaluate(['words', 'context', 'syntactic'], 'gilang', 20)

getting anaphora scores by antecedent dict
get sorted_scores_without_sc_closest
Without singleton classifier, closest-first: (0.5184275982323026, (0.5353535353535354, 0.6883116883116883, 0.6022727272727273), (0.3249547583563977, 0.6558379120879121, 0.4345824691918777), 0.5)

get sorted_scores_with_sc_closest
With singleton classifier, closest-first: (0.5216915111575184, (0.5377358490566038, 0.7402597402597403, 0.6229508196721312), (0.3027465021317481, 0.6877976190476193, 0.42043220264290565), 0.0004)

get sorted_scores_without_sc_best
Without singleton classifier, best-first: (0.5282585449030317, (0.5555555555555556, 0.7142857142857143, 0.6250000000000001), (0.3143578410385132, 0.6878891941391942, 0.43151708980606335), 0.5)

get sorted_scores_with_sc_best
With singleton classifier, best-first: (0.541768714251329, (0.5670103092783505, 0.7142857142857143, 0.6321839080459771), (0.3419334288899506, 0.6637591575091577, 0.45135352045668087), 0.30000000000000004)


## Soon

### Words + Context + Syntactic

In [30]:
evaluate(['words', 'context', 'syntactic'], 'soon', 5)

getting anaphora scores by antecedent dict
get sorted_scores_without_sc_closest
Without singleton classifier, closest-first: (0.22233896688169175, (0.16981132075471697, 0.23376623376623376, 0.19672131147540983), (0.22547892720306514, 0.27541208791208793, 0.24795662228797363), 0.4)

get sorted_scores_with_sc_closest
With singleton classifier, closest-first: (0.5367740041321248, (0.5454545454545454, 0.7012987012987013, 0.6136363636363636), (0.35511666204943515, 0.6524496336996338, 0.459911644627886), 0.03)

get sorted_scores_without_sc_best
Without singleton classifier, best-first: (0.27800606260551897, (0.2222222222222222, 0.36363636363636365, 0.27586206896551724), (0.22254257605820107, 0.37799908424908424, 0.28015005624552075), 0.30000000000000004)

get sorted_scores_with_sc_best
With singleton classifier, best-first: (0.5322101478719172, (0.5454545454545454, 0.7012987012987013, 0.6136363636363636), (0.3453133730676103, 0.6490155677655678, 0.4507839321074709), 0.03)


In [31]:
evaluate(['words', 'context', 'syntactic'], 'soon', 10)

getting anaphora scores by antecedent dict
get sorted_scores_without_sc_closest
Without singleton classifier, closest-first: (0.24958934372675887, (0.19327731092436976, 0.2987012987012987, 0.23469387755102045), (0.21631762652705064, 0.3402472527472528, 0.2644848099024973), 0.8)

get sorted_scores_with_sc_closest
With singleton classifier, closest-first: (0.5409834191712287, (0.5591397849462365, 0.6753246753246753, 0.6117647058823529), (0.37772476061949745, 0.6226419413919415, 0.47020213246010456), 0.2)

get sorted_scores_without_sc_best
Without singleton classifier, best-first: (0.2960392335260301, (0.25165562913907286, 0.4935064935064935, 0.33333333333333337), (0.17898462246288335, 0.466735347985348, 0.25874513371872687), 0.7000000000000001)

get sorted_scores_with_sc_best
With singleton classifier, best-first: (0.5523230038808579, (0.5806451612903226, 0.7012987012987013, 0.6352941176470589), (0.370000645000645, 0.641643772893773, 0.4693518901146568), 0.2)


In [32]:
evaluate(['words', 'context', 'syntactic'], 'soon', 20)

getting anaphora scores by antecedent dict
get sorted_scores_without_sc_closest
Without singleton classifier, closest-first: (0.27714422095212093, (0.22321428571428573, 0.3246753246753247, 0.2645502645502646), (0.25402621722846447, 0.33713369963369966, 0.2897381773539773), 0.9)

get sorted_scores_with_sc_closest
With singleton classifier, closest-first: (0.5358512357691024, (0.5632183908045977, 0.6363636363636364, 0.5975609756097561), (0.4033739960345465, 0.5750228937728938, 0.4741414959284488), 0.04)

get sorted_scores_without_sc_best
Without singleton classifier, best-first: (0.3845036743049909, (0.34710743801652894, 0.5454545454545454, 0.42424242424242425), (0.26655971479500895, 0.48791208791208796, 0.34476492436755746), 0.8)

get sorted_scores_with_sc_best
With singleton classifier, best-first: (0.5581816067269094, (0.5975609756097561, 0.6363636363636364, 0.6163522012578616), (0.4516874711049469, 0.5599130036630038, 0.5000110121959573), 0.1)


In [35]:
def get_markable_text(idx):
    return [word_by_idx[x] for x in markables[markables['id'] == idx].text.values[0]]

In [36]:
best_model = models['words_context_syntactic_gilang_10']
raw_pred_best = best_model.predict([text_1, text_2, prev_1, prev_2, next_1, next_2, syntactic_1, syntactic_2, relation], verbose=1)
pred_best = get_anaphora_scores_by_antecedent(pairs.m1_id, pairs.m2_id, raw_pred_best)
pred_chains_best = BestFirstClusterer().get_chains(pred_best, 0.30000000000000004)



In [37]:
[[get_markable_text(b) for b in a] for a in pred_chains_best if len(a) > 1]

[[['deputi', 'gubernur'],
  ['hartadi', 'a', 'sarwono'],
  ['ia'],
  ['hartadi'],
  ['hartadi'],
  ['hartadi'],
  ['ia']],
 [['bi'], ['bi']],
 [['nya'], ['nya'], ['nya']],
 [['ansari'], ['ansari']],
 [['menteri', 'keuangan', 'sri', 'mulyani'],
  ['sri', 'mulyani'],
  ['mulyani'],
  ['mulyani']],
 [['pt', 'astra', 'agro', 'lestari', 'tbk'],
  ['aali'],
  ['nya'],
  ['direktur', 'aali'],
  ['santosa'],
  ['nya'],
  ['nya']],
 [['deputi', 'senior'],
  ['miranda', 's', 'goeltom'],
  ['miranda'],
  ['miranda'],
  ['nya'],
  ['nya']],
 [['deputi', 'gubernur', 'senior', 'bi'],
  ['miranda', 's', 'goeltom'],
  ['miranda'],
  ['nya']],
 [['bank', 'mandiri'],
  ['nya'],
  ['nya'],
  ['direktur', 'teknologi', 'dan', 'operasional', 'bank', 'mandiri'],
  ['sasmita'],
  ['dia'],
  ['bank', 'mandiri'],
  ['ia'],
  ['nya'],
  ['bank', 'mandiri']],
 [['nya'], ['nya'], ['nya']],
 [['menteri', 'keuangan', 'sri', 'mulyani', 'indrawati'],
  ['nya'],
  ['menko', 'perekonomian', 'boediono'],
  ['nya'],
  ['b

In [31]:
[[get_markable_text(b) for b in a] for a in baseline_chains if len(a) > 1]

[[['deputi', 'gubernur'], ['bank', 'indonesia']],
 [['hartadi', 'a', 'sarwono'],
  ['ia'],
  ['hartadi'],
  ['jakarta'],
  ['hartadi'],
  ['hartadi'],
  ['kami'],
  ['sekretaris', 'perusahaan', 'astra', 'otoparts'],
  ['kartina', 'rahayu'],
  ['dia'],
  ['dia'],
  ['nya'],
  ['nya'],
  ['menkeu', 'sri', 'mulyani', 'indrawati'],
  ['dia']],
 [['ia'],
  ['dirjend',
   'industri',
   'logam',
   'mesin',
   'tekstil',
   'departemen',
   'perindustrian',
   'ansari',
   'bukhari'],
  ['ansari'],
  ['ansari']],
 [['indonesia', 'investor', 'forum'], ['indonesia', 'investor', 'forum']],
 [['menteri', 'keuangan', 'sri', 'mulyani'],
  ['sri', 'mulyani'],
  ['mulyani'],
  ['nya']],
 [['direktur', 'aali'],
  ['santosa'],
  ['nya'],
  ['nya'],
  ['deputi', 'senior'],
  ['bank', 'indonesia'],
  ['miranda', 's', 'goeltom'],
  ['miranda'],
  ['miranda'],
  ['kami'],
  ['nya'],
  ['deputi', 'gubernur', 'senior', 'bi'],
  ['miranda', 's', 'goeltom'],
  ['miranda']],
 [['pertumbuhan', 'ekonomi', 'indon

In [32]:
[[get_markable_text(b) for b in a] for a in label_chains if len(a) > 1]

[[['hartadi', 'a', 'sarwono'],
  ['ia'],
  ['hartadi'],
  ['hartadi'],
  ['hartadi'],
  ['ia']],
 [['dirjend',
   'industri',
   'logam',
   'mesin',
   'tekstil',
   'departemen',
   'perindustrian',
   'ansari',
   'bukhari'],
  ['ansari'],
  ['ansari']],
 [['pdb'], ['pdb'], ['pdb']],
 [['menteri', 'keuangan', 'sri', 'mulyani'],
  ['sri', 'mulyani'],
  ['mulyani'],
  ['mulyani']],
 [['direktur', 'aali'], ['santosa'], ['nya']],
 [['miranda', 's', 'goeltom'], ['miranda'], ['miranda'], ['nya'], ['nya']],
 [['deputi', 'gubernur', 'senior', 'bi'],
  ['miranda', 's', 'goeltom'],
  ['miranda'],
  ['nya']],
 [['direktur', 'teknologi', 'dan', 'operasional', 'bank', 'mandiri'],
  ['sasmita'],
  ['dia'],
  ['ia'],
  ['nya']],
 [['bank', 'mandiri'],
  ['nya'],
  ['kami'],
  ['bank', 'mandiri'],
  ['bank', 'mandiri'],
  ['nya']],
 [['menko', 'perekonomian', 'boediono'], ['nya'], ['boediono'], ['nya']],
 [['menteri', 'keuangan', 'sri', 'mulyani', 'indrawati'], ['nya'], ['nya']],
 [['analis', 'pefi