In [1]:
import pickle

In [2]:
conceptualizer_neighbors_dict = {}

for num in [1, 5, 10, 20, 50, 100]:
    with open(f"./conceptualizer_{num}_neighbors_dict.pickle", 'rb') as f:
        conceptualizer_neighbors_dict[num] = pickle.load(f)
    
with open('./clics_neighbors_dict.pickle', 'rb') as f:
    clics_neighbors_dict = pickle.load(f)

In [3]:
for key, num in conceptualizer_neighbors_dict.items():
    print(f"{key}: {len(num)}")
print(f"CLICS: {len(clics_neighbors_dict)}")

1: 5870
5: 4028
10: 3562
20: 3133
50: 2591
100: 2221
CLICS: 2919


In [4]:
# parse the CLICS concepts

clics_concepts = [concept.lower() for concept in clics_neighbors_dict.keys()]

# the concepts needs to be transformed

exceptions = {'sell': '$sell', 'third': '$third', 'hundred': '$hundred', 
              'water': 'water', 'village': '$village', 'night': 'night', 'remember': '$remember', 
              'trumpet': '$trumpet', 'silver': '$silver', 'gold': '$gold'}

In [5]:
for num in [1, 5, 10, 20, 50, 100]:
    print(f"Minimum languages for one edge to be able to included: {num}")
    conceptualizer_concepts = [concept.replace('$', '') for concept in conceptualizer_neighbors_dict[num].keys()]
    common_conceptes = set(clics_concepts).intersection(set(conceptualizer_concepts))
    print(f"Number of common concepts: {len(common_conceptes)}")
    
    # for computing micro and macro recalls
    
    matched_number = 0
    total_number = 0
    recall_accumulate = 0
    
    for i, concept in enumerate(list(common_conceptes)):
        clics_n = clics_neighbors_dict[concept.upper()]

        # we only keep the neighbors which occur in our data
        temp = []
        for n in clics_n:
            if n.lower() in exceptions:
                temp.append(exceptions[n.lower()])
            elif '$'+n.lower()+'$' in conceptualizer_neighbors_dict[num].keys():
                temp.append('$'+n.lower()+'$')
        clics_n = temp

        conceptualizer_n = conceptualizer_neighbors_dict[num][exceptions[concept] if concept in exceptions \
                                                              else '$' + concept + '$']

        associations_in_common = list(set(conceptualizer_n).intersection(set(clics_n)))
        recall_accumulate += len(associations_in_common) / len(clics_n)
        matched_number += len(associations_in_common)
        total_number += len(clics_n)
        
    print(f"Micro average Recall: {matched_number/total_number}")
    print(f"Macro average Recall: {recall_accumulate/len(common_conceptes)}") 
    
    print()

Minimum languages for one edge to be able to included: 1
Number of common concepts: 1220
Micro average Recall: 0.7125603864734299
Macro average Recall: 0.7955338031421626

Minimum languages for one edge to be able to included: 5
Number of common concepts: 1056
Micro average Recall: 0.6346426881211548
Macro average Recall: 0.7658996686425739

Minimum languages for one edge to be able to included: 10
Number of common concepts: 1001
Micro average Recall: 0.5769518583187827
Macro average Recall: 0.7265016972696795

Minimum languages for one edge to be able to included: 20
Number of common concepts: 935
Micro average Recall: 0.5376196990424077
Macro average Recall: 0.6969999796876831

Minimum languages for one edge to be able to included: 50
Number of common concepts: 833
Micro average Recall: 0.4756135445790618
Macro average Recall: 0.6612323846906227

Minimum languages for one edge to be able to included: 100
Number of common concepts: 761
Micro average Recall: 0.4186284544524053
Macro av