In [55]:
import requests


def make_request(query):
    base_url = "http://api.conceptnet.io"
    if not query.startswith(base_url):
        query = f"{base_url}{query}"
    return requests.get(query).json()

def read_edges(obj):
    for edge in obj['edges']:
        if edge['start']['language'] == 'en' and edge['end']['language'] == 'en':
            yield {
                'edge': (edge['start']['label'], edge['rel']['label'], edge['end']['label']),
                'example': edge['surfaceText']
            }

def get_relations_between_words(word1, word2):
    word1 = word1.replace(' ', '_')
    word2 = word2.replace(' ', '_')
    query = f"/query?node=/c/en/{word1}&other=/c/en/{word2}&language=en"
    obj = make_request(query=query)
    edges = list(read_edges(obj))
    return edges

def get_edges_related_to_word(word):
    word = word.replace(' ', '_')
    query = f"/query?node=/c/en/{word}&language=en"
    obj = make_request(query = query)
    edges = list(read_edges(obj))
    return edges

In [15]:
import re

import joblib
import pandas as pd
from tqdm import tqdm

In [56]:
dump = pd.read_csv("data/kgtk_conceptnet.tsv", sep = "\t")

dump.drop(columns=['relation;dimension', 'source'], inplace=True, axis = 1)

pattern = r"/c/en/([^/]*)(/.*)?"

from collections import defaultdict

label_dict = dict()
rel_dict = defaultdict(set)

for _, row in tqdm(dump.iterrows(), leave = False):
    node1 = row['node1']
    node2 = row['node2']

    rel = row['relation;label']
    sent = row['sentence']

    node1_id = re.match(pattern, node1).group(1)
    node2_id = re.match(pattern, node2).group(1)

    node1_label = row['node1;label']
    node2_label = row['node2;label']
    
    label_dict[node1_id] = node1_label
    label_dict[node2_id] = node2_label

    rel_dict[(node1_id, node2_id)].add((rel, sent))


import joblib

joblib.dump(rel_dict, "data/conceptNet_relations.joblib")
joblib.dump(label_dict, "data/conceptNet_labels.joblib") 

  dump = pd.read_csv("data/kgtk_conceptnet.tsv", sep = "\t")
                               

In [16]:
dump = pd.read_csv("data/kgtk_conceptnet.tsv", sep = "\t")

  dump = pd.read_csv("data/kgtk_conceptnet.tsv", sep = "\t")


In [17]:
dump.head(3)

Unnamed: 0,node1,relation,node2,node1;label,node2;label,relation;label,relation;dimension,source,sentence
0,/c/en/0/n,/r/Antonym,/c/en/1,0,1,antonym,,CN,
1,/c/en/12_hour_clock/n,/r/Antonym,/c/en/24_hour_clock,12 hour clock,24 hour clock,antonym,,CN,
2,/c/en/24_hour_clock/n,/r/Antonym,/c/en/12_hour_clock,24 hour clock,12 hour clock,antonym,,CN,


In [18]:
dump.shape

(3423004, 9)

In [19]:
import consts

In [31]:
good_relations_labels = dump[dump['relation'].isin(consts.good_relations)]['relation;label'].unique().tolist()

In [32]:
good_relations_labels

['at location',
 'capable of',
 'causes',
 'causes desire',
 'desires',
 'has first subevent',
 'has last subevent',
 'has property',
 'is a',
 'located near',
 'made of',
 'part of',
 'symbol of',
 'used for']

In [20]:
dump[dump['relation'].isin(consts.good_relations)].shape

(373389, 9)

In [22]:
rel_dict = joblib.load("data/conceptNet_relations.joblib")
label_dict = joblib.load("data/conceptNet_labels.joblib") 

In [23]:
rel_dict

defaultdict(set,
            {('0', '1'): {('antonym', nan), ('related to', nan)},
             ('12_hour_clock', '24_hour_clock'): {('antonym', nan)},
             ('24_hour_clock', '12_hour_clock'): {('antonym', nan)},
             ('5', '3'): {('antonym', nan)},
             ('a.c', 'd.c'): {('antonym', nan)},
             ('a.m', 'afternoon'): {('antonym', nan)},
             ('a.m', 'p.m'): {('antonym', nan), ('distinct from', nan)},
             ('a.m', 'pm'): {('antonym', nan), ('distinct from', nan)},
             ('ab_extra', 'ab_intra'): {('antonym', nan)},
             ('ab_intra', 'ab_extra'): {('antonym', nan)},
             ('abactinal', 'actinal'): {('antonym', nan)},
             ('abandon', 'acquire'): {('antonym', nan)},
             ('abandon', 'arrogate'): {('antonym', nan)},
             ('abandon', 'embrace'): {('antonym', nan)},
             ('abandon', 'engage'): {('antonym', nan)},
             ('abandon', 'gain'): {('antonym', nan)},
             ('abandon', '

In [3]:
get_relations_between_words('stepfather', 'father')

[{'edge': ('stepfather', 'DerivedFrom', 'father'), 'example': None},
 {'edge': ('stepfather', 'IsA', 'father'), 'example': None},
 {'edge': ('stepfather', 'DerivedFrom', 'father'), 'example': None}]

In [4]:
get_relations_between_words('bank', 'chase')

[{'edge': ('chase', 'IsA', 'bank'), 'example': None}]

In [5]:
get_relations_between_words('bank', 'bank of china')

[{'edge': ('bank of china', 'IsA', 'bank'), 'example': None}]

In [6]:
get_relations_between_words('bank', 'teller')

[{'edge': ('teller', 'RelatedTo', 'bank'), 'example': None},
 {'edge': ('bank', 'RelatedTo', 'teller'),
  'example': '[[bank]] is related to [[teller]]'}]