In [None]:
import requests
from collections import defaultdict

def get_related_verbs_and_locations(noun):
    base_url = "http://api.conceptnet.io/query"
    results = defaultdict(lambda: {"CapableOf": [], "AtLocation": []})

    # Fetch "CapableOf" relationships
    params_capable_of = {
        "rel": "/r/CapableOf",
        "start": f"/c/en/{noun}",
        "limit": 1000  # Adjust limit as needed
    }
    response = requests.get(base_url, params=params_capable_of)
    if response.status_code == 200:
        data = response.json()
        edges = data.get("edges", [])
        for edge in edges:
            verb = edge.get("end", {}).get("label", "")
            weight = edge.get("weight", 0)
            results[noun]["CapableOf"].append((verb, weight))

        # Sort "CapableOf" verbs for each noun by weight in descending order and keep the top 10
        results[noun]["CapableOf"] = sorted(results[noun]["CapableOf"], key=lambda x: x[1], reverse=True)[:10]
    else:
        print(f"Failed to fetch 'CapableOf' data for {noun}")

    # Fetch "AtLocation" relationships
    params_at_location = {
        "rel": "/r/AtLocation",
        "start": f"/c/en/{noun}",
        "limit": 1000  # Adjust limit as needed
    }
    response = requests.get(base_url, params=params_at_location)
    if response.status_code == 200:
        data = response.json()
        edges = data.get("edges", [])
        for edge in edges:
            location = edge.get("end", {}).get("label", "")
            weight = edge.get("weight", 0)
            surface_text = edge.get("surfaceText", "No surface text available")
            results[noun]["AtLocation"].append((location, weight, surface_text))

        # Sort "AtLocation" locations for each noun by weight in descending order and keep the top 10, including surface text
        results[noun]["AtLocation"] = sorted(results[noun]["AtLocation"], key=lambda x: x[1], reverse=True)[:10]
    else:
        print(f"Failed to fetch 'AtLocation' data for {noun}")

    return results

In [None]:
with open('./OMCS-SUBJ-100K-t1000_FILTERED.txt', 'r') as f:
    subjs = [line.split(':')[0] for line in f.readlines()]

In [None]:
subjs_last_noun = [subj.split()[-1] for subj in subjs]
subjs_last_noun

In [None]:
import json
with open('conceptnet_query.txt', 'w') as f:
    for subj in subjs_last_noun:
        result = get_related_verbs_and_locations(subj)
        result_str = json.dumps(result, indent=4)  # Convert the defaultdict to a pretty-printed JSON string
        f.write(result_str)
        f.write('\n') 
        print(subj)