In [None]:
import json
import glob

In [5]:
def show_ontology(ont_path):
    with open(ont_path) as in_file:
        data = json.load(in_file)
        print(f"Ontology: {data['title']}")
        cls_str = "\n\t".join([f"{c['label']} ({c['qid']})"  for c in data['concepts']])
        rel_str = "\n\t".join([f"{c['label']} ({c['pid']})"  for c in data['relations']])
        print(f"Concepts:\n\t{cls_str}")
        print(f"Relations:\n\t{rel_str}")

def load_jsonl(file_path):
    content = list()
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            content.append(json.loads(line))
    return content

def load_valid_ids(dir_path):
    all_lines = []
    jsonl_files = glob.glob(dir_path + '/*.txt')
    for file_path in sorted(jsonl_files):
        with open(file_path, 'r', encoding='utf-8') as file:
            lines = [line.strip() for line in file.readlines()]
            all_lines.append(lines)
    return all_lines

def load_all_jsonl(dir_path):
    jsonl_files = glob.glob(dir_path + '/*.jsonl')
    content = list()
    for file_path in jsonl_files:
        content += load_jsonl(file_path)
    return content

def load_test_data(test_path_prefix):
    test_data = load_all_jsonl(test_path_prefix)
    test_data = {td["id"]:td for td in test_data}
    return test_data

def load_prompts(prompt_path_prefix):
    prompts = load_all_jsonl(prompt_path_prefix)
    prompts = {p["id"]:p for p in prompts}
    return prompts

def load_llm_ouputs(llm_output_prefix):
    llm_ouputs = load_all_jsonl(llm_output_prefix)
    llm_ouputs = {p["id"]:p for p in llm_ouputs}
    return llm_ouputs

def print_llm_output(llm_output):
    print(f"Test ID: {llm_output['id']}\n")
    print(f"LLM Response:\n\n{llm_output['response']}\n")
    print(f"Triples:")
    for tr in llm_output['triples']:
        print(f"\t{tr}")



## Ontologies

In [7]:
ont_path_prefix = "./data/dbpedia_webnlg/ontologies/"
ontologies = [
"1_writtenwork_ontology.json",
    "2_airport_ontology.json",
    "3_artist_ontology.json",
    "4_film_ontology.json",
    "5_monument_ontology.json",
    "6_comicscharacter_ontology.json",
    "7_scientist_ontology.json",
    "8_astronaut_ontology.json",
    "9_building_ontology.json",
    "10_city_ontology.json",
    "11_meanoftransportation_ontology.json",
    "12_compan_ontologyy.json",
    "13_celestialbody_ontology.json",
    "14_musicalwork_ontology.json",
    "15_athlete_ontology.json",
    "16_university_ontology.json",
    "17_sportsteam_ontology.json",
    "18_politician_ontology.json",
    "19_food_ontology.json"
]
ontologies = [ont_path_prefix + ont for ont in ontologies]


## Ontology content

In [8]:
show_ontology(ontologies[1])

Ontology: Airport Ontology
Concepts:
	Airport (Airport)
	Aircraft (Aircraft)
	RunwaySurfaceType (RunwaySurfaceType)
	Airport (Airport)
	County (County)
	City (City)
	Demonym (Demonym)
	Place (Place)
	Battle (Battle)
	Country (Country)
	Division (Division)
	Language (Language)
	Party (Party)
	Class (Class)
Relations:
	aircraftFighter (aircraftFighter)
	aircraftHelicopter (aircraftHelicopter)
	runwayName (runwayName)
	areaCode (areaCode)
	3rdRunwaySurfaceType (3rdRunwaySurfaceType)
	hubAirport (hubAirport)
	elevationAboveTheSeaLevelInMetres (elevationAboveTheSeaLevelInMetres)
	ceremonialCounty (ceremonialCounty)
	capital (capital)
	runwaySurfaceType (runwaySurfaceType)
	headquarter (headquarter)
	demonym (demonym)
	postalCode (postalCode)
	location (location)
	owner (owner)
	regionServed (regionServed)
	transportAircraft (transportAircraft)
	order (order)
	leaderTitle (leaderTitle)
	battle (battle)
	cityServed (cityServed)
	leader (leader)
	city (city)
	isPartOf (isPartOf)
	icaoLocationI

# Test Sentences

In [None]:
test_path_prefix = "./data/dbpedia_webnlg/test/"
test_data = load_test_data(test_path_prefix)

valid_ids_prefix = "./data/dbpedia_webnlg/ground_truth/"
valid_ids = load_valid_ids(valid_ids_prefix)

In [None]:
# [2][10]  [3][1]  [4][6] [5][3]

test_id = valid_ids[5][3]
test_data[test_id]


# LLM Prompts

In [None]:
prompt_path_prefix = "./data/dbpedia_webnlg/prompts/"
prompts = load_prompts(prompt_path_prefix)

In [None]:
test_prompt = prompts[test_id]
print(f"Test ID: {test_prompt['id']}")
print(f"Prompt: {test_prompt['prompt']}")

# LLL Output

In [None]:
vicuna_output_prefix =  "./data/dbpedia_webnlg/baselines/Vicuna-13B/llm_responses/"
vicuna_outputs = load_llm_ouputs(vicuna_output_prefix)

alpaca_lora_output_prefix =  "./data/dbpedia_webnlg/baselines/Alpaca-LoRA-13B/llm_responses/"
alpaca_lora_outputs = load_llm_ouputs(alpaca_lora_output_prefix)


In [None]:
print("Vicuna-13B")
print_llm_output(vicuna_outputs[test_id])

In [None]:
print("Alpaca-LoRA-13B")
print_llm_output(alpaca_lora_outputs[test_id])