## Parse data

In [None]:
import warnings
from typing import List
import math

import pandas as pd
from glob import glob
import re
from bs4 import BeautifulSoup

warnings.filterwarnings("ignore", category=DeprecationWarning)

# Get a list of all xlsx files
files = sorted(glob("CPs exp 2a data/*.xlsx"))

# all_string_values = []
situations = []
actions = []

print(f"number of files: {len(files)}\n")
# Loop through the list of files and read each file
for file in files:
    print(f"processing `{file}` ...")
    dataframe = pd.read_excel(file)
    dataframe.drop_duplicates(subset="name", keep="last", inplace=True)
    # Now you can do something with the data
    assert len(dataframe.situation) == len(dataframe.actions)
    print(
        f"There are in total of {len(dataframe.situation)} situation-action pairs "
        f"in the file {file}.\n"
    )

    for situation, action in zip(dataframe.situation, dataframe.actions):
        # Parse HTML content
        situation_parsed = []

        if isinstance(situation, float) and math.isnan(situation):
            situation_parsed = None

        else:
            soup = BeautifulSoup(situation, "html.parser")

            # Extract data
            for div in soup.find_all("div", class_="item"):
                # Extract class and text content

                class_name = (
                    div.get("class")[1]
                    if len(div.get("class")) > 1
                    else div.get("class")[0]
                )
                # Process the text content more carefully to add spaces around buttons
                # or special divs
                texts = div.find_all(text=True)
                text_content = " ".join(text.strip() for text in texts).strip().lower()
                situation_parsed.append((class_name, text_content))

        situations.append(situation_parsed)

        soup = BeautifulSoup(action, "html.parser")
        action_parsed = {}

        # Extract boxes
        boxes = soup.find_all("div", class_="box box_action")

        for box in boxes:
            box_id = box.get("id")
            action_parsed[box_id] = []

            items = box.find_all("div", class_="item")
            for item in items:
                class_name = (
                    item.get("class")[1]
                    if len(item.get("class")) > 1
                    else item.get("class")[0]
                )
                # Process the text content more carefully to add spaces around buttons
                # or special divs
                texts = item.find_all(text=True)
                text_content = " ".join(text.strip() for text in texts).strip().lower()
                action_parsed[box_id].append((class_name, text_content))

        actions.append(action_parsed)
        assert len(situations) == len(actions)

situation_action_pairs = list(zip(situations, actions))

print(
    f"In total of {len(situation_action_pairs)} situation-action pairs have "
    f"been processed.\n"
)

# Create a new list with elements that don't have None as the situation
situation_action_pairs = [
    (situation, action)
    for situation, action in situation_action_pairs
    if situation is not None
]
print(
    f"After removing None, now there is {len(situation_action_pairs)} "
    f"situation-action pairs.\n"
)

for idx, (situation, actions_dict) in enumerate(situation_action_pairs.copy()):
    actions_list = []
    for action in actions_dict.values():
        if action:
            actions_list.append(action)

    if actions_list:
        situation_action_pairs[idx] = situation, actions_list

print(
    f"After removing empty actions, now there is {len(situation_action_pairs)} "
    f"situation-action pairs.\n"
)

## Get the stats of situations and actions

In [35]:
from collections import Counter

unique_ = dict(
    Counter([s for situation, actions in situation_action_pairs for s in situation])
)
unique_ = dict(sorted(unique_.items(), key=lambda item: item[1], reverse=True))
unique_

{('objects', 'large rock'): 140,
 ('objects', 'small rock'): 102,
 ('locations', 'top of rock pile'): 79,
 ('locations', 'left side of rock pile'): 59,
 ('agents', 'victim'): 33,
 ('agents', 'human'): 31,
 ('locations', 'bottom of rock pile'): 29,
 ('agents', 'robot'): 29,
 ('locations', 'on top of actor'): 28,
 ('locations', 'on top of object'): 13,
 ('counters', 'all'): 8,
 ('counters', 'one'): 7,
 ('locations', 'left side of field'): 3,
 ('locations', 'above rock pile'): 3,
 ('actions', 'move to object'): 3,
 ('locations', 'right side of rock pile'): 2,
 ('locations', 'right side of field'): 2,
 ('objects', 'brown rock'): 1,
 ('actions', 'break object in location'): 1,
 ('actions', 'pick up object in location'): 1,
 ('actions', 'move back and forth in location'): 1}

In [36]:
unique_ = dict(
    Counter(
        [
            a
            for situation, actions in situation_action_pairs
            for action in actions
            for a in action
        ]
    )
)
unique_ = dict(sorted(unique_.items(), key=lambda item: item[1], reverse=True))
unique_

{('agents', 'robot'): 226,
 ('agents', 'human'): 208,
 ('objects', 'large rock'): 181,
 ('actions', 'pick up object in location'): 181,
 ('objects', 'small rock'): 178,
 ('locations', 'top of rock pile'): 90,
 ('locations', 'left side of rock pile'): 88,
 ('actions', 'drop object in location'): 86,
 ('actions', 'break object in location'): 68,
 ('locations', 'on top of actor'): 47,
 ('agents', 'victim'): 46,
 ('locations', 'left side of field'): 45,
 ('locations', 'bottom of rock pile'): 35,
 ('counters', 'all'): 35,
 ('locations', 'right side of field'): 32,
 ('actions', 'move to object'): 31,
 ('actions', 'stand still in location'): 30,
 ('actions', 'move back and forth in location'): 20,
 ('counters', 'one'): 16,
 ('actions', 'move to location'): 16,
 ('actions', 'move to actor'): 13,
 ('locations', 'right side of rock pile'): 12,
 ('locations', 'on top of object'): 12,
 ('locations', 'above rock pile'): 10,
 ('counters', 'zero'): 2,
 ('locations', 'on top of location'): 1}

## Print raw

In [105]:
for idx, (situation, actions) in enumerate(situation_action_pairs):
    print(f"case {idx}")
    print(f"situation: {situation}")
    print(f"actions: {actions}")
    print("-" * 88)
    print()

case 0
situation: [('locations', 'top of rock pile'), ('objects', 'large rock'), ('locations', 'bottom of rock pile'), ('objects', 'large rock')]
actions: [[('agents', 'robot'), ('actions', 'move to object'), ('objects', 'large rock')], [('agents', 'human'), ('actions', 'move to object'), ('objects', 'large rock')], [('agents', 'robot'), ('actions', 'break object in location'), ('objects', 'large rock'), ('locations', 'right side of rock pile')], [('agents', 'robot'), ('actions', 'break object in location'), ('objects', 'large rock'), ('locations', 'bottom of rock pile')], [('agents', 'human'), ('actions', 'pick up object in location'), ('objects', 'small rock'), ('locations', 'top of rock pile')], [('agents', 'robot'), ('actions', 'pick up object in location'), ('objects', 'small rock'), ('locations', 'top of rock pile')]]
----------------------------------------------------------------------------------------

case 1
situation: [('counters', 'all'), ('objects', 'small rock'), ('locat

## Print cleaned

In [102]:
def remove_words(s: str) -> str:
    s = s.replace("move to object", "moveTo")
    s = s.replace("move to location", "moveTo")
    s = s.replace("move to actor", "moveTo")
    s = s.replace("in location", "")
    s = s.replace("to location", "")
    s = s.replace("on top of location", "topOf")
    s = s.replace("on top of object", "topOf")
    s = s.replace("break object ", "break")
    s = s.replace("robot", "Robot")
    s = s.replace("human", "Human")
    s = s.replace("victim", "Victim")
    s = s.replace("rock pile", "RockPile")
    s = s.replace("field", "Field")
    s = s.replace("large rock", "LargeRock")
    s = s.replace("small rock", "SmallRock")
    s = s.replace("brown rock", "BrownRock")
    s = s.replace("on top of actor", "topOf")
    s = s.replace("top of", "topOf")
    s = s.replace("above", "above,")
    s = s.replace("bottom of", "bottomOf,")
    s = s.replace("right side of", "rightSideOf,")
    s = s.replace("left side of", "leftSideOf,")
    s = s.replace("pick up object ", "pickUp")
    s = s.replace("all, ", "")
    s = s.replace("one, ", "")
    s = s.replace("zero, ", "")
    s = s.replace("move back and forth", "hasState, MoveBackAndForth")
    s = s.replace("drop object ", "drop")
    s = s.replace("stand still ", "hasState, standStill")


    return s

for idx, (situation, actions) in enumerate(situation_action_pairs):
    print(f"case {idx}")

    situation_str = ", ".join([s[1].strip() for s in situation])
    actions_str = ", ".join([a_[1].strip() for a in actions for a_ in a])

    situation_str = remove_words(situation_str)
    actions_str = remove_words(actions_str)

    print(f"situation: {situation_str}")
    print(f"actions: {actions_str}")
    print("-" * 88)
    print()

case 0
situation: topOf RockPile, LargeRock, bottomOf, RockPile, LargeRock
actions: Robot, moveTo, LargeRock, Human, moveTo, LargeRock, Robot, break, LargeRock, rightSideOf, RockPile, Robot, break, LargeRock, bottomOf, RockPile, Human, pickUp, SmallRock, topOf RockPile, Robot, pickUp, SmallRock, topOf RockPile
----------------------------------------------------------------------------------------

case 1
situation: SmallRock, topOf RockPile
actions: Human, hasState, MoveBackAndForth , topOf RockPile, Human, pickUp, SmallRock, topOf RockPile, Robot, moveTo, SmallRock
----------------------------------------------------------------------------------------

case 2
situation: topOf RockPile, LargeRock, topOf RockPile, SmallRock
actions: Robot, pickUp, SmallRock, topOf RockPile, Human, pickUp, SmallRock, topOf RockPile, Robot, pickUp, LargeRock, leftSideOf, RockPile, Robot, pickUp, LargeRock, rightSideOf, RockPile
----------------------------------------------------------------------------

In [101]:
import rdflib

# Create a new RDF graph
g = rdflib.Graph()

# Parse an OWL file
g.parse("coLearningOntology.owl", format="application/rdf+xml")

# Iterate over each statement in the graph
for stmt in g:
    print(stmt)

# You can also query the graph using SPARQL
query = """
SELECT ?subject ?predicate ?object
WHERE {
  ?subject ?predicate ?object
}
LIMIT 10
"""

for row in g.query(query):
    print(row)


(rdflib.term.URIRef('http://example.org/coLearningOntology#ActorAction'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef('http://www.w3.org/2002/07/owl#Class'))
(rdflib.term.BNode('N6c7ad248ef694a9499d6aaecd42be6f9'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest'), rdflib.term.BNode('Nd46862656df0449da8eb41743480e951'))
(rdflib.term.URIRef('http://example.org/coLearningOntology#Actor'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef('http://www.w3.org/2002/07/owl#Class'))
(rdflib.term.URIRef('http://example.org/coLearningOntology#Victim'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef('http://www.w3.org/2002/07/owl#Class'))
(rdflib.term.URIRef('http://example.org/coLearningOntology#Field'), rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), rdflib.term.URIRef('http://www.w3.org/2002/07/owl#Class'))
(rdflib.term.URIR

In [None]:
from rdflib import Graph, Namespace, RDF, URIRef
from rdflib.namespace import RDFS, OWL

# Create a new RDF graph
g = Graph()

# Namespace for our ontology
EX = Namespace("http://example.org/myOntology#")

# Bind a prefix to our namespace for more readable output
g.bind("ex", EX)

# Add classes (types) to the graph
robot = URIRef(EX.Robot)
human = URIRef(EX.Human)
large_rock = URIRef(EX.LargeRock)

# Add individuals to the graph
robot1 = URIRef(EX.robot1)
human1 = URIRef(EX.human1)
rock1 = URIRef(EX.rock1)

# Type assertions
g.add((robot1, RDF.type, robot))
g.add((human1, RDF.type, human))
g.add((rock1, RDF.type, large_rock))

# Add relationships
g.add((human1, EX.pickUp, rock1))
g.add((robot1, EX.moveTo, rock1))

# # Serialize the graph in RDF/XML format
# print(g.serialize(format="xml").decode("u8"))

# Or save the graph to a file
# with open("example_graph.rdf", "wb") as f:
#     g.serialize(f, format="xml")


In [None]:
g.serialize("example_graph.rdf", format="xml")