In [136]:
import modin.pandas as pd
# import numpy as np
# import seaborn as sns
# from datetime import datetime, date
from pprint import *
from itertools import zip_longest

In [2]:
import ray
ray.init(log_to_driver=False, ignore_reinit_error=True)

2024-09-02 00:17:38,589	INFO worker.py:1771 -- Started a local Ray instance.


0,1
Python version:,3.12.3
Ray version:,2.31.0


In [4]:
def load_and_clean(path: str, separator: str = ','):
  data = pd.read_csv(path, sep=separator, encoding='utf-8')
  try:
    data_dict = dict(zip(data['Unnamed: 1'], data['Unnamed: 0']))
  except:
    data_dict = dict(zip(data.columns[0], data.columns[1]))
  data = data.fillna(0)
  # convert letters to 0
  data[data.columns[2:]] = data[data.columns[2:]].replace('[A-Z]', 0, regex=True).astype(int)
  # convert columns data[A-K] to int
  data[data.columns[2:]] = data[data.columns[2:]].astype(int)
  data = data.rename(columns={'Unnamed: 0': 'name', 'Unnamed: 1': 'index'})
  return data, data_dict

In [5]:
data, data_dict = load_and_clean('./content/data.csv', separator=',')
stones, stones_dict = load_and_clean('./content/stones.csv', separator=',')
obstacle, obstacle_dict = load_and_clean('./content/obstacle.csv', separator=',')

Data types of partitions are different! Please refer to the troubleshooting section of the Modin documentation to fix this issue.


In [38]:
data

Unnamed: 0,name,index,A,B,C,D,E,F,G,H,I,J,K
0,Kettle,A,0,0,0,0,0,0,0,0,0,0,0
1,Empty Kettle,B,22,0,0,0,0,0,0,0,0,0,12
2,Half Kettle,C,22,0,0,0,0,0,0,0,0,0,12
3,Full Kettle,D,22,0,0,0,0,0,0,0,0,0,0
4,Cup1,E,0,0,0,0,0,0,0,0,0,0,0
5,Empty Cup1,F,0,0,0,0,22,0,0,0,0,0,0
6,Full Cup1,G,0,0,0,0,22,0,0,0,0,0,12
7,Cup2,H,0,0,0,0,0,0,0,0,0,0,0
8,Empty Cup2,I,0,0,0,0,0,0,0,22,0,0,0
9,Full Cup2,J,0,0,0,0,0,0,0,22,0,0,12


In [185]:
# Geting all the keys from column 'name' if rows contain only zeroes in corresponding columns after 'index'
def get_objects(df, index):
    # Step 1: Create a boolean mask for rows where all columns from 'index' onwards are 0
    mask = df.iloc[:, index:].eq(0).all(axis=1)
    # Rows having 23, 25 or 22
    mask2 = df['index'].isin([23, 25, 22]).any()

    # Step 2: Filter the dataframe using the mask and select the 'name' column
    filtered_df = df[mask | mask2]
    objects = filtered_df['name']

    # Step 3: Strip whitespace from each object name and create a list
    objects_to_return = [obj.strip() for obj in objects]

    return objects_to_return

In [39]:
# Actions are 'name' if rows contain either zeroes or 1, 11, 111 in corresponding columns after 'index'
def get_actions(df, index):
    """
    Returns a list of actions from a DataFrame based on specific values in columns starting from 'index'.
    Parameters:
    df (DataFrame): The input DataFrame.
    index (int): The starting index of the columns to filter.
    Returns:
    list: A list of actions extracted from the DataFrame.
    """
    
    # Step 1: Filter rows based on specific values in columns starting from 'index'
    mask1 = df.iloc[:, index:].isin([1, 11, 111, 14, 141]).any(axis=1)
    # mask2 = if 21 is present but with 1, 11, 111, 14, or 141 in the same row
    mask2 = df.iloc[:, index:].isin([21]).any(axis=1)
    filtered_df = df[mask1 | (mask2 & mask1)]
    # print(filtered_df)

    # Step 2: Extract the 'name' column from the filtered dataframe
    actions = filtered_df['name']

    # Step 3: Strip whitespace from each action and create a list
    actions_to_return = list(set([act.strip() for act in actions]))

    return actions_to_return

In [8]:
# States are 'name' if second part of .split is equal to corresponding object
def get_states(df, index, names):
    states = {}
    mapping = {}
    df = df.sort_values(by='index', ascending=False)
    for item in names:
        # Filter the dataframe for rows containing the current item
        filtered_df = df[df['name'].str.contains(item)]
        columns = list(filtered_df['index'])


        # Process each name in the filtered dataframe with reversed order
        processed_names = []
        for name in filtered_df['name']:
            # Remove the item from the name and strip whitespace
            processed_name = str(name).replace(item, '').strip()
            if processed_name:  # Only add non-empty strings
                processed_names.append(processed_name, )

        # Add the processed names to the states dictionary, sorted by columns using lambda function
        states[item] = processed_names
        # mapping[item] = {name: column for name, column in zip(filtered_df['name'], columns)}
        data_for_mapping = zip(filtered_df['name'], columns)
        # sorted_mapping = sorted(data_for_mapping, key=lambda x: x[1])
        mapping[item] = dict(data_for_mapping)

    return mapping, states

In [9]:
link_mapping = {
    '11': "Consumption",
    '12': "Result",
    '13': "Effect",
    '14': "Invocation",
    '16': "Agent",
    '17': "Instrument",
    '21': "Aggregation",
    '22': "Generalization",
    '23': "Exhibition",
    '24': "Instantiation",
    '25': "Unidirectional",
    '26': "Bidirectional"
}

def classify_relation(code, base_relations):
    """
    Classifies a relation based on the given code and base relations.
    Parameters:
    code (str): The code representing the relation.
    base_relations (dict): A dictionary containing the base relations.
    Returns:
    str: The classification of the relation.
    """

    code = str(code)  # Ensure code is a string
    
    match code:
        case s if s in base_relations:
            return base_relations[s]
        case s if s.endswith('1') and len(s) == 3:
            base_code = s[:-1]
            if base_code in base_relations:
                return f"{base_relations[base_code]} (Conditional)"
        case s if s.endswith('2') and len(s) == 3:
            base_code = s[:-1]
            if base_code in base_relations:
                return f"{base_relations[base_code]} (Event)"
    return "Unknown"

In [10]:
def print_transitions(transitions, objs_states):
    """
    Utility function for printing the transitions.
    Parameters:
    - transitions (dict): A dictionary containing the transitions for each action.
    - objs_states (dict): A dictionary containing the states for each object.
    Returns:
    None, prints the transitions.
    """
    
    for action, states in transitions.items():
        starting_states = states['starting_states']
        xor_starting_states = states['xor_starting_states']
        ending_states = states['ending_states']
        state_pairs = list(zip(starting_states, ending_states))
        
        for obj, obj_states in objs_states.items():
            for start_state, end_state in state_pairs:
                if start_state in obj_states and end_state in obj_states:
                    start_state_str = f"(XOR) {start_state}" if start_state in xor_starting_states else start_state
                    print(f"{action}, {obj}, {start_state_str}->{end_state}")

# Utility function to get the triplet object
def get_triplet_obj(triplet):
    """
    Get the object from a triplet of the format {'action': action, 
    'start': (object, state, relation), 
    'end': ((object, state, relation))}

    Parameters:
    triplet (dict): A dictionary containing the triplet information.

    Returns:
    tuple: A tuple containing the start and end values.
    """
    start = triplet['start'][0]
    end = triplet['end'][0]
    return start, end

In [175]:
def transitions_as_triplets(df, actions, objs_states):
    """
    Generate triplets of transitions for each action in a dataframe.
    Args:
        df (pandas.DataFrame): The dataframe containing the transitions.
        actions (list): List of actions to generate triplets for.
        objs_states (dict): Dictionary mapping objects to their states.
    Returns:
        list: List of dictionaries representing the triplets of transitions.
            Each dictionary contains the following keys:
            - 'action': The action for the transition.
            - 'start': Tuple representing the starting state, in the format (object, state, relation).
            - 'end': Tuple representing the ending state, in the format (object, state, relation).
    """
    
    start_list = [1, 11, 111, 14, 141, 21, 211, 22, 221, 222, 23, 231, 24, 241, 25, 251]
    end_list = [12, 121, 13, 131, 26, 261]
    precondition_list = [17, 171]
    agent_list = [16, 161]
    instrument_list = [17, 171]
    
    transitions = []
    
    for action in actions:
        filtered_df = df[df['name'] == action]
        action_column = filtered_df['index'][0]

        starting_states = []
        preconditions = []
        ending_states = []
        agents = []
        instruments = []
        values = filtered_df

        for obj, states in objs_states.items():
            for state, column in states.items():
                # print(f'Object: {obj}, State: {state}, Column: {column}')
                relation = classify_relation(str(values[column][0]), link_mapping)
                
                cell_value = df.loc[df['name'] == state, action_column].values[0]
                
                if values[column].isin(start_list).any():
                    starting_states.append((obj, state, relation))
                    
                if cell_value in end_list:
                    ending_states.append((obj, state, 'Result'))
                    
                if values[column].isin(precondition_list).any():
                    preconditions.append((obj, state, 'Precondition'))
                    
                if values[column].isin(agent_list).any():
                    agents.append((obj, state, relation))
                    
                if values[column].isin(instrument_list).any():
                    instruments.append((obj, state, relation))

        # Create pairs of starting and ending states using zip. If there is no ending state, use a placeholder
        state_pairs = list(zip(starting_states, ending_states))
        
        # Create triplets for each combination of starting and ending states
        for start, end in state_pairs:
            transitions.append({
                'action': action,
                'preconditions': preconditions,
                'start': start,
                'end': end,
                'agent': agents,
                'instrument': instruments
            })

    return transitions

# # Usage
# # trans = get_transitions(data, acts, mapping)

# # print('\nFormatted Transitions:')
# # print_transitions(trans, mapping)

In [186]:
# obstacle.csv

# obstacle, obstable_dict = load_and_clean('./content/obstacle.csv')

obstacle_objs = get_objects(obstacle, 2)
# print(f'Obstacle objects: {obstacle_objs}')

obstacle_actions = get_actions(obstacle, 2)
# print(f'\nObstacle actions: {obstacle_actions}')

obstacle_mapping, obstacle_states = get_states(obstacle, 2, obstacle_objs)
# print('\nObstacle objects & states: ')
# for item, map in obstacle_mapping.items():
#     for name, column in map.items():
#         print(f"\t{name}: {column}")
        


In [187]:
obstacle_objs, obstacle_actions

(['Forward Moving & Obstacle Avoiding System',
  'Obstacle',
  'Operator',
  'Detecting ()'],
 ['Detect Obstacle Triggering',
  'JetBot Stopping ()',
  'Detection Continuing',
  'Forward Movement Starting ()',
  'Message Information Anaylizing'])

In [188]:
triplets_obstacle = transitions_as_triplets(obstacle, obstacle_actions, obstacle_mapping)

print('\nTriplets:')
for triplet in triplets_obstacle:
    for key, value in triplet.items():
        print(f'{key}: {value}')
    print('\n')
    


Triplets:
action: Message Information Anaylizing
preconditions: [('Obstacle', 'Detected Obstacle {ob}', 'Precondition')]
start: ('Detecting ()', 'Detecting ()', 'Invocation')
end: ('Obstacle', 'Detected Obstacle?', 'Result')
agent: []
instrument: [('Obstacle', 'Detected Obstacle {ob}', 'Instrument')]




In [176]:
# Kettle and cups

# Usage
objs = get_objects(data, 2)
# print(f'\nObjects: {objs}')

acts = get_actions(data, 2)
# print(f'\nActions: {acts}')

print('\nObjects & states: ')
mapping, objects_states = get_states(data, 2, objs)
# Pretty printing mangles the dictionaries, so a loop is better
# pprint(objects_states,
#        indent=2,
    #    sort_dicts=True)

# for item, map in mapping.items():
#     print(f"\n{item}: {map}")
#     for name, column in map.items():
#         print(f"\t{name}: {column}")

triplet_trans = transitions_as_triplets(data, acts, mapping)
# print('\nTriplets:')
for triplet in triplet_trans:
    for key, value in triplet.items():
        print(f"{key}: {value}")
    print('\n')

action: Filling
preconditions: []
start: ('Kettle', 'Full Kettle', 'Consumption')
end: ('Kettle', 'Half Kettle', 'Result')
agent: []
instrument: []
action: Filling
preconditions: []
start: ('Kettle', 'Half Kettle', 'Consumption')
end: ('Kettle', 'Empty Kettle', 'Result')
agent: []
instrument: []
action: Filling
preconditions: []
start: ('Cup1', 'Empty Cup1', 'Consumption (Conditional)')
end: ('Cup1', 'Full Cup1', 'Result')
agent: []
instrument: []
action: Filling
preconditions: []
start: ('Cup2', 'Empty Cup2', 'Consumption (Conditional)')
end: ('Cup2', 'Full Cup2', 'Result')
agent: []
instrument: []


In [178]:
# Stones

stones_objs = get_objects(stones, 2)
# print(f'Stones objects: {stones_objs}')

stones_actions = get_actions(stones, 2)
# print(f'\nStones actions: {stones_actions}')

stones_mapping, stones_states = get_states(stones, 2, stones_objs)
# print('\nStones objects & states: ')
# pprint(stones_mapping,
#        indent=2,
#        sort_dicts=True)

print('\nStones transitions: ')
# stones_transitions = get_transitions(stones, stones_actions, stones_mapping)

triplets_stones = transitions_as_triplets(stones, stones_actions, stones_mapping)

for triplet in triplets_stones:
    for key, value in triplet.items():
        print(f'\t{key}: {value}')
    print('\n')


Stones transitions: 
	action: Putting
	preconditions: [('Hand', 'Empty Hand', 'Precondition')]
	start: ('Stone Pile', '1 Stone Stone Pile', 'Consumption (Conditional)')
	end: ('Stone Pile', 'Empty Stone Pile', 'Result')
	agent: []
	instrument: [('Hand', 'Empty Hand', 'Instrument (Conditional)')]


	action: Putting
	preconditions: [('Hand', 'Empty Hand', 'Precondition')]
	start: ('Stone Pile', '2 Stones Stone Pile', 'Consumption (Conditional)')
	end: ('Stone Pile', '1 Stone Stone Pile', 'Result')
	agent: []
	instrument: [('Hand', 'Empty Hand', 'Instrument (Conditional)')]


	action: Putting
	preconditions: [('Hand', 'Empty Hand', 'Precondition')]
	start: ('Bag1', 'Empty Bag1', 'Consumption (Conditional)')
	end: ('Bag1', 'Full Bag1', 'Result')
	agent: []
	instrument: [('Hand', 'Empty Hand', 'Instrument (Conditional)')]


	action: Putting
	preconditions: [('Hand', 'Empty Hand', 'Precondition')]
	start: ('Bag2', 'Empty Bag2', 'Consumption (Conditional)')
	end: ('Bag2', 'Full Bag2', 'Resul

# Code dump

In [22]:
# Getting transitions from the dataframe. We are searching for (1) non-zero values in columns for each 'action' - those are starting states, and (2) non-zero values in rows for each column corresponding to an action - those are ending states. E.g. For the 'data' dataframe, we are looking for (1) non-zero values in columns of 'Filling' row, and (2) non-zero values in rows of 'K' column.
# Objects-states are a dictionary {object: {state: column}}. Actions are a list of actions.
''' e.g. { 'Cup1': {'Cup1': 'E', 'Empty Cup1': 'F', 'Full Cup1': 'G'},
  'Cup2': {'Cup2': 'H', 'Empty Cup2': 'I', 'Full Cup2': 'J'},
  'Kettle': { 'Empty Kettle': 'B',
              'Full Kettle': 'D',
              'Half Kettle': 'C',
              'Kettle': 'A'}} '''
# Actions: ['Filling']

# def get_transitions(df, actions, objs_states):
    
#     transitions = {}

#     for action in actions:
#         filtered_df = df[df['name'] == action]
#         action_column = filtered_df['index'][0]

#         starting_states = []
#         xor_starting_states = []
#         ending_states = []
#         # columns = [column for column in filtered_df]
#         values = filtered_df

#         # Iterating over the {object: {state: column}} dictionary
#         for obj, states in objs_states.items():
#             for state, column in states.items():
#                 # print(f'Object: {obj}, State: {state}, Column: {column}')
#                 # Use the classify_relation function to get the relation type
#                 relation = classify_relation(str(values[column][0]), link_mapping)
#                 # Get the starting states for the current action, and append the relation type, too
#                 if values[column].isin([1, 11, 111]).any():
#                     starting_states.append(state)
#                     if values[column].isin([111]).any():
#                         xor_starting_states.append(state)

#                 # Get the cell value at row 'state' and column 'action_column'from the full dataframe
#                 cell_value = df.loc[df['name'] == state, action_column].values[0]
#                 if cell_value == 12:
#                     ending_states.append(state)

#         transitions[action] = {
#             'starting_states': starting_states,
#             'xor_starting_states': xor_starting_states,
#             'ending_states': ending_states
#         }

#     return transitions

# Another option of printing the triplets
for t in triplet_trans:
    print(f"Action: {t['action']}")
    print(f"  From: {t['start'][0]} - {t['start'][1]} ({t['start'][2]})")
    print(f"  To: {t['end'][0]} - {t['end'][1]} ({t['end'][2]})")
    print()