This notebook constructs state and action objects from the log tables, and exports them as transition tables and lists in [pickle](https://docs.python.org/3/library/pickle.html) format.

In [None]:
import pickle

import pathlib as pl
import pandas as pd
import networkx as nx

from justhink_world import create_all_worlds
from justhink_world.domain.state import NetworkState, EnvironmentState
from justhink_world.agent import Human, Robot
from justhink_world.domain.action import *

### Define paths.

In [None]:
resources_dir = pl.Path('resources')
networks_dir = resources_dir.joinpath('networks')
tables_dir = pl.Path('../processed_data/log_tables')

transition_tables_pickle_file = pl.Path(
    '../processed_data/justhink_spring21_transition_tables.pickle')
transitions_pickle_file = pl.Path(
    '../processed_data/justhink_spring21_transition_lists.pickle')

### Create activity instances.
Activities are represented in 'justhink_world' as worlds.

In [None]:
worlds = create_all_worlds()

# Print information on the activities.
# The name of the activity.
# name = 'pretest-1'
name = 'collaboration-1'

# Background network connections and costs for that activity.
print(worlds[name].env.state.network.graph.edges(data=True))

In [None]:
# Construct an activity name map from the names in the logs
# the latest names in the justhink_world representation package.
world_renamer = {name: name for name in worlds}
world_renamer['indiv-illustrate'] = 'tutorial'
world_renamer['collab-activity'] = 'collaboration-1'
world_renamer['collab-activity-2'] = 'collaboration-2'
world_renamer['debriefing'] = 'bye'

world_renamer

### Read the state transition log files.

In [None]:
# Discover the sate transition files.
file_list = tables_dir.glob(
    'justhink21_log_*/justhink_app-state_transition.csv')

# Make a map from participant to log file path.
files = {}
for f in file_list:
    i = int(str(f.parent).split('_')[-1])
    files[i] = f

# Read the log files.
tables = {}
for participant in files:
    df = pd.read_csv(files[participant])
    tables[participant] = df

### Construct a state object from a log table row.

In [None]:
def construct_state(row, graph, state_name='state', verbose=False):
    """Construct a state object from a log table row.

    Note: The argument state_name='state' for the current state, 
    state_name='next_state' for the next state in
    the 3-tuple logs of <state, action, next_state>
    """
    # Reconstruct the list of selected edges from string.
    edge_list_text = str(row['{}.edges'.format(state_name)])
    edge_list = edge_list_text.strip('[').strip(']').split(',')
    edges = list()
    for edge_text in edge_list:
        # if the edge list is empty, no edges are selected.
        if edge_list[0] == '':
            break
        l = edge_text.split()
        edge = (int(l[1]), int(l[3]))
        edges.append(edge)
    # Represent the selected edges as a subgraph.
    subgraph = nx.Graph()
    for u, v in edges:
        subgraph.add_edge(u, v)

    # Construct the suggested edge.
    suggested_edge = (int(row['{}.suggested.u'.format(state_name)]),
                      int(row['{}.suggested.v'.format(state_name)]))
    if suggested_edge == (-1, -1):
        suggested_edge = None

    # Construct the terminal/not and submitting/not information.
    is_terminal = bool(row['{}.terminal'.format(state_name)])
    is_submitting = bool(row['{}.submit_suggested'.format(state_name)])

    # Construct the set of active agents (indicating the turn at that state).
    if row['turn_agent'] == 'human':
        agents = frozenset({Human})
    elif row['turn_agent'] == 'robot':
        agents = frozenset({Robot})
    else:
        agents = frozenset()

    # Create the network state object.
    network = NetworkState(graph, subgraph, suggested_edge)

    # Create the environment state object.
    state = EnvironmentState(
        network, agents=agents, is_submitting=is_submitting, is_terminal=is_terminal)

    if verbose:
        print(state)

    return state


# Example / try out:
world = worlds['pretest-1']
df = tables[1].copy()
row_index = 10
construct_state(df.iloc[row_index],
                world.env.state.network.graph, verbose=True)

### Construct an action object from a log table row.

In [None]:
def construct_action(row, graph, verbose=False):
    """Construct an action object from a log table row."""
    action_type = row['action.type']
    if row['action.agent_name'] == 'human':
        agent = Human
    elif row['action.agent_name'] == 'robot':
        agent = Robot
    else:
        raise NotImplementedError

    if action_type == 0:
        return SuggestPickAction((row['action.edge.u'], row['action.edge.v']), agent)
    if action_type == 1:
        return PickAction((row['action.edge.u'], row['action.edge.v']), agent)
    if action_type == 2:
        return UnpickAction((row['action.edge.u'], row['action.edge.v']), agent)
    if action_type == 3:
        return SubmitAction(agent)
    if action_type == 4:
        return AttemptSubmitAction(agent)
    if action_type == 5:
        return ContinueAction(agent)
    if action_type == 6:
        return AgreeAction(agent)
    if action_type == 7:
        return DisagreeAction(agent)
    if action_type == 8:
        return ClearAction()
    if action_type == 9:
        print("TYPE_GUESS")
    return None


# Example / try out:
world = worlds['pretest-1']
df = tables[1].copy()
row_index = 10
construct_action(
    df.iloc[row_index], world.env.state.network.graph, verbose=True)

### Construct all states and actions for a log table.

Iterate through the rows.
Also append world_state, action, is_submission, cost, and is_mst columns to the table.

In [None]:
def construct_state_from_table(df, state_name='state', verbose=False):
    """Construct all states and action for a log table.

    Note: The argument state_name='state' for the current state, 
    state_name='next_state' for the next state in
    the 3-tuple logs of <state, action, next_state>
    """
    state_list = []
    attempt_numbers = dict()
    for i, row in df.iterrows():
        # Rename the activity to match with the updated representation package.
        activity = row['header.frame_id']
        activity = world_renamer[activity]
        world = worlds[activity]

        if activity not in attempt_numbers:
            attempt_numbers[activity] = 1

        state = construct_state(
            row, world.env.state.network.graph, state_name=state_name)

        is_submission = identify_submission_state(row)

        # Update the attempt number.
        if is_submission:
            attempt_numbers[activity] = attempt_numbers[activity] + 1
        state.attempt_no = attempt_numbers[activity]

        # Update the max attempts.
        if 'collab' in activity:
            state.max_attempts = 4

        state_list.append(state)

    df[state_name] = state_list

    return df

In [None]:
def construct_actions_and_metadata_from_table(df, verbose=False):
    """Construct all states and action for a log table.

    Note: The argument state_name='state' for the current state, 
    state_name='next_state' for the next state in
    the 3-tuple logs of <state, action, next_state>
    """
    is_submission_list = []
    cost_list = []
    is_mst_list = []
    is_spanning_list = []
    action_list = []

    for i, row in df.iterrows():
        # Rename the activity to match with the updated representation package.
        activity = row['header.frame_id']
        activity = world_renamer[activity]
        world = worlds[activity]

        action = construct_action(row, world.env.state.network.graph)
        action_list.append(action)

        state = row['state']

        is_submission = identify_submission_state(row)
        is_submission_list.append(is_submission)
        if verbose and is_submission:
            print('Submitted', activity, state)

        cost_list.append(state.network.get_cost())
        is_mst_list.append(state.network.is_mst())
        is_spanning_list.append(state.network.is_spanning())

    df['action'] = action_list
    df['cost'] = cost_list
    df['is_submission'] = is_submission_list
    df['is_mst'] = is_mst_list
    df['is_spanning'] = is_spanning_list

    return df

### Identify a submission action and the associated submitted state.

In [None]:
def identify_submission_state(row):
    """Identify which problem state has been submitted"""
    # check if the last state's action type is submit type 3 or 4
    problem_name = row['header.frame_id']

    # Renamed activities.
    problem_name = world_renamer[problem_name]

    action_type = row['action.type']

    if 'test' in problem_name:
        return action_type == 4

    # For collaborative activities check for 3 only.
    elif 'collab' in problem_name:
        return action_type == 3

    else:
        return False
    

# Example / try out:
# name = 'collab-activity'
name = 'pretest-1'
df = tables[1].copy()
df = construct_state_from_table(df, state_name='next_state')
df = construct_state_from_table(df, verbose=True)

pd.options.display.max_columns = None
# df

### Construct a transition table from the log table for each participant.

In [None]:
transition_tables = {}
for participant in sorted(tables):
    df = tables[participant].copy()
    
    df = construct_state_from_table(df, state_name='state')
    df = construct_state_from_table(df, state_name='next_state')
    df = construct_actions_and_metadata_from_table(df)

    transition_tables[participant] = df
    print('Processed participant', participant)

### Clean the tables, drop/rename columns.

In [None]:
clean_transition_tables = dict()
for participant in sorted(transition_tables):
    df = transition_tables[participant].copy()

    # Reset time to have duration in the time column.
    df['Time'] -= df.iloc[0]['Time']
    
    name_map = {
            'Time': 'time',
            'header.frame_id': 'activity',
        }
    df.rename(columns=name_map, inplace=True)
    
    # Rename.
    df['activity'] = [world_renamer[a] for a in df['activity']]

    # Remove state element columns.
    df.drop(
        ['header.seq', 
         'header.stamp.secs', 
         'header.stamp.nsecs',
         'action.agent_name',
         'state.edges', 
         'state.suggested.u',
         'state.suggested.v', 
         'state.terminal',
         'state.submit_suggested', 
         'action.type',
         'action.edge.u', 
         'action.edge.v',
         'turn_agent', 
         'next_state.edges',
         'next_state.suggested.u', 
         'next_state.suggested.v',
            'next_state.terminal',
         'next_state.submit_suggested', 
         'action_no', 
         'step_no'],
        axis=1, inplace=True)

    clean_transition_tables[participant] = df

In [None]:
# clean_transition_tables[1]

### Export the transition tables in CSV data format.

In [None]:
with transition_tables_pickle_file.open('wb') as handle:
    pickle.dump(clean_transition_tables, handle, protocol=pickle.HIGHEST_PROTOCOL)

print('Saved transition tables to {}'.format(transition_tables_pickle_file))

### Construct state transition lists per activity.

In [None]:
# List of transitions, indexed by participant and then the activity name.
transition_lists = dict()

for participant, log_df in transition_tables.items():
    print('Processing participant {}:'.format(participant))
    transition_lists[participant] = dict()

    for name in world_renamer:

        df = log_df[log_df['header.frame_id'] == name].reset_index()

        trans_list = []
        for i, row in df.iterrows():
            if i == 0:
                trans_list.append(row['state'])
            trans_list.append(row['action'])
            trans_list.append(row['next_state'])

        if len(trans_list) != 0:
            print('  Added {} states for {} at {}'.format(
                len(trans_list)+1//2, participant, name))
            transition_lists[participant][world_renamer[name]] = trans_list

### Export state transition lists.

In [None]:
with transitions_pickle_file.open('wb') as handle:
    pickle.dump(transition_lists, handle, protocol=pickle.HIGHEST_PROTOCOL)

print('Saved transition lists to {}'.format(transitions_pickle_file))