In [1]:
from justhink_problem.problem import init_problem, reset_problem
from justhink_problem.domain.state import WorldState
import pathlib as pl
import pickle
import pandas as pd
import copy

In [2]:
pd.set_option('display.max_rows', 500)

action_type_dict = {
    'TYPE_SUGGEST_PICK': 0,
    'TYPE_PICK': 1,
    'TYPE_UNPICK': 2,
    'TYPE_SUBMIT': 3,
    'TYPE_SUGGEST_SUBMIT': 4,
    'TYPE_CLEAR_SUGGEST_SUBMIT': 5,
    'TYPE_AGREE': 6,
    'TYPE_DISAGREE': 7,
    'TYPE_CLEAR': 8,
    'TYPE_GUESS': 9}

# Define paths.

In [3]:
resources_dir = pl.Path('resources')
networks_dir = resources_dir.joinpath('networks')
tables_dir = pl.Path('../processed_data/log_tables')
processed_tables_pickle_file = pl.Path(
    '../processed_data/processed_tables.pickle')

### Making activity network files.

In [4]:
# making network files for an activity
def make_network_files(name, networks_dir):
    network_file = networks_dir.joinpath('{}_edgelist.txt'.format(name))
    layout_file = networks_dir.joinpath('{}_layout.json'.format(name))
    return {'network_file': network_file, 'layout_file': layout_file}

# Initializing Problems.

In [5]:
test_submit_mode = 'once'
problems = {}

test_actions = ['pick', 'unpick', 'suggest-submit']
activity_actions = ['suggest-pick', 'submit']

for test in ['pretest', 'posttest']:
    for i in range(1, 6):
        name = '{}-{}'.format(test, i)

        problems[name] = init_problem(
            **make_network_files(name, networks_dir),
            action_types=test_actions,
            submit_mode=test_submit_mode)

 # Initialise the main collaborative activity problem.
name = 'collab-activity'
problems[name] = init_problem(
    **make_network_files(name, networks_dir),
    action_types=activity_actions)
name = 'collab-activity-2'
problems[name] = init_problem(
    **make_network_files(name, networks_dir),
    action_types=activity_actions)
name = 'indiv-illustrate'
problems[name] = init_problem(
    **make_network_files(name, networks_dir))

In [6]:
name = 'pretest-1'
name = 'collab-activity'

problems[name].env.state.graph.edges(data=True)
# help(problems[name].env.state)

EdgeDataView([(3, 1, {'cost': 3}), (3, 0, {'cost': 3}), (3, 2, {'cost': 3}), (3, 5, {'cost': 4}), (3, 7, {'cost': 5}), (1, 9, {'cost': 3}), (1, 2, {'cost': 2}), (1, 5, {'cost': 4}), (1, 4, {'cost': 3}), (0, 2, {'cost': 5}), (0, 7, {'cost': 4}), (2, 9, {'cost': 4}), (2, 8, {'cost': 3}), (5, 4, {'cost': 2}), (5, 7, {'cost': 3}), (5, 6, {'cost': 4}), (7, 6, {'cost': 2}), (9, 4, {'cost': 3}), (9, 8, {'cost': 2}), (4, 6, {'cost': 2})])

# Make dictionaries for dataframe traversal.

In [7]:
# dictionary for file path, key is student id
file_list = tables_dir.glob(
    'justhink20_log_*/justhink_app-state_transition.csv')
files = {}

for f in file_list:
    i = int(str(f.parent).split('_')[-1])
    files[i] = f

# dictionary for the state transition tables, key is student id
tables = {}

for student in files:
    df = pd.read_csv(files[student])
    tables[student] = df

# Reconstruct a State.

In [8]:
# reconstructing the state of the input row
def reconstruct_state(full_row, graph, verbose=False):
    # row_index refers to table row +2
    row = full_row[3:9]
    # print(row)

    # reconstructs edge list from string
    edge_list_text = str(row[0])
    edge_list = edge_list_text.strip('[').strip(']').split(',')
    edges = list()
    for edge_text in edge_list:
        # if the edge list is empty
        if edge_list[0] == '':
            break
        l = edge_text.split()
        edge = (int(l[1]), int(l[3]))
        edges.append(edge)
    edges = frozenset(edges)

    suggested = (int(row[1]), int(row[2]))
    terminal = bool(row[3])
    submit_suggested = bool(row[4])

    state = WorldState(graph, edges, suggested, submit_suggested, terminal)

    if verbose:
        print(state)
    return state


In [9]:
problem = problems['pretest-1']
df = tables[1].copy()
row_index = 10
reconstruct_state(df.iloc[row_index], problem.env.state.graph, verbose=True)

WorldState(n:7,e:12|e:6,c:19,s:False,t:False)


WorldState(n:7,e:12|e:6,c:19,s:False,t:False)

# Reconstruct all States.

Appending world_state, is_submission, cost, and is_mst columns to the table.

In [10]:
# reconstruct the state for every row in the table
def reconstruct_problem_state(df, verbose=False):
    state_list = []
    is_submission = []
    cost = []
    is_mst = []

    for i, row in df.iterrows():
        problem_name = row['header.frame_id']
        problem = problems[problem_name]
        state = reconstruct_state(row, problem.env.state.graph)

        state_list.append(state)

        submission = identify_submission_state(row)
        is_submission.append(submission)
        if verbose and submission:
            print('Submitted', problem_name, state)

        cost.append(state.get_cost())
        is_mst.append(state.is_mst())

    # append worldstate column
    df['world_state'] = state_list
    # append is submission column
    df['is_submission'] = is_submission
    # append cost column
    df['cost'] = cost
    # append is_mst column
    df['is_mst'] = is_mst

    return df

### Identifying submission state.

In [11]:
# identifies which problem state has been submitted
def identify_submission_state(row):
    # check if the last state's action type is submit type 3 or 4
    problem_name = row['header.frame_id']
    action_type = row['action.type']

    if 'test' in problem_name:
        return action_type == 4

    # for collaborative activities check for 3 only
    elif "collab" in problem_name:
        return action_type == 3

    else:
        return False


In [12]:
problem_name = 'collab-activity'
problem_name = 'pretest-1'
df = tables[1].copy()
df = reconstruct_problem_state(df, verbose=True)

Submitted pretest-1 WorldState(n:7,e:12|e:7,c:23,s:True,t:False)
Submitted pretest-1 WorldState(n:7,e:12|e:7,c:23,s:True,t:False)
Submitted pretest-2 WorldState(n:7,e:12|e:7,c:38,s:True,t:False)
Submitted pretest-2 WorldState(n:7,e:12|e:7,c:38,s:True,t:False)
Submitted pretest-3 WorldState(n:7,e:12|e:6,c:22,s:True,t:False)
Submitted pretest-3 WorldState(n:7,e:12|e:6,c:22,s:True,t:False)
Submitted pretest-4 WorldState(n:7,e:12|e:6,c:45,s:True,t:False)
Submitted pretest-4 WorldState(n:7,e:12|e:6,c:45,s:True,t:False)
Submitted pretest-5 WorldState(n:7,e:12|e:6,c:15,s:True,t:False)
Submitted pretest-5 WorldState(n:7,e:12|e:6,c:15,s:True,t:False)
Submitted collab-activity WorldState(n:10,e:20|e:14,c:38,s:True,t:False)
Submitted collab-activity WorldState(n:10,e:20|e:11,c:29,s:True,t:False)
Submitted collab-activity WorldState(n:10,e:20|e:11,c:29,s:True,t:False)
Submitted collab-activity WorldState(n:10,e:20|e:12,c:32,s:True,t:False)
Submitted collab-activity-2 WorldState(n:10,e:20|e:11,c:47

# Storing processed tables into a dictionary

In [13]:
processed_tables = {}
for key, df in tables.items():
    df = df.copy()
    df = reconstruct_problem_state(df)
    processed_tables[key] = df
    print('Processed', key)

Processed 6
Processed 1
Processed 9
Processed 7
Processed 2
Processed 5
Processed 4
Processed 3
Processed 10


# Pickling processed tables

In [14]:
with processed_tables_pickle_file.open('wb') as handle:
    pickle.dump(processed_tables, handle, protocol=pickle.HIGHEST_PROTOCOL)

print('Saved processed tables to {}'.format(processed_tables_pickle_file))

Saved processed tables to ../processed_data/processed_tables.pickle
