In [73]:
# README
#
# This file is used to analyze previous input data and generate input data for new designs
#
# To correctly use this notebook:
# a) Put the file 'Dataset-TaskDesign02-proc-fragments-pairs--CSV.csv' in the same folder as this notebook
#    This file is available at the following address:
#    https://www.dropbox.com/sh/2nedm4o444nnpid/AAAThms56S4Vz1jP4WsYR3bba?dl=0
# b) On notebook start use: Cell -> Run All

# Read existing input data
import csv

rows = []

with open('Dataset-TaskDesign02-proc-fragments-pairs--CSV.csv') as csvfile:
    csvreader = csv.reader(csvfile)
    rows = list(csvreader)

In [74]:
total_rows = len(rows)
print(f'Total rows: {total_rows}\n')

header_row = rows[0]
print(f'Headers: {header_row}\n')

data = rows[1:]
total_data = len(data)
print(f'Total DATA rows: {total_data}\n')
print(f'First data row: {data[0]}\n')
print(f'Second data row: {data[1]}\n')
print(f'Third data row: {data[2]}\n')

Total rows: 990

Headers: ['processId_A;nodeId_A;nodeLabel_A;processId_B;nodeId_B;nodeLabel_B;img_A;img_B']

Total DATA rows: 989

First data row: ['83;2707;check documents;89;2884;documents received;http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2707.png;http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2884.png']

Second data row: ['83;2707;check documents;89;2883;rejected;http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2707.png;http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2883.png']

Third data row: ['83;2707;check documents;89;2889;send letter of rejection;http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2707.png;http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2889.png']



In [75]:
# List of strings contained in data rows
datastr = [i[0] for i in data]

# List of lists of values contained in a row
datavals = [i.split(';') for i in datastr]

datavals[0]

['83',
 '2707',
 'check documents',
 '89',
 '2884',
 'documents received',
 'http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2707.png',
 'http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2884.png']

In [76]:
# List of lists representing a single process-node info
singlevals = [[[i[0],i[1],i[2],i[6]],
               [i[3],i[4],i[5],i[7]]] 
              for i in datavals]
singlevals[0]

[['83',
  '2707',
  'check documents',
  'http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2707.png'],
 ['89',
  '2884',
  'documents received',
  'http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2884.png']]

In [77]:
from itertools import chain

# Flatten singlevals list
# Each value in flatvals represents the data associated to a single node
flatvals = list(chain.from_iterable(singlevals))

[flatvals[0], flatvals[1]]

[['83',
  '2707',
  'check documents',
  'http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2707.png'],
 ['89',
  '2884',
  'documents received',
  'http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2884.png']]

In [78]:
len(flatvals) # Flatvals contains duplicate node data

1978

In [79]:
# Set of unique processes
proset = {i[0] for i in flatvals}

len(proset) # 4 unique processes

4

In [80]:
proset

{'82', '83', '84', '89'}

In [81]:
# Set of unique nodes
nodeset = {i[1] for i in flatvals}

len(nodeset) # 76 unique nodes

76

In [82]:
#nodeset

In [83]:
# Set of unique images
imageset = {i[3] for i in flatvals}

len(imageset) # 76 unique images (1 image per node)

76

In [84]:
#imageset

In [85]:
# Set of unique node labels
labelset = {i[2] for i in flatvals}

len(labelset) # 65 unique labels (considering letter case)

65

In [86]:
#labelset

In [87]:
# Collection of nodes where each node is: node_id: { proc_id, node_id, node_label, image }
nodes = { i[1]: { 'proc_id': i[0], 'node_id': i[1], 'label': i[2], 'img': i[3] } for i in flatvals }
len(nodes)

76

In [88]:
nodes['2709'] # Example node (activity)

{'img': 'http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2709.png',
 'label': 'send letter of rejection',
 'node_id': '2709',
 'proc_id': '83'}

In [89]:
nodes['2709']['label']

'send letter of rejection'

In [90]:
label_to_nodes = {}
for l in labelset:
    label_to_nodes[l] = []
    
for l in labelset:
    for k, v in nodes.items():
        if v['label'] == l:
            label_to_nodes[l].append(k)
            
label_to_nodes

{'Acceptance': ['2683'],
 'Apply Online': ['2667'],
 'Check Documents': ['2671'],
 'Documents received': ['2670'],
 'Evaluate': ['2672'],
 'Invite to an aptitude test': ['2673'],
 'Keep in the Applicant pool': ['2674'],
 'Pay for Aptitude test': ['2679'],
 'Rank Students according to GPA and the test results': ['2676'],
 'Send Documents by Post': ['2669'],
 'Send Online Protocol': ['2668'],
 'Send letter of rejection': ['2684'],
 'Take Aptitude test': ['2675'],
 'accepted': ['2696'],
 'accepted provisionally': ['2698'],
 'add certificate of bachelor degree': ['2716'],
 'add certificate of german language': ['2722'],
 'apply online': ['2886', '2691'],
 'average grade is good or better': ['2687'],
 'average grade is less than good': ['2689'],
 'bridging courses < 30 cp': ['2896'],
 'bridging courses > 30 cp': ['2895'],
 'certificate received': ['2700', '2702'],
 'check application complete': ['2715'],
 'check application in time': ['2717'],
 "check bachelor's degree": ['2894'],
 'check c

In [91]:
shared_labels = {k: v for k, v in label_to_nodes.items() if len(v) > 1}

shared_labels

{'apply online': ['2886', '2691'],
 'certificate received': ['2700', '2702'],
 'check documents': ['2707', '2897'],
 'documents received': ['2884', '2899', '2706'],
 'evaluate': ['2887', '2708'],
 'rejected': ['2883', '2893', '2697'],
 'send documents by post': ['2885', '2690'],
 'send letter of acceptance': ['2888', '2694'],
 'send letter of rejection': ['2889', '2709']}

In [92]:
nodes['2886']

{'img': 'http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2886.png',
 'label': 'apply online',
 'node_id': '2886',
 'proc_id': '89'}

In [93]:
nodes['2691']

{'img': 'http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2691.png',
 'label': 'apply online',
 'node_id': '2691',
 'proc_id': '83'}

In [94]:
lower_labelset = {l.lower() for l in labelset}

len(lower_labelset) # 59 unique labels (ignoring letter case)

59

In [95]:
lower_labelset # The list of the 59 unique labels (ignoring letter case)

{'acceptance',
 'accepted',
 'accepted provisionally',
 'add certificate of bachelor degree',
 'add certificate of german language',
 'apply online',
 'average grade is good or better',
 'average grade is less than good',
 'bridging courses < 30 cp',
 'bridging courses > 30 cp',
 'certificate received',
 'check application complete',
 'check application in time',
 "check bachelor's degree",
 'check certificate',
 'check documents',
 'check if bachelor is sufficient',
 'check if bachelors-grade within top 85%',
 'complete and in time?',
 'complete application',
 'conduct interview',
 'document',
 'documents received',
 'evaluate',
 'fill out application form',
 'forward documents',
 'german?',
 'go to interview',
 'hand application over to examining board',
 'immatriculate',
 'invite for talk',
 'invite to an aptitude test',
 'keep in the applicant pool',
 'less than 16 cp in mathematics',
 'pay for aptitude test',
 'provisional acceptance cancelled',
 'provisional acceptance confirmed'

In [96]:
lower_label_to_nodes = {}
for l in lower_labelset:
    lower_label_to_nodes[l] = []
    
for l in lower_labelset:
    for k, v in nodes.items():
        if v['label'].lower() == l:
            lower_label_to_nodes[l].append(k)
            
len(lower_label_to_nodes)

59

In [97]:
lower_shared_labels = {k: v for k, v in lower_label_to_nodes.items() if len(v) > 1}

lower_shared_labels

{'apply online': ['2886', '2691', '2667'],
 'certificate received': ['2700', '2702'],
 'check documents': ['2707', '2897', '2671'],
 'documents received': ['2884', '2899', '2706', '2670'],
 'evaluate': ['2887', '2708', '2672'],
 'rejected': ['2883', '2893', '2697'],
 'send documents by post': ['2885', '2690', '2669'],
 'send letter of acceptance': ['2888', '2694'],
 'send letter of rejection': ['2889', '2709', '2684']}

In [98]:
# List of node ids for nodes belonging to process 82
nodes82 = [k for k, v in nodes.items() if v['proc_id'] == '82']
len(nodes82)

13

In [99]:
# List of node ids for nodes belonging to process 83
nodes83 = [k for k, v in nodes.items() if v['proc_id'] == '83']
len(nodes83)

23

In [100]:
# List of node ids for nodes belonging to process 84
nodes84 = [k for k, v in nodes.items() if v['proc_id'] == '84']
len(nodes84)

23

In [101]:
# List of node ids for nodes belonging to process 89
nodes89 = [k for k, v in nodes.items() if v['proc_id'] == '89']
len(nodes89)

17

In [102]:
# Node labels from proc 82
labels82 = [nodes[n]['label'] for n in nodes82]
for l in labels82:
    print(l)

Apply Online
Send Online Protocol
Evaluate
Acceptance
Pay for Aptitude test
Send letter of rejection
Rank Students according to GPA and the test results
Check Documents
Documents received
Send Documents by Post
Keep in the Applicant pool
Take Aptitude test
Invite to an aptitude test


In [103]:
# Node labels from proc 83
labels83 = [nodes[n]['label'] for n in nodes83]
for l in labels83:
    print(l)

check documents
less than 16 cp in mathematics
send letter of rejection
sufficient cp in mathematics
take oral exam
evaluate
wait for bachelor's certificate
send bachelor's certificate
send letter of provisional acceptance
rejected
check certificate
provisional acceptance cancelled
accepted provisionally
send letter of acceptance
provisional acceptance confirmed
average grade is less than good
average grade is good or better
accepted
certificate received
certificate received
documents received
apply online
send documents by post


In [104]:
# Node labels from proc 84
labels84 = [nodes[n]['label'] for n in nodes84]
for l in labels84:
    print(l)

check application in time
send application
complete and in time?
check application complete
complete application
receive application
fill out application form
german?
add certificate of german language
set additional requirements
hand application over to examining board
check if bachelor is sufficient
add certificate of bachelor degree
invite for talk
receive rejection
check if bachelors-grade within top 85%
send rejection
immatriculate
receive acceptance
send acceptance
rank with other applicants
document
talk to applicant


In [105]:
# Node labels from proc 89
labels89 = [nodes[n]['label'] for n in nodes89]
for l in labels89:
    print(l)

documents received
rejected
send letter of rejection
send letter of acceptance
rejected
check bachelor's degree
bridging courses > 30 cp
conduct interview
check documents
go to interview
send interview invitation
forward documents
documents received
evaluate
bridging courses < 30 cp
apply online
send documents by post


In [106]:
# Lower case labels
lower_labels82 = [l.lower() for l in labels82]
lower_labels83 = [l.lower() for l in labels83]
lower_labels84 = [l.lower() for l in labels84]
lower_labels89 = [l.lower() for l in labels89]

In [107]:
# Labels in common between a pair of processes, for all processes
common8283 = set(lower_labels82).intersection(lower_labels83)
common8284 = set(lower_labels82).intersection(lower_labels84)
common8289 = set(lower_labels82).intersection(lower_labels89)
common8384 = set(lower_labels83).intersection(lower_labels84)
common8389 = set(lower_labels83).intersection(lower_labels89)
common8489 = set(lower_labels84).intersection(lower_labels89)

In [108]:
for i in common8283:
    print(i)

check documents
documents received
evaluate
send documents by post
apply online
send letter of rejection


In [109]:
common8284

set()

In [110]:
for i in common8289:
    print(i)

check documents
documents received
evaluate
send documents by post
apply online
send letter of rejection


In [111]:
common8384

set()

In [112]:
for i in common8389:
    print(i)

send letter of acceptance
check documents
rejected
documents received
evaluate
send documents by post
apply online
send letter of rejection


In [113]:
common8489

set()

In [114]:
# common8283 and common8289 coincide
_diff_82838289 = common8283 != common8289
not _diff_82838289

True

In [115]:
# Images for nodes from proc 82
images82 = [nodes[n]['img'] for n in nodes82]
for i in sorted(images82):
    print(i)

http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2667.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2668.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2669.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2670.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2671.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2672.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2673.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2674.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2675.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2676.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2679.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2683.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2684.png


In [116]:
# Images for nodes from proc 83
images83 = [nodes[n]['img'] for n in nodes83]
for i in sorted(images83):
    print(i)

http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2687.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2688.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2689.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2690.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2691.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2692.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2693.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2694.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2695.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2696.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2697.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2698.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2699.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2700.png
http://galvanic-axle-852.appspot.c

In [117]:
# Images for nodes from proc 84
images84 = [nodes[n]['img'] for n in nodes84]
for i in sorted(images84):
    print(i)

http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2710.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2711.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2712.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2713.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2714.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2715.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2716.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2717.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2718.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2719.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2720.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2721.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2722.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2723.png
http://galvanic-axle-852.appspot.c

In [118]:
# Images for nodes from proc 89
images89 = [nodes[n]['img'] for n in nodes89]
for i in sorted(images89):
    print(i)

http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2883.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2884.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2885.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2886.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2887.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2888.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2889.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2890.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2891.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2892.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2893.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2894.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2895.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2896.png
http://galvanic-axle-852.appspot.c

In [119]:
# Subjective list of nodes that could be grouped together in a single image
nodegroup82 = [
    ['2667', '2668'], # Apply online
    ['2669', '2670', '2671', '2672'], # Send document by post
    ['2673', '2674', '2675', '2679'], # Aptitude test
    ['2676', '2683', '2684'] # Acceptance
]
print(f"Group 82 length: {len(nodegroup82)}")
for i, g in enumerate(nodegroup82):
    print(f"Subgroup {i}")
    for n in g:
        print(nodes[n]['img'])

Group 82 length: 4
Subgroup 0
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2667.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2668.png
Subgroup 1
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2669.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2670.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2671.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2672.png
Subgroup 2
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2673.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2674.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2675.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2679.png
Subgroup 3
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2676.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2683.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2684.png


In [120]:
nodegroup83 = [
    ['2687', '2689', '2692', '2693'], # Average grade
    ['2688', '2704', '2705', '2708'], # Cp mathematics
    ['2690', '2691', '2706', '2707'], # Apply online
    ['2695', '2698', '2699', '2700'], # Provisional acceptance
    ['2701', '2702', '2703'], # Bachelor certificate
    ['2694', '2696', '2697', '2709'], # Accept / Reject
]
print(f"Group 83 length: {len(nodegroup83)}")
for i, g in enumerate(nodegroup83):
    print(f"Subgroup {i}")
    for n in g:
        print(nodes[n]['img'])

Group 83 length: 6
Subgroup 0
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2687.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2689.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2692.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2693.png
Subgroup 1
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2688.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2704.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2705.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2708.png
Subgroup 2
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2690.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2691.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2706.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2707.png
Subgroup 3
http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2695.png
http://galvanic-axle-852.appspot.com/sta

In [121]:
nodegroup84 = [
    ['2710', '2711', '2712', '2713', '2714'], # Complete and in time; REVIEW!!!
    ['2715', '2717'], # Check complete
    ['2716', '2718', '2722', '2730', '2732'], # German
    ['2719', '2720'], # Send/rec application
    ['2721', '2723', '2724', '2729', '2731'], # Send rejection
    ['2725', '2726', '2727', '2728'], # Invite to talk
]
print(f"Group 84 length: {len(nodegroup84)}")
for i, g in enumerate(nodegroup84):
    print(f"Subgroup {i}")
    for n in g:
        print(nodes[n]['img'])

Group 84 length: 6
Subgroup 0
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2710.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2711.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2712.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2713.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2714.png
Subgroup 1
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2715.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2717.png
Subgroup 2
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2716.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2718.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2722.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2730.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2732.png
Subgroup 3
http://galvanic-axle-852.appspot.com/static/taskdesign01/84/2719.png
http://galvanic-axle-852.appspot.com/sta

In [122]:
nodegroup89 = [
    ['2883', '2888', '2889', '2893'], # Send acc/rej
    ['2884', '2885', '2886', '2894'], # Apply online
    ['2887', '2895'], # Evaluate, 30cp
    ['2890', '2891', '2892'], # Interview
    ['2896', '2897', '2898', '2899'] # Check docs, 30cp
]
print(f"Group 89 length: {len(nodegroup89)}")
for i, g in enumerate(nodegroup89):
    print(f"Subgroup {i}")
    for n in g:
        print(nodes[n]['img'])

Group 89 length: 5
Subgroup 0
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2883.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2888.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2889.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2893.png
Subgroup 1
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2884.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2885.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2886.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2894.png
Subgroup 2
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2887.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2895.png
Subgroup 3
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2890.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2891.png
http://galvanic-axle-852.appspot.com/static/taskdesign01/89/2892.png
Subgroup 4
http://galvanic-axle-852.apps

In [123]:
# Build Design 2 (Keywords) Input data (csv)
# Row => | proc_id | node_id | node_label | node_img |

def get_prop_list(node_data):
    return [node_data['proc_id'], node_data['node_id'], node_data['label'], node_data['img']]


plist82 = [get_prop_list(nodes[n]) for n in nodes82]
plist83 = [get_prop_list(nodes[n]) for n in nodes83]
plist84 = [get_prop_list(nodes[n]) for n in nodes84]
plist89 = [get_prop_list(nodes[n]) for n in nodes89]

d2header = ['process_id', 'node_id', 'node_label', 'node_img']
d2data = plist82 + plist83 + plist84 + plist89

def writed2csv():
    with open('Design2_Input_v2.csv', 'w', newline='') as d2csvfile:
        writer = csv.writer(d2csvfile, dialect='excel')
        writer.writerow(d2header)
        writer.writerows(d2data)
#writed2csv() # Uncomment to write

In [124]:
#plist82

In [125]:
group82links = [
    'https://i.imgur.com/itkXCmg.png',
    'https://i.imgur.com/qM7QUtc.png',
    'https://i.imgur.com/Q009q9b.png',
    'https://i.imgur.com/OMUs1nZ.png'
]
for i in group82links:
    print(i)

https://i.imgur.com/itkXCmg.png
https://i.imgur.com/qM7QUtc.png
https://i.imgur.com/Q009q9b.png
https://i.imgur.com/OMUs1nZ.png


In [126]:
group83links = [
    'https://i.imgur.com/mMx14CQ.png',
    'https://i.imgur.com/HLcVtRu.png',
    'https://i.imgur.com/cy9q2Cq.png',
    'https://i.imgur.com/KGuw0WU.png',
    'https://i.imgur.com/0F7w4YA.png',
    'https://i.imgur.com/0bOIDEl.png'
]
for i in group83links:
    print(i)

https://i.imgur.com/mMx14CQ.png
https://i.imgur.com/HLcVtRu.png
https://i.imgur.com/cy9q2Cq.png
https://i.imgur.com/KGuw0WU.png
https://i.imgur.com/0F7w4YA.png
https://i.imgur.com/0bOIDEl.png


In [127]:
group84links = [
    'https://i.imgur.com/VZwozT1.png',
    'https://i.imgur.com/svotSPs.png',
    'https://i.imgur.com/rION8bN.png',
    'https://i.imgur.com/eK2xnAs.png',
    'https://i.imgur.com/TF0QbAE.png',
    'https://i.imgur.com/DT69KMC.png'
]
for i in group84links:
    print(i)

https://i.imgur.com/VZwozT1.png
https://i.imgur.com/svotSPs.png
https://i.imgur.com/rION8bN.png
https://i.imgur.com/eK2xnAs.png
https://i.imgur.com/TF0QbAE.png
https://i.imgur.com/DT69KMC.png


In [128]:
group89links = [
    'https://i.imgur.com/p4rP7Xh.png',
    'https://i.imgur.com/1GrcVPz.png',
    'https://i.imgur.com/fTDxvCy.png',
    'https://i.imgur.com/gqs0rQz.png',
    'https://i.imgur.com/ZNNxaUP.png'
]
for i in group89links:
    print(i)

https://i.imgur.com/p4rP7Xh.png
https://i.imgur.com/1GrcVPz.png
https://i.imgur.com/fTDxvCy.png
https://i.imgur.com/gqs0rQz.png
https://i.imgur.com/ZNNxaUP.png


In [129]:
# Ids for thr groups of nodes
# Each id is composed by the ids of the nodes present in the group

group82ids = [
    '2667+2668+2669+2670',
    '2668+2669+2670+2671+2672',
    '2673+2674+2675+2679',
    '2676+2683+2684'
]

group83ids = [
    '2687+2689+2692+2693',
    '2688+2704+2705+2708',
    '2690+2691+2706+2707',
    '2694+2696+2697+2709',
    '2695+2698+2699+2700',
    '2700+2701+2702+2703'
]

group84ids = [
    '2710+2711+2712+2713+2714',
    '2710+2711+2715+2717+2719+2720',
    '2715+2716+2717+2719+2720+2722+2732',
    '2716+2718+2720+2722+2730+2732',
    '2721+2723+2724+2728+2729+2731',
    '2725+2726+2727+2728'
]

group89ids = [
    '2883+2888+2889+2893',
    '2884+2885+2886+2894',
    '2887+2895+2899',
    '2890+2891+2892',
    '2896+2897+2898+2899'
]

In [130]:
groups82 = {}
for i, gid in enumerate(group82ids):
    groups82[gid] = group82links[i]
for k,v in groups82.items():
    print(f"{k}: {v}")

2667+2668+2669+2670: https://i.imgur.com/itkXCmg.png
2668+2669+2670+2671+2672: https://i.imgur.com/qM7QUtc.png
2673+2674+2675+2679: https://i.imgur.com/Q009q9b.png
2676+2683+2684: https://i.imgur.com/OMUs1nZ.png


In [131]:
groups83 = {}
for i, gid in enumerate(group83ids):
    groups83[gid] = group83links[i]
for k,v in groups83.items():
    print(f"{k}: {v}")

2687+2689+2692+2693: https://i.imgur.com/mMx14CQ.png
2688+2704+2705+2708: https://i.imgur.com/HLcVtRu.png
2690+2691+2706+2707: https://i.imgur.com/cy9q2Cq.png
2694+2696+2697+2709: https://i.imgur.com/KGuw0WU.png
2695+2698+2699+2700: https://i.imgur.com/0F7w4YA.png
2700+2701+2702+2703: https://i.imgur.com/0bOIDEl.png


In [132]:
groups84 = {}
for i, gid in enumerate(group84ids):
    groups84[gid] = group84links[i]
for k,v in groups84.items():
    print(f"{k}: {v}")

2710+2711+2712+2713+2714: https://i.imgur.com/VZwozT1.png
2710+2711+2715+2717+2719+2720: https://i.imgur.com/svotSPs.png
2715+2716+2717+2719+2720+2722+2732: https://i.imgur.com/rION8bN.png
2716+2718+2720+2722+2730+2732: https://i.imgur.com/eK2xnAs.png
2721+2723+2724+2728+2729+2731: https://i.imgur.com/TF0QbAE.png
2725+2726+2727+2728: https://i.imgur.com/DT69KMC.png


In [133]:
groups89 = {}
for i, gid in enumerate(group89ids):
    groups89[gid] = group89links[i]
for k,v in groups89.items():
    print(f"{k}: {v}")

2883+2888+2889+2893: https://i.imgur.com/p4rP7Xh.png
2884+2885+2886+2894: https://i.imgur.com/1GrcVPz.png
2887+2895+2899: https://i.imgur.com/fTDxvCy.png
2890+2891+2892: https://i.imgur.com/gqs0rQz.png
2896+2897+2898+2899: https://i.imgur.com/ZNNxaUP.png


In [134]:
def get_group_labels(group_id):
    """
    Get the label associated to each node present in a group.
    """
    node_ids = group_id.split('+')
    return [nodes[n]['label'] for n in node_ids]

def get_group_labels_csv(group_id):
    """
    Join the list of labels of a group into a csv string.
    """
    return ','.join(get_group_labels(group_id))

get_group_labels_csv('2896+2897+2898+2899')

'bridging courses < 30 cp,check documents,forward documents,documents received'

In [135]:
nodegroup8283 = []
for i in plist82:
    for k, v in groups83.items():
        nodegroup8283.append(i + ['83', k, v, get_group_labels_csv(k)])
        
nodegroup8283[0]

['82',
 '2667',
 'Apply Online',
 'http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2667.png',
 '83',
 '2687+2689+2692+2693',
 'https://i.imgur.com/mMx14CQ.png',
 'average grade is good or better,average grade is less than good,provisional acceptance confirmed,provisional acceptance cancelled']

In [136]:
nodegroup8284 = []
for i in plist82:
    for k, v in groups84.items():
        nodegroup8284.append(i + ['84', k, v, get_group_labels_csv(k)])
        
nodegroup8284[0]

['82',
 '2667',
 'Apply Online',
 'http://galvanic-axle-852.appspot.com/static/taskdesign01/82/2667.png',
 '84',
 '2710+2711+2712+2713+2714',
 'https://i.imgur.com/VZwozT1.png',
 'complete and in time?,hand application over to examining board,check if bachelor is sufficient,set additional requirements,check if bachelors-grade within top 85%']

In [137]:
nodegroup8389 = []
for i in plist83:
    for k, v in groups89.items():
        nodegroup8389.append(i + ['89', k, v, get_group_labels_csv(k)])
        
nodegroup8389[0]

['83',
 '2707',
 'check documents',
 'http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2707.png',
 '89',
 '2883+2888+2889+2893',
 'https://i.imgur.com/p4rP7Xh.png',
 'rejected,send letter of acceptance,send letter of rejection,rejected']

In [138]:
# Gruppo query-result di 83 e 82
# Qui vengono selezionati i label di 83 ed i gruppi di 82
nodegroup8382 = []
for i in plist83:
    for k, v in groups82.items():
        nodegroup8382.append(i + ['82', k, v, get_group_labels_csv(k)])
        
nodegroup8382[0]

['83',
 '2707',
 'check documents',
 'http://galvanic-axle-852.appspot.com/static/taskdesign01/83/2707.png',
 '82',
 '2667+2668+2669+2670',
 'https://i.imgur.com/itkXCmg.png',
 'Apply Online,Send Online Protocol,Send Documents by Post,Documents received']

In [140]:
# Build Design 4 (Search feedback) Input data (csv)
# Row => | query_proc_id | query_node_id | query | proc_id | group_id | group_img | group_labels |

# This data is also valid for Design 7
d4header = ['query_process_id', 'query_node_id', 'query_node_label', 'query_node_img', 'result_process_id', 'result_group_id', 'result_group_img', 'result_group_labels']
d4data = (nodegroup8283 + nodegroup8284 + nodegroup8389 + # Tutte le coppie in ordine alfabetico (82-83, 82-84, 83-89)
          nodegroup8382) # Coppia 83-82 (ordine inverso)

# IMPORTANT
# Once the file is converted to '.xlsx' and uploaded to Crowdflower
# it is necessary to split the columns:
# - "result_group_id" split with '+'
# - "result_group_labels" split with ','
def writed4csv():
    with open('Design4_Input_V00.csv', 'w', newline='') as d4csvfile:
        writer = csv.writer(d4csvfile, dialect='excel')
        writer.writerow(d4header)
        writer.writerows(d4data)
#writed4csv() # Uncomment to write