In [1]:
import csv
from collections import defaultdict
import numpy as np

In [2]:
def parse_hp_file(hp_file):
    hp_data = defaultdict(list)

    with open(hp_file, encoding='utf-8', newline='') as csvfile:
        ratings_headers = dict() # key: index of columns, value: header name
        ratings_h2ix = dict()
        csvreader = csv.reader(csvfile, delimiter=',', quotechar='\"')
        for _i, row in enumerate(csvreader):
            if _i == 0:
                for _j, header in enumerate(row):
                    ratings_headers[_j] = header.strip()
                ratings_h2ix = {_h: _ix for _ix, _h in ratings_headers.items()}
                continue
            state = row[ratings_h2ix['state']]
            if state != "COMPLETED":
                continue
            trial_id = row[ratings_h2ix['trial_id']]
            acc = row[ratings_h2ix['dev_test_combo']]
            acc = float(acc)
            dev_acc = int(acc) / 1000
            test_acc = (acc - int(acc)) * 100
            
            if dev_acc < 55: # degenerate trials; two other trials have a dev acc at 59, but checked their train acc and they are both normal
                continue
            
            seed = row[ratings_h2ix['seed']]
            confound_access_rate = float(row[ratings_h2ix['confound_access_rate']])
#             num_train_epochs = row[ratings_h2ix['num_train_epochs']]
#             coord_interval = row[ratings_h2ix['coord_interval']]
#             influence_tuning_batch_size = row[ratings_h2ix['influence_tuning_batch_size']]
#             influence_tuning_epochs = row[ratings_h2ix['influence_tuning_epochs']]
#             influence_tuning_instance_dropout = row[ratings_h2ix['influence_tuning_instance_dropout']]
#             influence_tuning_lr = row[ratings_h2ix['influence_tuning_lr']]
            key = (confound_access_rate,
#                    coord_interval,
#                    influence_tuning_batch_size,
#                    influence_tuning_epochs,
#                    influence_tuning_instance_dropout,
#                    influence_tuning_lr
                  )
            hp_data[key].append((dev_acc, test_acc, trial_id))

    print(len(hp_data))
    return hp_data

In [3]:
hp_data = parse_hp_file('msgs_it_access_rate.csv')

5


In [4]:
sorted_list = sorted([(k, np.mean([e[0] for e in v]), np.mean([e[1] for e in v]), v) for k, v in hp_data.items() if len(v) >= 3], key=lambda x: -x[1])[:10]

In [5]:
trial_ids = []
for k, _, _, v in sorted_list:
    for e in v:
        trial_ids.append(e[2])
print(trial_ids)

['k6uue5dzia', 'p5fwe8tvrv', 'p79zbmvkb3', 'nd6anv9jui', 'ijjzkuad27', 'x6f26zbu32', 'pqvuv2ikip', 'z79xepe384', 'vqay9rythw', '79grmhzyy2', '8r7m4eur3t', '63ubwyca87', 'ck8bs4a4xb', 'khw3c6s4xh', 'd24gvnz3wx', 'mk2knqb4w5', 'vurjsubiu6', '93yveujpec']


In [6]:
sorted_list

[((0.5,),
  93.71933333333334,
  93.57333332979276,
  [(92.226, 91.91333333001239, 'k6uue5dzia'),
   (99.206, 99.25333332939772, 'p5fwe8tvrv'),
   (89.726, 89.55333332996815, 'p79zbmvkb3')]),
 ((1.0,),
  88.0796,
  88.22533333383035,
  [(91.646, 91.59999999974389, 'nd6anv9jui'),
   (91.873, 92.0266666696989, 'ijjzkuad27'),
   (91.626, 91.71333333069924, 'x6f26zbu32'),
   (83.953, 84.26666666928213, 'pqvuv2ikip'),
   (81.3, 81.51999999972759, 'z79xepe384')]),
 ((0.2,),
  83.94125,
  83.89833333494607,
  [(92.326, 92.39999999990687, 'vqay9rythw'),
   (80.186, 80.42666667024605, '79grmhzyy2'),
   (85.113, 85.20666667027399, '8r7m4eur3t'),
   (78.14, 77.55999999935739, '63ubwyca87')]),
 ((0.05,),
  81.84866666666667,
  81.74222222005483,
  [(93.546, 93.70000000053551, 'ck8bs4a4xb'),
   (59.38, 59.473333330242895, 'khw3c6s4xh'),
   (92.62, 92.05333332938608, 'd24gvnz3wx')]),
 ((0.1,),
  80.31733333333334,
  80.47999999689637,
  [(82.666, 83.09333333018003, 'mk2knqb4w5'),
   (82.846, 82.9733

In [7]:
hp_data = parse_hp_file('msgs_et_access_rate.csv')

5


In [8]:
sorted_list = sorted([(k, np.mean([e[0] for e in v]), np.mean([e[1] for e in v]), v) for k, v in hp_data.items() if len(v) >= 3], key=lambda x: -x[1])[:10]

In [9]:
trial_ids = []
for k, _, _, v in sorted_list:
    for e in v:
        trial_ids.append(e[2])
print(trial_ids)

['rx2ht9me5b', 'v567qt8nx6', '5pxxfkuvm6', 'hmb8cy2vvu', 'nvbbcnunwi', '7qsaw6zshr', 'qxsi6byrtw', 'uzez8jmjfp', '2tz7gfbdv6', 'geykrz44iw', 'x5jvqbygvu', '3j8pdavu2a', 'mtrwsqpjic', '76tdzmbxdv', 'bztr4vc3pp', 'fut2ypabrk', 'tvfu3g4mcy', '6kecssscfb', 'mr87pfe9w8', 'e39g52mdxx', 'f9vknd5uw4', '9ym2pip2tk', 'sf49xvh6hf', 'x7h2xz2huw', '79mrjkzni2']


In [10]:
sorted_list

[((1.0,),
  87.58380000000001,
  87.44933333393419,
  [(92.213, 91.88666667032521, 'rx2ht9me5b'),
   (73.573, 73.54666666942649, 'v567qt8nx6'),
   (92.74, 92.51333332940703, '5pxxfkuvm6'),
   (90.54, 90.47333333001006, 'hmb8cy2vvu'),
   (88.853, 88.82666667050216, 'nvbbcnunwi')]),
 ((0.05,),
  85.153,
  84.8973333320464,
  [(90.386, 90.48000000038883, '7qsaw6zshr'),
   (83.733, 83.49333333026152, 'qxsi6byrtw'),
   (77.013, 76.79999999963911, 'uzez8jmjfp'),
   (87.213, 86.79333332984243, '2tz7gfbdv6'),
   (87.42, 86.92000000010012, 'geykrz44iw')]),
 ((0.5,),
  83.0542,
  82.79866666794987,
  [(88.526, 88.1066666697734, 'x5jvqbygvu'),
   (83.973, 83.58666667045327, '3j8pdavu2a'),
   (80.946, 80.90666666976176, 'mtrwsqpjic'),
   (81.226, 80.85333333001472, '76tdzmbxdv'),
   (80.6, 80.53999999974621, 'bztr4vc3pp')]),
 ((0.2,),
  81.8288,
  81.56266666774172,
  [(85.846, 85.52666666946607, 'fut2ypabrk'),
   (92.006, 91.76666667044628, 'tvfu3g4mcy'),
   (74.333, 74.57333333004499, '6kecssscf