In [1]:
import csv
from collections import defaultdict
import numpy as np

In [2]:
def parse_hp_file(hp_file):
    hp_data = defaultdict(list)

    with open(hp_file, encoding='utf-8', newline='') as csvfile:
        ratings_headers = dict() # key: index of columns, value: header name
        ratings_h2ix = dict()
        csvreader = csv.reader(csvfile, delimiter=',', quotechar='\"')
        for _i, row in enumerate(csvreader):
            if _i == 0:
                for _j, header in enumerate(row):
                    ratings_headers[_j] = header.strip()
                ratings_h2ix = {_h: _ix for _ix, _h in ratings_headers.items()}
                continue
            state = row[ratings_h2ix['state']]
            if state != "COMPLETED":
                continue
            trial_id = row[ratings_h2ix['trial_id']]
            acc = row[ratings_h2ix['dev_test_combo']]
            acc = float(acc)
            dev_acc = int(acc) / 1000
            test_acc = (acc - int(acc)) * 100
            
            if dev_acc < 51: # degenerate trial
                continue
            
            seed = row[ratings_h2ix['seed']]
            confound_access_rate = float(row[ratings_h2ix['confound_access_rate']])
#             num_train_epochs = row[ratings_h2ix['num_train_epochs']]
#             coord_interval = row[ratings_h2ix['coord_interval']]
#             influence_tuning_batch_size = row[ratings_h2ix['influence_tuning_batch_size']]
#             influence_tuning_epochs = row[ratings_h2ix['influence_tuning_epochs']]
#             influence_tuning_instance_dropout = row[ratings_h2ix['influence_tuning_instance_dropout']]
#             influence_tuning_lr = row[ratings_h2ix['influence_tuning_lr']]
            key = (confound_access_rate,
#                    coord_interval,
#                    influence_tuning_batch_size,
#                    influence_tuning_epochs,
#                    influence_tuning_instance_dropout,
#                    influence_tuning_lr
                  )
            hp_data[key].append((dev_acc, test_acc, trial_id))

    print(len(hp_data))
    return hp_data

In [3]:
hp_data = parse_hp_file('synth_it_access_rate.csv')

5


In [4]:
sorted_list = sorted([(k, np.mean([e[0] for e in v]), np.mean([e[1] for e in v]), v) for k, v in hp_data.items() if len(v) >= 3], key=lambda x: -x[1])[:10]

In [5]:
trial_ids = []
for k, _, _, v in sorted_list:
    for e in v:
        trial_ids.append(e[2])
print(trial_ids)

['umqeks9chk', 'q95dbr5xcz', 'uky4nzwrqf', 'pftzbck8yq', 'c4nequ7abt', 'y37x9x272r', 'cum6f6vti8', 'en25kd8x9b', '96cb34secg', 't669pn2gmz', 'ftc57dik4h', '2pgdpmc5qp', 'jkvzgipb2g', '2rxf6urm2y', 'uwg75syjn4', 'irimvkz4qb', 'vq2jyyia9t', 'wjas93buf5', 'cnwyfg75gf', 'xiarpfznfh', 'j5f5v4pa3v', 'yy6qtw8ixi']


In [6]:
sorted_list

[((1.0,),
  81.4582,
  80.48000000038883,
  [(81.875, 80.80000000045402, 'umqeks9chk'),
   (80.0, 79.60000000020955, 'q95dbr5xcz'),
   (82.5, 82.00000000069849, 'uky4nzwrqf'),
   (84.166, 82.60000000009313, 'pftzbck8yq'),
   (78.75, 77.40000000048894, 'c4nequ7abt')]),
 ((0.2,),
  81.35374999999999,
  80.64999999987776,
  [(84.166, 83.99999999965075, 'y37x9x272r'),
   (82.5, 81.39999999984866, 'cum6f6vti8'),
   (81.666, 79.60000000020955, 'en25kd8x9b'),
   (77.083, 77.5999999998021, '96cb34secg')]),
 ((0.5,),
  81.25,
  80.13333333316648,
  [(80.625, 78.99999999935972, 't669pn2gmz'),
   (81.25, 80.59999999968568, 'ftc57dik4h'),
   (81.875, 80.80000000045402, '2pgdpmc5qp')]),
 ((0.1,),
  79.583,
  78.68000000016764,
  [(75.833, 74.00000000052387, 'jkvzgipb2g'),
   (80.0, 80.40000000037253, '2rxf6urm2y'),
   (81.666, 82.00000000069849, 'uwg75syjn4'),
   (76.875, 73.79999999975553, 'irimvkz4qb'),
   (83.541, 83.19999999948777, 'vq2jyyia9t')]),
 ((0.05,),
  78.4164,
  78.23999999993248,
  [

In [7]:
hp_data = parse_hp_file('synth_et_access_rate.csv')

5


In [8]:
sorted_list = sorted([(k, np.mean([e[0] for e in v]), np.mean([e[1] for e in v]), v) for k, v in hp_data.items() if len(v) >= 3], key=lambda x: -x[1])[:10]

In [9]:
trial_ids = []
for k, _, _, v in sorted_list:
    for e in v:
        trial_ids.append(e[2])
print(trial_ids)

['x7t2eg5i3p', 'pty7b64tur', 'qivp54mtfz', '6h3agn2siv', '8wcfyspvke', '64tb6qnz4a', 'pqbzgyijm9', 'mmji2hq3gv', 'hhwjtfyu8w', 'iq2jwj6d7v', 'haaq8822x4', 'mhipu7xfw8', '42g5n7u8it', '9rjcwn3kyn', 'y8bdnxcq53', 'ezb9j98xsf', '3a6n9c4pqe', 'wdr9y3m9u5', '7mg2c77vfc', 'pzmzm2hu6x', 'vb8jp2m66t', '89cys4n5hh', 'm6rr39ags5']


In [10]:
sorted_list

[((1.0,),
  83.6246,
  82.20000000001164,
  [(84.791, 83.00000000017462, 'x7t2eg5i3p'),
   (80.833, 79.60000000020955, 'pty7b64tur'),
   (84.583, 82.79999999940628, 'qivp54mtfz'),
   (81.041, 80.80000000045402, '6h3agn2siv'),
   (86.875, 84.79999999981374, '8wcfyspvke')]),
 ((0.5,),
  81.52733333333333,
  81.06666666620488,
  [(84.583, 84.39999999973224, '64tb6qnz4a'),
   (80.416, 80.19999999960419, 'pqbzgyijm9'),
   (79.583, 78.59999999927823, 'mmji2hq3gv')]),
 ((0.1,),
  80.83279999999999,
  80.79999999987194,
  [(81.666, 81.79999999993015, 'hhwjtfyu8w'),
   (80.833, 80.19999999960419, 'iq2jwj6d7v'),
   (75.833, 76.20000000024447, 'haaq8822x4'),
   (84.791, 85.19999999989523, 'mhipu7xfw8'),
   (81.041, 80.59999999968568, '42g5n7u8it')]),
 ((0.2,),
  80.5414,
  80.43999999965308,
  [(76.25, 78.99999999935972, '9rjcwn3kyn'),
   (80.833, 80.40000000037253, 'y8bdnxcq53'),
   (80.625, 79.7999999995227, 'ezb9j98xsf'),
   (82.291, 79.7999999995227, '3a6n9c4pqe'),
   (82.708, 83.199999999487