In [1]:
import csv
import json
import os
import random

In [2]:
font_dir = '../gwfonts'
csv_path = '../attributeData/estimatedAttributes.csv'
tmp_font_to_attribute_values = {}

with open(csv_path, 'r') as f:
    reader = csv.reader(f)
    header = next(reader)
    attributes = header[1:]
    for row in reader:
        tmp_font_to_attribute_values[row[0]] = {a: v for a, v in zip(attributes, row[1:])}

valid_font_to_attribute_values = {}
font_paths = [os.path.join(font_dir, p) for p in os.listdir(font_dir)]
for font_path in font_paths:
    font_name = os.path.splitext(os.path.basename(font_path))[0]
    if font_name in tmp_font_to_attribute_values.keys():
        valid_font_to_attribute_values[font_name] = tmp_font_to_attribute_values[font_name]

In [3]:
shuffled_font_names = list(valid_font_to_attribute_values.keys())
random.seed(123)
random.shuffle(shuffled_font_names)

In [4]:
all_font_to_attribute_values = {font_name: valid_font_to_attribute_values[font_name] for font_name in shuffled_font_names}
train_font_names = shuffled_font_names[:int(len(shuffled_font_names) * 0.80)]
# validation_font_names = shuffled_font_names[int(len(shuffled_font_names) * 0.6):int(len(shuffled_font_names) * 0.80)]
validation_font_names = shuffled_font_names[int(len(shuffled_font_names) * 0.80):]
test_font_names = shuffled_font_names[int(len(shuffled_font_names) * 0.80):]

train_font_to_attribute_values = {font_name: all_font_to_attribute_values[font_name] for font_name in train_font_names}
validation_font_to_attribute_values = {font_name: all_font_to_attribute_values[font_name] for font_name in validation_font_names}
test_font_to_attribute_values = {font_name: all_font_to_attribute_values[font_name] for font_name in test_font_names}

In [5]:
#json.dump(all_font_to_attribute_values, open('../attributeData/all_font_to_attribute_values.json', 'w'))
json.dump(train_font_to_attribute_values, open('../attributeData/train_font_to_attribute_values_6.json', 'w'))
json.dump(validation_font_to_attribute_values, open('../attributeData/validation_font_to_attribute_values_6.json', 'w'))
json.dump(test_font_to_attribute_values, open('../attributeData/test_font_to_attribute_values_6.json', 'w'))

## split dataset for cross-validation

In [None]:
split_num = 20
all_font_to_attribute_values = {
    font_name: valid_font_to_attribute_values[font_name]
    for font_name in shuffled_font_names
}
for i in range(split_num):
    test_font_names = shuffled_font_names[
        int(len(shuffled_font_names) * i / split_num) : int(
            len(shuffled_font_names) * (i + 1) / split_num
        )
    ]
    other_font_names = (
        shuffled_font_names[: int(len(shuffled_font_names) * i / split_num)]
        + shuffled_font_names[int(len(shuffled_font_names) * (i + 1) / split_num) :]
    )
    train_font_names = other_font_names
    train_font_names = other_font_names[:160]
    validation_font_names = other_font_names[160:]
    train_font_to_attribute_values = {
        font_name: all_font_to_attribute_values[font_name]
        for font_name in train_font_names
    }
    validation_font_to_attribute_values = {
        font_name: all_font_to_attribute_values[font_name]
        for font_name in validation_font_names
    }
    test_font_to_attribute_values = {
        font_name: all_font_to_attribute_values[font_name]
        for font_name in test_font_names
    }
    json.dump(
        train_font_to_attribute_values,
        open(
            f"../attributeData/train_font_to_attribute_values_cross_validation_{split_num}_{i}.json",
            "w",
        ),
    )
    json.dump(
        validation_font_to_attribute_values,
        open(
            f"../attributeData/validation_font_to_attribute_values_cross_validation_{split_num}_{i}.json",
            "w",
        ),
    )
    json.dump(
        test_font_to_attribute_values,
        open(
            f"../attributeData/test_font_to_attribute_values_cross_validation_{split_num}_{i}.json",
            "w",
        ),
    )

In [7]:
split_num = 5
all_font_to_attribute_values = {font_name: valid_font_to_attribute_values[font_name] for font_name in shuffled_font_names}
for i in range(split_num):
  test_font_names = shuffled_font_names[int(len(shuffled_font_names) * i / split_num):int(len(shuffled_font_names) * (i + 1) / split_num)]
  other_font_names = shuffled_font_names[:int(len(shuffled_font_names) * i / split_num)] + shuffled_font_names[int(len(shuffled_font_names) * (i + 1) / split_num):]
  train_font_names = other_font_names[:int(len(other_font_names) * 3 / 4)]
  validation_font_names = other_font_names[int(len(other_font_names) * 3 / 4):]
  train_font_to_attribute_values = {font_name: all_font_to_attribute_values[font_name] for font_name in train_font_names}
  validation_font_to_attribute_values = {font_name: all_font_to_attribute_values[font_name] for font_name in validation_font_names}
  test_font_to_attribute_values = {font_name: all_font_to_attribute_values[font_name] for font_name in test_font_names}
  json.dump(train_font_to_attribute_values, open(f'../attributeData/train_font_to_attribute_values_cross_validation_{split_num}_{i}.json', 'w'))
  json.dump(validation_font_to_attribute_values, open(f'../attributeData/validation_font_to_attribute_values_cross_validation_{split_num}_{i}.json', 'w'))
  json.dump(test_font_to_attribute_values, open(f'../attributeData/test_font_to_attribute_values_cross_validation_{split_num}_{i}.json', 'w'))

In [1]:
split_num = 40
all_font_to_attribute_values = {font_name: valid_font_to_attribute_values[font_name] for font_name in shuffled_font_names}
for i in range(split_num):
  test_font_names = shuffled_font_names[int(len(shuffled_font_names) * i / split_num):int(len(shuffled_font_names) * (i + 1) / split_num)]
  other_font_names = shuffled_font_names[:int(len(shuffled_font_names) * i / split_num)] + shuffled_font_names[int(len(shuffled_font_names) * (i + 1) / split_num):]
  # split other_font_names into train and validation (5:1)
  # train_font_names = other_font_names[:int(len(other_font_names) * 5 / 6)]
  train_font_names = other_font_names
  train_font_names = other_font_names[:150]
  validation_font_names = other_font_names[150:]
  train_font_to_attribute_values = {font_name: all_font_to_attribute_values[font_name] for font_name in train_font_names}
  validation_font_to_attribute_values = {font_name: all_font_to_attribute_values[font_name] for font_name in validation_font_names}
  test_font_to_attribute_values = {font_name: all_font_to_attribute_values[font_name] for font_name in test_font_names}
  json.dump(train_font_to_attribute_values, open(f'../attributeData/train_font_to_attribute_values_cross_validation_{split_num}_{i}.json', 'w'))
  json.dump(validation_font_to_attribute_values, open(f'../attributeData/validation_font_to_attribute_values_cross_validation_{split_num}_{i}.json', 'w'))
  json.dump(test_font_to_attribute_values, open(f'../attributeData/test_font_to_attribute_values_cross_validation_{split_num}_{i}.json', 'w'))

NameError: name 'shuffled_font_names' is not defined

In [8]:
train_font_to_attribute_values = json.load(open('../attributeData/train_font_to_attribute_values_cross_validation_5_0.json', 'r'))
validation_font_to_attribute_values = json.load(open('../attributeData/validation_font_to_attribute_values_cross_validation_5_0.json', 'r'))
test_font_to_attribute_values = json.load(open('../attributeData/test_font_to_attribute_values_cross_validation_5_0.json', 'r'))
print(len(train_font_to_attribute_values), len(validation_font_to_attribute_values), len(test_font_to_attribute_values))

120 40 40
