In [10]:
import os
import pandas as pd
import numpy as np
import pickle
from sklearn.cluster import MeanShift
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.model_selection import ShuffleSplit
from sklearn.model_selection import GridSearchCV

In [6]:
dir_path = 'data/'
fj_unlabeled_font_vectors = pd.read_pickle(dir_path + 'fj_ul_font_vectors.pkl')
fj_labeled_font_vectors = pd.read_pickle(dir_path + 'fj_l_font_vectors.pkl')
common_attribute_labels = pd.read_pickle(dir_path + 'common_attribute_labels.pkl')

attribute_names = np.loadtxt(dir_path + 'attrNames.txt', dtype=str)
typographic_features = np.asarray(['font_name', 'capitals', 'cursive', 'display', 'italic', 'monospace', 'serif'])
semantic_features = attribute_names[~np.isin(attribute_names, typographic_features)]

In [7]:
X = fj_labeled_font_vectors.iloc[:, 1:]
y = common_attribute_labels.iloc[:, 1:]

In [161]:
error_by_attr_table = pd.DataFrame(data=[], index=semantic_features)
error_table = pd.DataFrame(data=[])

In [164]:
errors = []
random_seeds = np.array(X.drop(index=i).iloc[np.random.choice(X.drop(index=i).shape[0], 20, replace=False)])

model = MeanShift(bandwidth=150)#, seeds=random_seeds)
for i, row in X.iterrows():
    model.fit(X.drop(index=i))
#     print(model.cluster_centers_.shape)
    closest_centroid = model.predict(X.iloc[i, :].values.reshape(1, -1))
#     print(closest_centroid)
#     print(model.predict(X))
    neighbor_points_indices = model.predict(X) == closest_centroid
    neighbor_points_indices[i] = False  # exclude the true answer itself
    f_p = np.array(y[neighbor_points_indices]).mean(axis=0)  # uncomment for unweighted mean
#     distances_to_neighbors = np.sqrt(((np.array(X[neighbor_points_indices]) - np.array(X.iloc[i, :]))**2).sum(axis=1))
#     distances_to_neighbors = 1 / distances_to_neighbors
#     distances_to_neighbors /= distances_to_neighbors.max()
#     f_p = np.average(np.array(y[neighbor_points_indices]), weights=distances_to_neighbors, axis=0)
    f_t = y.iloc[i, :].values
    e = np.abs(f_t - f_p)
    errors.append(e)
errors = np.asarray(errors)
avg_errors = np.mean(errors, axis=0)
result = pd.DataFrame(data=avg_errors, index=attribute_names)
semantic_result = result[result.index.isin(semantic_features)]

print('Error by attribute (ordered by error, ascending):')
print(semantic_result.sort_values(by=0).round(3))

semantic_result.columns = ['Unweighted Mean Shift 150']
error_by_attr_table = pd.concat([error_by_attr_table, semantic_result, ], axis=1)
error_table = pd.concat([error_table, semantic_result.mean().round(3), ], axis=0)

Error by attribute (ordered by error, ascending):
                        0
gentle              0.060
fresh               0.065
charming            0.067
delicate            0.069
wide                0.071
calm                0.078
friendly            0.079
soft                0.082
attention-grabbing  0.083
strong              0.090
sloppy              0.092
graceful            0.093
happy               0.094
pretentious         0.096
boring              0.097
thin                0.097
attractive          0.099
modern              0.108
warm                0.111
clumsy              0.111
bad                 0.115
sharp               0.116
artistic            0.117
disorderly          0.117
playful             0.117
legible             0.119
technical           0.120
dramatic            0.120
angular             0.121
formal              0.128
complex             0.129


In [165]:
print(error_table)

                               0
Weighted Mean Shift 150    0.099
Unweighted Mean Shift 150  0.099
