In [3]:
# Import necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score, f1_score

import pandas as pd

In [4]:
def sample_data(data, label):
    # Separate the dataframe into positive and negative examples
    positive_examples = data[data[label] == 1]
    negative_examples = data[data[label] == 0]

    size_positive = len(positive_examples)
    size_negative = len(negative_examples)
    total = size_positive + size_negative

    print(size_negative, size_positive)

    # Determine number of samples
    min_n_samples = min([len(positive_examples), len(negative_examples)])

    if 10*size_negative <= total:
        n_samples_positive = 9*size_negative
        n_samples_negative = size_negative
    elif 10*size_positive <= total:
        n_samples_negative = 9*size_positive
        n_samples_positive = size_positive
    else:
        n_samples_positive = size_positive
        n_samples_negative = size_negative
    
    print(n_samples_negative / (n_samples_positive + n_samples_negative))


    # Randomly sample at max 3k examples from each group
    positive_sample = positive_examples.sample(n=n_samples_positive, replace=False)
    negative_sample = negative_examples.sample(n=n_samples_negative, replace=False)

    # Concatenate the sampled dataframes
    sampled_df = pd.concat([positive_sample, negative_sample], axis=0)

    # Reset the index of the resulting dataframe
    sampled_df = sampled_df.reset_index(drop=True)

    return sampled_df

In [17]:
def train_SVC(dataset, label, balanced=False):
    # Get dataset
    if balanced:
        balanced_dataset = sample_data(dataset, label)
        X = balanced_dataset.iloc[ : , : 512]
        y = balanced_dataset[label]
    else:
        X = dataset.iloc[ : , : 512]
        y = dataset[label]
    # Get dataset

    print(label)
    print(y.value_counts())

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # Create an SVM classifier object
    clf = LinearSVC(max_iter=60000, tol=1e-3, verbose=1, class_weight='balanced')

    # Train the SVM classifier on the training data
    clf.fit(X_train, y_train)

    # Predict the labels of the test data
    y_pred = clf.predict(X_train)

    # Evaluate the accuracy of the classifier
    acc_train = accuracy_score(y_train, y_pred)
    print("Training Accuracy:", acc_train)

    # Predict the labels of the train data
    y_pred = clf.predict(X_test)

    # Evaluate the accuracy of the classifier
    acc_test = accuracy_score(y_test, y_pred)
    print(f"Test Accuracy: {acc_test}")

    # Evaluate F1 Score
    f1 = f1_score(y_test, y_pred)
    print("F1 score: {:.2f}".format(f1))

    # Analytics
    analytics = [label, len(X), acc_train, acc_test, f1, clf.coef_]

    return analytics

In [6]:
# Load the dataset
dataset = pd.read_csv('data\\latent_space\\latent_space_data_100k')
X = dataset.iloc[ : , : 512]
y = dataset

In [5]:
feature_names = ["5oClockShadow", "ArchedEyebrows", "Attractive", "BagsUnderEyes", "Bald", "Bangs",
"BigLips", "BigNose", "BlackHair", "BlondHair", "Blurry", "BrownHair", "BushyEyebrows", "Chubby",
"DoubleChin", "Eyeglasses", "Goatee", "GrayHair", "HeavyMakeup", "HighCheekbones", "Male", 
"MouthSlightlyOpen", "Mustache", "NarrowEyes", "NoBeard", "OvalFace", "PaleSkin", "PointyNose", 
"RecedingHairline", "RosyCheeks", "Sideburn", "Smiling", "StraightHair", "WavyHair", "WearingEarrings", 
"WearingHat", "WearingLipstick", "WearingNecklace", "WearingNecktie", "Young"]

In [1]:
# Get balanced dataset
balanced_dataset = sample_data(dataset, 'Young')
X = balanced_dataset.iloc[ : , : 512]
y = balanced_dataset.ArchedEyebrows

balanced_dataset.ArchedEyebrows.value_counts()

NameError: name 'sample_data' is not defined

In [13]:
balanced_dataset.ArchedEyebrows.value_counts()

0    2844
1     316
Name: ArchedEyebrows, dtype: int64

In [36]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [37]:
# Create an SVM classifier object
clf = LinearSVC()

# Train the SVM classifier on the training data
clf.fit(X_train, y_train)

# Predict the labels of the test data
y_pred = clf.predict(X_test)

# Evaluate the accuracy of the classifier
acc = accuracy_score(y_test, y_pred)
print(f"Accuracy: {acc}")

f1 = f1_score(y_test, y_pred)
print("F1 score: {:.2f}".format(f1))

Accuracy: 0.5827777777777777




In [38]:
# Train the SVM classifier on the training data
clf.fit(X_train, y_train)



In [39]:
# Predict the labels of the test data
y_pred = clf.predict(X_train)

In [40]:
# Evaluate the accuracy of the classifier
acc = accuracy_score(y_train, y_pred)
print("Accuracy:", acc)

Accuracy: 0.6902380952380952


In [6]:
cols = ['Feature', 'Data Size', 'Training Accuracy', 'Testing Accuracy', 'F1 Score', 'Normal']
svc_data = []
for label in feature_names:
    analytics = train_SVC(dataset, label, balanced=True)
    svc_data.append(analytics)

99737 263
0.9
5oClockShadow
0    2367
1     263
Name: 5oClockShadow, dtype: int64
[LibLinear]



Training Accuracy: 1.0
Test Accuracy: 0.8155893536121673
F1 score: 0.29
99684 316
0.9
ArchedEyebrows
0    2844
1     316
Name: ArchedEyebrows, dtype: int64
[LibLinear]



Training Accuracy: 0.9193037974683544
Test Accuracy: 0.8259493670886076
F1 score: 0.25
98638 1362
0.9
Attractive
0    12258
1     1362
Name: Attractive, dtype: int64
[LibLinear]



Training Accuracy: 0.8341593245227606
Test Accuracy: 0.8138766519823789
F1 score: 0.27
99142 858
0.9
BagsUnderEyes
0    7722
1     858
Name: BagsUnderEyes, dtype: int64
[LibLinear]



Training Accuracy: 0.856497668997669
Test Accuracy: 0.8129370629370629
F1 score: 0.23
98820 1180
0.9
Bald
0    10620
1     1180
Name: Bald, dtype: int64
[LibLinear]



Training Accuracy: 0.8998940677966102
Test Accuracy: 0.8838983050847458
F1 score: 0.53
97202 2798
0.9
Bangs
0    25182
1     2798
Name: Bangs, dtype: int64
[LibLinear]



Training Accuracy: 0.8707558970693352
Test Accuracy: 0.8656182987848463
F1 score: 0.45
97086 2914
0.9
BigLips
0    26226
1     2914
Name: BigLips, dtype: int64
[LibLinear]



Training Accuracy: 0.8401252573781743
Test Accuracy: 0.8323610157858614
F1 score: 0.17
89327 10673
0.89327
BigNose
0    89327
1    10673
Name: BigNose, dtype: int64
[LibLinear]



Training Accuracy: 0.81505
Test Accuracy: 0.8109
F1 score: 0.19
98836 1164
0.9
BlackHair
0    10476
1     1164
Name: BlackHair, dtype: int64
[LibLinear]



Training Accuracy: 0.8683419243986255
Test Accuracy: 0.843213058419244
F1 score: 0.40
99656 344
0.9
BlondHair
0    3096
1     344
Name: BlondHair, dtype: int64
[LibLinear]



Training Accuracy: 0.967296511627907
Test Accuracy: 0.8241279069767442
F1 score: 0.32
86498 13502
0.86498
Blurry
0    86498
1    13502
Name: Blurry, dtype: int64
[LibLinear]



Training Accuracy: 0.8097375
Test Accuracy: 0.80815
F1 score: 0.33
99457 543
0.9
BrownHair
0    4887
1     543
Name: BrownHair, dtype: int64
[LibLinear]



Training Accuracy: 0.8943370165745856
Test Accuracy: 0.8296500920810314
F1 score: 0.38
99273 727
0.9
BushyEyebrows
0    6543
1     727
Name: BushyEyebrows, dtype: int64
[LibLinear]



Training Accuracy: 0.8834250343878954
Test Accuracy: 0.8342503438789546
F1 score: 0.34
97977 2023
0.9
Chubby
0    18207
1     2023
Name: Chubby, dtype: int64
[LibLinear]



Training Accuracy: 0.8584404349975284
Test Accuracy: 0.842807711319822
F1 score: 0.35
99493 507
0.9
DoubleChin
0    4563
1     507
Name: DoubleChin, dtype: int64
[LibLinear]



Training Accuracy: 0.8957100591715976
Test Accuracy: 0.8165680473372781
F1 score: 0.31
86949 13051
0.86949
Eyeglasses
0    86949
1    13051
Name: Eyeglasses, dtype: int64
[LibLinear]



Training Accuracy: 0.8163875
Test Accuracy: 0.8168
F1 score: 0.35
91971 8029
0.9
Goatee
0    72261
1     8029
Name: Goatee, dtype: int64
[LibLinear]



Training Accuracy: 0.8301002615518744
Test Accuracy: 0.824386598580147
F1 score: 0.20
99472 528
0.9
GrayHair
0    4752
1     528
Name: GrayHair, dtype: int64
[LibLinear]



Training Accuracy: 0.8880208333333334
Test Accuracy: 0.8172348484848485
F1 score: 0.32
99768 232
0.9
HeavyMakeup
0    2088
1     232
Name: HeavyMakeup, dtype: int64
[LibLinear]



Training Accuracy: 0.9946120689655172
Test Accuracy: 0.7650862068965517
F1 score: 0.17
84074 15926
0.84074
HighCheekbones
0    84074
1    15926
Name: HighCheekbones, dtype: int64
[LibLinear]



Training Accuracy: 0.79975
Test Accuracy: 0.7943
F1 score: 0.45
37168 62832
0.37168
Male
1    62832
0    37168
Name: Male, dtype: int64
[LibLinear]



Training Accuracy: 0.6060625
Test Accuracy: 0.59705
F1 score: 0.67
61939 38061
0.61939
MouthSlightlyOpen
0    61939
1    38061
Name: MouthSlightlyOpen, dtype: int64
[LibLinear]



Training Accuracy: 0.6050875
Test Accuracy: 0.59515
F1 score: 0.49
84112 15888
0.84112
Mustache
0    84112
1    15888
Name: Mustache, dtype: int64
[LibLinear]



Training Accuracy: 0.75685
Test Accuracy: 0.7569
F1 score: 0.30
88485 11515
0.88485
NarrowEyes
0    88485
1    11515
Name: NarrowEyes, dtype: int64
[LibLinear]



Training Accuracy: 0.8119875
Test Accuracy: 0.80915
F1 score: 0.29
46319 53681
0.46319
NoBeard
1    53681
0    46319
Name: NoBeard, dtype: int64
[LibLinear]



Training Accuracy: 0.5890125
Test Accuracy: 0.587
F1 score: 0.61
99600 400
0.9
OvalFace
0    3600
1     400
Name: OvalFace, dtype: int64
[LibLinear]



Training Accuracy: 0.945625
Test Accuracy: 0.84375
F1 score: 0.42
99954 46
0.9
PaleSkin
0    414
1     46
Name: PaleSkin, dtype: int64
[LibLinear]Training Accuracy: 1.0
Test Accuracy: 0.6630434782608695
F1 score: 0.11
98467 1533
0.9
PointyNose
0    13797
1     1533
Name: PointyNose, dtype: int64
[LibLinear]



Training Accuracy: 0.8427919112850619
Test Accuracy: 0.8056099151989563
F1 score: 0.23
89390 10610
0.8939
RecedingHairline
0    89390
1    10610
Name: RecedingHairline, dtype: int64
[LibLinear]



Training Accuracy: 0.8310125
Test Accuracy: 0.82715
F1 score: 0.35
99910 90
0.9
RosyCheeks
0    810
1     90
Name: RosyCheeks, dtype: int64
[LibLinear]Training Accuracy: 1.0
Test Accuracy: 0.8
F1 score: 0.38
96951 3049
0.9
Sideburn
0    27441
1     3049
Name: Sideburn, dtype: int64
[LibLinear]



Training Accuracy: 0.8484339127582814
Test Accuracy: 0.8463430632994424
F1 score: 0.32
77541 22459
0.77541
Smiling
0    77541
1    22459
Name: Smiling, dtype: int64
[LibLinear]



Training Accuracy: 0.75825
Test Accuracy: 0.75075
F1 score: 0.49
99764 236
0.9
StraightHair
0    2124
1     236
Name: StraightHair, dtype: int64
[LibLinear]



Training Accuracy: 1.0
Test Accuracy: 0.8177966101694916
F1 score: 0.25
90958 9042
0.9
WavyHair
0    81378
1     9042
Name: WavyHair, dtype: int64
[LibLinear]



Training Accuracy: 0.8200895819508959
Test Accuracy: 0.8130944481309444
F1 score: 0.22
99742 258
0.9
WearingEarrings
0    2322
1     258
Name: WearingEarrings, dtype: int64
[LibLinear]



Training Accuracy: 0.9869186046511628
Test Accuracy: 0.7732558139534884
F1 score: 0.21
92045 7955
0.9
WearingHat
0    71595
1     7955
Name: WearingHat, dtype: int64
[LibLinear]



Training Accuracy: 0.8403834066624765
Test Accuracy: 0.8397862979258328
F1 score: 0.27
97146 2854
0.9
WearingLipstick
0    25686
1     2854
Name: WearingLipstick, dtype: int64
[LibLinear]



Training Accuracy: 0.8481079187105817
Test Accuracy: 0.8430273300630694
F1 score: 0.21
99939 61
0.9
WearingNecklace
0    549
1     61
Name: WearingNecklace, dtype: int64
[LibLinear]Training Accuracy: 1.0
Test Accuracy: 0.7295081967213115
F1 score: 0.23
97850 2150
0.9
WearingNecktie
0    19350
1     2150
Name: WearingNecktie, dtype: int64
[LibLinear]



Training Accuracy: 0.8604651162790697
Test Accuracy: 0.8388372093023255
F1 score: 0.37
37901 62099
0.37901
Young
1    62099
0    37901
Name: Young, dtype: int64
[LibLinear]Training Accuracy: 0.6185375
Test Accuracy: 0.6184
F1 score: 0.69




In [7]:
df_svc = pd.DataFrame(svc_data, columns=cols)
df_svc

Unnamed: 0,Feature,Data Size,Training Accuracy,Testing Accuracy,F1 Score,Normal
0,5oClockShadow,2630,1.0,0.815589,0.291971,"[[0.20616387182721962, -0.1852887032878476, -0..."
1,ArchedEyebrows,3160,0.919304,0.825949,0.246575,"[[0.0033585155950519962, -0.09369209687722566,..."
2,Attractive,13620,0.834159,0.813877,0.274678,"[[0.010649949568088025, -0.02435262106397855, ..."
3,BagsUnderEyes,8580,0.856498,0.812937,0.230216,"[[-0.03482109437807537, 0.01880609993099003, -..."
4,Bald,11800,0.899894,0.883898,0.534014,"[[-0.019647980213325283, 0.007708622113970705,..."
5,Bangs,27980,0.870756,0.865618,0.44868,"[[0.006340451119322179, 0.012511142019755108, ..."
6,BigLips,29140,0.840125,0.832361,0.168511,"[[-0.001042532874640518, 0.004935834719709264,..."
7,BigNose,100000,0.81505,0.8109,0.193947,"[[-0.017237408249713072, -0.01066792366029792,..."
8,BlackHair,11640,0.868342,0.843213,0.396694,"[[-0.04842956290884508, 0.007079689066247752, ..."
9,BlondHair,3440,0.967297,0.824128,0.324022,"[[0.05455862652310837, -0.5923431886420694, -0..."


In [8]:
df_svc.to_csv('data\\svm\\C_90_split_90_10_recovery')

In [9]:
import pandas as pd
df = pd.read_csv('80_pct_squared_hinge')