In [7]:
import torch
from torch.utils.data import Dataset
import numpy as np

In [59]:
class Monkey_Dataset(torch.utils.data.Dataset):
    def __init__(self, root, num=30, iso=True, split=0.6, train=True):
        super().__init__()
        mode = 'iso' if iso else 'pca'
#         self.datas = np.load(root + f'{mode}inputs_all.npy')
#         self.labels = np.load(root + f'{mode}labels_all_index.npy')
        datas = np.load(root + f'{mode}_random.npy')
        split = int(0.6 * datas.shape[0])
        self.datas = datas[:split ] if train else datas[split:]

    def __len__(self):
        """
            return: the number of sample(int type)
        """
        return self.datas.shape[0]

    def __getitem__(self, index):
        """
        return: tensor [1, embedding] and LongTensor [1]
        """
        _data = self.datas[index]
        data, label = _data[:30], _data[30]
        # crop
        data = data[:num]
        data = torch.tensor(data, dtype=torch.float32)
        label = torch.LongTensor([int(label)])
        return data, label

In [87]:
weight_vector = np.ones((30), dtype=np.float32) * 0.01
weight_vector[0] = 0.1
weight_vector[1] = 0.03
weight_vector[2] = 0.02
weight_vector[3] = 0.02

In [83]:
num = 5

In [84]:
monkey = Monkey_Dataset('', num, iso=True, split=0.8, train=True)
data, label = monkey.datas[:, :num], monkey.datas[:, 30]

In [85]:
val_monkey = Monkey_Dataset('', num, iso=True, split=0.8, train=False)
val_data, val_label = val_monkey.datas[:, :num], val_monkey.datas[:, 30]

In [86]:
vs = [[], [], [], []]
center = []
for vector, value in zip(data, label):
    vs[int(value)].append(vector)
for i in range(len(vs)):
    vs[i] = np.array(vs[i])
    center.append(np.mean(vs[i], axis=0, keepdims=False))
    print(f'{i} : {vs[i].shape}')
center = np.array(center)

0 : (107869, 5)
1 : (108014, 5)
2 : (107951, 5)
3 : (36825, 5)


In [89]:
acc = 0
for vector, value in zip(val_data, val_label):
    distance = np.sum(np.power(vector - center, 2) * weight_vector[:num], axis=1, keepdims=False)
    pred = np.argmin(distance)
    acc += 1 if pred == int(value) else 0
acc / val_label.size

0.24059224754616537

In [93]:
for num in range(1, 30, 1):
    monkey = Monkey_Dataset('', num, iso=True, split=0.8, train=True)
    data, label = monkey.datas[:, :num], monkey.datas[:, 30]
    val_monkey = Monkey_Dataset('', num, iso=True, split=0.8, train=False)
    val_data, val_label = val_monkey.datas[:, :num], val_monkey.datas[:, 30]
    vs = [[], [], [], []]
    center = []
    for vector, value in zip(data, label):
        vs[int(value)].append(vector)
    for i in range(len(vs)):
        vs[i] = np.array(vs[i])
        center.append(np.mean(vs[i], axis=0, keepdims=False))
#         print(f'{i} : {vs[i].shape}')
    center = np.array(center)
    acc = 0
    for vector, value in zip(val_data, val_label):
        distance = np.sum(np.power(vector - center, 2) * weight_vector[:num], axis=1, keepdims=False)
        pred = np.argmin(distance)
        acc += 1 if pred == int(value) else 0
    print(f"{num}: {acc / val_label.size}")

1: 0.1942896356679421
2: 0.19892696722675096
3: 0.2410414240558975
4: 0.23960655464980868
5: 0.24059224754616537
6: 0.24147812344035935
7: 0.24151555481617035
8: 0.2419855265346864
9: 0.24149891864914325
10: 0.24146148727333222
11: 0.23901597072034603
12: 0.23996007319913493
13: 0.24065879221427383
14: 0.24040093162535353
15: 0.2404633172517052
16: 0.24071285975711196
17: 0.24082931292630178
18: 0.2412077857261687
19: 0.24307519547496256
20: 0.24482615205456662
21: 0.245271169522542
22: 0.24540009981700217
23: 0.24586175345200464
24: 0.2454292131092996
25: 0.24609881883214108
26: 0.24496340043254033
27: 0.2452545333555149
28: 0.24559557477957078
29: 0.24583679920146398


In [94]:
for num in range(1, 30, 1):
    monkey = Monkey_Dataset('', num, iso=True, split=0.8, train=True)
    data, label = monkey.datas[:, :num], monkey.datas[:, 30]
    val_monkey = Monkey_Dataset('', num, iso=True, split=0.8, train=False)
    val_data, val_label = val_monkey.datas[:, :num], val_monkey.datas[:, 30]
    vs = [[], [], [], []]
    center = []
    for vector, value in zip(data, label):
        vs[int(value)].append(vector)
    for i in range(len(vs)):
        vs[i] = np.array(vs[i])
        center.append(np.mean(vs[i], axis=0, keepdims=False))
#         print(f'{i} : {vs[i].shape}')
    center = np.array(center)
    acc = 0
    for vector, value in zip(val_data, val_label):
        distance = np.sum(np.power(vector - center, 2), axis=1, keepdims=False)
        pred = np.argmin(distance)
        acc += 1 if pred == int(value) else 0
    print(f"{num}: {acc / val_label.size}")

1: 0.1942896356679421
2: 0.20966561304275494
3: 0.25964897687572785
4: 0.25563550158043585
5: 0.2596947263350524
6: 0.2599733821327566
7: 0.25760272833139247
8: 0.26177008817168523
9: 0.2592455498253202
10: 0.252524538346365
11: 0.23798452836466477
12: 0.24171102977873898
13: 0.24540425885875894
14: 0.24418565962402264
15: 0.24580352686740975
16: 0.24772916320079855
17: 0.24940525702878058
18: 0.25050740309432706
19: 0.25389702212610216
20: 0.25872566960572285
21: 0.25758609216436534
22: 0.25732823157544504
23: 0.257685909166528
24: 0.25848028614207286
25: 0.2587131924804525
26: 0.25341457328231576
27: 0.25332307436366663
28: 0.25249126601231076
29: 0.2530652137747463


In [95]:
for num in range(1, 30, 1):
    monkey = Monkey_Dataset('', num, iso=True, split=0.8, train=True)
    data, label = monkey.datas[:, :num], monkey.datas[:, 30]
    val_monkey = Monkey_Dataset('', num, iso=True, split=0.8, train=False)
    val_data, val_label = val_monkey.datas[:, :num], val_monkey.datas[:, 30]
    vs = [[], [], [], []]
    center = []
    for vector, value in zip(data, label):
        vs[int(value)].append(vector)
    for i in range(len(vs)):
        vs[i] = np.array(vs[i])
        center.append(np.mean(vs[i], axis=0, keepdims=False))
#         print(f'{i} : {vs[i].shape}')
    center = np.array(center)
    acc = 0
    for vector, value in zip(val_data, val_label):
        distance = np.sum((vector - center), axis=1, keepdims=False)
        pred = np.argmin(distance)
        acc += 1 if pred == int(value) else 0
    print(f"{num}: {acc / val_label.size}")

1: 0.2994967559474297
2: 0.2994967559474297
3: 0.2994967559474297
4: 0.2994967559474297
5: 0.2991557145233738
6: 0.2994967559474297
7: 0.2994967559474297
8: 0.2994967559474297
9: 0.2994967559474297
10: 0.2994967559474297
11: 0.2994967559474297
12: 0.2994967559474297
13: 0.2991557145233738
14: 0.2991557145233738
15: 0.2991557145233738
16: 0.2991557145233738
17: 0.2991557145233738
18: 0.2991557145233738
19: 0.2991557145233738
20: 0.2991557145233738
21: 0.2991557145233738
22: 0.10245383463649975
23: 0.10245383463649975
24: 0.10245383463649975
25: 0.29889369489269674
26: 0.10245383463649975
27: 0.10245383463649975
28: 0.10245383463649975
29: 0.10245383463649975


In [96]:
for num in range(1, 30, 1):
    monkey = Monkey_Dataset('', num, iso=True, split=0.8, train=True)
    data, label = monkey.datas[:, :num], monkey.datas[:, 30]
    val_monkey = Monkey_Dataset('', num, iso=True, split=0.8, train=False)
    val_data, val_label = val_monkey.datas[:, :num], val_monkey.datas[:, 30]
    vs = [[], [], [], []]
    center = []
    for vector, value in zip(data, label):
        vs[int(value)].append(vector)
    for i in range(len(vs)):
        vs[i] = np.array(vs[i])
        center.append(np.mean(vs[i], axis=0, keepdims=False))
#         print(f'{i} : {vs[i].shape}')
    center = np.array(center)
    acc = 0
    for vector, value in zip(val_data, val_label):
        distance = np.sum(np.abs(vector - center), axis=1, keepdims=False)
        pred = np.argmin(distance)
        acc += 1 if pred == int(value) else 0
    print(f"{num}: {acc / val_label.size}")

1: 0.1942896356679421
2: 0.19426052237564465
3: 0.24832806521377473
4: 0.25002911329229743
5: 0.24514223922808184
6: 0.25670437531192813
7: 0.26179088338046913
8: 0.26171602062884713
9: 0.2574114124105806
10: 0.2558060222924638
11: 0.23281068041923142
12: 0.23674929296290134
13: 0.24221843287306605
14: 0.2413450341041424
15: 0.24294210613874564
16: 0.2462568624188987
17: 0.2503410414240559
18: 0.25223340542339046
19: 0.25581849941773416
20: 0.2602312427216769
21: 0.2587298286474796
22: 0.2603019464315422
23: 0.25966977208451175
24: 0.26020628847113625
25: 0.2579895192147729
26: 0.2543129263017801
27: 0.2537722508733988
28: 0.2528780568956912
29: 0.25509898519381136


In [97]:
for num in range(1, 30, 1):
    monkey = Monkey_Dataset('', num, iso=True, split=0.8, train=True)
    data, label = monkey.datas[:, :num], monkey.datas[:, 30]
    val_monkey = Monkey_Dataset('', num, iso=True, split=0.8, train=False)
    val_data, val_label = val_monkey.datas[:, :num], val_monkey.datas[:, 30]
    vs = [[], [], [], []]
    center = []
    for vector, value in zip(data, label):
        vs[int(value)].append(vector)
    for i in range(len(vs)):
        vs[i] = np.array(vs[i])
        center.append(np.mean(vs[i], axis=0, keepdims=False))
#         print(f'{i} : {vs[i].shape}')
    center = np.array(center)
    acc = 0
    for vector, value in zip(val_data, val_label):
        distance = np.sum(np.abs(vector - center) * weight_vector[:num], axis=1, keepdims=False)
        pred = np.argmin(distance)
        acc += 1 if pred == int(value) else 0
    print(f"{num}: {acc / val_label.size}")

1: 0.1942896356679421
2: 0.19429379470969887
3: 0.24746298452836465
4: 0.21765513225752786
5: 0.2304109133255698
6: 0.2298286474796207
7: 0.22980369322908
8: 0.2297828980202961
9: 0.2341124604891033
10: 0.23664531691898186
11: 0.23738562635168858
12: 0.23807602728331392
13: 0.23787639327898852
14: 0.2372400598902013
15: 0.23784727998669106
16: 0.2380635501580436
17: 0.23827566128763933
18: 0.23809682249209782
19: 0.23812177674263849
20: 0.23822991182831477
21: 0.23849193145899183
22: 0.2392031275994011
23: 0.23931542172683415
24: 0.23986857428048577
25: 0.23947762435534853
26: 0.24058808850440858
27: 0.2407461320911662
28: 0.24107053734819497
29: 0.24170271169522542
