## Data upload

In [1]:
#import packages
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
import random
# Device configuration
import os 
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]='0'

In [2]:
#update data
#feature
train_ch = np.array(pd.read_csv("train_noise_logits_before.csv", index_col = 0))
#origin input
train = np.array(pd.read_csv("train.csv", index_col = 0))
train = np.reshape(train, [train.shape[0], 2, 128])
#label
train_label = np.array(pd.read_csv("train_label.csv", index_col = 0))
train_noise_label = np.array(pd.read_csv("train_noise_label.csv", index_col = 0))
train_label = np.reshape(train_label, [-1])
train_noise_label = np.reshape(train_noise_label, [-1])

In [15]:
np.sum(train_noise_label == train_label) / len(train_label)

0.6

In [3]:
a = np.cov(train_ch, rowvar = False)
a_c, a_v = np.linalg.eig(a)
train_ch_ro = np.dot(train_ch, a_v)

## CSFE

In [5]:
def g(x, sample, label, r):
    n  = sample.shape[0]
    g = 0

    for i in range(r):
        p_r = np.sum(label == i) / n
        F_n = np.sum((sample <= x)) / n
        F_n_bar = 1 - F_n
        F_rn = np.sum((sample[(label == i)] <= x)) / (n * p_r)
        F_rn_bar = 1 - F_rn
#         print(F_n, F_rn)
        
        g += p_r * F_rn * np.log((F_rn + 1e-6) / (F_n + 1e-6)) + p_r * F_rn_bar * np.log((F_rn_bar + 1e-6) / (F_n_bar + 1e-6))


    return 2 * g   

def LR(x, y, r):
    n = x.shape[0]
    p = x.shape[1]
    lr = np.zeros(p)
    for i in range(p):
        sample = x[:, i]
        label = y
        m = np.min(sample)
        M = np.max(sample)
        x_i = np.linspace(m, M, 101)
        inter = (M - m) / 100
        j = 0
        for j in range(len(x_i) - 1):
            lr[i] += g(x_i[j], sample, label, r) / n
    
    
    return lr

In [6]:
def VaS(x, y, r):
    n = x.shape[0]
    p = x.shape[1]
    VaS = np.zeros(p)
    for j in range(p):
        a0 = (np.dot(np.reshape(x[:, j], [-1, 1]), np.ones([1, n])) <= np.dot(np.ones([n,1]), np.reshape(x[:, j], [1, -1])))
        a00 = np.mean(a0, axis = 0)
        d = np.zeros(r)
        for i in range(r):
            m = np.sum(y == i)
            a = np.where(y == i)[0]
            a1 = (np.dot(np.reshape(x[a, j], [-1, 1]), np.ones([1, n])) <= np.dot(np.ones([m,1]), np.reshape(x[:, j], [1, -1])))
            a2 = np.mean(a1, axis = 0)
            a35=(np.arcsin(np.sqrt(a2)) - np.arcsin(np.sqrt(a00))) ** 2
            d1 = m * np.mean(a35)
            d[i] = d1
        d2 = np.sum(d)
        VaS[j] = d2
    return VaS

In [7]:
VaS_result = VaS(train_ch_ro, train_noise_label, 11)

## TGC

In [8]:
def t_type(x, v):
    n = x.shape[0]
    p = x.shape[1]
    cov = np.identity(p)
    mu = np.zeros([1, p])
    mmu = np.zeros([n, p])
    a = 1 - (p < n)
    iter_num = 50
    err = 10
    for i in range(iter_num):
        if err <= 1e-12:
            break
        else:
            oldcov = cov
            incov = np.linalg.inv(cov)
            temp = np.dot(np.dot((x - mmu), incov), (x - mmu).T)
            wt = (v + p) / (v + np.diag(temp))
#             print(wt.shape)
            mu = np.dot(wt, x) / np.sum(wt)
            mmu = np.dot(np.ones([n, 1]), np.reshape(mu, [1, -1]))
#             print(mmu.shape)
            diag_wt = np.diag(wt)
#             print(diag_wt.shape)
            cov1 = np.dot(np.dot((x - mmu).T, diag_wt), (x - mmu)) / n
            cov = cov1 / (1 + a) + a * np.identity(p) / (1 + a)
            cov = cov
            temp1 = (p * oldcov / np.sum(np.diag(oldcov))) - (p * cov / np.sum(np.diag(cov)))
            err = np.linalg.norm(temp1, ord = "fro")
#             print(i)
    
    
    return mu, cov

## Training Set Boundaries Reassigned

In [9]:
def get_dis(x, mu, cov, inv, det):
    n = x.shape[0]
    up = 2000
    tem = x - mu
    tem = np.reshape(tem, [1, -1])
    dis0 = np.dot(np.dot(tem, inv), tem.T)
    dis = dis0 + 100 * det
    return dis 

In [10]:
def pre(x, y, v):
    a = 700
    up = 2000
    n = x.shape[0]
    p = x.shape[1]
    mu = np.zeros(p)
    cov = np.zeros([p, p])
    inv = np.zeros([p, p])
    dis_all0 = np.zeros([n, 11])
    dis_all = np.zeros([n, 11])
    for i in range(11):
        temp0 = x[(y == i), :]
        mu, cov = t_type(temp0, v)
        inv = np.linalg.inv(cov)
        eig, vector = np.linalg.eig(cov)
        ln_det = np.sum(np.log(eig))
        m = 0
        for m in range(n):
            tem1 = x[m, :]
            dis_all[m, i] = get_dis(tem1, mu, cov, inv, ln_det)
    return dis_all

In [11]:
def acc(x, y, v, yt):
    d = pre(x, y, v)
    n = d.shape[0]
    pre_label = np.zeros(n)
    correct = 0
    for i in range(n):
        pre_label[i] = np.argsort(d[i, :])[0]
        if pre_label[i] == yt[i]:
            correct += 1
    return correct / n, pre_label

In [12]:
train_temp = train_ch_ro[:, np.argsort(VaS_result)[150:]]
acc_rate, pre_label = acc(train_temp, train_noise_label, 1, train_label)
acc_rate

0.7068831168831169

In [13]:
pd.DataFrame(pre_label).to_csv("train_reassigned_label_LR_after150.csv")