In [1]:
import math
import torch
from torch import nn
from torch.nn import init
import numpy as np
import pandas as pd
seed=100
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

In [2]:
r"""
Permutation Invariant layers and Permutation Equivariant, as described in the
paper Deep Sets, by Zaheer et al. (https://arxiv.org/abs/1703.06114)
"""
#FeatureExtractor

class InvLinear(nn.Module):

    r"""Permutation invariant linear layer.
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        bias: If set to False, the layer will not learn an additive bias.
            Default: ``True``
        reduction: Permutation invariant operation that maps the input set into a single
            vector. Currently, the following are supported: mean, sum, max and min.
    """
    def __init__(self, in_features, out_features, bias=True, reduction='mean'):
        super(InvLinear, self).__init__()

        self.in_features = in_features
        self.out_features = out_features
        assert reduction in ['mean', 'sum', 'max', 'min'],  \
            '\'reduction\' should be \'mean\'/\'sum\'\'max\'/\'min\', got {}'.format(reduction)
        self.reduction = reduction

        self.beta = nn.Parameter(torch.Tensor(self.in_features,
                                              self.out_features))
        if bias:
            self.bias = nn.Parameter(torch.Tensor(1, self.out_features))
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()

    def reset_parameters(self):
        init.xavier_uniform_(self.beta)
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.beta)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)

    def forward(self, X, mask=None):
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        r"""
        Maps the input set X = {x_1, ..., x_M} to a vector y of dimension out_features,
        through a permutation invariant linear transformation of the form:
            $y = \beta reduction(X) + bias$
        Inputs:
        X: N sets of size at most M where each element has dimension in_features
           (tensor with shape (N, M, in_features))
        mask: binary mask to indicate which elements in X are valid (byte tensor
            with shape (N, M) or None); if None, all sets have the maximum size M.
            Default: ``None``.
        Outputs:
        Y: N vectors of dimension out_features (tensor with shape (N, out_features))
        """
        N, M, _ = X.shape
        device = X.device
        y = torch.zeros(N, self.out_features).to(device)
        if mask is None:
            mask = torch.ones(N, M).byte().to(device)

        if self.reduction == 'mean':
            sizes = mask.float().sum(dim=1).unsqueeze(1)
            Z = X * mask.unsqueeze(2).float()
            y = (Z.sum(dim=1) @ self.beta)/sizes

        elif self.reduction == 'sum':
            Z = X * mask.unsqueeze(2).float()
            y = Z.sum(dim=1) @ self.beta

        elif self.reduction == 'max':
            Z = X.clone()
            Z[~mask] = float('-Inf')
            y = Z.max(dim=1)[0] @ self.beta

        else:  # min
            Z = X.clone()
            Z[~mask] = float('Inf')
            y = Z.min(dim=1)[0] @ self.beta

        if self.bias is not None:
            y += self.bias

        return y

    def extra_repr(self):
        return 'in_features={}, out_features={}, bias={}, reduction={}'.format(
            self.in_features, self.out_features,
            self.bias is not None, self.reduction)


class EquivLinear(InvLinear):
    r"""Permutation equivariant linear layer.
    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        bias: If set to False, the layer will not learn an additive bias.
            Default: ``True``
        reduction: Permutation invariant operation that maps the input set into a single
            vector. Currently, the following are supported: mean, sum, max and min.
    """
    def __init__(self, in_features, out_features, bias=True, reduction='mean'):
        super(EquivLinear, self).__init__(in_features, out_features,
                                          bias=bias, reduction=reduction)

        self.alpha = nn.Parameter(torch.Tensor(self.in_features,
                                               self.out_features))

        self.reset_parameters()

    def reset_parameters(self):
        super(EquivLinear, self).reset_parameters()
        if hasattr(self, 'alpha'):
            init.xavier_uniform_(self.alpha)

    def forward(self, X, mask=None):
        r"""
        Maps the input set X = {x_1, ..., x_M} to the output set
        Y = {y_1, ..., y_M} through a permutation equivariant linear transformation
        of the form:
            $y_i = \alpha x_i + \beta reduction(X) + bias$
        Inputs:
        X: N sets of size at most M where each element has dimension in_features
           (tensor with shape (N, M, in_features))
        mask: binary mask to indicate which elements in X are valid (byte tensor
            with shape (N, M) or None); if None, all sets have the maximum size M.
            Default: ``None``.
        Outputs:
        Y: N sets of same cardinality as in X where each element has dimension
           out_features (tensor with shape (N, M, out_features))
        """
        N, M, _ = X.shape
        device = X.device
        Y = torch.zeros(N, M, self.out_features).to(device)
        if mask is None:
            mask = torch.ones(N, M).byte().to(device)

        Y = torch.zeros(N, M, self.out_features).to(device)
        h_inv = super(EquivLinear, self).forward(X, mask=mask)
        Y[mask] = (X @ self.alpha + h_inv.unsqueeze(1))[mask]

        return Y

In [31]:
#df = pd.read_csv('D:/Program Files/R/Rfile/SC_FL_CD/1023/alltrain.csv')
df = pd.read_csv('D:/Program Files/R/Rfile/SC_FL_CD/1023/valtrain.csv')
print(df.shape)
#print(df.head())
df= df.apply(pd.to_numeric,errors='coerce')

(44486, 59)


In [None]:
siteid = np.array(df['hospitalid'])
id = np.unique(siteid)
print(id)
print(len(id))

In [35]:
#from kmodes.kmodes import KModes

#onedf = df[df['hospitalid']==79]
#onedt = onedf.drop(['hospitalid'],axis=1)
#print(onedf.shape)

#km = KModes(n_clusters=5,init='Huang',n_init=5,verbose=1)
#clusters =km.fit_predict(onedt)#
#print(km.cluster_centroids_)#


In [None]:
from kmodes.kmodes import KModes
dtlist = []          ## 

for i in range(len(id)):
    np.random.seed(seed)
    print('this is runing:',i)
    onedf = df[df['hospitalid']==id[i]]
    onedt = onedf.drop(['hospitalid'],axis=1)#
    #path='E:/deepset/site%s'% id[i]+'.csv'
    #onedt.to_csv(path,index=False)
    
    km = KModes(n_clusters=10,init='Huang',n_init=5,verbose=1)
    clusters =km.fit_predict(onedt)
    kmc= km.cluster_centroids_
    dtlist.append(kmc)   ## 


In [None]:
dtlist

In [39]:
data_list = np.array(dtlist)
t = torch.tensor(data_list,dtype= torch.float)

##
deep_sets = InvLinear(in_features=58, out_features=10)
np.random.seed(seed)

In [None]:
y1 = deep_sets.forward(t)
y =  y1.detach().numpy()
y


In [41]:
#y.to_csv('E:/deepset/vc1.csv',index=False)

In [42]:
y2 = pd.DataFrame(y)
y2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,-4.343514,-0.470205,-5.780606,1.961841,-1.940944,1.399573,-0.673334,-2.169806,-0.438555,-1.526194
1,-4.468988,-0.463305,-5.871291,1.982735,-1.889553,1.412642,-0.520311,-2.189789,-0.365389,-1.49148
2,-4.51102,-0.452256,-5.814909,2.135624,-1.987322,1.300354,-0.469966,-2.525029,-0.454924,-1.668514
3,-4.562456,-0.198412,-5.874629,2.334005,-2.022654,1.461627,-0.308126,-2.261543,-0.714481,-1.573503
4,-4.438083,-0.645153,-5.910209,2.030208,-1.800816,1.501837,-0.417466,-2.207183,-0.373309,-1.511065
5,-4.478323,-0.378768,-5.769333,2.025003,-1.893234,1.627165,-0.494353,-2.301847,-0.527648,-1.52027
6,-4.504324,-0.588872,-5.946111,1.963259,-1.849977,1.485801,-0.706335,-2.229362,-0.316175,-1.557958
7,-4.479429,-0.530021,-5.837025,2.141378,-2.090778,1.547986,-0.558126,-2.502913,-0.464672,-1.473361
8,-4.414129,-0.365094,-6.136842,2.13839,-2.010524,1.420078,-0.367815,-2.320865,-0.614544,-1.740168
9,-4.542788,-0.681623,-5.881545,2.023903,-2.26698,1.632286,-0.901849,-2.412902,-0.430513,-1.623027


In [None]:
# 
siteid = ["site" + str(i) for i in id]
print(siteid)
y2.index = siteid

In [44]:
y2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
site63,-4.343514,-0.470205,-5.780606,1.961841,-1.940944,1.399573,-0.673334,-2.169806,-0.438555,-1.526194
site146,-4.468988,-0.463305,-5.871291,1.982735,-1.889553,1.412642,-0.520311,-2.189789,-0.365389,-1.49148
site152,-4.51102,-0.452256,-5.814909,2.135624,-1.987322,1.300354,-0.469966,-2.525029,-0.454924,-1.668514
site154,-4.562456,-0.198412,-5.874629,2.334005,-2.022654,1.461627,-0.308126,-2.261543,-0.714481,-1.573503
site165,-4.438083,-0.645153,-5.910209,2.030208,-1.800816,1.501837,-0.417466,-2.207183,-0.373309,-1.511065
site167,-4.478323,-0.378768,-5.769333,2.025003,-1.893234,1.627165,-0.494353,-2.301847,-0.527648,-1.52027
site171,-4.504324,-0.588872,-5.946111,1.963259,-1.849977,1.485801,-0.706335,-2.229362,-0.316175,-1.557958
site176,-4.479429,-0.530021,-5.837025,2.141378,-2.090778,1.547986,-0.558126,-2.502913,-0.464672,-1.473361
site197,-4.414129,-0.365094,-6.136842,2.13839,-2.010524,1.420078,-0.367815,-2.320865,-0.614544,-1.740168
site199,-4.542788,-0.681623,-5.881545,2.023903,-2.26698,1.632286,-0.901849,-2.412902,-0.430513,-1.623027


In [45]:

y2.to_csv('D:/Program Files/R/Rfile/SC_FL_CD/1023/valVector.csv',index=True)