# Init

In [1]:
import h5py, torch, time, datetime, os
import numpy as np
import matplotlib.pyplot as plt
from torch import nn
from torch.nn.modules import Module

In [541]:
class OurModel(nn.Module):
   
    def __init__(self, AR = [1, 3, 3, 1] , AF = 'ReLU' ):               
        self.NumberOfEpochs = 100
        self.InitialLearningRate = 0.01
        self.SaveAfterEpoch = [self.NumberOfEpochs]
        self.SaveAfterEpoch = lambda :[self.NumberOfEpochs]  
        super(OurModel, self).__init__() 
        if AF in ['ReLU', 'Sigmoid']:
            self.ActivationFunction = AF
        else:
            print('Valid Activations are: [\'ReLU\', \'Sigmoid\'].')
            self.ActivationFunction = 'ReLU'
        if type(AR) == list:
            if all(isinstance(n, int) for n in AR):
                self.Architecture = AR
            else:
                print('Architecture should be a list of integers !')
                self.Architecture = [1, 3, 3, 1]
        else:
            print('Architecture should be a list !')
            self.Architecture = [1, 3, 3, 1]
        self.fclist1  = nn.ModuleList([nn.Linear(self.Architecture[i], 
        self.Architecture[i+1]) for i in range(len(self.Architecture)-2)])
        self.fc1 = nn.Linear(self.Architecture[-2], 1)       
        self.fclist2 = nn.ModuleList([nn.Linear(self.Architecture[i], 
        self.Architecture[i+1]) for i in range(len(self.Architecture)-2)])
        self.fc2 = nn.Linear(self.Architecture[-2], 1)
        
    def SetPreprocess(self, TrainingData, TrainingDataParameters):
        self.Scaling = TrainingData.std(0)
        self.Shift = TrainingData.mean(0)
        self.ParameterScaling = TrainingDataParameters.std(0)

    def forward(self, x):       
        activation = torch.relu if self.ActivationFunction == 'ReLU' else None
        activation = torch.sigmoid if self.ActivationFunction == 'Sigmoid' else activation
        input_dim = self.Architecture[0]
        x, p    = x[:, :input_dim], x[:, input_dim:-1].squeeze() 
        
        if not hasattr(self, 'Shift'):
            print('Please Set Preprocess!')
        
        with torch.no_grad():
            x = (x - self.Shift)/self.Scaling
            p = p/self.ParameterScaling
        
        x1 = x2 = x
        
        for i, l in enumerate(self.fclist1):
            x1 = activation(l(x1))
        x1 = self.fc1(x1).squeeze()
        
        for i, l in enumerate(self.fclist2):
            x2 = activation(l(x2))
        x2 = self.fc2(x2).squeeze()
        
        capf   = torch.log(
            ((1 + torch.mul(x1, p))**2 + (torch.mul(x2, p))**2))        
        return torch.sigmoid(capf).view(-1, 1)
    
    def get_L1max(self, value_per_unit):
        L1_max = []
        for m in model_plus.children():
            if isinstance(m, nn.Linear):
                L1_max.append(m.weight.size(0)*m.weight.size(1) \
                                  * value_per_unit)
            else:
                for mm in m:
                    L1_max.append(mm.weight.size(0)*mm.weight.size(1)\
                                      *value_per_unit)
        self.L1max_list = L1_max
    
    def clip_L1(self):
        counter = 0
        for m in model_plus.children():
            if isinstance(m, nn.Linear):
                counter += 1
                with torch.no_grad():
                    designated_L1 = self.L1max_list[counter-1]
                    if designated_L1 == value_per_unit*n_neurons[0]*n_neurons[1]:
                        continue                                #skipping first layer
                    L1 = m.weight.abs().sum()
                    m.weight.masked_scatter_(L1>self.L1max_list[counter-1],
                                            m.weight/L1*self.L1max_list[counter-1])
            else:
                for mm in m:
                    counter +=1
                    with torch.no_grad():
                        designated_L1 = self.L1max_list[counter-1]
                        if designated_L1 == value_per_unit*n_neurons[0]*n_neurons[1]:
                            continue                            #skipping first layer
                        L1 = mm.weight.abs().sum()
                        mm.weight.masked_scatter_(L1>designated_L1,
                                            mm.weight/L1*designated_L1)
    
    def calculate_ratio(self, points):
        with torch.no_grad():
            y = self(points)
        return y/(1-y)    
    
    def SetNumberOfEpochs(self,NE):
        self.NumberOfEpochs = NE
        #self.SaveAfterEpoch = [self.NumberOfEpochs]
    def SetInitialLearningRate(self,ILR):
        self.InitialLearningRate = ILR
    def SetSaveAfterEpoch(self,SAE):
        SAE.sort()
        self.SaveAfterEpoch = SAE
    def Report(self):
        print(
        'Architecture = ' + str(self.Architecture) + 
        '\nActivation function = ' + self.ActivationFunction +
        '\nInitial learning rate = ' + str(self.InitialLearningRate) +
        '\nNumber of epochs = ' + str(self.NumberOfEpochs) +
        '\nSaving network after epoch(s): ' + str(self.SaveAfterEpoch)
        )

In [547]:
x = OurModel([1,24,1] ,'Sigmoid')
print(x.Architecture)
print(x.Architecture[0])
print(x.ActivationFunction)
print(x.NumberOfEpochs)
print(x.NumberOfEpochs)
x.SetNumberOfEpochs(25000)
print(x.NumberOfEpochs)
print(x.SaveAfterEpoch())
x.SetSaveAfterEpoch([203,47])
x.SetInitialLearningRate(23)
print(x.InitialLearningRate)
x.Report()
x.SetPreprocess(torch.Tensor([[2],[5]]), torch.Tensor([1, 2]))
x.forward(torch.Tensor([[1,2,4],[2,4,5]]))

[1, 24, 1]
1
Sigmoid
100
100
25000
[25000]
23
Architecture = [1, 24, 1]
Activation function = Sigmoid
Initial learning rate = 23
Number of epochs = 25000
Saving network after epoch(s): [47, 203]


tensor([[0.2780],
        [0.6618]], grad_fn=<ViewBackward>)

In [296]:
class _Loss(Module):
    def __init__(self, size_average=None, reduce=None, reduction='mean'):
        super(_Loss, self).__init__()
        if size_average is not None or reduce is not None:
            self.reduction = _Reduction.legacy_get_string(size_average, reduce)
        else:
            self.reduction = reduction

class WeightedMSELoss(_Loss):
    __constants__ = ['reduction']

    def __init__(self, size_average=None, reduce=None, reduction='mean'):
        super(WeightedMSELoss, self).__init__(size_average, reduce, reduction)

    def forward(self, input, target, weight):
        return torch.mean(torch.mul(weight, (input - target)**2))

In [593]:
class OurTrainingData():
    def __init__(self, filepathlist, ): 
        if type(filepathlist) == list:
            if all(isinstance(n, str) for n in filepathlist):
                self.FilePathList = filepathlist
                #print('Reading Files ...')
                #print(*self.FilePathList, sep = '\n')
                def ReadFile(path): 
                    print('Reading File ...' + path)
                    file = h5py.File(path, 'r')
                    #print(list(file.keys()))
                    if list(file.keys()) != ['Data', 'Info', 'Parameters', 'Values', 'Weights']:
                        print('File format not valid: ' + path)
                        return None
                    else:
                        return [file['Info'][()][0], np.array(file['Parameters'][()]), np.array(file['Values'][()]), file['Data'][()], file['Weights'][()]]
                ImportedFiles = list(map(ReadFile,self.FilePathList))
                if not None in ImportedFiles:
                    self.InfoList, self.ParametersList, self.ValuesList, self.DataList, self.WeigthsList = list(map(list, zip(*ImportedFiles)))
                    def f(x) : 
                        if x.sum() == 0:
                            return 0
                        else:
                            return 1
                    self.TargetList = map(f, self.ValuesList)
                    if np.all(self.ParametersList == self.ParametersList[0]):
                        self.Parameters = self.ParametersList[0]
                    else:
                        print('Files have different Parameters !')
                    def f(x) :
                        nonzero = x[0].nonzero()
                        return self.Parameters[nonzero]
                    print(self.ValuesList[0])
            else:
                print('Input should be a list of strings !')
        else:
            print('Input should be a list !')
    def Report(self):
        from tabulate import tabulate
        print('Report:')
        print(tabulate({"File": self.FilePathList, "Info": self.InfoList, "Parameters": self.ParametersList, "Values": self.ValuesList, 
                        "Target": self.TargetList, "#Events": list(map(len, self.DataList))}, headers="keys"))
        #print(*self.FilePathList, sep = '\n')
        #print(list(map(len, self.DataList)))
        #print(list(self.ParametersList))
        #print(list(self.ValuesList))       
#def Import_pData(datafilepath):
#    print(datafilepath)
#    datafile = h5py.File(datafilepath, 'r')
#    print(list(datafile.keys()))
#    if list(datafile.keys()) != ['Data', 'Weights']:
#        print('File format not vadid: ' + datafilepath)
#        return 1
#    else:
#        data=datafile['Data']
#        weights=datafile['Weights']
#        #print(list(weights))
#        return np.array([weights,data])


In [41]:
class OurTrainingData():
    def __init__(self, SMfilepathlist, BSMfilepathlist, parameters ): 
        self.Parameters = parameters
        print('Only 1D Implemented in Training !') if len(self.Parameters)!= 1 else print('Loading Data with Parameters: ' + str(parameters) )       
####### Load SM data
        if type(SMfilepathlist) == list:
            if all(isinstance(n, str) for n in SMfilepathlist):
                self.SMFilePathList = SMfilepathlist
                self.SMNumFiles = len(self.SMFilePathList)
                #print('Reading Files ...')
                #print(*self.FilePathList, sep = '\n')
                def ReadSMFile(path): 
                    print('Reading SM File ...' + path)
                    file = h5py.File(path, 'r')
                    #print(list(file.keys()))
                    #print(file['Values'][()])
                    #print([0] * len(file['Values'][()]))
                    if list(file.keys()) == ['Data', 'Info', 'Parameters', 'Values', 'Weights']:
                        if file['Values'][()] == [0] * len(file['Values'][()]):
                            return [file['Info'][()][0], np.array(file['Parameters'][()]), torch.Tensor(file['Values'][()]), torch.Tensor(file['Data'][()]), 
                                torch.Tensor(file['Weights'][()])]    
                        else:
                            print('File: ' + path + ' is of BSM type!')
                            return None
                    else:
                        print('File format not valid: ' + path)
                        return None
                ImportedFiles = list(map(ReadSMFile, self.SMFilePathList))
                if not None in ImportedFiles:
                    self.SMInfoList, self.SMParametersList, self.SMValuesList, self.SMDataList, self.SMWeightsList = list(map(list, zip(*ImportedFiles)))
                    self.SMNDataList =  list(map(len, self.SMDataList))
                    #def f(x) : 
                    #    if x.sum() == 0:
                    #        return 0
                    #    else:
                    #        return 1
                    #self.TargetList = list(map(f, self.ValuesList))
                    #if np.all(self.ParametersList !=  self.Parameters): print('Not all files have ' + str(self.Parameters) + 'Parameters !')
            else:
                print('SMfilefathlist input should be a list of strings !')
        else:
            print('SMfilefathlist input should be a list !')

####### Join SM data and SM weigths
        self.SMData = torch.cat(self.SMDataList, 0)  
        self.SMWeights = torch.cat(self.SMWeightsList, 0)
        self.SMNData = sum(self.SMNDataList)
####### Load BSM data
        if type(BSMfilepathlist) == list:
            if all(isinstance(n, str) for n in BSMfilepathlist):
                self.BSMFilePathList = BSMfilepathlist
                self.BSMNumFiles = len(self.BSMFilePathList)
                #print('Reading Files ...')
                #print(*self.FilePathList, sep = '\n')
                def ReadBSMFile(path): 
                    print('Reading BSM File ...' + path)
                    file = h5py.File(path, 'r')
                    if list(file.keys()) == ['Data', 'Info', 'Parameters', 'Values', 'Weights']:
                        if file['Values'][()] != [0] * len(file['Values'][()]):
                            return [file['Info'][()][0], np.array(file['Parameters'][()]), torch.Tensor(file['Values'][()]), torch.Tensor(file['Data'][()]), 
                                torch.Tensor(file['Weights'][()])]    
                        else:
                            print('File: ' + path + ' is of SM type!')
                            return None
                    else:
                        print('File format not valid: ' + path)
                        return None
                ImportedFiles = list(map(ReadBSMFile, self.BSMFilePathList))
                if not None in ImportedFiles:
                    self.BSMInfoList, self.BSMParametersList, self.BSMValuesList, self.BSMDataList, self.BSMWeightsList = list(map(list, zip(*ImportedFiles)))
                    self.BSMNDataList =  list(map(len, self.BSMDataList))
                    #def f(x) : 
                    #    if x.sum() == 0:
                    #        return 0
                    #    else:
                    #        return 1
                    #self.TargetList = list(map(f, self.ValuesList))
                    #if np.all(self.ParametersList !=  self.Parameters): print('Not all files have ' + str(self.Parameters) + 'Parameters !')
            else:
                print('BSMfilefathlist input should be a list of strings !')
        else:
            print('BSMfilefathlist input should be a list !')
####### Prepare SM data
        BSMNRatioDataList = list(map(lambda n: n/sum(self.BSMNDataList), self.BSMNDataList))
        self.SMSampleSizeList = [int(self.SMNData*BSMNRatioData) for BSMNRatioData in BSMNRatioDataList] 
        self.SMValues = torch.cat([torch.ones(self.SMSampleSizeList[i])*self.BSMValuesList[i]
                             for i in range(len(BSMNRatioDataList))])
        self.SMNData = sum(self.SMSampleSizeList)
        self.SMData = self.SMData[: self.SMNData]
        ReWeighting = torch.cat([torch.ones(self.SMSampleSizeList[i])*self.BSMNDataList[i]/self.SMSampleSizeList[i] 
                                 for i in range(len(BSMNRatioDataList))])
        self.SMWeights = self.SMWeights[:self.SMNData].mul(ReWeighting)
####### Labels, join BSM and SM
####### Allow taking less data
    def ReturnData(self):
        output = []
        for i in range (0, len(self.InfoList)):
            print('here' + str(self.TargetList[i]))
            targ = np.empty(self.NDataList[i])
            targ.fill(self.TargetList[i])
            output.extend(targ)
        return np.array(output)
 #       def f(target,):
  #          target = x[0]
   #         Data = x[1]
    #        tl = np.empty(len(Data))
     #       tl.fill(target)
                
    def Report(self):
        from tabulate import tabulate
        print('Report:')
        print(tabulate({"SM File": self.SMFilePathList, "Info": self.SMInfoList, "Parameters": self.SMParametersList, "Values": self.SMValuesList, 
                        #"Target": self.TargetList,
                        "#Events": self.SMNDataList}, headers="keys"))
        print(tabulate({"BSM File": self.BSMFilePathList, "Info": self.BSMInfoList, "Parameters": self.BSMParametersList, "Values": self.BSMValuesList, 
                        #"Target": self.TargetList,
                        "#Events": self.BSMNDataList}, headers="keys"))
        #print(*self.FilePathList, sep = '\n')
        #print(list(map(len, self.DataList)))
        #print(list(self.ParametersList))
        #print(list(self.ValuesList))       
#def Import_pData(datafilepath):
#    print(datafilepath)
#    datafile = h5py.File(datafilepath, 'r')
#    print(list(datafile.keys()))
#    if list(datafile.keys()) != ['Data', 'Weights']:
#        print('File format not vadid: ' + datafilepath)
#        return 1
#    else:
#        data=datafile['Data']
#        weights=datafile['Weights']
#        #print(list(weights))
#        return np.array([weights,data])

In [42]:
t = OurTrainingData(['/data3/MadGraph/testSM1.h5','/data3/MadGraph/testSM2.h5'],['/data3/MadGraph/testBSM1.h5', '/data3/MadGraph/testBSM2.h5'], ['GW'])
t.Report()
#o = t.ReturnData()
#print(o)

Loading Data with Parameters: ['GW']
Reading SM File .../data3/MadGraph/testSM1.h5
Reading SM File .../data3/MadGraph/testSM2.h5
Reading BSM File .../data3/MadGraph/testBSM1.h5
Reading BSM File .../data3/MadGraph/testBSM2.h5
[0.9052491946181542, 0.09475080538184574]
tensor([1.8210e-07, 1.8210e-07, 1.8210e-07,  ..., 1.8210e-07, 1.8210e-07,
        1.8210e-07])
tensor([1.0498, 1.0498, 1.0498,  ..., 1.0499, 1.0499, 1.0499])
tensor([1.9116e-07, 1.9116e-07, 1.9116e-07,  ..., 1.9118e-07, 1.9118e-07,
        1.9118e-07])
tensor([ 0.1000,  0.1000,  0.1000,  ..., -0.3000, -0.3000, -0.3000])
torch.Size([20107])
Report:
SM File                     Info    Parameters      Values    #Events
--------------------------  ------  ------------  --------  ---------
/data3/MadGraph/testSM1.h5  blabla  ['GW']               0      19108
/data3/MadGraph/testSM2.h5  blabla  ['GW']               0       1000
BSM File                     Info    Parameters      Values    #Events
---------------------------  ---

In [30]:
arr1 = np.array([1, 2, 3])

arr2 = np.array([4, 5, 6])

arr = np.concatenate((arr1, arr2))

print(arr)

[1 2 3 4 5 6]


In [661]:
class OurTrainingData():
    def __init__(self, filepathlist, parameters = ['GW']): 
        self.Parameters = parameters
        print('Only 1D Implemented in Training !') if len(self.Parameters)!=1 else None
        if type(filepathlist) == list:
            if all(isinstance(n, str) for n in filepathlist):
                self.FilePathList = filepathlist
                #print('Reading Files ...')
                #print(*self.FilePathList, sep = '\n')
                def ReadFile(path): 
                    print('Reading File ...' + path)
                    file = h5py.File(path, 'r')
                    #print(list(file.keys()))
                    if list(file.keys()) != ['Data', 'Info', 'Parameters', 'Values', 'Weights']:
                        print('File format not valid: ' + path)
                        return None
                    else:
                        return [file['Info'][()][0], np.array(file['Parameters'][()]), np.array(file['Values'][()]), np.array(file['Data'][()]), 
                                np.array(file['Weights'][()])]
                ImportedFiles = list(map(ReadFile, self.FilePathList))
                if not None in ImportedFiles:
                    self.InfoList, self.ParametersList, self.ValuesList, self.DataList, self.WeigthsList = list(map(list, zip(*ImportedFiles)))
                    self.NDataList =  list(map(len, self.DataList))
                    def f(x) : 
                        if x.sum() == 0:
                            return 0
                        else:
                            return 1
                    self.TargetList = list(map(f, self.ValuesList))
                    if np.all(self.ParametersList !=  self.Parameters): print('Not all files have ' + str(self.Parameters) + 'Parameters !')
            else:
                print('Input should be a list of strings !')
        else:
            print('Input should be a list !')
    def ReturnData(self):
        output = []
        for i in range (0, len(self.InfoList)):
            print('here' + str(self.TargetList[i]))
            targ = np.empty(self.NDataList[i])
            targ.fill(self.TargetList[i])
            output.extend(targ)
        return np.array(output)
 #       def f(target,):
  #          target = x[0]
   #         Data = x[1]
    #        tl = np.empty(len(Data))
     #       tl.fill(target)
                
    def Report(self):
        from tabulate import tabulate
        print('Report:')
        print(tabulate({"File": self.FilePathList, "Info": self.InfoList, "Parameters": self.ParametersList, "Values": self.ValuesList, 
                        "Target": self.TargetList, "#Events": self.NDataList}, headers="keys"))
        #print(*self.FilePathList, sep = '\n')
        #print(list(map(len, self.DataList)))
        #print(list(self.ParametersList))
        #print(list(self.ValuesList))       
#def Import_pData(datafilepath):
#    print(datafilepath)
#    datafile = h5py.File(datafilepath, 'r')
#    print(list(datafile.keys()))
#    if list(datafile.keys()) != ['Data', 'Weights']:
#        print('File format not vadid: ' + datafilepath)
#        return 1
#    else:
#        data=datafile['Data']
#        weights=datafile['Weights']
#        #print(list(weights))
#        return np.array([weights,data])

In [644]:
a = np.array([0,0.])
b = np.array(['f','g'])
a.sum()
print(a[a.nonzero()])
print(b[a.nonzero()])
print('here') if True else None
a = np.array([[1,2,3],[3,4,5]])
len(a)
a = []
a.append([1])
print(a)

[]
[]
here
[[1]]


In [442]:
def f(x): return -x, x
a, b, = list(map(f,[3,4]))
print(a)
list(map(list, zip(*[[1,2],[3,4]])))

(-3, 3)


[[1, 3], [2, 4]]

Name      Age
------  -----
Alice      24
Bob        19


In [290]:
torch.Tensor([[1,2,3],[2,4,5]])

tensor([[1., 2., 3.],
        [2., 4., 5.]])

In [246]:
a = [3, 2]
a.sort()
print(a)

[2, 3]


In [159]:
data[0:2]

array([array([1.59062308e+06, 7.38849819e-01, 1.11762201e+00, 3.08047003e+00,
       1.31599059e+00, 3.50028196e+00, 4.13548874e+02, 2.37012655e+01,
       4.18555177e+02, 8.01572954e+03, 6.37188323e+03]),
       array([3.68781055e+06, 3.83362445e-01, 1.53083097e+00, 1.67333005e+00,
       4.64560989e-01, 2.70386282e-01, 3.15489925e+02, 5.96262828e+01,
       3.46105482e+02, 7.86621494e+03, 6.37189199e+03])], dtype=object)

In [157]:
weights[0:3]

array([1.821e-07, 1.821e-07, 1.821e-07], dtype=object)

In [160]:
class QuadraticNet(nn.Module):
    def __init__(self):
        super(QuadraticNet, self).__init__()
        self.fclist1  = nn.ModuleList([nn.Linear(n_neurons[i], 
            n_neurons[i+1]) for i in range(len(n_neurons)-1)])
        self.fc1     = nn.Linear(n_neurons[-1], 1)
        
        self.fclist2  = nn.ModuleList([nn.Linear(n_neurons[i], 
            n_neurons[i+1]) for i in range(len(n_neurons)-1)])
        self.fc2     = nn.Linear(n_neurons[-1], 1)
        
    def forward(self, x):
        x, p    = x[:, :input_dim], x[:, input_dim:-1].squeeze() 
        x1 = x2 = x
        
        for i, l in enumerate(self.fclist1):
            x1 = torch.sigmoid(l(x1))
        x1     = self.fc1(x1).squeeze()
        
        for i, l in enumerate(self.fclist2):
            x2 = torch.sigmoid(l(x2))
        x2     = self.fc2(x2).squeeze()
        
        capf   = torch.log(
            ((1 + torch.mul(x1, p))**2 + (torch.mul(x2, p))**2))
        
        return torch.sigmoid(capf).view(-1, 1)
    
    def get_L1max(self, value_per_unit):
        L1_max = []
        for m in model_plus.children():
            if isinstance(m, nn.Linear):
                L1_max.append(m.weight.size(0)*m.weight.size(1) \
                                  * value_per_unit)
            else:
                for mm in m:
                    L1_max.append(mm.weight.size(0)*mm.weight.size(1)\
                                      *value_per_unit)
        self.L1max_list = L1_max
    
    def clip_L1(self):
        counter = 0
        for m in model_plus.children():
            if isinstance(m, nn.Linear):
                counter += 1
                with torch.no_grad():
                    designated_L1 = self.L1max_list[counter-1]
                    if designated_L1 == value_per_unit*n_neurons[0]*n_neurons[1]:
                        continue                                #skipping first layer
                    L1 = m.weight.abs().sum()
                    m.weight.masked_scatter_(L1>self.L1max_list[counter-1],
                                            m.weight/L1*self.L1max_list[counter-1])
            else:
                for mm in m:
                    counter +=1
                    with torch.no_grad():
                        designated_L1 = self.L1max_list[counter-1]
                        if designated_L1 == value_per_unit*n_neurons[0]*n_neurons[1]:
                            continue                            #skipping first layer
                        L1 = mm.weight.abs().sum()
                        mm.weight.masked_scatter_(L1>designated_L1,
                                            mm.weight/L1*designated_L1)
    
    def calculate_ratio(self, points):
        with torch.no_grad():
            y = self(points)
        return y/(1-y)

In [126]:
l=np.array([[1,2],[4,5]]).transpose()
print(l)

[[1 4]
 [2 5]]


In [145]:
l[0][0]

1

In [24]:
f=2
print(f)

2


In [71]:
del test

In [79]:
test3 = h5py.File('/data3/MadGraph/test.h5', 'r')
list(test3.keys())

['Data', 'Nev13']

In [18]:
test = h5py.File('/data3/MadGraph/test.h5', 'r')
print(list(test.keys()))
data=test['Dataset1']
print(list(data))
data[2]

['Dataset1']
[1, 2, 3, 4]


3

# Multiple Functions

In [10]:
#### helper functions ####

def convert_angles_in_data(data, angle_pos):
    nonangle_pos = list(set(range(data.shape[1]))-set(angle_pos))
    nonangle_pos.sort()

    catdata = torch.cat((data[:, angle_pos].cos_(), 
                         data[:, angle_pos].sin_(),
                         data[:, nonangle_pos]), 1)
    
    return catdata

def append_constant(data, constant):
    return torch.cat((data, torch.ones(data.size(0), 1)*float(constant)), 1)

def report_ETA(beginning, start, epochs, e, loss):
    time_elapsed = time.time() - start
    time_left    = str(datetime.timedelta(
        seconds=((time.time() - beginning)/(e+1)*(epochs-(e+1)))))
    print('Training epoch %s (took %.2f sec, time left %s sec) loss %.8f'%(
        e, time_elapsed, time_left, loss))
    return time.time()

def simpleplot(tsm, tbsm, title, sep, p, deltap):
    plt.figure(figsize=(8, 6))
    ax = plt.subplot()
    plt.hist(tsm,  50, alpha=0.5, label='SM')
    plt.hist(tbsm, 50, alpha=0.5, label='BSM')
    plt.title(title)
    plt.legend(loc='upper right')
    
    plt.text(x=0.05, y=0.85, transform=ax.transAxes, 
         s='sep = %.3f\np = %.3f +/- %.3f'%(sep, p, deltap), 
         bbox=dict(facecolor='blue', alpha=0.2))
    
    plt.savefig(outputfolder + '/' + title+'.pdf')
    plt.show()

def combine_pm(tsm_plus, tbsm_plus, tsm_minus, tbsm_minus, e):
    len_sm    = min(len(tsm_plus), len(tsm_minus))
    len_bsm   = min(len(tbsm_plus), len(tbsm_minus))
    
    tsm       = (tsm_plus[:len_sm] + tsm_minus[:len_sm])
    tbsm      = (tbsm_plus[:len_bsm] + tbsm_minus[:len_bsm])
    
    mu_sm     = tsm.mean().item()
    mu_bsm    = tbsm.mean().item()
    sigma_sm  = tsm.std().item()
    sigma_bsm = tbsm.std().item()
    med_sm    = tsm.median().item()
    
    sep    = (mu_sm - mu_bsm)/sigma_bsm
    p      = 1.*len([i for i in tbsm if i > med_sm])/len(tsm)
    
    delta1 = (p * (1 - p)/min(len_sm, len_bsm))**0.5
    delta2 = (sigma_sm/sigma_bsm) * np.exp(-((mu_bsm - mu_sm)**2)/(
            2 * sigma_bsm**2))/(2*(n_meas**0.5))
    deltap = (delta1**2 + delta2**2)**0.5
    
    title = '%s, %s%s, combined, %s, N=%d, epochs=%d'%(
                     outputheader, str(n_neurons), bsm_op, bsm_test, N, e)

    simpleplot(tsm, tbsm, title, sep, p, deltap)
    
    return (p, deltap)

def conclude(title, p_history, outputfolder):
    title = 'Test p value history - ' + title
    plt.subplots(figsize = (8,4))
    plt.xscale('log')
    plt.ylim(top = max(map(lambda entry: entry[1], p_history[1:])))
    plt.title(title)
    plt.xlabel('epochs')
    plt.ylabel('p value')

    x    = list(map(lambda l: l[0], p_history))
    y    = list(map(lambda l: l[1], p_history))
    yerr = list(map(lambda l: l[2], p_history))
    plt.errorbar(x, y, yerr = yerr)
    
    plt.hlines(y=0.05, xmin=x[0], xmax=x[-1], colors='red')

    plt.savefig(outputfolder + title + '.pdf')
    plt.show()
    plt.close()
    np.savetxt(outputfolder + title + '.csv', p_history)

In [11]:
def multiply_by_constant(data, constant, pos, inplace=True):
    updatevalue = torch.mul(data[:, pos], constant).view(-1, 1)
    if inplace:
        return torch.cat((data[:, :pos], updatevalue, data[:, pos+1:]), 1)
    else:
        return torch.cat((data[:, :pos], data[:, pos+1:], updatevalue), 1)

def take_ratio(data, num_pos, den_pos):
    ratio = data[:, num_pos]/data[:, den_pos]
    data[:, num_pos] = ratio
    
    return data

# Network and Loss

In [164]:
class QuadraticNet(n_neurons):
    def __init__(self):
        super(QuadraticNet, self).__init__()
        self.fclist1  = nn.ModuleList([nn.Linear(n_neurons[i], 
            n_neurons[i+1]) for i in range(len(n_neurons)-1)])
        self.fc1     = nn.Linear(n_neurons[-1], 1)
        
        self.fclist2  = nn.ModuleList([nn.Linear(n_neurons[i], 
            n_neurons[i+1]) for i in range(len(n_neurons)-1)])
        self.fc2     = nn.Linear(n_neurons[-1], 1)
        
    def forward(self, x):
        x, p    = x[:, :input_dim], x[:, input_dim:-1].squeeze() 
        x1 = x2 = x
        
        for i, l in enumerate(self.fclist1):
            x1 = torch.sigmoid(l(x1))
        x1     = self.fc1(x1).squeeze()
        
        for i, l in enumerate(self.fclist2):
            x2 = torch.sigmoid(l(x2))
        x2     = self.fc2(x2).squeeze()
        
        capf   = torch.log(
            ((1 + torch.mul(x1, p))**2 + (torch.mul(x2, p))**2))
        
        return torch.sigmoid(capf).view(-1, 1)
    
    def get_L1max(self, value_per_unit):
        L1_max = []
        for m in model_plus.children():
            if isinstance(m, nn.Linear):
                L1_max.append(m.weight.size(0)*m.weight.size(1) \
                                  * value_per_unit)
            else:
                for mm in m:
                    L1_max.append(mm.weight.size(0)*mm.weight.size(1)\
                                      *value_per_unit)
        self.L1max_list = L1_max
    
    def clip_L1(self):
        counter = 0
        for m in model_plus.children():
            if isinstance(m, nn.Linear):
                counter += 1
                with torch.no_grad():
                    designated_L1 = self.L1max_list[counter-1]
                    if designated_L1 == value_per_unit*n_neurons[0]*n_neurons[1]:
                        continue                                #skipping first layer
                    L1 = m.weight.abs().sum()
                    m.weight.masked_scatter_(L1>self.L1max_list[counter-1],
                                            m.weight/L1*self.L1max_list[counter-1])
            else:
                for mm in m:
                    counter +=1
                    with torch.no_grad():
                        designated_L1 = self.L1max_list[counter-1]
                        if designated_L1 == value_per_unit*n_neurons[0]*n_neurons[1]:
                            continue                            #skipping first layer
                        L1 = mm.weight.abs().sum()
                        mm.weight.masked_scatter_(L1>designated_L1,
                                            mm.weight/L1*designated_L1)
    
    def calculate_ratio(self, points):
        with torch.no_grad():
            y = self(points)
        return y/(1-y)

NameError: name 'n_neurons' is not defined

In [163]:
QuadraticNet()

NameError: name 'n_neurons' is not defined

In [13]:
class QuadraticNetReLU(QuadraticNet):
    def __init__(self):
        super(QuadraticNetReLU, self).__init__()
        self.fclist1  = nn.ModuleList([nn.Linear(n_neurons[i], 
            n_neurons[i+1]) for i in range(len(n_neurons)-1)])
        self.fc1     = nn.Linear(n_neurons[-1], 1)
        
        self.fclist2  = nn.ModuleList([nn.Linear(n_neurons[i], 
            n_neurons[i+1]) for i in range(len(n_neurons)-1)])
        self.fc2     = nn.Linear(n_neurons[-1], 1)
        
    def forward(self, x):
        x, p    = x[:, :input_dim], x[:, input_dim:-1].squeeze()
        x1 = x2 = x
        
        for i, l in enumerate(self.fclist1):
            x1 = torch.nn.functional.relu(l(x1))
        x1     = self.fc1(x1).squeeze()
        
        for i, l in enumerate(self.fclist2):
            x2 = torch.nn.functional.relu(l(x2))
        x2     = self.fc2(x2).squeeze()
        
        capf   = torch.log(
            ((1 + torch.mul(x1, p))**2 + (torch.mul(x2, p))**2))
        
        return torch.sigmoid(capf).view(-1, 1)

In [14]:
class _Loss(Module):
    def __init__(self, size_average=None, reduce=None, reduction='mean'):
        super(_Loss, self).__init__()
        if size_average is not None or reduce is not None:
            self.reduction = _Reduction.legacy_get_string(size_average, reduce)
        else:
            self.reduction = reduction

class WeightedMSELoss(_Loss):
    __constants__ = ['reduction']

    def __init__(self, size_average=None, reduce=None, reduction='mean'):
        super(WeightedMSELoss, self).__init__(size_average, reduce, reduction)

    def forward(self, input, target, weight):
        return torch.mean(torch.mul(weight, (input - target)**2))

### Read Data - Train Data

### Training 

In [15]:
def train(model, X_train, y_train):
    model.get_L1max(value_per_unit)
    optimiser           = torch.optim.Adam(model.parameters(), lr)
    criterion           = WeightedMSELoss()
    
    if use_gpu:
        model                 = model.cuda()
        X_train, y_train      = X_train.cuda(), y_train.cuda()

    print(" =================== BEGINNING TRAIN ==================== ")
    beginning = start = time.time()

    for e in range(epochs):
        output          = model(X_train)
        loss            = criterion(output, y_train, X_train[:, -1:])

        if (e+1) % verbose_prd  == 0:
            start       = report_ETA(beginning, start, epochs, e+1, loss)
            if save_history:
                generictitle = '%s, %s, %s, N=%d, epochs=%d'%(outputheader, pm, str(n_neurons), N, e+1)
                torch.save({'state_dict': model.state_dict()}, outputfolder + 
                                generictitle + '.pth')
                modelparams = [w.detach().tolist() for w in model.parameters()]
                np.savetxt(outputfolder + generictitle + '.csv', modelparams, '%s')            
            
        optimiser.zero_grad()
        loss.backward()
        optimiser.step()
        model.clip_L1()
        
    print(" ===================   END OF TRAIN   =================== ")
    
    return model

## Adding ratio of PT/pt instead of PT

In [19]:
def read_data(pm):
    sm_file        = sm_filename_fn(pm)    
    train_size     = len(bsm_coef)*N
    
    smdata         = h5py.File(datafolder + sm_filename + '_' + pm + '_nlo.h5', 'r') 
    nsm            = smdata['Number'][()]
    
    if isinstance(nsm, np.ndarray):
        nsm = nsm[0]
    
    smdata         = torch.Tensor(smdata['Data'][()])
    smdata         = take_ratio(smdata, -2, -3)
    
    smdata_lst     = [append_constant(smdata[i*N:(i+1)*N, :], bsm_coef[i]
                                 ) for i in range(len(bsm_coef))]    # wilson coefficient G for SM
    smdata_lst.append(
        append_constant(smdata[train_size:, :], bsm_test))           # wilson coefficient G for SM (test)
    smdata         = torch.cat(smdata_lst, 0)
    
    #################################################################################################
    #### be careful! as this changes the position of the column of weight and wilson coefficient ####
    nlo_w_mean_sm  = smdata[:, -2].mean()
    smdata         = multiply_by_constant(smdata, 
                          1./nlo_w_mean_sm, -2, inplace=False)     # sigma_g/sigma_0 = 1 for SM, NLO
    #################################################################################################
    
    smdata         = append_constant(smdata, 0)                      # training target y = 0 for SM
    
    nbsm_dic       = {c: (h5py.File(datafolder + bsm_filename_fn(bsm_op, c, pm), 
                                    'r')['Number'][()]) for c in bsm_coef}
        
    if isinstance(nbsm_dic[bsm_coef[0]], np.ndarray):
        nbsm_dic = {c: torch.as_tensor(nbsm_dic[c][0]) for c in bsm_coef}
    else:
        nbsm_dic = {c: torch.as_tensor(nbsm_dic[c]) for c in bsm_coef}
    
    
    #nbsm_dic       = {c: (torch.as_tensor(h5py.File(datafolder + bsm_filename(bsm_op, c)\
    #                + '_' + pm + '_nlo.h5', 'r')['Number'][()])) for c in bsm_coef}
    
    bsmdata_lst    = [torch.Tensor(h5py.File(datafolder + bsm_filename_fn(bsm_op, c, pm),
                                             'r')['Data'][()])[:N, :] for c in bsm_coef]
    
    print('============ nsm: %s ============'%(str(nsm)))
    for i in range(len(bsm_coef)):
        print('========== nbsm %i: %s =========='%(i, str(nbsm_dic[bsm_coef[i]])))
    
    bsmdata_lst    = [take_ratio(bsmdata_lst[i], -2, -3) for i in range(len(bsmdata_lst))]

    bsmdata_lst    = [append_constant(bsmdata_lst[i], bsm_coef[i]
                                 ) for i in range(len(bsmdata_lst))] # wilson coefficient G for BSM
    
    nlo_w_mean_bsm = [bsmdata_lst[i][:, -2].mean() for i in range(len(bsmdata_lst))]   
    
    bsmdata_lst    = [multiply_by_constant(bsmdata_lst[i], (nbsm_dic[bsm_coef[i]]/nsm)/nlo_w_mean_bsm[i],
                            -2, inplace=False) for i in range(len(bsmdata_lst))] # sigma_g/sigma_0 = nbsm/nsm for BSM, NLO
    
    bsmdata        = torch.cat(bsmdata_lst, 0)
    bsmdata        = append_constant(bsmdata, 1)                     # training target y = 1 for BSM

    traindata      = torch.cat((smdata[:train_size, :], bsmdata), 0)
    traindata      = traindata[torch.randperm(traindata.size(0))]

    if convert_ang:
        traindata  = convert_angles_in_data(traindata, angle_pos)

    X_train        = traindata[:, :-1] 
    y_train        = traindata[:, -1].reshape(-1, 1)
    
    title = 'Scaling (input dim %d, ratio), %s, %s'%(input_dim, pm, bsm_coef)

    if os.path.isfile(scalingfolder + title + '.csv'):
        print('Reading scaling from: \n%s...'%(scalingfolder + title + '.csv'))
        
        scalingPars  = np.loadtxt(open(scalingfolder + title + '.csv', 'r'))
        X_train_mean = torch.from_numpy(scalingPars[:input_dim+1]).type(torch.FloatTensor)
        X_train_std  = torch.from_numpy(scalingPars[input_dim+1:]).type(torch.FloatTensor)
    else:
        X_train_mean           = X_train[:,  :input_dim+1].mean(0)
        X_train_std            = X_train[:,  :input_dim+1].std(0)
    
    # normalisation
    X_train[:, :input_dim+1] = (X_train[:, :input_dim+1] - X_train_mean)/X_train_std
    
    #################################################################################################
    
    bsmdata_test   = h5py.File(datafolder + bsm_filename(bsm_op, bsm_test) +\
                            '_' + pm +'_nlo.h5', 'r')
    nbsm_dic[bsm_test] = (bsmdata_test['Number'][()])
    
    if isinstance(nbsm_dic[bsm_test], np.ndarray):
        nbsm_dic[bsm_test] = torch.as_tensor(nbsm_dic[bsm_test][0])
    else:
        nbsm_dic[bsm_test] = torch.as_tensor(nbsm_dic[bsm_test])
    
    bsmdata_test   = torch.Tensor(bsmdata_test['Data'][()])
    bsmdata_test   = take_ratio(bsmdata_test, -2, -3)
    
    bsmdata_test   = append_constant(bsmdata_test, bsm_test)         # wilson coefficient G for BSM
    
    #################################################################################################
    #### be careful! as this changes the position of the column of weight and wilson coefficient ####
    nlo_w_mean_bsm_test = bsmdata_test[:, -2].mean() 
    
    bsmdata_test   = multiply_by_constant(bsmdata_test, 
        (nbsm_dic[bsm_test]/nsm)/nlo_w_mean_bsm_test, -2, inplace=False)# sigma_g/sigma_0 = nbsm/nsm for BSM
    #################################################################################################
    
    bsmdata_test   = append_constant(bsmdata_test, 1)                # testing target y = 1 for BSM

    if bsm_test in bsm_coef:
        bsmdata_test  = bsmdata_test[N:, :]

    smdata_test    = smdata[train_size:,  :]
    
    #testdata       = torch.cat((smdata[train_size:, :], bsmdata_test), 0)
    #testdata       = testdata[torch.randperm(testdata.size(0))]

    if convert_ang:
        smdata_test   = convert_angles_in_data(smdata_test,  angle_pos)
        bsmdata_test  = convert_angles_in_data(bsmdata_test, angle_pos)
    
    X_test_sm      = smdata_test[:,  :-1] 
    X_test_bsm     = bsmdata_test[:, :-1]
    
    # normalisation
    X_test_sm[:,  :input_dim+1]  = (X_test_sm[:, :input_dim+1] - X_train_mean)/X_train_std
    X_test_bsm[:, :input_dim+1] = (X_test_bsm[:, :input_dim+1] - X_train_mean)/X_train_std
    
    if not os.path.isfile(scalingfolder + title + '.csv'):
        print('Saving scaling parameters at: \n%s...'%(scalingfolder + title + '.csv'))
        
        np.savetxt(outputfolder + title + '.csv', torch.cat(
        [X_train_mean.reshape(-1, 1), X_train_std.reshape(-1, 1)], 0).numpy())
        
    return X_train, y_train, X_test_sm, X_test_bsm, nsm, nbsm_dic

### Fixed Parameters

In [20]:
datafolder    = '/home/chen/Documents/DibosonProcessData_NLO_NewBias/'
scalingfolder = '/home/chen/Documents/OutputQuadratic_NLO_NewBias_alt/'
outputfolder  = '/home/chen/Documents/OutputQuadratic_NLO_NewBias_alt/'
outputheader  = 'Newbias'

sm_filename_fn   = lambda pm: 'sm_%s_nlo.h5'%(pm)
bsm_filename_fn  = lambda g, c, pm: '%s%s_%s_nlo.h5'%(g, c, pm)

### Dynamic Parameters

In [21]:
angle_pos     = [3, 5]
convert_ang   = True

use_gpu       = True
input_dim     = 8 if not convert_ang else 10
n_neurons     = [input_dim, 32, 32, 32]
#N             = int(18e4)
N             = int(2e5)
lr            = 1e-3
epochs        = 10000
verbose_prd   = 1000
n_meas        = 4000
value_per_unit= 1.0
save_history  = True

In [22]:
outputheader  = 'NewBias-GW-vanilla'
bsm_op        = 'GW'
bsm_coef      = ['-1e-7', '-5e-8', '-2e-8', '2e-8', '5e-8', '1e-7']
bsm_test      = '2e-8'

In [24]:
pm                  = 'plus'
X_train, y_train, X_test_sm, X_test_bsm, nsm, nbsm_dic = read_data(pm)
model_plus          = QuadraticNetReLU()
model_plus          = train(model_plus, X_train, y_train)

pm                    = 'minus'
X_train, y_train, X_test_sm, X_test_bsm, nsm, nbsm_dic = read_data(pm)
model_minus           = QuadraticNetReLU()
model_minus           = train(model_minus, X_train, y_train)

Reading scaling from: 
/home/chen/Documents/OutputQuadratic_NLO_NewBias_alt/Scaling (input dim 10, ratio), plus, ['-1e-7', '-5e-8', '-2e-8', '2e-8', '5e-8', '1e-7'].csv...


KeyboardInterrupt: 