This is a minimal viable version of the code.

In [None]:
from Tools import *

# set paths to folders to read data and save files
DataParentFolder = os.getcwd() + '/../h5/Ideal_Reweighted_Latent_Data_Coefficients/'
GeneralParentFolder = os.getcwd() + '/../'

#################### trainer ####################
NumEpochs = int(1e1)
Loss = 'Quadratic'
OT = OurTrainer(NumEpochs = NumEpochs, LossFunction = Loss, LearningRate = 1e-3)
OT.SetSaveAfterEpochs(list(range(1000, 10000, 1000)))
OT.SetPrintAfterEpochs(list(range(1000, 10000, 1000)))  
    
#################### data ####################

# load training data
DataFilePath   = DataParentFolder + "/trainingSampleReweightedLarge_Latent.h5"
WeightFilePath = DataParentFolder + "/trainingSampleReweightedLarge_Latent_Benchmark.h5"
TrainingData = CombinedDataFile(DataFilePath, WeightFilePath, verbose=True, NReadData=int(1e6))

# load validation data
VDataFilePath    = DataParentFolder + "/testingSampleReweighted_Latent.h5"
VWeightFilePath  = DataParentFolder + "/testingSampleReweighted_Latent_Benchmark.h5"
ValidationData = CombinedDataFile(VDataFilePath, VWeightFilePath, verbose=True, NReadData=int(1e6))

#################### training ####################

# set random seed for reproduction
RandomSeed = torch.randint(100, 999, (1, )).item()
torch.manual_seed(RandomSeed)

# meta parameters
NOutput = 2
Architecture = [10, 32, 32, 32, 32, 1]
ActivationFn = 'ReLU'
WeightClipping = True
WeightClippingValue = 0.05

# model initialisation
MD = OurModel(NOutput, Architecture, ActivationFn)
MD.InitPreprocess(TrainingData)

# training
BatchSize = int(25e4)
MD = OT.Train(MD, TrainingData, bs = BatchSize, ValidationDataFile = ValidationData, 
              WeightClipping=WeightClipping, L1Max=WeightClippingValue, 
              Name = 'ParametrizedDiscriminant, (loss %s, Seed %d), '%(Loss, RandomSeed),
              Folder = GeneralParentFolder + '/TrainedModels/IdealReweightedLatentModels/')

#### Smart Choice of Wilson Coefficients and Reweighting

In [None]:
#################### generate new data file ####################

# file names
NewFileName    = DataParentFolder + "/trainingSampleReweightedLarge_Latent_smart_%s.h5"%(RandomSeed)
CoeffFileName  = DataParentFolder + "/trainingSampleReweightedLarge_Latent.h5"
DataFileName   = DataParentFolder + "/trainingSampleReweightedLarge_Latent.h5"

FeatureColumns = list(range(10))
DataSetList    = [] # the new file will store nothing but the reweights computed according to smart Wilson coefficients
MD.cpu()

with h5py.File(DataFileName, 'r') as td:
    DataTraining = (torch.Tensor(td['Data'][()]))
    DataTraining = DataTraining[:int(1e6)]

if not os.path.isfile(NewFileName):
    SmartWCgw, Factor = ComputeSmartWCgw(DataTraining, MD, FeatureColumns)
    CreateDataSet(CoeffFileName, SmartWCgw, Factor, DataFileName, NewFileName, DataSetList)
    

#################### smart training ####################

# load new training data
DataFilePath   = DataParentFolder + "/trainingSampleReweightedLarge_Latent.h5"
WeightFilePath = NewFileName
TrainingData = CombinedDataFile(DataFilePath, WeightFilePath, verbose=True, NReadData=int(1e6))

#################### training ####################

# set random seed for reproduction
PrevRandomSeed = RandomSeed
RandomSeed = torch.randint(100, 999, (1, )).item()
torch.manual_seed(RandomSeed)

# model initialisation
MDSmart = OurModel(NOutput, Architecture, ActivationFn)
MDSmart.InitPreprocess(TrainingData)

# training
BatchSize = int(25e4)
MDSmart = OT.Train(MDSmart, TrainingData, bs = BatchSize, ValidationDataFile = ValidationData, 
              WeightClipping=WeightClipping, L1Max=WeightClippingValue, 
              Name = 'ParametrizedDiscriminant_Smart, (loss %s, Seed %d), '%(Loss, RandomSeed),
              Folder = GeneralParentFolder + '/TrainedModels/IdealReweightedLatentModels/')