In [1]:
from functions_pt import *
import pickle
from torch.optim import Adam

In [2]:
embed_size = 128
dropout = 0.1
multiple_factor = 1
layers = 2
batch_size = 64
agg = 'mean' # mean,sum,min,max
lr = 1e-4
clip = 1
epochs = 15

In [3]:
# load data
ys = np.load('ys.npy')
weights = np.load('weights.npy')
length = np.load('length.npy')
words_embed = np.load('possible_words_embed.npy')
with open('indexes.pkl', 'rb') as f:
    indexes = pickle.load( f)

# train/val split
np.random.seed(0)
train_idx = np.random.rand(ys.shape[0])>0.2
val_idx = np.logical_not(train_idx)

ys_train, ys_val = ys[train_idx], ys[val_idx]
weights_train, weights_val = weights[train_idx], weights[val_idx]
length_train, length_val = length[train_idx], length[val_idx]
indexes_train = [idx for idx,bol in zip(indexes,train_idx) if bol]
indexes_val = [idx for idx,bol in zip(indexes,train_idx) if not bol]

# set up dataloader
dataset_train = CustomDataset(ys_train,length_train,indexes_train,words_embed)
sampler = WeightedRandomSampler(weights_train, 50000)
dataset_train = DataLoader(dataset_train, batch_size=batch_size, sampler=sampler,collate_fn=collate)

dataset_val = CustomDataset(ys_val,length_val,indexes_val,words_embed)
sampler = WeightedRandomSampler(weights_val, 10000)
dataset_val = DataLoader(dataset_val, batch_size=batch_size, sampler=sampler,collate_fn=collate)

In [4]:
model = pointNet(layers,embed_size,agg,dropout,multiple_factor).to('cuda')
opt = Adam([    {'params': [p for p in model.parameters() if p is not model.w]},\
                {'params': model.w, 'lr': lr/4}
            ],lr=lr)

In [5]:
model,_ = train(opt,model,epochs,dataset_train,dataset_val,model.parameters(),clip)

epoch:0, train_loss: +1.361, val_loss: +0.735 

epoch:1, train_loss: +1.005, val_loss: +0.571 

epoch:2, train_loss: +0.738, val_loss: +0.424 

epoch:3, train_loss: +0.534, val_loss: +0.274 

epoch:4, train_loss: +0.384, val_loss: +0.242 

epoch:5, train_loss: +0.279, val_loss: +0.148 

epoch:6, train_loss: +0.204, val_loss: +0.122 

epoch:7, train_loss: +0.148, val_loss: +0.092 

epoch:8, train_loss: +0.104, val_loss: +0.054 

epoch:9, train_loss: +0.069, val_loss: +0.041 

epoch:10, train_loss: +0.043, val_loss: +0.028 

epoch:11, train_loss: +0.025, val_loss: +0.013 

epoch:12, train_loss: +0.014, val_loss: +0.005 

epoch:13, train_loss: +0.008, val_loss: +0.004 

epoch:14, train_loss: +0.006, val_loss: +0.003 

Training completed in 240.26040315628052s


In [9]:
torch.save(model.state_dict(), 'baseline0.pt')

In [8]:
model.w

Parameter containing:
tensor(0.2472, device='cuda:0', requires_grad=True)

In [5]:
# floor at 1
# out = torch.maximum(self.min_,out.squeeze() + self.w * torch.log2(length))
model,_ = train(opt,model,epochs,dataset_train,dataset_val,model.parameters(),clip)

epoch:0, train_loss: +0.207, val_loss: +0.197 

epoch:1, train_loss: +0.187, val_loss: +0.194 

epoch:2, train_loss: +0.185, val_loss: +0.193 

epoch:3, train_loss: +0.173, val_loss: +0.136 

epoch:4, train_loss: +0.085, val_loss: +0.174 

epoch:5, train_loss: +0.065, val_loss: +0.139 

epoch:6, train_loss: +0.066, val_loss: +0.137 

epoch:7, train_loss: +0.067, val_loss: +0.115 

epoch:8, train_loss: +0.062, val_loss: +0.105 

epoch:9, train_loss: +0.061, val_loss: +0.097 

epoch:10, train_loss: +0.062, val_loss: +0.165 

epoch:11, train_loss: +0.076, val_loss: +0.135 

epoch:12, train_loss: +0.071, val_loss: +0.117 

epoch:13, train_loss: +0.061, val_loss: +0.149 

epoch:14, train_loss: +0.059, val_loss: +0.092 

Training completed in 235.402446269989s


In [6]:
w = dataset_val.dataset.words_embed
l = torch.tensor([2309.0])

In [7]:
model((torch.tensor(w).to('cuda'),torch.tensor(l).to('cuda')))

  model((torch.tensor(w).to('cuda'),torch.tensor(l).to('cuda')))


tensor([2.8766], device='cuda:0', grad_fn=<AddBackward0>)