# Импорт библиотек

In [1]:
import json
from utils import *
import dataset
import torchvision.transforms as transforms
import numpy as np
import random
from model_training import *
import neptune

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
def set_seed(seed: int = 666) -> None:
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.set_deterministic(True)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
set_seed()

In [4]:
neptune.init('iliaavilov/Image-captioning')

Project(iliaavilov/Image-captioning)

In [5]:
os.environ['NEPTUNE_API_TOKEN']="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5haSIsImFwaV91cmwiOiJodHRwczovL3VpLm5lcHR1bmUuYWkiLCJhcGlfa2V5IjoiYzI0ZTMzMDQtZTFmZi00ZjgxLWI4NGQtZGJiOWQyNDJiYjE5In0="
os.environ['NEPTUNE_PROJECT']="iliaavilov/Image-captioning"
os.environ['NEPTUNE_NOTEBOOK_ID']="53c1b8bb-9fe1-4125-9310-ef3fdd555e3f"
os.environ['NEPTUNE_NOTEBOOK_PATH']="Desktop/Projects/Image_captioning/training.ipynb"

# Создание файлов для обучения

In [6]:
#create_input_files(dataset = 'flickr30k', 
#                   karpathy_json_path = 'data/dataset_flickr30k.json', 
#                   image_folder = 'data/',
#                   output_folder = 'images/', 
#                   captions_per_image = 5)

# Загрузка словарей

In [7]:
with open('images/WORDMAP_flickr30k.json') as f:
    wordmap = json.load(f)

In [8]:
res = dict((v,k) for k,v in wordmap.items())

wordmap - словарь, каждому слову ставящий в соответствие его позиционный индекс\
res - Обратный к wordmap словарь

# Загрузка/инициализация модели

In [9]:
# Параметры модели
encoded_image_size = 14
vocab_size = len(wordmap)
word_embeddings_dim = 512
attention_dim = 512
decoder_hidden_size = 512
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
checkpoint_name = None

In [10]:
start_epoch, end_epoch, loss_fn, enc, dec, optimizer_encoder, optimizer_decoder = load_models(checkpoint_name, 
                                                                                              encoded_image_size, 
                                                                                              word_embeddings_dim, 
                                                                                              attention_dim,
                                                                                              decoder_hidden_size, 
                                                                                              vocab_size, 
                                                                                              device)

# Инициализация загрузчиков данных

In [11]:
batch_size = 100
workers = 0

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
set_seed()
train_loader = torch.utils.data.DataLoader(dataset.CaptionDataset('images/', 
                                                                  'TRAIN', 
                                                                  transform=transforms.Compose([normalize])),
                                           batch_size=batch_size, 
                                           shuffle=True, 
                                           num_workers=workers, 
                                           pin_memory=True)

val_loader = torch.utils.data.DataLoader(dataset.CaptionDataset('images/',  
                                                                'VAL',
                                                                transform=transforms.Compose([normalize])),
                                         batch_size=200, 
                                         shuffle=True, 
                                         num_workers=workers, 
                                         pin_memory=True)

In [12]:
neptune.create_experiment(name = 'Soft_attention', upload_source_files = ['utils.py', 
                                                                          'model_training.py', 
                                                                          'model.py', 
                                                                          'dataset.py'], description = 'flickr 30k')

https://ui.neptune.ai/iliaavilov/Image-captioning/e/IM-25


Experiment(IM-25)

In [None]:
for epoch in range(start_epoch, end_epoch):
    print('epoch:', epoch)

    train(enc, dec, device, loss_fn, train_loader, optimizer_decoder, optimizer_encoder, epoch)
    
    validate(enc, dec, device, loss_fn, val_loader, wordmap, epoch)

epoch: 0
Current loss 8.799320220947266
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
Epoch 0, BLEU4 0.0852609232021067
epoch: 1
Current loss 4.193248748779297
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
Epoch 1, BLEU4 0.10348155125096124
epoch: 2
Current loss 3.721883773803711
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
Epoch 2, BLEU4 0.11230835097786523
epoch: 3
Current loss 3.7770819664001465
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
Epoch 3, BLEU4 0.12012013892607291
epoch: 4
Current loss 3.2761971950531006
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
Epoch 4, BLEU4 0.12354677226163112
epoch: 5
Current loss 3.4972100257873535
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
Epoch 5, BLEU4 0.12497637998983456
epoch: 6
Current loss 3.334003448486328
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
Epoch 6, BLEU4 0.129622514908692

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\Sibmice\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-13-b9233ffd8801>", line 4, in <module>
    train(enc, dec, device, loss_fn, train_loader, optimizer_decoder, optimizer_encoder, epoch)
  File "C:\Users\Sibmice\Desktop\Projects\Image_captioning\model_training.py", line 63, in train
    dec_out, captions, captions_lengths, sort_ind = dec(captions=caps,
  File "C:\Users\Sibmice\anaconda3\lib\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
    result = self.forward(*input, **kwargs)
  File "C:\Users\Sibmice\Desktop\Projects\Image_captioning\model.py", line 86, in forward
    predictions = torch.zeros(batch_size, max(captions_lengths), self.vocab_size).to(device) # (batch_size, max(captions_length), vocab_size)
  File "C:\Users\Sibmice\anaconda3\lib\site-packages\torch\tensor.py", line 21, in wrapped
 

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\Sibmice\anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3417, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-13-b9233ffd8801>", line 4, in <module>
    train(enc, dec, device, loss_fn, train_loader, optimizer_decoder, optimizer_encoder, epoch)
  File "C:\Users\Sibmice\Desktop\Projects\Image_captioning\model_training.py", line 63, in train
    dec_out, captions, captions_lengths, sort_ind = dec(captions=caps,
  File "C:\Users\Sibmice\anaconda3\lib\site-packages\torch\nn\modules\module.py", line 727, in _call_impl
    result = self.forward(*input, **kwargs)
  File "C:\Users\Sibmice\Desktop\Projects\Image_captioning\model.py", line 86, in forward
    predictions = torch.zeros(batch_size, max(captions_lengths), self.vocab_size).to(device) # (batch_size, max(captions_length), vocab_size)
  File "C:\Users\Sibmice\anaconda3\lib\site-packages\torch\tensor.py", line 21, in wrapped
 