# Импорт библиотек

In [1]:
import json
from utils import *
import dataset
import torchvision.transforms as transforms
import numpy as np
import random
from model_training import *
import neptune

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
def set_seed(seed: int = 666) -> None:
    np.random.seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.set_deterministic(True)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
set_seed()

In [4]:
neptune.init('iliaavilov/Image-captioning')

Project(iliaavilov/Image-captioning)

In [5]:
os.environ['NEPTUNE_API_TOKEN']=
os.environ['NEPTUNE_PROJECT']=
os.environ['NEPTUNE_NOTEBOOK_ID']=
os.environ['NEPTUNE_NOTEBOOK_PATH']="Desktop/Projects/Image_captioning/training.ipynb"

# Создание файлов для обучения

In [6]:
#create_input_files(dataset = 'coco', 
#                   karpathy_json_path = 'data/dataset_coco.json', 
#                   image_folder = 'data/',
#                   output_folder = 'images/', 
#                   captions_per_image = 5)

# Загрузка словарей

In [7]:
with open('images/WORDMAP_COCO.json') as f:
    wordmap = json.load(f)

In [8]:
res = dict((v,k) for k,v in wordmap.items())

wordmap - словарь, каждому слову ставящий в соответствие его позиционный индекс\
res - Обратный к wordmap словарь

# Загрузка/инициализация модели

In [10]:
# Параметры модели
encoded_image_size = 14
vocab_size = len(wordmap)
word_embeddings_dim = 512
attention_dim = 512
decoder_hidden_size = 512
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
checkpoint_name = None

In [11]:
start_epoch, end_epoch, loss_fn, enc, dec, optimizer_encoder, optimizer_decoder = load_models(checkpoint_name, 
                                                                                              encoded_image_size, 
                                                                                              word_embeddings_dim, 
                                                                                              attention_dim,
                                                                                              decoder_hidden_size, 
                                                                                              vocab_size, 
                                                                                              device)

# Инициализация загрузчиков данных

In [12]:
batch_size = 100
workers = 0

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
set_seed()
train_loader = torch.utils.data.DataLoader(dataset.CaptionDataset('images/', 
                                                                  'TRAIN', 
                                                                  transform=transforms.Compose([normalize])),
                                           batch_size=batch_size, 
                                           shuffle=True, 
                                           num_workers=workers, 
                                           pin_memory=True)

val_loader = torch.utils.data.DataLoader(dataset.CaptionDataset('images/',  
                                                                'VAL',
                                                                transform=transforms.Compose([normalize])),
                                         batch_size=200, 
                                         shuffle=True, 
                                         num_workers=workers, 
                                         pin_memory=True)

In [13]:
neptune.create_experiment(name = 'Soft_attention', upload_source_files = ['utils.py', 
                                                                          'model_training.py', 
                                                                          'model.py', 
                                                                          'dataset.py'])

https://ui.neptune.ai/iliaavilov/Image-captioning/e/IM-23


Experiment(IM-23)

In [None]:
for epoch in range(start_epoch, end_epoch):
    print('epoch:', epoch)

    train(enc, dec, device, loss_fn, train_loader, optimizer_decoder, optimizer_encoder, epoch)
    
    validate(enc, dec, device, loss_fn, val_loader, wordmap, epoch)

epoch: 0
Current loss 9.081635475158691


Experiencing connection interruptions. Reestablishing communication with Neptune.


Current loss 3.4759562015533447
