# Visualize Book Representation in TensborBoard

In [1]:
import pandas as pd
import numpy as np
import gensim
import torch
import torchvision
from tensorboardX import SummaryWriter
from PIL import Image
from pathlib import Path
import shutil

## Convert ISBN to Book ID

In [2]:
desc_pd = pd.read_csv('data/tosho_processed_clean.csv.bz2', sep='\t', compression='bz2')
desc_pd.shape

(891, 3)

In [3]:
id_df = pd.read_csv('./data/tosho_raw.csv.bz2', sep='\t', compression='bz2')
id_df.drop_duplicates(inplace=True)
id_df.shape

(1187, 4)

In [4]:
id_df = pd.merge(desc_pd, id_df, on=['id'])
id_df.shape

(891, 6)

In [5]:
def modulus11weight10to2(c9):
    sum_ = 0
    for i in range(len(c9)):
        try:
            c = int(c9[i])
        except ValueError:
            return False
        sum_ += (10 - i) * c

    result = 11 - (sum_ % 11)
    
    if result == 11:
        return '0'
    elif result == 10:
        return 'X'
    else:
        return str(result)
    
def encode13to10(isbn13):
    prefix = isbn13[3:-1]
    check_digit = modulus11weight10to2(prefix)
    isbn10 = prefix + check_digit
    return isbn10

def convert_isbn(isbn):
    if len(isbn) == 13:
        return encode13to10(isbn)
    else:
        return isbn

In [6]:
id_df['isbn'] = id_df['isbn_raw'].apply(lambda x: convert_isbn(x))

In [7]:
book_cover_list = !ls data/book_img_original

In [8]:
book_cover_pd = pd.DataFrame(book_cover_list, columns=['file_name'])
book_cover_pd.shape

(870, 1)

In [9]:
book_cover_pd['isbn'] = book_cover_pd.apply(lambda x: x.file_name.replace('.jpg', ''), axis=1)

In [10]:
id_isbn_mapping_df = pd.merge(id_df[['id', 'isbn']], book_cover_pd[['isbn', 'file_name']], on=['isbn'])
id_isbn_mapping_df.shape

(891, 3)

In [11]:
id_isbn_mapping_df.head(3)

Unnamed: 0,id,isbn,file_name
0,3901637,4577028603,4577028603.jpg
1,3901633,4772101896,4772101896.jpg
2,3901632,4772101829,4772101829.jpg


## Rename image filenames and copy

In [12]:
dst_directory = 'data/book_img'

In [13]:
!mkdir -p {dst_directory}

In [14]:
def move_book_covers(filename, book_id):
    dst = 'data/book_img'
    try:
        shutil.copy('data/book_img_original/'+filename, dst_directory+'/'+str(book_id)+'.jpg')
        return 1
    except:
        return 0

In [15]:
id_isbn_mapping_df['result'] = id_isbn_mapping_df.apply(lambda x: move_book_covers(x.file_name, x.id), axis=1)

In [16]:
id_isbn_mapping_df['result'].value_counts()

1    891
Name: result, dtype: int64

## Load book representation model

In [17]:
writer = SummaryWriter()
model = gensim.models.keyedvectors.KeyedVectors.load_word2vec_format('model/book2vec', binary=False)

weights = model.wv.vectors
labels = model.wv.index2word

  'See the migration notes for details: %s' % _MIGRATION_NOTES_URL
  after removing the cwd from sys.path.
  """


## Convert Book ID's to Book Titles

In [18]:
labels_title = [desc_pd[desc_pd.id == int(i)]['title'].values[0] for i in labels]

## Create Book Cover Sprites

In [19]:
img_size = 128

transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize((img_size, img_size)),
    torchvision.transforms.ToTensor(),
])

In [23]:
images = torch.zeros(0)

for i, img in enumerate(Path(dst_directory).glob("*.jpg")):
    img_v = transform(Image.open(img))
    images = torch.cat((images, img_v))

images = images.view(len(labels), 3, img_size, img_size)

## Write to Tensorboard format

In [24]:
writer.add_embedding(torch.FloatTensor(weights), label_img=images, metadata=labels_title)

1. Run Tensorboard  
```bash
tensorboard --logdir=runs
```
2. Go to `localhost:6006` on your web browser