# Setup seq2seq lecture

## Setup Google Drive Integration

In [0]:
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from google.colab import auth
auth.authenticate_user()
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}

!mkdir -p drive
!google-drive-ocamlfuse drive -o nonempty

## PyTorch setup

In [0]:
# http://pytorch.org/
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

# !pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.3.0.post4-{platform}-linux_x86_64.whl torchvision
!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.0-{platform}-linux_x86_64.whl torchvision

import torch

## Fast.AI Setup

In [0]:
# !git clone https://github.com/fastai/fastai.git
# import os
# os.chdir("fastai")
# !git pull

In [0]:
# os.chdir("..")
# !pip install https://github.com/fastai/fastai/archive/master.zip

## Install libs

In [0]:
# !pip install unidecode tqdm dill pysmi visdom sconce
!pip install unidecode tqdm dill pysmi

In [0]:
# install visdom
!pip install --upgrade git+https://github.com/facebookresearch/visdom.git@master

In [0]:
# install tnt
!pip install git+https://github.com/pytorch/tnt.git@master

## Install the PyDrive wrapper & import libraries

In [0]:
# This only needs to be done once per notebook.
!pip install -U -q PyDrive

from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
# This only needs to be done once per notebook.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

from pathlib import Path
import os

def download_and_save(file_name, file_id):
  downloaded = drive.CreateFile({'id': file_id})
  # fetch file
  downloaded.FetchContent()
  # write file
  with open(file_name,'wb') as f:
       f.write(downloaded.content.read())
      
  print(f'Saved {file_name}')
  
FILE_NAME, FILE_ID = ['file_name', 'file_id']

# Get dataset(s)

In [0]:
from pathlib import Path
dataset_path = Path.home() / 'data/seq2seq'; dataset_path
!mkdir -p $dataset_path
!ls -la $dataset_path/

## Get dataset and extra sources

In [0]:
# eng-pol.txt, https://drive.google.com/open?id=1vXMWySvI0mVcyqQZQeQUsk9vSmXYvIYw
eng_pol = {FILE_NAME: 'eng-pol.txt', FILE_ID: '1vXMWySvI0mVcyqQZQeQUsk9vSmXYvIYw'}
download_and_save(dataset_path / eng_pol[FILE_NAME], eng_pol[FILE_ID])

In [0]:
# masked_cross_entropy.py, https://drive.google.com/open?id=1YbNcZHWn7F0bUPDcgsKdHrj8HHr8QBXS
masked_cross_entropy = {FILE_NAME: 'masked_cross_entropy.py', FILE_ID: '1YbNcZHWn7F0bUPDcgsKdHrj8HHr8QBXS'}
download_and_save(Path('/content/') / masked_cross_entropy[FILE_NAME], masked_cross_entropy[FILE_ID])

In [0]:
# TODO: download btec dataset

In [0]:
# TODO: download saved model

In [0]:
!ls -la $dataset_path/

# vm monitoring

In [0]:
!uptime

In [0]:
!/opt/bin/nvidia-smi

In [0]:
import os
import psutil

def print_memsize():
  process = psutil.Process(os.getpid())
  print(f'{process.memory_info().rss / 1024**3:.5} GB')

In [0]:
print_memsize()