# Tryout DIGITS

In [5]:
import os

In [None]:
# Project
import sys
pwd=os.path.abspath(os.path.dirname('.'))
sys.path.append(os.path.join(pwd, '..', 'common'))

In [28]:
from data_utils import type_1_files, type_2_files, type_3_files, test_ids
from data_utils import get_filename, type_to_index
from training_utils import get_trainval_id_type_lists

## Create training/validation text files to store images as datasets

In [2]:
train_id_type_list, val_id_type_list = get_trainval_id_type_lists()
RESOURCES_PATH=os.path.join(pwd, '..', 'resources')

Train dataset contains : 
('-', [175, 546, 315], ' images of corresponding types')
Validation dataset contains : 
('-', [75, 234, 135], ' images of corresponding types')


In [21]:
# Create train.txt : all images without 1339.jpg in Type_1
filename=os.path.join(RESOURCES_PATH, 'digits', 'train.txt')
if os.path.exists(filename):
    print("Remove file: %s" % filename)
    os.remove(filename)    
    
with open(filename, 'w') as r:    
    for image_id, image_type in train_id_type_list:
        if image_id == '1339' and image_type == "Type_1":
            continue
        r.write(get_filename(image_id, image_type) + " %i\n" % type_to_index[image_type])

Remove file: /home/u2459/Intel_MobileODT/notebooks/../resources/digits/train.txt


In [22]:
!cat {filename} | wc -l
!head -n 10 {filename}
!cat {filename} | grep '1339'

1638
/data/kaggle/train/Type_1/1194.jpg 0
/data/kaggle/train/Type_2/929.jpg 1
/data/kaggle/train/Type_3/878.jpg 2
/data/kaggle/train/Type_1/356.jpg 0
/data/kaggle/train/Type_2/867.jpg 1
/data/kaggle/train/Type_3/277.jpg 2
/data/kaggle/train/Type_1/1199.jpg 0
/data/kaggle/train/Type_2/946.jpg 1
/data/kaggle/train/Type_3/1244.jpg 2
/data/kaggle/train/Type_1/677.jpg 0


In [26]:
# Create val.txt : all images without 1339.jpg in Type_1
filename=os.path.join(RESOURCES_PATH, 'digits', 'val.txt')
if os.path.exists(filename):
    print("Remove file: %s" % filename)
    os.remove(filename)    
    
with open(filename, 'w') as r:    
    for image_id, image_type in val_id_type_list:
        if image_id == '1339' and image_type == "Type_1":
            continue
        r.write(get_filename(image_id, image_type) + " %i\n" % type_to_index[image_type])

In [27]:
!cat {filename} | wc -l
!head -n 10 {filename}
!cat {filename} | grep '1339'

699
/data/kaggle/train/Type_1/580.jpg 0
/data/kaggle/train/Type_2/717.jpg 1
/data/kaggle/train/Type_3/512.jpg 2
/data/kaggle/train/Type_1/660.jpg 0
/data/kaggle/train/Type_2/447.jpg 1
/data/kaggle/train/Type_3/1167.jpg 2
/data/kaggle/train/Type_1/596.jpg 0
/data/kaggle/train/Type_2/1054.jpg 1
/data/kaggle/train/Type_3/646.jpg 2
/data/kaggle/train/Type_1/308.jpg 0


In [24]:
# Create labels.txt
filename=os.path.join(RESOURCES_PATH, 'digits', 'labels.txt')
if os.path.exists(filename):
    print("Remove file: %s" % filename)
    os.remove(filename)    
    
with open(filename, 'w') as r:
    r.write("Type_1\n")
    r.write("Type_2\n")
    r.write("Type_3\n")

In [25]:
!cat {filename}

Type_1
Type_2
Type_3


## Submit a DIGITS dataset creation job to the cluster
```
Create DB (train) task started.
"/opt/intel/intelpython27/bin/python2 /home/u2459/DIGITS/digits/tools/create_db.py /home/u2459/Intel_MobileODT/resources/digits/train.txt /home/u2459/DIGITS/digits/jobs/20170318-130703-d9e2/train_db 224 224 --backend=lmdb --channels=3 --resize_mode=squash --mean_file=/home/u2459/DIGITS/digits/jobs/20170318-130703-d9e2/mean.binaryproto --mean_file=/home/u2459/DIGITS/digits/jobs/20170318-130703-d9e2/mean.jpg --shuffle --encoding=png"

Create DB (val) task started.
"/opt/intel/intelpython27/bin/python2 /home/u2459/DIGITS/digits/tools/create_db.py /home/u2459/Intel_MobileODT/resources/digits/val.txt /home/u2459/DIGITS/digits/jobs/20170318-130703-d9e2/val_db 224 224 --backend=lmdb --channels=3 --resize_mode=squash --shuffle --encoding=png"
```


In [140]:
env="PYTHONPATH=$PYTHONPATH:/home/u2459/digits_dependencies/lib/python2.7/site-packages/ CAFFE_ROOT=/opt/caffe-master/"

def submit_job(cmd, env=env):
    job_id = !echo {env} {cmd} | qsub -l nodes=1:knl -d /home/u2459
    return job_id 

def checkout_job(job_id):
    print(job_id)
    !qstat
    !cat ~/STDIN.o{job_id[0].split('.')[0]}
    !cat ~/STDIN.e{job_id[0].split('.')[0]}

In [30]:
INPUT_PATH=os.path.join(pwd, '..', 'input')

In [122]:
create_train_db_cmd=[
    "python", 
    "/home/u2459/DIGITS/digits/tools/create_db.py",
    os.path.join(RESOURCES_PATH, 'digits', 'train.txt'),
    os.path.join(INPUT_PATH, 'train_db.lmdb'), 
    "224",
    "224",
    "--backend=lmdb", 
    "--channels=3",
    "--resize_mode=squash",
    "--mean_file="+os.path.join(INPUT_PATH, 'mean.binaryproto'),
    "--mean_file="+os.path.join(INPUT_PATH, 'mean.jpg'),
    "--shuffle",
    "--encoding=png"
]
create_train_db_cmd = ' '.join(create_train_db_cmd)

create_val_db_cmd=[
    "python", 
    "/home/u2459/DIGITS/digits/tools/create_db.py",
    os.path.join(RESOURCES_PATH, 'digits', 'val.txt'),
    os.path.join(INPUT_PATH, 'val_db.lmdb'), 
    "224",
    "224",
    "--backend=lmdb", 
    "--channels=3",
    "--resize_mode=squash",
    "--shuffle",
    "--encoding=png"
]
create_val_db_cmd = ' '.join(create_val_db_cmd)

In [87]:
job_id = submit_job(create_train_db_cmd)

In [128]:
checkout_job(job_id)

['3516.c001']
Job ID                    Name             User            Time Use S Queue
------------------------- ---------------- --------------- -------- - -----
3499.c001                  kaggle_mobileODT u2459           10:28:26 R batch          
3517.c001                  STDIN            u2459                  0 R batch          

  ########################################################################
  # Colfax Cluster - https://colfaxresearch.com/
  #      Date:           Sat Mar 18 14:41:16 PDT 2017
  #    Job ID:           3516.c001
  #      User:           u2459
  # Resources:           neednodes=1:knl,nodes=1:knl,walltime=24:00:00
  ########################################################################
  
2017-03-18 14:41:26 [DEBUG] 1638 total lines in file
2017-03-18 14:41:26 [INFO ] 1638 valid lines in file
2017-03-18 14:41:26 [DEBUG] Category 0 has 546 images.
2017-03-18 14:41:26 [DEBUG] Category 1 has 546 images.
2017-03-18 14:41:26 [DEBUG] Category 2 has 546 ima

In [142]:
job_id_2 = submit_job(create_val_db_cmd)

In [149]:
checkout_job(job_id_2)

['3518.c001']
Job ID                    Name             User            Time Use S Queue
------------------------- ---------------- --------------- -------- - -----
3499.c001                  kaggle_mobileODT u2459           13:51:36 R batch          

  ########################################################################
  # Colfax Cluster - https://colfaxresearch.com/
  #      Date:           Sat Mar 18 15:00:51 PDT 2017
  #    Job ID:           3518.c001
  #      User:           u2459
  # Resources:           neednodes=1:knl,nodes=1:knl,walltime=24:00:00
  ########################################################################
  
2017-03-18 15:01:02 [DEBUG] 699 total lines in file
2017-03-18 15:01:02 [INFO ] 699 valid lines in file
2017-03-18 15:01:02 [DEBUG] Category 0 has 231 images.
2017-03-18 15:01:02 [DEBUG] Category 1 has 234 images.
2017-03-18 15:01:02 [DEBUG] Category 2 has 234 images.
2017-03-18 15:01:04 [DEBUG] Processed 0/699
2017-03-18 15:01:06 [DEBUG] Processed 0/

In [150]:
!ls {INPUT_PATH}

mean.binaryproto  mean.jpg  train_db.lmdb  val_db.lmdb


In [133]:
!ls ~/

build_opencv	     Intel_MobileODT  start_digits.sh	test.e3244   test.o3246
DIGITS		     keras_source     start_digits.sh~	test.e3246   test.py
digits_dependencies  opencv	      STDIN.e3516	test_launch  tmp
env.local	     opencv_source    STDIN.o3516	test.o3244


In [106]:
!rm ~/STDIN.*