In [None]:
CAFFE_DIR = '../scripts/caffe'

import sys
sys.path.append(CAFFE_DIR + '/python')

nickname = 'nick_goodey'
board_name = 'under-the-sea'

In [None]:
from PIL import Image

def resize_and_crop(img_path, modified_path, size, crop_type='top'):
    """
    Resize and crop an image to fit the specified size.
    args:
        img_path: path for the image to resize.
        modified_path: path to store the modified image.
        size: `(width, height)` tuple.
        crop_type: can be 'top', 'middle' or 'bottom', depending on this
            value, the image will cropped getting the 'top/left', 'midle' or
            'bottom/rigth' of the image to fit the size.
    raises:
        Exception: if can not open the file in img_path of there is problems
            to save the image.
        ValueError: if an invalid `crop_type` is provided.
    """
    # If height is higher we resize vertically, if not we resize horizontally
    img = Image.open(img_path)
    # Get current and desired ratio for the images
    img_ratio = img.size[0] / float(img.size[1])
    ratio = size[0] / float(size[1])
    #The image is scaled/cropped vertically or horizontally depending on the ratio
    if ratio > img_ratio:
        img = img.resize((size[0], size[0] * img.size[1] / img.size[0]),
                Image.ANTIALIAS)
        # Crop in the top, middle or bottom
        if crop_type == 'top':
            box = (0, 0, img.size[0], size[1])
        elif crop_type == 'middle':
            box = (0, (img.size[1] - size[1]) / 2, img.size[0], (img.size[1] + size[1]) / 2)
        elif crop_type == 'bottom':
            box = (0, img.size[1] - size[1], img.size[0], img.size[1])
        else :
            raise ValueError('ERROR: invalid value for crop_type')
        img = img.crop(box)
    elif ratio < img_ratio:
        img = img.resize((size[1] * img.size[0] / img.size[1], size[1]),
                Image.ANTIALIAS)
        # Crop in the top, middle or bottom
        if crop_type == 'top':
            box = (0, 0, size[0], img.size[1])
        elif crop_type == 'middle':
            box = ((img.size[0] - size[0]) / 2, 0, (img.size[0] + size[0]) / 2, img.size[1])
        elif crop_type == 'bottom':
            box = (img.size[0] - size[0], 0, img.size[0], img.size[1])
        else :
            raise ValueError('ERROR: invalid value for crop_type')
        img = img.crop(box)
    else :
        img = img.resize((size[0], size[1]),
                Image.ANTIALIAS)
        # If the scale is the same, we do not need to crop
    img.save(modified_path)

In [None]:
import urllib
import urllib2
from bs4 import BeautifulSoup
import time
import os
import shutil

##########################################################
def __fetch_file(link) :
    # user_agent  = 'curl/7.29.0'
    # host = 'www.pinterest.com'
    # accept = '*/*'
    
    url = link
    # user_agent = 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.124 Safari/537.36'
    # values = {}
    # headers = {"User-Agent" : user_agent, "Host" : host, "Accept" : accept} #{'User-Agent' : user_agent }
    # headers = {} #{'User-Agent' : user_agent }
    # data = urllib.urlencode(values)
    req = urllib2.Request(url) #, data, headers)
    response = urllib2.urlopen(req)
    the_page = response.read()
    return the_page

#########################################################
def get_link(nickname, board_name):
    return 'https://www.pinterest.com/' + nickname + '/' + board_name + '/'

#########################################################
def get_dir(nickname, board_name):
    return 'out/' + nickname + '/' + board_name + '/'

##########################################################
def ensure_dir(dir):
    if not os.path.exists(dir):
        os.makedirs(dir)

##########################################################
def fetch_images(nickname, board_name):
    url = get_link(nickname, board_name)
    dir = get_dir(nickname, board_name)
    ensure_dir(dir)
    
    html_code = __fetch_file(url)
    soup = BeautifulSoup(html_code, 'html.parser')

    for link in soup.find_all('img'):
        if not link.has_attr("class"):
            continue
        if not "pinImg" in link["class"]:
            continue
        time.sleep(0.1)
        link = link["src"]
        filename = link[link.rindex("/"):]
        with open(dir + filename + '.tmp', 'wb+') as f:
            f.write(__fetch_file(link))

        resize_and_crop(
          dir + filename + '.tmp',
          dir + filename,
          (256, 256),
          crop_type='middle'
        )


##########################################################
def remove_download(nickname, board_name):
    dir = 'out/' + nickname + '/'
    shutil.rmtree(dir, True)

In [None]:
import numpy as np
import os
import glob
import time

import caffe

In [None]:
__CLASSIFIER = None
__TRANSFORMER = None

def __init_classifier():
    model_def = CAFFE_DIR + '/models/bvlc_reference_caffenet/deploy.prototxt'
    pretrained_model = CAFFE_DIR + '/models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel'
    mean_file = CAFFE_DIR + '/python/caffe/imagenet/ilsvrc_2012_mean.npy'
    channel_swap_data = '2,1,0'
    input_scale = 1.0
    raw_scale = 255.0
    images_dim = '256,256'
    caffe.set_mode_cpu()
    print("CPU mode")

    image_dims = [int(s) for s in images_dim.split(',')]

    mean, channel_swap = None, None
    if mean_file:
        mean = np.load(mean_file).mean(1).mean(1)
    if channel_swap_data:
        channel_swap = [int(s) for s in channel_swap_data.split(',')]

    classifier = caffe.Net(model_def, pretrained_model, caffe.TEST)

    transformer = caffe.io.Transformer({'data': classifier.blobs['data'].data.shape})
    transformer.set_transpose('data', (2,0,1))
    transformer.set_mean('data', mean) # mean pixel
    transformer.set_raw_scale('data', raw_scale)  # the reference model operates on images in [0,255] range instead of [0,1]
    transformer.set_channel_swap('data', channel_swap)  # the reference model has channels in BGR order instead of RGB

    global __CLASSIFIER
    __CLASSIFIER = classifier

    global __TRANSFORMER
    __TRANSFORMER = transformer

In [None]:
def extract_for_dir(dir):
    input_file = os.path.expanduser(dir)
    ext = 'jpg'

    if __CLASSIFIER is None:
        __init_classifier()

    # Load numpy array (.npy), directory glob (*.jpg), or image file.
    if input_file.endswith('npy'):
        print("Loading file: %s" % input_file)
        inputs = np.load(input_file)
    elif os.path.isdir(input_file):
        print("Loading folder: %s" % input_file)
        inputs =[caffe.io.load_image(im_f)
                 for im_f in glob.glob(input_file + '/*.' + ext)]
    else:
        print("Loading file: %s" % input_file)
        inputs = [caffe.io.load_image(input_file)]

    print("Classifying %d inputs." % len(inputs))

    __CLASSIFIER.blobs['data'].reshape(50,3,227,227)
    __CLASSIFIER.blobs['data'].data[...] = __TRANSFORMER.preprocess('data', inputs[0])
    __CLASSIFIER.forward()
    
    return __CLASSIFIER.blobs['fc8'].data[0].tolist() + __CLASSIFIER.blobs['fc7'].data[0].tolist()

In [None]:
fetch_images(nickname, board_name)
dir = get_dir(nickname, board_name)

In [None]:
fc8_and_fc7_features = extract_for_dir(dir)
len(fc8_and_fc7_features)

Loading folder: out/nick_goodey/under-the-sea/
Classifying 25 inputs.


5096