In [None]:
%pip install keras
%pip install tensorflow
%pip install Pillow
%pip install matplotlib
%pip install azureml.core
%pip install azure.storage.blob
%pip install --upgrade azure-cognitiveservices-vision-computervision
%pip install azureml.core
%pip install pyspark
%pip install opencv-python
%pip install keras
%pip install pydot
%pip install glob2
%pip install azureml-sdk[databricks]

In [None]:
import numpy as np
from numpy import array
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import string
import os
from PIL import Image
import glob
from pickle import dump, load
import dill as pickle
from time import time
import requests
import json

# NLP
import nltk
from nltk.translate.bleu_score import corpus_bleu

# Keras / Deep Learning
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import LSTM, Embedding, TimeDistributed, Dense, RepeatVector,\
                         Activation, Flatten, Reshape, concatenate, Dropout, BatchNormalization
from keras.optimizers import Adam, RMSprop
from keras.layers.wrappers import Bidirectional
from keras.layers.merge import add
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras import Input, layers
from keras import optimizers
from keras.applications.inception_v3 import preprocess_input
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.utils import to_categorical

# Azure ML/ MLflow
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from azure.cognitiveservices.vision.computervision.models import VisualFeatureTypes
from msrest.authentication import CognitiveServicesCredentials
import azureml.core
import mlflow.keras

In [None]:
# load doc into memory
def load_doc(filename):
	# open the file as read only
	file = open(filename, 'r')
	# read all text
	text = file.read()
	# close the file
	file.close()
	return text

filename = "/dbfs/mnt/blobcontainer/Flickr8k_text/Flickr8k.token.txt"
# load descriptions
doc = load_doc(filename)
print(doc[:300])

In [None]:
def load_descriptions(doc):
	mapping = dict()
	# process lines
	for line in doc.split('\n'):
		# split line by white space
		tokens = line.split()
		if len(line) < 2:
			continue
		# take the first token as the image id, the rest as the description
		image_id, image_desc = tokens[0], tokens[1:]
		# extract filename from image id
		image_id = image_id.split('.')[0]
		# convert description tokens back to string
		image_desc = ' '.join(image_desc)
		# create the list if needed
		if image_id not in mapping:
			mapping[image_id] = list()
		# store description
		mapping[image_id].append(image_desc)
	return mapping

# parse descriptions
descriptions = load_descriptions(doc)
print('Loaded: %d ' % len(descriptions))

In [None]:
list(descriptions.keys())[:5]

In [None]:
descriptions['1000268201_693b08cb0e']

In [None]:
descriptions['1001773457_577c3a7d70']

In [None]:
def clean_descriptions(descriptions):
	# prepare translation table for removing punctuation
	table = str.maketrans('', '', string.punctuation)
	for key, desc_list in descriptions.items():
		for i in range(len(desc_list)):
			desc = desc_list[i]
			# tokenize
			desc = desc.split()
			# convert to lower case
			desc = [word.lower() for word in desc]
			# remove punctuation from each token
			desc = [w.translate(table) for w in desc]
			# remove hanging 's' and 'a'
			desc = [word for word in desc if len(word)>1]
			# remove tokens with numbers in them
			desc = [word for word in desc if word.isalpha()]
			# store as string
			desc_list[i] =  ' '.join(desc)

# clean descriptions
clean_descriptions(descriptions)

In [None]:
# the dictionary 'descriptions' has all the training and test descriptions
descriptions['1000268201_693b08cb0e']

In [None]:
descriptions['1001773457_577c3a7d70']

In [None]:
# convert the loaded descriptions into a vocabulary of words
def to_vocabulary(descriptions):
	# build a list of all description strings
	all_desc = set()
	for key in descriptions.keys():
		[all_desc.update(d.split()) for d in descriptions[key]]
	return all_desc

# summarize vocabulary
vocabulary = to_vocabulary(descriptions)
print('Original Vocabulary Size: %d' % len(vocabulary))

In [None]:
# save descriptions to file, one per line
def save_descriptions(descriptions, filename):
	lines = list()
	for key, desc_list in descriptions.items():
		for desc in desc_list:
			lines.append(key + ' ' + desc)
	data = '\n'.join(lines)
	file = open(filename, 'w')
	file.write(data)
	file.close()

save_descriptions(descriptions, 'descriptions.txt')

In [None]:
# load a pre-defined list of photo identifiers
def load_set(filename):
	doc = load_doc(filename)
	dataset = list()
	# process line by line
	for line in doc.split('\n'):
		# skip empty lines
		if len(line) < 1:
			continue
		# get the image identifier
		identifier = line.split('.')[0]
		dataset.append(identifier)
	return set(dataset)

# load training dataset (6K)
filename = '/dbfs/mnt/blobcontainer/Flickr8k_text/Flickr_8k.trainImages.txt'
train = load_set(filename)
print('Dataset: %d' % len(train))

In [None]:
dbutils.fs.ls('dbfs:/mnt/blobcontainer/Flickr8k_Dataset/')

In [None]:
# Below path contains all the images
images = '/dbfs/mnt/blobcontainer/Flickr8k_Dataset/Flicker8k_Dataset/'

# Create a list of all image names in the directory
img = glob.glob(images + '*.jpg')

In [None]:
# Below file conatains the names of images to be used in train data
train_images_file = '/dbfs/mnt/blobcontainer/Flickr8k_text/Flickr_8k.trainImages.txt'
# Read the train image names in a set
train_images = set(open(train_images_file, 'r').read().strip().split('\n'))

# Create a list of all the training images with their full path names
train_img = []

print(img)
for i in img: # img is list of full path names of all images
    if i[len(images):] in train_images: # Check if the image belongs to training set
        train_img.append(i) # Add it to the list of train images
print(train_img)

In [None]:
# Below file conatains the names of images to be used in test data
test_images_file = '/dbfs/mnt/blobcontainer/Flickr8k_text/Flickr_8k.testImages.txt'
# Read the validation image names in a set# Read the test image names in a set
test_images = set(open(test_images_file, 'r').read().strip().split('\n'))

# Create a list of all the test images with their full path names
test_img = []

for i in img: # img is list of full path names of all images
    if i[len(images):] in test_images: # Check if the image belongs to test set
        test_img.append(i) # Add it to the list of test images

In [None]:
# load clean descriptions into memory
def load_clean_descriptions(filename, train_list, test_list):
	# load document
	doc = load_doc(filename)
	train_descriptions = dict()
	test_descriptions = dict()
	for line in doc.split('\n'):
		# split line by white space
		tokens = line.split()
		# split id from description
		image_id, image_desc = tokens[0], tokens[1:]
		if image_id in train_list:
			if image_id not in train_descriptions:
				train_descriptions[image_id] = list()
			desc = 'startseq ' + ' '.join(image_desc) + ' endseq'
			train_descriptions[image_id].append(desc)
		if image_id in test_list:
			if image_id not in test_descriptions:
				test_descriptions[image_id] = list()
			desc = 'startseq ' + ' '.join(image_desc) + ' endseq'
			test_descriptions[image_id].append(desc)
	return train_descriptions, test_descriptions

# descriptions
train_list = [i.split('.')[0] for i in train_images]
test_list = [i.split('.')[0] for i in test_images]

train_descriptions, test_descriptions = load_clean_descriptions('descriptions.txt', train_list, test_list)
print('Descriptions: train=%d' % len(train_descriptions))
print('Descriptions: train=%d' % len(test_descriptions))

In [None]:
def preprocess(image_path):
    # Convert all the images to size 299x299 as expected by the inception v3 model
    img = image.load_img(image_path, target_size=(299, 299))
    # Convert PIL image to numpy array of 3-dimensions
    x = image.img_to_array(img)
    # Add one more dimension
    x = np.expand_dims(x, axis=0)
    # preprocess the images using preprocess_input() from inception module
    x = preprocess_input(x)
    return x

In [None]:
# Load the inception v3 model
model = InceptionV3(weights='imagenet')

In [None]:
# Create a new model, by removing the last layer (output layer) from the inception v3
model_new = Model(model.input, model.layers[-2].output)

In [None]:
# Function to encode a given image into a vector of size (2048, )
def encode(image):
    image = preprocess(image) # preprocess the image
    fea_vec = model_new.predict(image) # Get the encoding vector for the image
    fea_vec = np.reshape(fea_vec, fea_vec.shape[1]) # reshape from (1, 2048) to (2048, )
    return fea_vec

In [None]:
# Call the funtion to encode all the train images
# This will take a while on CPU - Execute this only once
start = time()
encoding_train = {}
for img in train_img:
    encoding_train[img[len(images):]] = encode(img)
print("Time taken in seconds =", time()-start)

In [None]:
# Move file to mount point
dbutils.fs.cp('file:/databricks/driver/encoded_train_images.pk', 'dbfs:/mnt/blobcontainer/encoded_train_images.pk')
display(dbutils.fs.ls("dbfs:/mnt/blobcontainer/8k_encoded_images/encoded_train_images.pk"))

# Open from mount point
with open("/dbfs/mnt/blobcontainer/encoded_train_images.pk", "rb") as train_features:
  train_features = pickle.load(train_features)

print('Photos: train=%d' % len(train_features))

In [None]:
# Call the funtion to encode all the test images - Execute this only once
start = time()
encoding_test = {}
for img in test_img:
    encoding_test[img[len(images):]] = encode(img)
print("Time taken in seconds =", time()-start)

In [None]:
# Save the bottleneck test features
with open("encoded_test_images.pk", "wb") as encoded_pickle:
  pickle.dump(encoding_test, encoded_pickle)

# Check if on driver
display(dbutils.fs.ls("file:/databricks/driver/encoded_test_images.pk"))

# Copy from local dbfs to mount point
dbutils.fs.cp('file:/databricks/driver/encoded_test_images.pk', 'dbfs:/mnt/blobcontainer/encoded_test_images.pk')

# Check if in mount point
display(dbutils.fs.ls("dbfs:/mnt/blobcontainer/"))

# Open file from mount point
with open("/dbfs/mnt/blobcontainer/encoded_test_images.pk", "rb") as test_features:
  test_features = pickle.load(test_features)

# train_features = pickle.load(open("/dbfs/mnt/blobcontainer/encoded_test_images.pk", "rb"))
print('Photos: test=%d' % len(test_features))

In [None]:
# Create a list of all the training captions
all_train_captions = []
for key, val in train_descriptions.items():
    for cap in val:
        all_train_captions.append(cap)
len(all_train_captions)

In [None]:
# Consider only words which occur at least 10 times in the corpus
word_count_threshold = 10
word_counts = {}
nsents = 0
for sent in all_train_captions:
    nsents += 1
    for w in sent.split(' '):
        word_counts[w] = word_counts.get(w, 0) + 1

vocab = [w for w in word_counts if word_counts[w] >= word_count_threshold]
print('preprocessed words %d -> %d' % (len(word_counts), len(vocab)))

In [None]:
ixtoword = {}
wordtoix = {}

ix = 1
for w in vocab:
    wordtoix[w] = ix
    ixtoword[ix] = w
    ix += 1

In [None]:
vocab_size = len(ixtoword) + 1 # one for appended 0's
vocab_size

In [None]:
# convert a dictionary of clean descriptions to a list of descriptions
def to_lines(descriptions):
	all_desc = list()
	for key in descriptions.keys():
		[all_desc.append(d) for d in descriptions[key]]
	return all_desc

# calculate the length of the description with the most words
def max_length(descriptions):
	lines = to_lines(descriptions)
	return max(len(d.split()) for d in lines)

# determine the maximum sequence length
max_length = max_length(train_descriptions)
print('Description Length: %d' % max_length)

In [None]:
# data generator, intended to be used in a call to model.fit_generator()
def data_generator(descriptions, photos, wordtoix, max_length, num_photos_per_batch):
    X1, X2, y = list(), list(), list()
    n=0
    # loop for ever over images
    while 1:
        for key, desc_list in descriptions.items():
            n+=1
            # retrieve the photo feature
            photo = photos[key+'.jpg']
            for desc in desc_list:
                # encode the sequence
                seq = [wordtoix[word] for word in desc.split(' ') if word in wordtoix]
                # split one sequence into multiple X, y pairs
                for i in range(1, len(seq)):
                    # split into input and output pair
                    in_seq, out_seq = seq[:i], seq[i]
                    # pad input sequence
                    in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
                    # encode output sequence
                    out_seq = to_categorical([out_seq], num_classes=vocab_size)[0]
                    # store
                    X1.append(photo)
                    X2.append(in_seq)
                    y.append(out_seq)
            # yield the batch data
            if n==num_photos_per_batch:
                yield [np.array(X1), np.array(X2)], array(y)
                X1, X2, y = list(), list(), list()
                n=0

In [None]:
# Load Glove vectors
glove_dir = '/dbfs/mnt/blobcontainer/'
embeddings_index = {} # empty dictionary
f = open(os.path.join(glove_dir, 'glove.6B.200d.txt'), encoding="utf-8")

for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float32')
    embeddings_index[word] = coefs
f.close()
print('Found %s word vectors.' % len(embeddings_index))

In [None]:
embedding_dim = 200

# Get 200-dim dense vector for each of the 10000 words in out vocabulary
embedding_matrix = np.zeros((vocab_size, embedding_dim))

for word, i in wordtoix.items():
    #if i < max_words:
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
        # Words not found in the embedding index will be all zeros
        embedding_matrix[i] = embedding_vector

In [None]:
embedding_matrix.shape

In [None]:
generator = data_generator(train_descriptions, train_features, wordtoix, max_length, 1)
# print(next(generator))
inputs, outputs = next(generator)
print(inputs[0].shape)
print(inputs[1].shape)
print(outputs.shape)

In [None]:
inputs1 = Input(shape=(2048,))
fe1 = Dropout(0.5)(inputs1)
fe2 = Dense(256, activation='relu')(fe1)
inputs2 = Input(shape=(max_length,))
se1 = Embedding(vocab_size, embedding_dim, mask_zero=True)(inputs2)
se2 = Dropout(0.5)(se1)
se3 = LSTM(256)(se2)
decoder1 = add([fe2, se3])
decoder2 = Dense(256, activation='relu')(decoder1)
outputs = Dense(vocab_size, activation='softmax')(decoder2)
model = Model(inputs=[inputs1, inputs2], outputs=outputs)
model.summary()

In [None]:
model.layers[2]
model.layers[2].set_weights([embedding_matrix])
model.layers[2].trainable = False
model.compile(loss='categorical_crossentropy', optimizer='adam')

In [None]:
epochs = 10
number_pics_per_bath = 3
steps = len(train_descriptions)//number_pics_per_bath

mlflow.keras.autolog()
with mlflow.start_run():
  for i in range(epochs):
    generator = data_generator(train_descriptions, train_features, wordtoix, max_length, number_pics_per_bath)
    model.fit(generator, epochs=1, steps_per_epoch=steps, verbose=1)
    mlflow.keras.log_model(model, "models")

In [None]:
model.optimizer.lr = 0.0001
epochs = 10
number_pics_per_bath = 6
steps = len(train_descriptions)//number_pics_per_bath

In [None]:
for i in range(epochs):
    generator = data_generator(train_descriptions, train_features, wordtoix, max_length, number_pics_per_bath)
    model.fit(generator, epochs=1, steps_per_epoch=steps, verbose=1)
    mlflow.keras.log_model(model, "models")
    model.save('./model_weights/model_' + str(i) + '.h5')
mlflow.end_run()

In [None]:
mlflow.keras.load_model(model)

In [None]:
images = '/dbfs/mnt/blobcontainer/Flickr8k_Dataset/Flicker8k_Dataset/'
encoding_test = test_features

In [None]:
# with open("/file:databricks/driver/encoded_test_images.pk", "rb") as encoded_pickle:
#     encoding_test = load(encoded_pickle)

# print(encoding_test)

In [None]:
display(dbutils.fs.ls('dbfs:/mnt/blobcontainer/8kmodel_19.h5'))

In [None]:
from keras.models import load_model
filename = '/dbfs/mnt/blobcontainer/8kmodel_19.h5' # try locally with DSVM
model = load_model(filename)

In [None]:
def greedySearch(photo):
    in_text = 'startseq'
    for i in range(max_length):
        sequence = [wordtoix[w] for w in in_text.split() if w in wordtoix]
        sequence = pad_sequences([sequence], maxlen=max_length)
        print(type(sequence))
        print(sequence)
        yhat = model.predict([photo,sequence], verbose=0)
        yhat = np.argmax(yhat)
        word = ixtoword[yhat]
        in_text += ' ' + word
        if word == 'endseq':
            break
    final = in_text.split()
    final = final[1:-1]
    final = ' '.join(final)
    return final

In [None]:
print(encoding_test.keys())

In [None]:
z=101
pic = list(encoding_test.keys())[6]
image = encoding_test[pic].reshape((1,2048))
x=plt.imread(images+pic)
plt.imshow(x)
plt.show()
print("Greedy:",greedySearch(image))

In [None]:
def evaluate_model(model, descriptions, encoding_test):
  actual, predicted = list(), list()
  
  for key, desc_list in descriptions.items():
    image = encoding_test[pic].reshape((1,2048))
    yhat = greedySearch(image)
    reference_captions = [d.split() for d in desc_list]
    actual.append(reference_captions)
    predicted.append(yhat.split())
  
  print('BLEU-1: %f' % corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0)))
  print('BLEU-2: %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0)))
  print('BLEU-3: %f' % corpus_bleu(actual, predicted, weights=(0.3, 0.3, 0.3, 0)))
  print('BLEU-4: %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))

evaluate_model(model, test_descriptions, encoding_test)

# Deploy model to endpoint

In [None]:
import azureml
from azureml.core import Workspace
import mlflow.azureml

workspace_name = "<WORKSPACE-NAME>"
workspace_location="<WORKSPACE-LOCATION>"
resource_group = "<RESOURCE-GROUP>"
subscription_id = "<SUBSCRIPTION-ID>"

workspace = Workspace.create(name = workspace_name,
                             location = workspace_location,
                             resource_group = resource_group,
                             subscription_id = subscription_id,
                             exist_ok=True)

In [None]:
%%writefile score.py # written to Workspace
import json
import numpy as np
from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
import os
import pickle
import joblib

def init():
    # loads container instance to accept requests
    global model 
    run_id1 = '63ea9b85d72a4b658b81fb8a839bcccc'
    model_uri = 'runs:/' + run_id1 + '/model'
    model_path = model_uri
    model = joblib.load(model_path)
    
    # sample input for schema generation
    numpy_sample_input = NumpyParameterType(np.array([[1, 2, 3, 5, 5, 6], [0, 1, 2, 3]]))
    
    sample_input = StandardPythonParameterType({'input1': numpy_sample_input})
    sample_output = "A dog sits on a rug"
    outputs = StandardPythonParameterType({'Results': sample_output})

    @input_schema('Inputs', sample_input)
    @output_schema(outputs)
    
def run(Inputs):
    # accepts data from REST call
    try:
        data = Inputs['input1']
        assert isinstance(data, np.ndarray)
        result = model.predict(data)
        return result
    except Exception as e:
        error = str(e)
        return error

In [None]:
# deploy in ACI
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig
from azureml.core.model import Model

#myenv = Environment.get(workspace=workspace, name="tutorial-env", version="1")
inference_config = InferenceConfig(entry_script="score.py")

service = Model.deploy(workspace=ws, 
                       name='sklearn-mnist-svc3', 
                       models=[model], 
                       inference_config=inference_config, 
                       deployment_config=aciconfig)

service.wait_for_deployment(show_output=True)

In [None]:
run_id1 = '63ea9b85d72a4b658b81fb8a839bcccc'
model_uri = 'runs:/' + run_id1 + '/model'

model_image, azure_model = mlflow.azureml.build_image(model_uri, workspace=workspace, model_name='model', image_name='model', description='Keras InceptionV3 image for generating image captions', synchronous=False)

model_image.wait_for_creation(show_output=True)


In [None]:
from azureml.core.webservice import AciWebservice, Webservice

dev_webservice_name = "automatic-image-captioning-model"
dev_webservice_deployment_config = AciWebservice.deploy_configuration()
dev_webservice = Webservice.deploy_from_image(name=dev_webservice_name, image=model_image, deployment_config=dev_webservice_deployment_config, workspace=workspace)

In [None]:
dev_webservice.wait_for_deployment(show_output=True)

In [None]:
service = Webservice(workspace=workspace, name="automatic-image-captioning-model")
print(service.swagger_uri)

# Test Endpoint with New Image

In [None]:
from keras.preprocessing.image import load_img, img_to_array

def preprocess_test(image_path):
    # Convert all the images to size 299x299 as expected by the inception v3 model
    img = load_img(image_path, target_size=(299, 299))
    imgplot = plt.imshow(img)
    # Convert PIL image to numpy array of 3-dimensions
    x = img_to_array(img)
    # Add one more dimension
    x = np.expand_dims(x, axis=0)
    # preprocess the images using preprocess_input() from inception module
    x = preprocess_input(x)
    return x

# Function to encode a given image into a vector of size (2048, )
def encode_test(image_path):
    # preprocess the image
    image = preprocess_test(image_path)
    # Get the encoding vector for the image
    fea_vec = model_new.predict(image)
    # reshape from (1, 2048) to (2048, )
    fea_vec = np.reshape(fea_vec, fea_vec.shape[1])
    return fea_vec

In [None]:
image_name = 'bunny_test'
image_path = '/dbfs/mnt/blobcontainer/user_test_images/' + image_name
encoded_image = encode_test(image_path)

In [None]:
def format_query_input(photo, sequence):
  query_input = json.dumps({'data': [[encoded_image.tolist(), 'startseq']]})
  query_input = eval(query_input)
  query_input.pop('index', None)
  return query_input

In [None]:
def query_endpoint(scoring_uri, inputs, service_key=None):
  headers = {
    "Content-Type": "application/json",
  }
  if service_key is not None:
    headers["Authorization"] = "Bearer {service_key}".format(service_key=service_key)
    
  print("Sending batch prediction request with inputs: {}".format(inputs))
  response = requests.post(scoring_uri, data=json.dumps(inputs), headers=headers)
  print(response)
  preds = json.loads(response)
  print(preds)
  print("Received response: {}".format(preds))
  return preds

In [None]:
scoring_uri = "<SCORING-URI>"
 dev_prediction = query_endpoint(scoring_uri=scoring_uri, inputs=query_input)

In [None]:
def greedySearch(photo):
    in_text = 'startseq'
    for i in range(max_length):
        sequence = [wordtoix[w] for w in in_text.split() if w in wordtoix]
        sequence = pad_sequences([sequence], maxlen=max_length)
        query = format_query_input(photo, sequence)
        dev_prediction = query_endpoint_example(scoring_uri=dev_webservice.scoring_uri, inputs=query_input)
        yhat = np.argmax(yhat)
        word = ixtoword[yhat]
        in_text += ' ' + word
        if word == 'endseq':
            break
    final = in_text.split()
    final = final[1:-1]
    final = ' '.join(final)
    return final

In [None]:
in_text = 'startseq'
sequence = [wordtoix[w] for w in in_text.split() if w in wordtoix]
sequence = pad_sequences([sequence], maxlen=max_length)
query_input = format_query_input(encoded_image, sequence)
dev_prediction = query_endpoint(scoring_uri=scoring_uri, inputs=query_input)