ssd vgg

2025-12-25 01:00:11 -05:00 · 2019-07-02 17:23:56 -07:00
parent 14ecfb0bf3
commit c75e820107
38 changed files with 6262 additions and 0 deletions
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/notebooks/.ipynb_checkpoints/Finetune
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/notebooks/.ipynb_checkpoints/Finetune
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/notebooks/Deploy
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/notebooks/Deploy
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/notebooks/Finetune
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/notebooks/Finetune
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/notebooks/config.json
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/notebooks/config.json
@@ -0,0 +1,5 @@
+{
+    "subscription_id": "93177b32-3f08-4530-a61e-d1775d2480ad",
+    "resource_group": "MSRBrainwave",
+    "workspace_name": "brainwave"
+}
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/notebooks/sample.jpg
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/notebooks/sample.jpg
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/notebooks/sample.xml
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/notebooks/sample.xml
@@ -0,0 +1,26 @@
+<annotation>
+	<folder>runeightft1</folder>
+	<filename>1555394321.8154433.jpg</filename>
+	<path>E:/Image grocerydemostills/runeightft1/1555394321.8154433.jpg</path>
+	<source>
+		<database>Unknown</database>
+	</source>
+	<size>
+		<width>852</width>
+		<height>506</height>
+		<depth>3</depth>
+	</size>
+	<segmented>0</segmented>
+	<object>
+		<name>stockout</name>
+		<pose>Unspecified</pose>
+		<truncated>0</truncated>
+		<difficult>0</difficult>
+		<bndbox>
+			<xmin>660</xmin>
+			<ymin>201</ymin>
+			<xmax>712</xmax>
+			<ymax>294</ymax>
+		</bndbox>
+	</object>
+</annotation>
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/init.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/init.py
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/anchors/.init.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/anchors/.init.py
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/anchors/generate_anchors.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/anchors/generate_anchors.py
@@ -0,0 +1,92 @@
+import numpy as np
+import math
+from model.ssd_vgg_300 import SSDNet, SSDParams
+
+_R_MEAN = 123.
+_G_MEAN = 117.
+_B_MEAN = 104.
+EVAL_SIZE = (300, 300)
+
+defaults = SSDNet.default_params
+
+img_shape = defaults.img_shape
+num_classes = defaults.num_classes
+feat_layers = defaults.feat_layers
+feat_shapes = defaults.feat_shapes
+anchor_size_bounds = defaults.anchor_size_bounds
+anchor_sizes = defaults.anchor_sizes
+anchor_ratios = defaults.anchor_ratios 
+anchor_steps = defaults.anchor_steps
+anchor_offset = defaults.anchor_offset
+normalizations = defaults.normalizations
+prior_scaling = defaults.prior_scaling
+
+def ssd_anchor_one_layer(img_shape,
+                         feat_shape,
+                         sizes,
+                         ratios,
+                         step,
+                         offset=0.5,
+                         dtype=np.float32):
+    """Computer SSD default anchor boxes for one feature layer.
+
+    Determine the relative position grid of the centers, and the relative
+    width and height.
+
+    Arguments:
+      feat_shape: Feature shape, used for computing relative position grids;
+      size: Absolute reference sizes;
+      ratios: Ratios to use on these features;
+      img_shape: Image shape, used for computing height, width relatively to the
+        former;
+      offset: Grid offset.
+
+    Return:
+      y, x, h, w: Relative x and y grids, and height and width.
+    """
+    # Compute the position grid: simple way.
+    # y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]]
+    # y = (y.astype(dtype) + offset) / feat_shape[0]
+    # x = (x.astype(dtype) + offset) / feat_shape[1]
+    # Weird SSD-Caffe computation using steps values...
+    y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]]
+    y = (y.astype(dtype) + offset) * step / img_shape[0]
+    x = (x.astype(dtype) + offset) * step / img_shape[1]
+
+    # Expand dims to support easy broadcasting.
+    y = np.expand_dims(y, axis=-1)
+    x = np.expand_dims(x, axis=-1)
+
+    # Compute relative height and width.
+    # Tries to follow the original implementation of SSD for the order.
+    num_anchors = len(sizes) + len(ratios)
+    h = np.zeros((num_anchors, ), dtype=dtype)
+    w = np.zeros((num_anchors, ), dtype=dtype)
+    # Add first anchor boxes with ratio=1.
+    h[0] = sizes[0] / img_shape[0]
+    w[0] = sizes[0] / img_shape[1]
+    di = 1
+    if len(sizes) > 1:
+        h[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[0]
+        w[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[1]
+        di += 1
+    for i, r in enumerate(ratios):
+        h[i+di] = sizes[0] / img_shape[0] / math.sqrt(r)
+        w[i+di] = sizes[0] / img_shape[1] * math.sqrt(r)
+    return y, x, h, w
+
+
+def ssd_anchors_all_layers(img_shape = img_shape,
+                           offset=0.5,
+                           dtype=np.float32):
+    """Compute anchor boxes for all feature layers.
+    """
+    layers_anchors = []
+    for i, s in enumerate(feat_shapes):
+        anchor_bboxes = ssd_anchor_one_layer(img_shape, s,
+                                             anchor_sizes[i],
+                                             anchor_ratios[i],
+                                             anchor_steps[i],
+                                             offset=offset, dtype=dtype)
+        layers_anchors.append(anchor_bboxes)
+    return layers_anchors
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/dataprep/dataset_utils.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/dataprep/dataset_utils.py
@@ -0,0 +1,114 @@
+import os
+import sys
+import tarfile
+
+from six.moves import urllib
+import tensorflow as tf
+
+LABELS_FILENAME = 'labels.txt'
+
+import shutil
+from os import path
+
+def check_labelmatch(images, annotations):
+    data_dir_images = os.path.split(images[0])[0]
+    data_dir_annot =  os.path.split(annotations[0])[0]
+
+    im_files = {os.path.splitext(os.path.split(f)[1])[0] for f in images}
+    annot_files = {os.path.splitext(os.path.split(f)[1])[0] for f in annotations}
+
+    extra_ims = im_files.difference(annot_files)
+    extra_annots = annot_files.difference(im_files)
+    mismatch = len(extra_ims) > 0 or len(extra_annots) > 0
+
+    if mismatch:
+        print(f"The following files will be removed from the training process:")
+
+    if len(extra_ims) > 0:
+        print(f"images without annotations: {extra_ims}")
+
+    if len(extra_annots) > 0:
+        print(f"annotations without images: {extra_annots}")
+
+    if not mismatch:    
+        print(str(len(images)) + ' images found and ' + str(len(annotations)) + ' matching annotations found.'  )
+        return (images, annotations)
+    
+    im_files = im_files.difference(extra_ims)
+    annot_files = annot_files.difference(extra_annots)
+
+    im_files = [os.path.join(data_dir_images, f+".jpg") for f in im_files]
+    annot_files = [os.path.join(data_dir_annot, f+".xml") for f in annot_files]
+
+    return(im_files, annot_files)
+
+def create_dir(path):
+    try:
+        path_annotations = path + '/Annotations'
+        path_images = path + '/JPEGImages'
+        
+        os.makedirs(path_annotations)
+        os.makedirs(path_images)
+        
+    except OSError:
+        print("Creation of folders in directory %s failed.  Folder may already exist." % path)
+    else:
+        print("Successfully created images and annotations folders at %s" % path)
+        
+def move_images(data_dir, train_images, train_annotations,
+               test_images, test_annotations):
+
+    source = data_dir + '/'       
+    
+    for image in train_images:
+        image = data_dir + '/' + image
+        dst = source + 'train/' + '/JPEGImages'
+        
+        if path.exists(image):
+            shutil.copy(image, dst)            
+            
+    for image in test_images:
+        image = data_dir + '/' + image
+        dst = source + 'test/' + '/JPEGImages'
+        
+        if path.exists(image):
+            shutil.copy(image, dst)            
+            
+    for annot in train_annotations:
+        annot = data_dir + '/' + annot
+        dst = source + 'train/' + '/Annotations'
+        
+        if path.exists(annot):
+            shutil.copy(annot, dst)            
+            
+    for annot in test_annotations:
+        annot = data_dir + '/' + annot
+        dst = source + 'test/' + '/Annotations'
+        
+        if path.exists(annot):
+            shutil.copy(annot, dst)   
+    
+    print('Images and annotations have been copied to directories: ' + source + 'train' + ' and ' + source + 'test')
+ 
+def int64_feature(value):
+    """Wrapper for inserting int64 features into Example proto.
+    """
+    if not isinstance(value, list):
+        value = [value]
+    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
+
+
+def float_feature(value):
+    """Wrapper for inserting float features into Example proto.
+    """
+    if not isinstance(value, list):
+        value = [value]
+    return tf.train.Feature(float_list=tf.train.FloatList(value=value))
+
+
+def bytes_feature(value):
+    """Wrapper for inserting bytes features into Example proto.
+    """
+    if not isinstance(value, list):
+        value = [value]
+    return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/dataprep/pascalvoc_common.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/dataprep/pascalvoc_common.py
@@ -0,0 +1,112 @@
+# Copyright 2015 Paul Balanca. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Provides data for the Pascal VOC Dataset (images + annotations).
+"""
+import os
+
+import tensorflow as tf
+from dataprep import dataset_utils
+
+slim = tf.contrib.slim
+
+VOC_LABELS = {
+    'none': (0, 'Background'),
+    'aeroplane': (1, 'Vehicle'),
+    'bicycle': (2, 'Vehicle'),
+    'bird': (3, 'Animal'),
+    'boat': (4, 'Vehicle'),
+    'bottle': (5, 'Indoor'),
+    'bus': (6, 'Vehicle'),
+    'car': (7, 'Vehicle'),
+    'cat': (8, 'Animal'),
+    'chair': (9, 'Indoor'),
+    'cow': (10, 'Animal'),
+    'diningtable': (11, 'Indoor'),
+    'dog': (12, 'Animal'),
+    'horse': (13, 'Animal'),
+    'motorbike': (14, 'Vehicle'),
+    'person': (15, 'Person'),
+    'pottedplant': (16, 'Indoor'),
+    'sheep': (17, 'Animal'),
+    'sofa': (18, 'Indoor'),
+    'train': (19, 'Vehicle'),
+    'tvmonitor': (20, 'Indoor'),
+}
+
+def get_split(split_name, dataset_dir, file_pattern, reader,
+              split_to_sizes, items_to_descriptions, num_classes):
+    """Gets a dataset tuple with instructions for reading Pascal VOC dataset.
+
+    Args:
+      split_name: A train/test split name.
+      dataset_dir: The base directory of the dataset sources.
+      file_pattern: The file pattern to use when matching the dataset sources.
+        It is assumed that the pattern contains a '%s' string so that the split
+        name can be inserted.
+      reader: The TensorFlow reader type.
+
+    Returns:
+      A `Dataset` namedtuple.
+
+    Raises:
+        ValueError: if `split_name` is not a valid train/test split.
+    """
+    if split_name not in split_to_sizes:
+        raise ValueError('split name %s was not recognized.' % split_name)
+    file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
+
+    # Allowing None in the signature so that dataset_factory can use the default.
+    if reader is None:
+        reader = tf.TFRecordReader
+    # Features in Pascal VOC TFRecords.
+    keys_to_features = {
+        'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
+        'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
+        'image/height': tf.FixedLenFeature([1], tf.int64),
+        'image/width': tf.FixedLenFeature([1], tf.int64),
+        'image/channels': tf.FixedLenFeature([1], tf.int64),
+        'image/shape': tf.FixedLenFeature([3], tf.int64),
+        'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
+        'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
+        'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
+        'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
+        'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
+        'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64),
+        'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64),
+    }
+    items_to_handlers = {
+        'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
+        'shape': slim.tfexample_decoder.Tensor('image/shape'),
+        'object/bbox': slim.tfexample_decoder.BoundingBox(
+                ['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'),
+        'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'),
+        'object/difficult': slim.tfexample_decoder.Tensor('image/object/bbox/difficult'),
+        'object/truncated': slim.tfexample_decoder.Tensor('image/object/bbox/truncated'),
+    }
+    decoder = slim.tfexample_decoder.TFExampleDecoder(
+        keys_to_features, items_to_handlers)
+
+    labels_to_names = None
+    if dataset_utils.has_labels(dataset_dir):
+        labels_to_names = dataset_utils.read_label_file(dataset_dir)
+
+    return slim.dataset.Dataset(
+            data_sources=file_pattern,
+            reader=reader,
+            decoder=decoder,
+            num_samples=split_to_sizes[split_name],
+            items_to_descriptions=items_to_descriptions,
+            num_classes=num_classes,
+            labels_to_names=labels_to_names)
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/dataprep/pascalvoc_to_tfrecords.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/dataprep/pascalvoc_to_tfrecords.py
@@ -0,0 +1,223 @@
+# Copyright 2015 Paul Balanca. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Converts Pascal VOC data to TFRecords file format with Example protos.
+
+The raw Pascal VOC data set is expected to reside in JPEG files located in the
+directory 'JPEGImages'. Similarly, bounding box annotations are supposed to be
+stored in the 'Annotation directory'
+
+This TensorFlow script converts the training and evaluation data into
+a sharded data set consisting of 1024 and 128 TFRecord files, respectively.
+
+Each validation TFRecord file contains ~500 records. Each training TFREcord
+file contains ~1000 records. Each record within the TFRecord file is a
+serialized Example proto. The Example proto contains the following fields:
+
+    image/encoded: string containing JPEG encoded image in RGB colorspace
+    image/height: integer, image height in pixels
+    image/width: integer, image width in pixels
+    image/channels: integer, specifying the number of channels, always 3
+    image/format: string, specifying the format, always'JPEG'
+
+
+    image/object/bbox/xmin: list of float specifying the 0+ human annotated
+        bounding boxes
+    image/object/bbox/xmax: list of float specifying the 0+ human annotated
+        bounding boxes
+    image/object/bbox/ymin: list of float specifying the 0+ human annotated
+        bounding boxes
+    image/object/bbox/ymax: list of float specifying the 0+ human annotated
+        bounding boxes
+    image/object/bbox/label: list of integer specifying the classification index.
+    image/object/bbox/label_text: list of string descriptions.
+
+Note that the length of xmin is identical to the length of xmax, ymin and ymax
+for each example.
+"""
+import os
+import sys
+import random
+
+import numpy as np
+import tensorflow as tf
+
+import xml.etree.ElementTree as ET
+
+from dataprep.dataset_utils import int64_feature, float_feature, bytes_feature
+
+# TFRecords conversion parameters.
+RANDOM_SEED = 4242
+SAMPLES_PER_FILES = 100
+
+def _set_voc_labels_map(class_list):
+    return dict(**{'none': 0}, **{cl: i + 1 for i, cl in enumerate(class_list)})
+
+def _process_image(img_name, annot_name, class_list):
+    """Process a image and annotation file.
+
+    Args:
+      img_name: string, path to an image file e.g., '/path/to/example.JPG'.
+    Returns:
+      image_buffer: string, JPEG encoding of RGB image.
+      height: integer, image height in pixels.
+      width: integer, image width in pixels.
+    """
+    # Read the image file.
+    image_data = tf.gfile.FastGFile(img_name, 'rb').read()
+    class_dict = _set_voc_labels_map(class_list)
+
+    # Read the XML annotation file.
+    filename = annot_name
+    tree = ET.parse(filename)
+    root = tree.getroot()
+
+    # Image shape.
+    size = root.find('size')
+    shape = [int(size.find('height').text),
+             int(size.find('width').text),
+             int(size.find('depth').text)]
+    # Find annotations.
+    bboxes = []
+    labels = []
+    labels_text = []
+    difficult = []
+    truncated = []
+    for obj in root.findall('object'):
+        label = obj.find('name').text
+        labels.append(class_dict[label])
+        labels_text.append(label.encode('ascii'))
+
+        if obj.find('difficult'):
+            difficult.append(int(obj.find('difficult').text))
+        else:
+            difficult.append(0)
+        if obj.find('truncated'):
+            truncated.append(int(obj.find('truncated').text))
+        else:
+            truncated.append(0)
+
+        bbox = obj.find('bndbox')
+        bboxes.append((to_valid_range(float(bbox.find('ymin').text) / shape[0]),
+                       to_valid_range(float(bbox.find('xmin').text) / shape[1]),
+                       to_valid_range(float(bbox.find('ymax').text) / shape[0]),
+                       to_valid_range(float(bbox.find('xmax').text) / shape[1])
+                       ))
+    return image_data, shape, np.clip(bboxes, a_min=0., a_max=1.), labels, labels_text, difficult, truncated
+
+def to_valid_range(v):
+    if v < 0.0:
+        return 0.0
+    if v > 1.0:
+        return 1.0
+    return v
+
+
+def _convert_to_example(image_data, labels, labels_text, bboxes, shape,
+                        difficult, truncated):
+    """Build an Example proto for an image example.
+
+    Args:
+      image_data: string, JPEG encoding of RGB image;
+      labels: list of integers, identifier for the ground truth;
+      labels_text: list of strings, human-readable labels;
+      bboxes: list of bounding boxes; each box is a list of integers;
+          specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong
+          to the same label as the image label.
+      shape: 3 integers, image shapes in pixels.
+    Returns:
+      Example proto
+    """
+    xmin = []
+    ymin = []
+    xmax = []
+    ymax = []
+    for b in bboxes:
+        assert len(b) == 4
+        # pylint: disable=expression-not-assigned
+        [l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)]
+        # pylint: enable=expression-not-assigned
+
+    image_format = b'JPEG'
+    example = tf.train.Example(features=tf.train.Features(feature={
+            'image/height': int64_feature(shape[0]),
+            'image/width': int64_feature(shape[1]),
+            'image/channels': int64_feature(shape[2]),
+            'image/shape': int64_feature(shape),
+            'image/object/bbox/xmin': float_feature(xmin),
+            'image/object/bbox/xmax': float_feature(xmax),
+            'image/object/bbox/ymin': float_feature(ymin),
+            'image/object/bbox/ymax': float_feature(ymax),
+            'image/object/bbox/label': int64_feature(labels),
+            'image/object/bbox/label_text': bytes_feature(labels_text),
+            'image/object/bbox/difficult': int64_feature(difficult),
+            'image/object/bbox/truncated': int64_feature(truncated),
+            'image/format': bytes_feature(image_format),
+            'image/encoded': bytes_feature(image_data)}))
+    return example
+
+
+def _add_to_tfrecord(img_name, annot_name, class_list, tfrecord_writer):
+    """Loads data from image and annotations files and add them to a TFRecord.
+
+    Args:
+      dataset_dir: Dataset directory;
+      name: Image name to add to the TFRecord;
+      tfrecord_writer: The TFRecord writer to use for writing.
+    """
+    image_data, shape, bboxes, labels, labels_text, difficult, truncated = \
+        _process_image(img_name, annot_name, class_list)
+
+    example = _convert_to_example(image_data, labels, labels_text,
+                                  bboxes, shape, difficult, truncated)
+    tfrecord_writer.write(example.SerializeToString())
+
+
+def _get_output_filename(output_dir, name, idx):
+    return os.path.join(output_dir, f"{name}_{idx:04d}.tfrecord")
+
+def run(output_dir, classes_list, images_list, annotations_list, output_name):
+    """Runs the conversion operation.
+
+    Args:
+      output_dir: Output directory.
+    """
+   
+    if not tf.gfile.Exists(output_dir):
+        tf.gfile.MakeDirs(output_dir)
+
+    if(len(images_list) != len(annotations_list)):
+        raise ValueError("Images and annotations lists are of different legnths!")
+
+    # Process dataset files.
+    fidx = 0
+    i = 0
+    im_annot = list(zip(images_list, annotations_list))
+
+    while i <  len(im_annot):
+        # Open new TFRecord file.
+        tf_filename = _get_output_filename(output_dir, output_name, fidx)
+        with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer:
+            j = 0
+            while i < len(im_annot) and j < SAMPLES_PER_FILES:
+                sys.stdout.write('\r>> Converting image %d/%d' % (i+1, len(im_annot)))
+                sys.stdout.flush()
+
+                img_name, annot_name = im_annot[i]
+                _add_to_tfrecord(img_name, annot_name, classes_list, tfrecord_writer)
+                i += 1
+                j += 1
+            fidx += 1
+
+    print('\nFinished converting the Pascal VOC dataset!')
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/datautil/init.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/datautil/init.py
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/datautil/parser.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/datautil/parser.py
@@ -0,0 +1,65 @@
+import tensorflow as tf
+import numpy as np
+import os
+
+from datautil.ssd_vgg_preprocessing import preprocess_for_train, preprocess_for_eval
+from model import ssd_common
+from tfutil import tf_utils
+
+features = {
+    'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
+    'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
+    'image/height': tf.FixedLenFeature([1], tf.int64),
+    'image/width': tf.FixedLenFeature([1], tf.int64),
+    'image/channels': tf.FixedLenFeature([1], tf.int64),
+    'image/shape': tf.FixedLenFeature([3], tf.int64),
+    'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
+    'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
+    'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
+    'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
+    'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
+    'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64),
+    'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64),
+}
+
+
+def get_parser_func(anchors, num_classes, is_training, var_scope):
+    '''
+    Dataset parser function for training and evaluation
+
+    Arguments:
+        preprocess_fn - function that does preprocesing
+    '''
+    
+    preprocess_fn = preprocess_for_train if is_training else preprocess_for_eval
+
+    def parse_tfrec_data(example_proto):
+        with tf.variable_scope(var_scope):
+            parsed_features = tf.parse_single_example(example_proto, features)
+            
+            image_string = parsed_features['image/encoded']
+            image_decoded = tf.image.decode_jpeg(image_string)
+
+            labels = tf.sparse.to_dense(parsed_features['image/object/bbox/label'])
+            
+            xmin = tf.sparse.to_dense(parsed_features['image/object/bbox/xmin'])
+            xmax = tf.sparse.to_dense(parsed_features['image/object/bbox/xmax'])
+            ymin = tf.sparse.to_dense(parsed_features['image/object/bbox/ymin'])
+            ymax = tf.sparse.to_dense(parsed_features['image/object/bbox/ymax'])
+            bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=1)
+            
+            if is_training:
+                image, labels, bboxes = preprocess_fn(image_decoded, labels, bboxes)
+            else:
+                image, labels, bboxes, _ = preprocess_fn(image_decoded, labels, bboxes)
+
+        # ground truth encoding
+        # each of the returns is a litst of tensors
+        if is_training:
+            classes, localisations, scores = \
+                ssd_common.tf_ssd_bboxes_encode(labels, bboxes, anchors, num_classes)
+            return tf_utils.reshape_list([image, classes, localisations, scores])
+        else:
+            return tf_utils.reshape_list([image, labels, bboxes])
+
+    return parse_tfrec_data
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/datautil/ssd_vgg_preprocessing.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/datautil/ssd_vgg_preprocessing.py
@@ -0,0 +1,397 @@
+# Copyright 2015 Paul Balanca. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Pre-processing images for SSD-type networks.
+"""
+from enum import Enum, IntEnum
+import numpy as np
+
+import tensorflow as tf
+
+from tensorflow.python.ops import control_flow_ops
+
+import tfextended as tfe
+from datautil import tf_image
+
+# Resizing strategies.
+Resize = IntEnum('Resize', ('NONE',                # Nothing!
+                            'CENTRAL_CROP',        # Crop (and pad if necessary).
+                            'PAD_AND_RESIZE',      # Pad, and resize to output shape.
+                            'WARP_RESIZE'))        # Warp resize.
+
+# VGG mean parameters.
+_R_MEAN = 123.
+_G_MEAN = 117.
+_B_MEAN = 104.
+
+# Some training pre-processing parameters.
+BBOX_CROP_OVERLAP = 0.5         # Minimum overlap to keep a bbox after cropping.
+MIN_OBJECT_COVERED = 0.25
+CROP_RATIO_RANGE = (0.6, 1.67)  # Distortion ratio during cropping.
+EVAL_SIZE = (300, 300)
+
+
+def tf_image_whitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN]):
+    """Subtracts the given means from each image channel.
+
+    Returns:
+        the centered image.
+    """
+    if image.get_shape().ndims != 3:
+        raise ValueError('Input must be of size [height, width, C>0]')
+
+    mean = tf.constant(means, dtype=image.dtype)
+    image = image - mean
+    return image
+
+
+def tf_image_unwhitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN], to_int=True):
+    """Re-convert to original image distribution, and convert to int if
+    necessary.
+
+    Returns:
+      Centered image.
+    """
+    mean = tf.constant(means, dtype=image.dtype)
+    image = image + mean
+    if to_int:
+        image = tf.cast(image, tf.int32)
+    return image
+
+
+def np_image_unwhitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN], to_int=True):
+    """Re-convert to original image distribution, and convert to int if
+    necessary. Numpy version.
+
+    Returns:
+      Centered image.
+    """
+    img = np.copy(image)
+    img += np.array(means, dtype=img.dtype)
+    if to_int:
+        img = img.astype(np.uint8)
+    return img
+
+
+def tf_summary_image(image, bboxes, name='image', unwhitened=False):
+    """Add image with bounding boxes to summary.
+    """
+    if unwhitened:
+        image = tf_image_unwhitened(image)
+    image = tf.expand_dims(image, 0)
+    bboxes = tf.expand_dims(bboxes, 0)
+    image_with_box = tf.image.draw_bounding_boxes(image, bboxes)
+    tf.summary.image(name, image_with_box)
+
+
+def apply_with_random_selector(x, func, num_cases):
+    """Computes func(x, sel), with sel sampled from [0...num_cases-1].
+
+    Args:
+        x: input Tensor.
+        func: Python function to apply.
+        num_cases: Python int32, number of cases to sample sel from.
+
+    Returns:
+        The result of func(x, sel), where func receives the value of the
+        selector as a python integer, but sel is sampled dynamically.
+    """
+    sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
+    # Pass the real x only to one of the func calls.
+    return control_flow_ops.merge([
+            func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case)
+            for case in range(num_cases)])[0]
+
+
+def distort_color(image, color_ordering=0, fast_mode=True, scope=None):
+    """Distort the color of a Tensor image.
+
+    Each color distortion is non-commutative and thus ordering of the color ops
+    matters. Ideally we would randomly permute the ordering of the color ops.
+    Rather then adding that level of complication, we select a distinct ordering
+    of color ops for each preprocessing thread.
+
+    Args:
+        image: 3-D Tensor containing single image in [0, 1].
+        color_ordering: Python int, a type of distortion (valid values: 0-3).
+        fast_mode: Avoids slower ops (random_hue and random_contrast)
+        scope: Optional scope for name_scope.
+    Returns:
+        3-D Tensor color-distorted image on range [0, 1]
+    Raises:
+        ValueError: if color_ordering not in [0, 3]
+    """
+    with tf.name_scope(scope, 'distort_color', [image]):
+        if fast_mode:
+            if color_ordering == 0:
+                image = tf.image.random_brightness(image, max_delta=32. / 255.)
+                image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+            else:
+                image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+                image = tf.image.random_brightness(image, max_delta=32. / 255.)
+        else:
+            if color_ordering == 0:
+                image = tf.image.random_brightness(image, max_delta=32. / 255.)
+                image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+                image = tf.image.random_hue(image, max_delta=0.2)
+                image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
+            elif color_ordering == 1:
+                image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+                image = tf.image.random_brightness(image, max_delta=32. / 255.)
+                image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
+                image = tf.image.random_hue(image, max_delta=0.2)
+            elif color_ordering == 2:
+                image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
+                image = tf.image.random_hue(image, max_delta=0.2)
+                image = tf.image.random_brightness(image, max_delta=32. / 255.)
+                image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+            elif color_ordering == 3:
+                image = tf.image.random_hue(image, max_delta=0.2)
+                image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+                image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
+                image = tf.image.random_brightness(image, max_delta=32. / 255.)
+            else:
+                raise ValueError('color_ordering must be in [0, 3]')
+        # The random_* ops do not necessarily clamp.
+        return tf.clip_by_value(image, 0.0, 1.0)
+
+
+def distorted_bounding_box_crop(image,
+                                labels,
+                                bboxes,
+                                min_object_covered=0.3,
+                                aspect_ratio_range=(0.9, 1.1),
+                                area_range=(0.1, 1.0),
+                                max_attempts=200,
+                                clip_bboxes=True,
+                                scope=None):
+    """Generates cropped_image using a one of the bboxes randomly distorted.
+
+    See `tf.image.sample_distorted_bounding_box` for more documentation.
+
+    Args:
+        image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
+        bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
+            where each coordinate is [0, 1) and the coordinates are arranged
+            as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
+            image.
+        min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
+            area of the image must contain at least this fraction of any bounding box
+            supplied.
+        aspect_ratio_range: An optional list of `floats`. The cropped area of the
+            image must have an aspect ratio = width / height within this range.
+        area_range: An optional list of `floats`. The cropped area of the image
+            must contain a fraction of the supplied image within in this range.
+        max_attempts: An optional `int`. Number of attempts at generating a cropped
+            region of the image of the specified constraints. After `max_attempts`
+            failures, return the entire image.
+        scope: Optional scope for name_scope.
+    Returns:
+        A tuple, a 3-D Tensor cropped_image and the distorted bbox
+    """
+    with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes]):
+        # Each bounding box has shape [1, num_boxes, box coords] and
+        # the coordinates are ordered [ymin, xmin, ymax, xmax].
+        bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
+                tf.shape(image),
+                bounding_boxes=tf.expand_dims(bboxes, 0),
+                min_object_covered=min_object_covered,
+                aspect_ratio_range=aspect_ratio_range,
+                area_range=area_range,
+                max_attempts=max_attempts,
+                use_image_if_no_bounding_boxes=True)
+        distort_bbox = distort_bbox[0, 0]
+
+        # Crop the image to the specified bounding box.
+        cropped_image = tf.slice(image, bbox_begin, bbox_size)
+        # Restore the shape since the dynamic slice loses 3rd dimension.
+        cropped_image.set_shape([None, None, 3])
+
+        # Update bounding boxes: resize and filter out.
+        bboxes = tfe.bboxes_resize(distort_bbox, bboxes)
+        labels, bboxes = tfe.bboxes_filter_overlap(labels, bboxes,
+                                                   threshold=BBOX_CROP_OVERLAP,
+                                                   assign_negative=False)
+        return cropped_image, labels, bboxes, distort_bbox
+
+
+def preprocess_for_train(image, labels, bboxes,
+                         out_shape = (300, 300), data_format='NHWC',
+                         scope='ssd_preprocessing_train'):
+    """Preprocesses the given image for training.
+
+    Note that the actual resizing scale is sampled from
+        [`resize_size_min`, `resize_size_max`].
+
+    Args:
+        image: A `Tensor` representing an image of arbitrary size.
+        output_height: The height of the image after preprocessing.
+        output_width: The width of the image after preprocessing.
+        resize_side_min: The lower bound for the smallest side of the image for
+            aspect-preserving resizing.
+        resize_side_max: The upper bound for the smallest side of the image for
+            aspect-preserving resizing.
+
+    Returns:
+        A preprocessed image.
+    """
+    fast_mode = False
+    with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]):
+        if image.get_shape().ndims != 3:
+            raise ValueError('Input must be of size [height, width, C>0]')
+        # Convert to float scaled [0, 1].
+        if image.dtype != tf.float32:
+            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
+        tf_summary_image(image, bboxes, 'image_with_bboxes')
+
+        # # Remove DontCare labels.
+        # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label,
+        #                                                     labels,
+        #                                                     bboxes)
+
+        # Distort image and bounding boxes.
+        dst_image = image
+        dst_image, labels, bboxes, distort_bbox = \
+            distorted_bounding_box_crop(image, labels, bboxes,
+                                        min_object_covered=MIN_OBJECT_COVERED,
+                                        aspect_ratio_range=CROP_RATIO_RANGE)
+        # Resize image to output size.
+        dst_image = tf_image.resize_image(dst_image, out_shape,
+                                          method=tf.image.ResizeMethod.BILINEAR,
+                                          align_corners=False)
+        tf_summary_image(dst_image, bboxes, 'image_shape_distorted')
+
+        # Randomly flip the image horizontally.
+        dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)
+
+        # Randomly distort the colors. There are 4 ways to do it.
+        dst_image = apply_with_random_selector(
+                dst_image,
+                lambda x, ordering: distort_color(x, ordering, fast_mode),
+                num_cases=4)
+        tf_summary_image(dst_image, bboxes, 'image_color_distorted')
+
+        # Rescale to VGG input scale.
+        image = dst_image * 255.
+        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
+        # Image data format.
+        if data_format == 'NCHW':
+            image = tf.transpose(image, perm=(2, 0, 1))
+        return image, labels, bboxes
+
+
+def preprocess_for_eval(image, labels, bboxes,
+                        out_shape=EVAL_SIZE, data_format='NHWC',
+                        difficults=None, resize=Resize.WARP_RESIZE,
+                        scope='ssd_preprocessing_train'):
+    """Preprocess an image for evaluation.
+
+    Args:
+        image: A `Tensor` representing an image of arbitrary size.
+        out_shape: Output shape after pre-processing (if resize != None)
+        resize: Resize strategy.
+
+    Returns:
+        A preprocessed image.
+    """
+    with tf.name_scope(scope):
+        if image.get_shape().ndims != 3:
+            raise ValueError('Input must be of size [height, width, C>0]')
+
+        image = tf.cast(image, tf.float32)
+        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
+
+        # Add image rectangle to bboxes.
+        bbox_img = tf.constant([[0., 0., 1., 1.]])
+        if bboxes is None:
+            bboxes = bbox_img
+        else:
+            bboxes = tf.concat([bbox_img, bboxes], axis=0)
+
+        if resize == Resize.NONE:
+            # No resizing...
+            pass
+        elif resize == Resize.CENTRAL_CROP:
+            # Central cropping of the image.
+            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
+                image, bboxes, out_shape[0], out_shape[1])
+        elif resize == Resize.PAD_AND_RESIZE:
+            # Resize image first: find the correct factor...
+            shape = tf.shape(image)
+            factor = tf.minimum(tf.to_double(1.0),
+                                tf.minimum(tf.to_double(out_shape[0] / shape[0]),
+                                           tf.to_double(out_shape[1] / shape[1])))
+            resize_shape = factor * tf.to_double(shape[0:2])
+            resize_shape = tf.cast(tf.floor(resize_shape), tf.int32)
+
+            image = tf_image.resize_image(image, resize_shape,
+                                          method=tf.image.ResizeMethod.BILINEAR,
+                                          align_corners=False)
+            # Pad to expected size.
+            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
+                image, bboxes, out_shape[0], out_shape[1])
+        elif resize == Resize.WARP_RESIZE:
+            # Warp resize of the image.
+            image = tf_image.resize_image(image, out_shape,
+                                          method=tf.image.ResizeMethod.BILINEAR,
+                                          align_corners=False)
+
+        # Split back bounding boxes.
+        bbox_img = bboxes[0]
+        bboxes = bboxes[1:]
+        # Remove difficult boxes.
+        if difficults is not None:
+            mask = tf.logical_not(tf.cast(difficults, tf.bool))
+            labels = tf.boolean_mask(labels, mask)
+            bboxes = tf.boolean_mask(bboxes, mask)
+        # Image data format.
+        if data_format == 'NCHW':
+            image = tf.transpose(image, perm=(2, 0, 1))
+        return image, labels, bboxes, bbox_img
+
+def preprocess_image(image,
+                     labels,
+                     bboxes,
+                     out_shape,
+                     data_format,
+                     is_training=False,
+                     **kwargs):
+    """Pre-process an given image.
+
+    Args:
+      image: A `Tensor` representing an image of arbitrary size.
+      output_height: The height of the image after preprocessing.
+      output_width: The width of the image after preprocessing.
+      is_training: `True` if we're preprocessing the image for training and
+        `False` otherwise.
+      resize_side_min: The lower bound for the smallest side of the image for
+        aspect-preserving resizing. If `is_training` is `False`, then this value
+        is used for rescaling.
+      resize_side_max: The upper bound for the smallest side of the image for
+        aspect-preserving resizing. If `is_training` is `False`, this value is
+         ignored. Otherwise, the resize side is sampled from
+         [resize_size_min, resize_size_max].
+
+    Returns:
+      A preprocessed image.
+    """
+    if is_training:
+        return preprocess_for_train(image, labels, bboxes,
+                                    out_shape=out_shape,
+                                    data_format=data_format)
+    else:
+        return preprocess_for_eval(image, labels, bboxes,
+                                   out_shape=out_shape,
+                                   data_format=data_format,
+                                   **kwargs)
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/datautil/tf_image.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/datautil/tf_image.py
@@ -0,0 +1,306 @@
+# Copyright 2015 The TensorFlow Authors and Paul Balanca. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Custom image operations.
+Most of the following methods extend TensorFlow image library, and part of
+the code is shameless copy-paste of the former!
+"""
+import tensorflow as tf
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import clip_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_image_ops
+from tensorflow.python.ops import gen_nn_ops
+from tensorflow.python.ops import string_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import variables
+
+
+# =========================================================================== #
+# Modification of TensorFlow image routines.
+# =========================================================================== #
+def _assert(cond, ex_type, msg):
+    """A polymorphic assert, works with tensors and boolean expressions.
+    If `cond` is not a tensor, behave like an ordinary assert statement, except
+    that a empty list is returned. If `cond` is a tensor, return a list
+    containing a single TensorFlow assert op.
+    Args:
+      cond: Something evaluates to a boolean value. May be a tensor.
+      ex_type: The exception class to use.
+      msg: The error message.
+    Returns:
+      A list, containing at most one assert op.
+    """
+    if _is_tensor(cond):
+        return [control_flow_ops.Assert(cond, [msg])]
+    else:
+        if not cond:
+            raise ex_type(msg)
+        else:
+            return []
+
+
+def _is_tensor(x):
+    """Returns `True` if `x` is a symbolic tensor-like object.
+    Args:
+      x: A python object to check.
+    Returns:
+      `True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`.
+    """
+    return isinstance(x, (ops.Tensor, variables.Variable))
+
+
+def _ImageDimensions(image):
+    """Returns the dimensions of an image tensor.
+    Args:
+      image: A 3-D Tensor of shape `[height, width, channels]`.
+    Returns:
+      A list of `[height, width, channels]` corresponding to the dimensions of the
+        input image.  Dimensions that are statically known are python integers,
+        otherwise they are integer scalar tensors.
+    """
+    if image.get_shape().is_fully_defined():
+        return image.get_shape().as_list()
+    else:
+        static_shape = image.get_shape().with_rank(3).as_list()
+        dynamic_shape = array_ops.unstack(array_ops.shape(image), 3)
+        return [s if s is not None else d
+                for s, d in zip(static_shape, dynamic_shape)]
+
+
+def _Check3DImage(image, require_static=True):
+    """Assert that we are working with properly shaped image.
+    Args:
+      image: 3-D Tensor of shape [height, width, channels]
+        require_static: If `True`, requires that all dimensions of `image` are
+        known and non-zero.
+    Raises:
+      ValueError: if `image.shape` is not a 3-vector.
+    Returns:
+      An empty list, if `image` has fully defined dimensions. Otherwise, a list
+        containing an assert op is returned.
+    """
+    try:
+        image_shape = image.get_shape().with_rank(3)
+    except ValueError:
+        raise ValueError("'image' must be three-dimensional.")
+    if require_static and not image_shape.is_fully_defined():
+        raise ValueError("'image' must be fully defined.")
+    if any(x == 0 for x in image_shape):
+        raise ValueError("all dims of 'image.shape' must be > 0: %s" %
+                         image_shape)
+    if not image_shape.is_fully_defined():
+        return [check_ops.assert_positive(array_ops.shape(image),
+                                          ["all dims of 'image.shape' "
+                                           "must be > 0."])]
+    else:
+        return []
+
+
+def fix_image_flip_shape(image, result):
+    """Set the shape to 3 dimensional if we don't know anything else.
+    Args:
+      image: original image size
+      result: flipped or transformed image
+    Returns:
+      An image whose shape is at least None,None,None.
+    """
+    image_shape = image.get_shape()
+    if image_shape == tensor_shape.unknown_shape():
+        result.set_shape([None, None, None])
+    else:
+        result.set_shape(image_shape)
+    return result
+
+
+# =========================================================================== #
+# Image + BBoxes methods: cropping, resizing, flipping, ...
+# =========================================================================== #
+def bboxes_crop_or_pad(bboxes,
+                       height, width,
+                       offset_y, offset_x,
+                       target_height, target_width):
+    """Adapt bounding boxes to crop or pad operations.
+    Coordinates are always supposed to be relative to the image.
+
+    Arguments:
+      bboxes: Tensor Nx4 with bboxes coordinates [y_min, x_min, y_max, x_max];
+      height, width: Original image dimension;
+      offset_y, offset_x: Offset to apply,
+        negative if cropping, positive if padding;
+      target_height, target_width: Target dimension after cropping / padding.
+    """
+    with tf.name_scope('bboxes_crop_or_pad'):
+        # Rescale bounding boxes in pixels.
+        scale = tf.cast(tf.stack([height, width, height, width]), bboxes.dtype)
+        bboxes = bboxes * scale
+        # Add offset.
+        offset = tf.cast(tf.stack([offset_y, offset_x, offset_y, offset_x]), bboxes.dtype)
+        bboxes = bboxes + offset
+        # Rescale to target dimension.
+        scale = tf.cast(tf.stack([target_height, target_width,
+                                  target_height, target_width]), bboxes.dtype)
+        bboxes = bboxes / scale
+        return bboxes
+
+
+def resize_image_bboxes_with_crop_or_pad(image, bboxes,
+                                         target_height, target_width):
+    """Crops and/or pads an image to a target width and height.
+    Resizes an image to a target width and height by either centrally
+    cropping the image or padding it evenly with zeros.
+
+    If `width` or `height` is greater than the specified `target_width` or
+    `target_height` respectively, this op centrally crops along that dimension.
+    If `width` or `height` is smaller than the specified `target_width` or
+    `target_height` respectively, this op centrally pads with 0 along that
+    dimension.
+    Args:
+      image: 3-D tensor of shape `[height, width, channels]`
+      target_height: Target height.
+      target_width: Target width.
+    Raises:
+      ValueError: if `target_height` or `target_width` are zero or negative.
+    Returns:
+      Cropped and/or padded image of shape
+        `[target_height, target_width, channels]`
+    """
+    with tf.name_scope('resize_with_crop_or_pad'):
+        image = ops.convert_to_tensor(image, name='image')
+
+        assert_ops = []
+        assert_ops += _Check3DImage(image, require_static=False)
+        assert_ops += _assert(target_width > 0, ValueError,
+                              'target_width must be > 0.')
+        assert_ops += _assert(target_height > 0, ValueError,
+                              'target_height must be > 0.')
+
+        image = control_flow_ops.with_dependencies(assert_ops, image)
+        # `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks.
+        # Make sure our checks come first, so that error messages are clearer.
+        if _is_tensor(target_height):
+            target_height = control_flow_ops.with_dependencies(
+                assert_ops, target_height)
+        if _is_tensor(target_width):
+            target_width = control_flow_ops.with_dependencies(assert_ops, target_width)
+
+        def max_(x, y):
+            if _is_tensor(x) or _is_tensor(y):
+                return math_ops.maximum(x, y)
+            else:
+                return max(x, y)
+
+        def min_(x, y):
+            if _is_tensor(x) or _is_tensor(y):
+                return math_ops.minimum(x, y)
+            else:
+                return min(x, y)
+
+        def equal_(x, y):
+            if _is_tensor(x) or _is_tensor(y):
+                return math_ops.equal(x, y)
+            else:
+                return x == y
+
+        height, width, _ = _ImageDimensions(image)
+        width_diff = target_width - width
+        offset_crop_width = max_(-width_diff // 2, 0)
+        offset_pad_width = max_(width_diff // 2, 0)
+
+        height_diff = target_height - height
+        offset_crop_height = max_(-height_diff // 2, 0)
+        offset_pad_height = max_(height_diff // 2, 0)
+
+        # Maybe crop if needed.
+        height_crop = min_(target_height, height)
+        width_crop = min_(target_width, width)
+        cropped = tf.image.crop_to_bounding_box(image, offset_crop_height, offset_crop_width,
+                                                height_crop, width_crop)
+        bboxes = bboxes_crop_or_pad(bboxes,
+                                    height, width,
+                                    -offset_crop_height, -offset_crop_width,
+                                    height_crop, width_crop)
+        # Maybe pad if needed.
+        resized = tf.image.pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width,
+                                               target_height, target_width)
+        bboxes = bboxes_crop_or_pad(bboxes,
+                                    height_crop, width_crop,
+                                    offset_pad_height, offset_pad_width,
+                                    target_height, target_width)
+
+        # In theory all the checks below are redundant.
+        if resized.get_shape().ndims is None:
+            raise ValueError('resized contains no shape.')
+
+        resized_height, resized_width, _ = _ImageDimensions(resized)
+
+        assert_ops = []
+        assert_ops += _assert(equal_(resized_height, target_height), ValueError,
+                              'resized height is not correct.')
+        assert_ops += _assert(equal_(resized_width, target_width), ValueError,
+                              'resized width is not correct.')
+
+        resized = control_flow_ops.with_dependencies(assert_ops, resized)
+        return resized, bboxes
+
+
+def resize_image(image, size,
+                 method=tf.image.ResizeMethod.BILINEAR,
+                 align_corners=False):
+    """Resize an image and bounding boxes.
+    """
+    # Resize image.
+    with tf.name_scope('resize_image'):
+        height, width, channels = _ImageDimensions(image)
+        image = tf.expand_dims(image, 0)
+        image = tf.image.resize_images(image, size,
+                                       method, align_corners)
+        image = tf.reshape(image, tf.stack([size[0], size[1], channels]))
+        return image
+
+
+def random_flip_left_right(image, bboxes, seed=None):
+    """Random flip left-right of an image and its bounding boxes.
+    """
+    def flip_bboxes(bboxes):
+        """Flip bounding boxes coordinates.
+        """
+        bboxes = tf.stack([bboxes[:, 0], 1 - bboxes[:, 3],
+                           bboxes[:, 2], 1 - bboxes[:, 1]], axis=-1)
+        return bboxes
+
+    # Random flip. Tensorflow implementation.
+    with tf.name_scope('random_flip_left_right'):
+        image = ops.convert_to_tensor(image, name='image')
+        _Check3DImage(image, require_static=False)
+        uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
+        mirror_cond = math_ops.less(uniform_random, .5)
+        # Flip image.
+        result = control_flow_ops.cond(mirror_cond,
+                                       lambda: array_ops.reverse_v2(image, [1]),
+                                       lambda: image)
+        # Flip bboxes.
+        bboxes = control_flow_ops.cond(mirror_cond,
+                                       lambda: flip_bboxes(bboxes),
+                                       lambda: bboxes)
+        return fix_image_flip_shape(image, result), bboxes
+
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/finetune/init.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/finetune/init.py
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/finetune/eval.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/finetune/eval.py
@@ -0,0 +1,159 @@
+import tensorflow as tf
+import numpy as np
+
+import os, sys, time
+
+from anchors import generate_anchors
+from model import ssd_common, ssd_vgg_300
+from datautil.parser import get_parser_func
+from datautil.ssd_vgg_preprocessing import preprocess_for_eval, preprocess_for_train
+from tfutil import endpoints, tf_utils
+import tfextended as tfe
+from finetune.train_eval_base import TrainerBase
+
+class EvalVggSsd(TrainerBase):
+    '''
+    Run fine-tuning
+    Have training and validation recordset files
+    '''
+
+    def __init__(self, ckpt_dir, validation_recordset_files, steps_to_save = 1000, num_steps = 1000, num_classes = 21, print_steps = 10):
+
+        '''
+        ckpt_dir - directory of checkpoint metagraph
+        train_recordset_files - list of files represetnting the recordset for training
+        validation_recordset_files - list of files representing validation recordset
+        '''
+        super().__init__(ckpt_dir, validation_recordset_files, steps_to_save, num_steps, num_classes, print_steps, 1, is_training=False)
+        self.eval_classes = num_classes
+
+    def get_eval_ops(self, b_labels, b_bboxes, predictions, localizations):
+        '''
+        Create evaluation operation
+        '''
+        b_difficults = tf.zeros(tf.shape(b_labels), dtype=tf.int64)
+
+        # Performing post-processing on CPU: loop-intensive, usually more efficient.
+        with tf.device('/device:CPU:0'):
+            # Detected objects from SSD output.
+            detected_localizations = self.ssd_net.bboxes_decode(localizations, self.anchors)
+
+            rscores, rbboxes = \
+                self.ssd_net.detected_bboxes(predictions, detected_localizations,
+                                        select_threshold=0.01,
+                                        nms_threshold=0.45,
+                                        clipping_bbox=None,
+                                        top_k=400,
+                                        keep_top_k=20)
+
+            # Compute TP and FP statistics.
+            num_gbboxes, tp, fp, rscores = \
+                tfe.bboxes_matching_batch(rscores.keys(), rscores, rbboxes,
+                                          b_labels, b_bboxes, b_difficults,
+                                          matching_threshold=0.5)
+
+            # =================================================================== #
+            # Evaluation metrics.
+            # =================================================================== #
+            dict_metrics = {}
+            metrics_scope = 'ssd_metrics_scope'
+
+            # First add all losses.
+            for loss in tf.get_collection(tf.GraphKeys.LOSSES):
+                dict_metrics[loss.op.name] = tf.metrics.mean(loss, name=metrics_scope)
+            # Extra losses as well.
+            for loss in tf.get_collection('EXTRA_LOSSES'):
+                dict_metrics[loss.op.name] = tf.metrics.mean(loss, name=metrics_scope)
+
+            # Add metrics to summaries and Print on screen.
+            for name, metric in dict_metrics.items():
+                # summary_name = 'eval/%s' % name
+                summary_name = name
+                tf.summary.scalar(summary_name, metric[0])
+
+            # FP and TP metrics.
+            tp_fp_metric = tfe.streaming_tp_fp_arrays(num_gbboxes, tp, fp, rscores, name=metrics_scope)
+
+            for c in tp_fp_metric[0].keys():
+                dict_metrics['tp_fp_%s' % c] = (tp_fp_metric[0][c],
+                                                tp_fp_metric[1][c])
+
+            # Add to summaries precision/recall values.
+            aps_voc12 = {}
+            # TODO: We cut it short by the actual number of classes we have
+            for c in list(tp_fp_metric[0].keys())[:self.eval_classes - 1]:
+                # Precison and recall values.
+                prec, rec = tfe.precision_recall(*tp_fp_metric[0][c])
+
+                # Average precision VOC12.
+                v = tfe.average_precision_voc12(prec, rec)
+                summary_name = 'AP_VOC12/%s' % c
+                tf.summary.scalar(summary_name, v)
+
+                aps_voc12[c] = v
+
+            # Mean average precision VOC12.
+            summary_name = 'AP_VOC12/mAP'
+            mAP = tf.add_n(list(aps_voc12.values())) / len(aps_voc12)
+            tf.summary.scalar(summary_name, mAP)
+
+        names_to_values, names_to_updates = tf.contrib.metrics.aggregate_metric_map(dict_metrics)
+
+        # Split into values and updates ops.
+        return (names_to_values, names_to_updates, mAP)
+
+    def eval(self):
+
+        tf.logging.set_verbosity(tf.logging.INFO)
+
+        # shorthand
+        sess = self.sess
+
+        sess.run(self.iterator.initializer)
+        batch_data = self.iterator.get_next()
+
+        # image, classes, scores, ground_truths are neatly packed into a flat list
+        # this is how we will slice it to extract the data we need:
+        # we will convert the flat list into a list of lists, where each sub-list
+        # is as long as each slice dimension
+        slice_shape = [1] * 3
+
+        b_image, b_labels, b_bboxes = tf_utils.reshape_list(batch_data, slice_shape)
+        
+        # network endpoints
+        predictions, localizations, _, _ = self.get_output_tensors(b_image)
+
+        # branch to create evaluation operation
+        _, names_to_updates, mAP = \
+            self.get_eval_ops(b_labels, b_bboxes, predictions, localizations)
+
+        eval_update_ops = tf_utils.reshape_list(list(names_to_updates.values()))
+
+        # summaries
+        summary_op = tf.summary.merge_all()
+        saver = tf.train.Saver()
+
+        eval_writer = tf.summary.FileWriter(self.ckpt_dir + '/eval')
+
+        # initialize globals
+        sess.run(tf.global_variables_initializer())
+
+        saver.restore(self.sess, self.ckpt_file)
+        sess.run(tf.local_variables_initializer())
+
+        tf.logging.info(f"Starting evaluation for {self.num_steps} steps")
+        cur_step = self.latest_ckpt_step
+
+        for step in range(self.num_steps):
+            print(f"Evaluation step: {step + 1}", end='\r', flush=True)
+            _, summary = sess.run([eval_update_ops, summary_op])
+
+            if (step + 1) % self.print_steps == 0 or step == self.num_steps:
+                eval_writer.add_summary(summary, cur_step + step + 1)
+
+        summary_final, mAP_val = sess.run([summary_op, mAP])
+
+        print(f"\nmAP: {mAP_val:.4f}")
+
+        if (step + 1) % self.print_steps != 0:
+            eval_writer.add_summary(summary_final, self.num_steps + cur_step)
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/finetune/inference.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/finetune/inference.py
@@ -0,0 +1,95 @@
+import tensorflow as tf
+
+import os, time
+
+from anchors import generate_anchors
+from model import np_methods
+from tfutil import endpoints, tf_utils
+from datautil.ssd_vgg_preprocessing import preprocess_for_eval
+import tfextended as tfe
+from azureml.accel.models import SsdVgg
+
+class InferVggSsd:
+    '''
+    Run fine-tuning
+    Have training and validation recordset files
+    '''
+
+    def __init__(self, ckpt_dir, ckpt_file=None, gpu=True):
+
+        '''
+        ckpt_dir - directory of checkpoint metagraph
+        '''
+
+        if gpu:
+            gpu_options = tf.GPUOptions(allow_growth=True)
+            config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
+        else:
+            config = tf.ConfigProto(log_device_placement=False, device_count={'GPU': 0})
+
+        self.sess = tf.Session(config=config)
+
+        ssd_net_graph = SsdVgg(ckpt_dir)
+        self.ckpt_dir = ssd_net_graph.model_path
+        
+        self.img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
+
+        # Evaluation pre-processing: resize to SSD net shape.
+        image_pre, _, _, self.bbox_img = preprocess_for_eval(
+            self.img_input, None, None, generate_anchors.img_shape, "NHWC")
+        self.image_4d = tf.expand_dims(image_pre, 0)
+
+        # import the graph
+        ssd_net_graph.import_graph_def(self.image_4d, is_training=False)
+
+        graph = tf.get_default_graph()
+        self.localizations = [graph.get_tensor_by_name(tensor_name) for tensor_name in endpoints.localizations_names]
+        self.predictions = [graph.get_tensor_by_name(tensor_name) for tensor_name in endpoints.predictions_names]
+
+        # Restore SSD model.
+        self.sess.run(tf.global_variables_initializer())
+
+        if ckpt_file is None:
+            ssd_net_graph.restore_weights(self.sess)
+        else:
+            saver = tf.train.Saver()
+            saver.restore(self.sess, os.path.join(self.ckpt_dir, ckpt_file))
+
+        # SSD default anchor boxes.
+        self.ssd_anchors = generate_anchors.ssd_anchors_all_layers()
+
+
+    def close(self):
+        self.sess.close()
+        tf.reset_default_graph()
+
+    def process_image(self, img, select_threshold=0.4, nms_threshold=.45, net_shape=(300, 300)):
+        # Run SSD network.
+        rpredictions, rlocalisations, rbbox_img = \
+            self.sess.run([self.predictions, self.localizations, self.bbox_img],
+                                                                feed_dict={self.img_input: img})
+        # Get classes and bboxes from the net outputs.
+        rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
+                rpredictions, rlocalisations, self.ssd_anchors,
+                select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True)
+        
+        rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
+        rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400)
+        rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
+        return rclasses, rscores, rbboxes
+
+    def infer(self, img, visualize):
+        rclasses, rscores, rbboxes =  self.process_image(img)
+
+        if visualize:
+            from tfutil import visualization
+            visualization.plt_bboxes(img, rclasses, rscores, rbboxes)
+
+        return rclasses, rscores, rbboxes
+
+    def infer_file(self, im_file, visualize=False):
+        import cv2
+
+        img = cv2.imread(im_file)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        return self.infer(img, visualize)
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/finetune/metrics.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/finetune/metrics.py
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/finetune/model_saver.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/finetune/model_saver.py
@@ -0,0 +1,57 @@
+import tensorflow as tf
+
+import os, time
+
+from azureml.accel.models import SsdVgg
+import azureml.accel.models.utils as utils
+
+class SaverVggSsd:
+    '''
+    Run fine-tuning
+    Have training and validation recordset files
+    '''
+
+    def __init__(self, ckpt_dir):
+
+        '''
+        ckpt_dir - directory of checkpoint metagraph
+        '''
+
+        config = tf.ConfigProto(log_device_placement=False, device_count={'GPU': 0})
+
+        self.sess = tf.Session(config=config)
+
+        ssd_net_graph = SsdVgg(ckpt_dir, is_frozen=True)
+        self.ckpt_dir = ssd_net_graph.model_path
+
+        self.in_images = tf.placeholder(tf.string)
+        self.image_tensors = utils.preprocess_array(self.in_images, output_width=300, output_height=300, 
+            preserve_aspect_ratio=False)
+
+        self.output_tensors = ssd_net_graph.import_graph_def(self.image_tensors, is_training=False)
+
+        self.output_names = ssd_net_graph.output_tensor_list
+        self.input_name_str = self.in_images.name
+
+        # Restore SSD model.
+        ssd_net_graph.restore_weights(self.sess)
+
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+
+    def close(self):
+        self.sess.close()
+        tf.reset_default_graph()
+
+    def save_for_deployment(self, saved_path):
+
+        output_map = {'out_{}'.format(i): output for i, output in enumerate(self.output_tensors)}
+
+        tf.saved_model.simple_save(self.sess, 
+            saved_path, 
+            inputs={"images": self.in_images},
+            outputs=output_map)
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/finetune/train.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/finetune/train.py
@@ -0,0 +1,144 @@
+import tensorflow as tf
+import numpy as np
+
+import os, sys, time, re
+
+from anchors import generate_anchors
+from model import ssd_common, ssd_vgg_300
+from datautil.parser import get_parser_func
+from datautil.ssd_vgg_preprocessing import preprocess_for_eval, preprocess_for_train
+from tfutil import endpoints, tf_utils
+import tfextended as tfe
+from finetune.train_eval_base import TrainerBase
+
+class TrainVggSsd(TrainerBase):
+    '''
+    Run fine-tuning
+    Have training and validation recordset files
+    '''
+
+    def __init__(self, ckpt_dir, train_recordset_files, 
+        steps_to_save = 1000, num_steps = 1000, num_classes = 21, 
+        print_steps = 10, batch_size = 2,
+        learning_rate = 1e-4, learning_rate_decay_steps=None, learning_rate_decay_value = None,
+        adam_beta1 = 0.9, adam_beta2 = 0.999, adam_epsilon = 1e-8):
+
+        '''
+        ckpt_dir - directory of checkpoint metagraph
+        train_recordset_files - list of files represetnting the recordset for training
+        validation_recordset_files - list of files representing validation recordset
+        '''
+
+        super().__init__(ckpt_dir, train_recordset_files, steps_to_save, num_steps, num_classes, print_steps, batch_size)
+
+        # optimizer parameters
+        self.learning_rate = learning_rate
+        self.learning_rate_decay_steps = learning_rate_decay_steps
+        self.learning_rate_decay_value = learning_rate_decay_value
+
+        if self.learning_rate <= 0 \
+            or (self.learning_rate_decay_value is not None and self.learning_rate_decay_value <= 0) \
+            or (self.learning_rate_decay_steps is not None and self.learning_rate_decay_steps <= 0) \
+            or (self.learning_rate_decay_steps is None and self.learning_rate_decay_value is not None) \
+            or (self.learning_rate_decay_steps is not None and self.learning_rate_decay_value is None):
+                raise ValueError("learning rate, learning rate steps, learning rate decay must be positive, \
+                    learning decay steps and value must be both present or both absent")
+
+        self.adam_beta1 = adam_beta1
+        self.adam_beta2 = adam_beta2
+        self.adam_epsilon = adam_epsilon
+
+    def get_optimizer(self, learning_rate):
+        optimizer = tf.train.AdamOptimizer(
+            learning_rate,
+            beta1=self.adam_beta1,
+            beta2=self.adam_beta2,
+            epsilon=self.adam_epsilon)
+        return optimizer
+
+    def get_learning_rate(self, global_step):
+        '''
+        Configure learning rate based on decay specifications
+        '''
+        if self.learning_rate_decay_steps is None:
+            return tf.constant(self.learning_rate, name = 'fixed_learning_rate')
+        else:
+            return tf.train.exponential_decay(self.learning_rate, global_step, \
+                self.learning_rate_decay_steps, self.learning_rate_decay_value, \
+                staircase=True, name="exponential_decay_learning_rate")
+
+    def train(self):
+
+        tf.logging.set_verbosity(tf.logging.INFO)
+
+        # shorthand
+        sess = self.sess
+
+        batch_data = self.iterator.get_next()
+
+        # image, classes, scores, ground_truths are neatly packed into a flat list
+        # this is how we will slice it to extract the data we need:
+        # we will convert the flat list into a list of lists, where each sub-list
+        # is as long as each slice dimension
+        slice_shape = [1] + [len(self.anchors)] * 3
+
+        b_image, b_classes, b_localizations, b_scores = tf_utils.reshape_list(batch_data, slice_shape)
+        # network endpoints
+        _, localizations, logits, bw_saver = self.get_output_tensors(b_image)
+
+        variables_to_train = tf.trainable_variables()
+        sess.run(tf.initialize_variables(variables_to_train))
+
+        # add losses
+        total_loss = self.ssd_net.losses(logits, localizations, b_classes, b_localizations, b_scores)
+        tf.summary.scalar("total_loss", total_loss)
+        
+        global_step = tf.train.get_or_create_global_step()
+        learning_rate = self.get_learning_rate(global_step)
+
+        # configure learning rate now that we have the global step
+        # add optimizer
+        optimizer = self.get_optimizer(learning_rate)
+
+        tf.summary.scalar("learning_rate", learning_rate)
+
+        grads_and_vars = optimizer.compute_gradients(total_loss, var_list=variables_to_train)
+        grad_updates = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
+
+        # initialize all the variables we should initialize
+        # weights will be restored right after
+        sess.run(tf.global_variables_initializer())
+
+        # after the first restore, we want global step in our checkpoint
+        saver = tf.train.Saver(variables_to_train + [global_step])
+        if self.latest_ckpt_step == 0:
+            bw_saver.restore(sess, self.ckpt_file)
+        else:
+            saver.restore(sess, self.ckpt_file)
+            self.ckpt_file = os.path.join(self.ckpt_dir, self.ckpt_prefix)
+
+        # summaries
+        train_summary_op = tf.summary.merge_all()
+        train_writer = tf.summary.FileWriter(self.ckpt_dir + '/train', tf.get_default_graph())
+
+        tf.logging.info(f"Starting training for {self.num_steps} steps")
+
+        sess.run(self.iterator.initializer)
+
+        # training loop
+        start = time.time()
+
+        for _ in range(self.num_steps):
+
+            loss, _, cur_step, summary = sess.run([total_loss, grad_updates, global_step, train_summary_op])
+            cur_step += 1
+
+            if cur_step % self.print_steps == 0:
+
+                print(f"{cur_step}: loss: {loss:.3f}, avg per step: {(time.time() - start) / self.print_steps:.3f} sec", end='\r', flush=True)
+                train_writer.add_summary(summary, cur_step + 1)
+                start = time.time()
+
+            if cur_step % self.steps_to_save == 0:
+                saver.save(sess, self.ckpt_file, global_step=global_step)
+        print("\n")
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/finetune/train_eval_base.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/finetune/train_eval_base.py
@@ -0,0 +1,125 @@
+import tensorflow as tf
+import numpy as np
+
+import os, sys, time, glob, re
+
+from anchors import generate_anchors
+from model import ssd_common, ssd_vgg_300
+from datautil.parser import get_parser_func
+from datautil.ssd_vgg_preprocessing import preprocess_for_eval, preprocess_for_train
+from tfutil import endpoints, tf_utils
+import tfextended as tfe
+from azureml.accel.models import SsdVgg
+
+slim = tf.contrib.slim
+
+class TrainerBase:
+    '''
+    Run fine-tuning
+    Have training and validation recordset files
+    '''
+
+    def __init__(self, ckpt_dir, recordset_files, 
+        steps_to_save = 1000, num_steps = 1000, num_classes = 21, print_steps = 10, batch_size=2, is_training=True):
+
+        '''
+        ckpt_dir - directory of checkpoint metagraph
+        recordset_files - list of files represetnting the recordset for training
+        validation_recordset_files - list of files representing validation recordset
+        '''
+        
+        self.is_training = is_training
+        
+        # This will pull the model with its weights
+        # And seed the checkpoint
+        self.ssd_net_graph = SsdVgg(ckpt_dir)
+        self.ckpt_dir = self.ssd_net_graph.model_path
+        self.ckpt_file = tf.train.latest_checkpoint(self.ssd_net_graph.model_path)
+
+        try:
+            self.latest_ckpt_step = int(re.findall("-[0-9]+$", self.ckpt_file)[0][1:])
+        except:
+            self.latest_ckpt_step = 0
+
+        self.recordset = recordset_files
+        self.ckpt_prefix = os.path.split(self.ssd_net_graph.model_ref + "_bw")[1]
+
+        self.pb_graph_path = os.path.join(self.ckpt_dir, self.ckpt_prefix + ".graph.pb")
+        #if self.is_training:
+        self.graph_file = os.path.join(self.ckpt_dir, self.ckpt_prefix + ".meta")
+        #else:
+        #    self.graph_file = self.ckpt_file + ".meta"
+
+        # anchors
+        self.anchors = generate_anchors.ssd_anchors_all_layers()
+
+        # shuffle
+        self.n_shuffle = 1000
+        self.num_steps = num_steps
+
+        # num of classes
+        # REVIEW: this has to be 21!
+        self.num_classes = 21
+
+        # initialize data pipeline
+        self.batch_size = batch_size
+        self.iterator = None
+        self.prep_dataset_and_iterator()
+
+        self.steps_to_save = steps_to_save
+
+        self.print_steps = print_steps
+        # for losses etc
+        self.ssd_net = ssd_vgg_300.SSDNet()
+
+        # input placeholder
+        self.input_tensor_name = self.ssd_net_graph.input_tensor_list[0]
+
+        
+    def __enter__(self):
+        gpu_options = tf.GPUOptions(allow_growth=True)
+        config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
+
+        self.sess = tf.Session(config=config)
+
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.sess.close()
+        tf.reset_default_graph()
+
+    def prep_dataset_and_iterator(self):
+        '''
+        Create datasets for training or validation
+        '''
+
+        var_scope = "training" if self.is_training else "eval"
+
+        parse_func = get_parser_func(self.anchors, self.num_classes, self.is_training, var_scope)
+
+        with tf.variable_scope(var_scope):
+            # data pipeline
+            dataset = tf.data.TFRecordDataset(self.recordset)
+            if self.is_training:
+                dataset = dataset.shuffle(self.n_shuffle)
+            dataset = dataset.map(parse_func)
+            dataset = dataset.repeat()
+            dataset = dataset.batch(self.batch_size)
+            dataset = dataset.prefetch(1)
+
+            self.iterator = dataset.make_initializable_iterator()
+
+    def get_output_tensors(self, image):
+
+        is_training = tf.constant(self.is_training, dtype=tf.bool, shape=())
+        input_map = {self.input_tensor_name: image, "is_training": is_training}
+
+        saver = tf.train.import_meta_graph(self.graph_file, input_map=input_map)
+        graph = tf.get_default_graph()
+
+        logits = [graph.get_tensor_by_name(tensor_name) for tensor_name in endpoints.logit_names]
+        localizations = [graph.get_tensor_by_name(tensor_name) for tensor_name in endpoints.localizations_names]
+        predictions = [graph.get_tensor_by_name(tensor_name) for tensor_name in endpoints.predictions_names]
+
+        return predictions, localizations, logits, saver
+
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/model/init.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/model/init.py
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/model/custom_layers.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/model/custom_layers.py
@@ -0,0 +1,164 @@
+# Copyright 2015 Paul Balanca. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Implement some custom layers, not provided by TensorFlow.
+
+Trying to follow as much as possible the style/standards used in
+tf.contrib.layers
+"""
+import tensorflow as tf
+
+from tensorflow.contrib.framework.python.ops import add_arg_scope
+from tensorflow.contrib.layers.python.layers import initializers
+from tensorflow.contrib.framework.python.ops import variables
+from tensorflow.contrib.layers.python.layers import utils
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import variable_scope
+
+
+def abs_smooth(x):
+    """Smoothed absolute function. Useful to compute an L1 smooth error.
+
+    Define as:
+        x^2 / 2         if abs(x) < 1
+        abs(x) - 0.5    if abs(x) > 1
+    We use here a differentiable definition using min(x) and abs(x). Clearly
+    not optimal, but good enough for our purpose!
+    """
+    absx = tf.abs(x)
+    minx = tf.minimum(absx, 1)
+    r = 0.5 * ((absx - 1) * minx + absx)
+    return r
+
+
+@add_arg_scope
+def l2_normalization(
+        inputs,
+        scaling=False,
+        scale_initializer=init_ops.ones_initializer(),
+        reuse=None,
+        variables_collections=None,
+        outputs_collections=None,
+        data_format='NHWC',
+        trainable=True,
+        scope=None):
+    """Implement L2 normalization on every feature (i.e. spatial normalization).
+
+    Should be extended in some near future to other dimensions, providing a more
+    flexible normalization framework.
+
+    Args:
+      inputs: a 4-D tensor with dimensions [batch_size, height, width, channels].
+      scaling: whether or not to add a post scaling operation along the dimensions
+        which have been normalized.
+      scale_initializer: An initializer for the weights.
+      reuse: whether or not the layer and its variables should be reused. To be
+        able to reuse the layer scope must be given.
+      variables_collections: optional list of collections for all the variables or
+        a dictionary containing a different list of collection per variable.
+      outputs_collections: collection to add the outputs.
+      data_format:  NHWC or NCHW data format.
+      trainable: If `True` also add variables to the graph collection
+        `GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
+      scope: Optional scope for `variable_scope`.
+    Returns:
+      A `Tensor` representing the output of the operation.
+    """
+
+    with variable_scope.variable_scope(
+            scope, 'L2Normalization', [inputs], reuse=reuse) as sc:
+        inputs_shape = inputs.get_shape()
+        inputs_rank = inputs_shape.ndims
+        dtype = inputs.dtype.base_dtype
+        if data_format == 'NHWC':
+            # norm_dim = tf.range(1, inputs_rank-1)
+            norm_dim = tf.range(inputs_rank-1, inputs_rank)
+            params_shape = inputs_shape[-1:]
+        elif data_format == 'NCHW':
+            # norm_dim = tf.range(2, inputs_rank)
+            norm_dim = tf.range(1, 2)
+            params_shape = (inputs_shape[1])
+
+        # Normalize along spatial dimensions.
+        outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12)
+        # Additional scaling.
+        if scaling:
+            scale_collections = utils.get_variable_collections(
+                variables_collections, 'scale')
+            scale = variables.model_variable('gamma',
+                                             shape=params_shape,
+                                             dtype=dtype,
+                                             initializer=scale_initializer,
+                                             collections=scale_collections,
+                                             trainable=trainable)
+            if data_format == 'NHWC':
+                outputs = tf.multiply(outputs, scale)
+            elif data_format == 'NCHW':
+                scale = tf.expand_dims(scale, axis=-1)
+                scale = tf.expand_dims(scale, axis=-1)
+                outputs = tf.multiply(outputs, scale)
+                # outputs = tf.transpose(outputs, perm=(0, 2, 3, 1))
+
+        return utils.collect_named_outputs(outputs_collections,
+                                           sc.original_name_scope, outputs)
+
+
+@add_arg_scope
+def pad2d(inputs,
+          pad=(0, 0),
+          mode='CONSTANT',
+          data_format='NHWC',
+          trainable=True,
+          scope=None):
+    """2D Padding layer, adding a symmetric padding to H and W dimensions.
+
+    Aims to mimic padding in Caffe and MXNet, helping the port of models to
+    TensorFlow. Tries to follow the naming convention of `tf.contrib.layers`.
+
+    Args:
+      inputs: 4D input Tensor;
+      pad: 2-Tuple with padding values for H and W dimensions;
+      mode: Padding mode. C.f. `tf.pad`
+      data_format:  NHWC or NCHW data format.
+    """
+    with tf.name_scope(scope, 'pad2d', [inputs]):
+        # Padding shape.
+        if data_format == 'NHWC':
+            paddings = [[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]]
+        elif data_format == 'NCHW':
+            paddings = [[0, 0], [0, 0], [pad[0], pad[0]], [pad[1], pad[1]]]
+        net = tf.pad(inputs, paddings, mode=mode)
+        return net
+
+
+@add_arg_scope
+def channel_to_last(inputs,
+                    data_format='NHWC',
+                    scope=None):
+    """Move the channel axis to the last dimension. Allows to
+    provide a single output format whatever the input data format.
+
+    Args:
+      inputs: Input Tensor;
+      data_format: NHWC or NCHW.
+    Return:
+      Input in NHWC format.
+    """
+    with tf.name_scope(scope, 'channel_to_last', [inputs]):
+        if data_format == 'NHWC':
+            net = inputs
+        elif data_format == 'NCHW':
+            net = tf.transpose(inputs, perm=(0, 2, 3, 1))
+        return net
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/model/np_methods.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/model/np_methods.py
@@ -0,0 +1,252 @@
+# Copyright 2017 Paul Balanca. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Additional Numpy methods. Big mess of many things!
+"""
+import numpy as np
+
+
+# =========================================================================== #
+# Numpy implementations of SSD boxes functions.
+# =========================================================================== #
+def ssd_bboxes_decode(feat_localizations,
+                      anchor_bboxes,
+                      prior_scaling=[0.1, 0.1, 0.2, 0.2]):
+    """Compute the relative bounding boxes from the layer features and
+    reference anchor bounding boxes.
+
+    Return:
+      numpy array Nx4: ymin, xmin, ymax, xmax
+    """
+    # Reshape for easier broadcasting.
+    l_shape = feat_localizations.shape
+    feat_localizations = np.reshape(feat_localizations,
+                                    (-1, l_shape[-2], l_shape[-1]))
+    yref, xref, href, wref = anchor_bboxes
+    xref = np.reshape(xref, [-1, 1])
+    yref = np.reshape(yref, [-1, 1])
+
+    # Compute center, height and width
+    cx = feat_localizations[:, :, 0] * wref * prior_scaling[0] + xref
+    cy = feat_localizations[:, :, 1] * href * prior_scaling[1] + yref
+    w = wref * np.exp(feat_localizations[:, :, 2] * prior_scaling[2])
+    h = href * np.exp(feat_localizations[:, :, 3] * prior_scaling[3])
+    # bboxes: ymin, xmin, xmax, ymax.
+    bboxes = np.zeros_like(feat_localizations)
+    bboxes[:, :, 0] = cy - h / 2.
+    bboxes[:, :, 1] = cx - w / 2.
+    bboxes[:, :, 2] = cy + h / 2.
+    bboxes[:, :, 3] = cx + w / 2.
+    # Back to original shape.
+    bboxes = np.reshape(bboxes, l_shape)
+    return bboxes
+
+
+def ssd_bboxes_select_layer(predictions_layer,
+                            localizations_layer,
+                            anchors_layer,
+                            select_threshold=0.5,
+                            img_shape=(300, 300),
+                            num_classes=21,
+                            decode=True):
+    """Extract classes, scores and bounding boxes from features in one layer.
+
+    Return:
+      classes, scores, bboxes: Numpy arrays...
+    """
+    # First decode localizations features if necessary.
+    if decode:
+        localizations_layer = ssd_bboxes_decode(localizations_layer, anchors_layer)
+
+    # Reshape features to: Batches x N x N_labels | 4.
+    p_shape = predictions_layer.shape
+    batch_size = p_shape[0] if len(p_shape) == 5 else 1
+    predictions_layer = np.reshape(predictions_layer,
+                                   (batch_size, -1, p_shape[-1]))
+    l_shape = localizations_layer.shape
+    localizations_layer = np.reshape(localizations_layer,
+                                     (batch_size, -1, l_shape[-1]))
+
+    # Boxes selection: use threshold or score > no-label criteria.
+    if select_threshold is None or select_threshold == 0:
+        # Class prediction and scores: assign 0. to 0-class
+        classes = np.argmax(predictions_layer, axis=2)
+        scores = np.amax(predictions_layer, axis=2)
+        mask = (classes > 0)
+        classes = classes[mask]
+        scores = scores[mask]
+        bboxes = localizations_layer[mask]
+    else:
+        sub_predictions = predictions_layer[:, :, 1:]
+        idxes = np.where(sub_predictions > select_threshold)
+        classes = idxes[-1]+1
+        scores = sub_predictions[idxes]
+        bboxes = localizations_layer[idxes[:-1]]
+
+    return classes, scores, bboxes
+
+
+def ssd_bboxes_select(predictions_net,
+                      localizations_net,
+                      anchors_net,
+                      select_threshold=0.5,
+                      img_shape=(300, 300),
+                      num_classes=21,
+                      decode=True):
+    """Extract classes, scores and bounding boxes from network output layers.
+
+    Return:
+      classes, scores, bboxes: Numpy arrays...
+    """
+    l_classes = []
+    l_scores = []
+    l_bboxes = []
+    # l_layers = []
+    # l_idxes = []
+    for i in range(len(predictions_net)):
+        classes, scores, bboxes = ssd_bboxes_select_layer(
+            predictions_net[i], localizations_net[i], anchors_net[i],
+            select_threshold, img_shape, num_classes, decode)
+        l_classes.append(classes)
+        l_scores.append(scores)
+        l_bboxes.append(bboxes)
+        # Debug information.
+        # l_layers.append(i)
+        # l_idxes.append((i, idxes))
+
+    classes = np.concatenate(l_classes, 0)
+    scores = np.concatenate(l_scores, 0)
+    bboxes = np.concatenate(l_bboxes, 0)
+    return classes, scores, bboxes
+
+
+# =========================================================================== #
+# Common functions for bboxes handling and selection.
+# =========================================================================== #
+def bboxes_sort(classes, scores, bboxes, top_k=400):
+    """Sort bounding boxes by decreasing order and keep only the top_k
+    """
+    # if priority_inside:
+    #     inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \
+    #         (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin)
+    #     idxes = np.argsort(-scores)
+    #     inside = inside[idxes]
+    #     idxes = np.concatenate([idxes[inside], idxes[~inside]])
+    idxes = np.argsort(-scores)
+    classes = classes[idxes][:top_k]
+    scores = scores[idxes][:top_k]
+    bboxes = bboxes[idxes][:top_k]
+    return classes, scores, bboxes
+
+
+def bboxes_clip(bbox_ref, bboxes):
+    """Clip bounding boxes with respect to reference bbox.
+    """
+    bboxes = np.copy(bboxes)
+    bboxes = np.transpose(bboxes)
+    bbox_ref = np.transpose(bbox_ref)
+    bboxes[0] = np.maximum(bboxes[0], bbox_ref[0])
+    bboxes[1] = np.maximum(bboxes[1], bbox_ref[1])
+    bboxes[2] = np.minimum(bboxes[2], bbox_ref[2])
+    bboxes[3] = np.minimum(bboxes[3], bbox_ref[3])
+    bboxes = np.transpose(bboxes)
+    return bboxes
+
+
+def bboxes_resize(bbox_ref, bboxes):
+    """Resize bounding boxes based on a reference bounding box,
+    assuming that the latter is [0, 0, 1, 1] after transform.
+    """
+    bboxes = np.copy(bboxes)
+    # Translate.
+    bboxes[:, 0] -= bbox_ref[0]
+    bboxes[:, 1] -= bbox_ref[1]
+    bboxes[:, 2] -= bbox_ref[0]
+    bboxes[:, 3] -= bbox_ref[1]
+    # Resize.
+    resize = [bbox_ref[2] - bbox_ref[0], bbox_ref[3] - bbox_ref[1]]
+    bboxes[:, 0] /= resize[0]
+    bboxes[:, 1] /= resize[1]
+    bboxes[:, 2] /= resize[0]
+    bboxes[:, 3] /= resize[1]
+    return bboxes
+
+
+def bboxes_jaccard(bboxes1, bboxes2):
+    """Computing jaccard index between bboxes1 and bboxes2.
+    Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable.
+    """
+    bboxes1 = np.transpose(bboxes1)
+    bboxes2 = np.transpose(bboxes2)
+    # Intersection bbox and volume.
+    int_ymin = np.maximum(bboxes1[0], bboxes2[0])
+    int_xmin = np.maximum(bboxes1[1], bboxes2[1])
+    int_ymax = np.minimum(bboxes1[2], bboxes2[2])
+    int_xmax = np.minimum(bboxes1[3], bboxes2[3])
+
+    int_h = np.maximum(int_ymax - int_ymin, 0.)
+    int_w = np.maximum(int_xmax - int_xmin, 0.)
+    int_vol = int_h * int_w
+    # Union volume.
+    vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1])
+    vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1])
+    jaccard = int_vol / (vol1 + vol2 - int_vol)
+    return jaccard
+
+
+def bboxes_intersection(bboxes_ref, bboxes2):
+    """Computing jaccard index between bboxes1 and bboxes2.
+    Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable.
+    """
+    bboxes_ref = np.transpose(bboxes_ref)
+    bboxes2 = np.transpose(bboxes2)
+    # Intersection bbox and volume.
+    int_ymin = np.maximum(bboxes_ref[0], bboxes2[0])
+    int_xmin = np.maximum(bboxes_ref[1], bboxes2[1])
+    int_ymax = np.minimum(bboxes_ref[2], bboxes2[2])
+    int_xmax = np.minimum(bboxes_ref[3], bboxes2[3])
+
+    int_h = np.maximum(int_ymax - int_ymin, 0.)
+    int_w = np.maximum(int_xmax - int_xmin, 0.)
+    int_vol = int_h * int_w
+    # Union volume.
+    vol = (bboxes_ref[2] - bboxes_ref[0]) * (bboxes_ref[3] - bboxes_ref[1])
+    score = int_vol / vol
+    return score
+
+
+def bboxes_nms(classes, scores, bboxes, nms_threshold=0.45):
+    """Apply non-maximum selection to bounding boxes.
+    """
+    keep_bboxes = np.ones(scores.shape, dtype=np.bool)
+    for i in range(scores.size-1):
+        if keep_bboxes[i]:
+            # Computer overlap with bboxes which are following.
+            overlap = bboxes_jaccard(bboxes[i], bboxes[(i+1):])
+            # Overlap threshold for keeping + checking part of the same class
+            keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i])
+            keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap)
+
+    idxes = np.where(keep_bboxes)
+    return classes[idxes], scores[idxes], bboxes[idxes]
+
+
+def bboxes_nms_fast(classes, scores, bboxes, threshold=0.45):
+    """Apply non-maximum selection to bounding boxes.
+    """
+    pass
+
+
+
+
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/model/ssd_common.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/model/ssd_common.py
@@ -0,0 +1,408 @@
+# Copyright 2015 Paul Balanca. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Shared function between different SSD implementations.
+"""
+import numpy as np
+import tensorflow as tf
+import tfextended as tfe
+
+
+# =========================================================================== #
+# TensorFlow implementation of boxes SSD encoding / decoding.
+# =========================================================================== #
+def tf_ssd_bboxes_encode_layer(labels,
+                               bboxes,
+                               anchors_layer,
+                               num_classes,
+                               ignore_threshold=0.5,
+                               prior_scaling=[0.1, 0.1, 0.2, 0.2],
+                               dtype=tf.float32):
+    """Encode groundtruth labels and bounding boxes using SSD anchors from
+    one layer.
+
+    Arguments:
+      labels: 1D Tensor(int64) containing groundtruth labels;
+      bboxes: Nx4 Tensor(float) with bboxes relative coordinates;
+      anchors_layer: Numpy array with layer anchors;
+      matching_threshold: Threshold for positive match with groundtruth bboxes;
+      prior_scaling: Scaling of encoded coordinates.
+
+    Return:
+      (target_labels, target_localizations, target_scores): Target Tensors.
+    """
+    # Anchors coordinates and volume.
+    yref, xref, href, wref = anchors_layer
+    ymin = yref - href / 2.
+    xmin = xref - wref / 2.
+    ymax = yref + href / 2.
+    xmax = xref + wref / 2.
+    vol_anchors = (xmax - xmin) * (ymax - ymin)
+
+    # Initialize tensors...
+    shape = (yref.shape[0], yref.shape[1], href.size)
+    feat_labels = tf.zeros(shape, dtype=tf.int64)
+    feat_scores = tf.zeros(shape, dtype=dtype)
+
+    feat_ymin = tf.zeros(shape, dtype=dtype)
+    feat_xmin = tf.zeros(shape, dtype=dtype)
+    feat_ymax = tf.ones(shape, dtype=dtype)
+    feat_xmax = tf.ones(shape, dtype=dtype)
+
+    def jaccard_with_anchors(bbox):
+        """Compute jaccard score between a box and the anchors.
+        """
+        int_ymin = tf.maximum(ymin, bbox[0])
+        int_xmin = tf.maximum(xmin, bbox[1])
+        int_ymax = tf.minimum(ymax, bbox[2])
+        int_xmax = tf.minimum(xmax, bbox[3])
+        h = tf.maximum(int_ymax - int_ymin, 0.)
+        w = tf.maximum(int_xmax - int_xmin, 0.)
+        # Volumes.
+        inter_vol = h * w
+        union_vol = vol_anchors - inter_vol \
+            + (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
+        jaccard = tf.divide(inter_vol, union_vol)
+        return jaccard
+
+    def intersection_with_anchors(bbox):
+        """Compute intersection between score a box and the anchors.
+        """
+        int_ymin = tf.maximum(ymin, bbox[0])
+        int_xmin = tf.maximum(xmin, bbox[1])
+        int_ymax = tf.minimum(ymax, bbox[2])
+        int_xmax = tf.minimum(xmax, bbox[3])
+        h = tf.maximum(int_ymax - int_ymin, 0.)
+        w = tf.maximum(int_xmax - int_xmin, 0.)
+        inter_vol = h * w
+        scores = tf.divide(inter_vol, vol_anchors)
+        return scores
+
+    def condition(i, feat_labels, feat_scores,
+                  feat_ymin, feat_xmin, feat_ymax, feat_xmax):
+        """Condition: check label index.
+        """
+        r = tf.less(i, tf.shape(labels))
+        return r[0]
+
+    def body(i, feat_labels, feat_scores,
+             feat_ymin, feat_xmin, feat_ymax, feat_xmax):
+        """Body: update feature labels, scores and bboxes.
+        Follow the original SSD paper for that purpose:
+          - assign values when jaccard > 0.5;
+          - only update if beat the score of other bboxes.
+        """
+        # Jaccard score.
+        label = labels[i]
+        bbox = bboxes[i]
+        jaccard = jaccard_with_anchors(bbox)
+        # Mask: check threshold + scores + no annotations + num_classes.
+        mask = tf.greater(jaccard, feat_scores)
+        # mask = tf.logical_and(mask, tf.greater(jaccard, matching_threshold))
+        mask = tf.logical_and(mask, feat_scores > -0.5)
+        mask = tf.logical_and(mask, label < num_classes)
+        imask = tf.cast(mask, tf.int64)
+        fmask = tf.cast(mask, dtype)
+        # Update values using mask.
+        feat_labels = imask * label + (1 - imask) * feat_labels
+        feat_scores = tf.where(mask, jaccard, feat_scores)
+
+        feat_ymin = fmask * bbox[0] + (1 - fmask) * feat_ymin
+        feat_xmin = fmask * bbox[1] + (1 - fmask) * feat_xmin
+        feat_ymax = fmask * bbox[2] + (1 - fmask) * feat_ymax
+        feat_xmax = fmask * bbox[3] + (1 - fmask) * feat_xmax
+
+        # Check no annotation label: ignore these anchors...
+        # interscts = intersection_with_anchors(bbox)
+        # mask = tf.logical_and(interscts > ignore_threshold,
+        #                       label == no_annotation_label)
+        # # Replace scores by -1.
+        # feat_scores = tf.where(mask, -tf.cast(mask, dtype), feat_scores)
+
+        return [i+1, feat_labels, feat_scores,
+                feat_ymin, feat_xmin, feat_ymax, feat_xmax]
+    # Main loop definition.
+    i = 0
+    [i, feat_labels, feat_scores,
+     feat_ymin, feat_xmin,
+     feat_ymax, feat_xmax] = tf.while_loop(condition, body,
+                                           [i, feat_labels, feat_scores,
+                                            feat_ymin, feat_xmin,
+                                            feat_ymax, feat_xmax])
+    # Transform to center / size.
+    feat_cy = (feat_ymax + feat_ymin) / 2.
+    feat_cx = (feat_xmax + feat_xmin) / 2.
+    feat_h = feat_ymax - feat_ymin
+    feat_w = feat_xmax - feat_xmin
+    # Encode features.
+    feat_cy = (feat_cy - yref) / href / prior_scaling[0]
+    feat_cx = (feat_cx - xref) / wref / prior_scaling[1]
+    feat_h = tf.log(feat_h / href) / prior_scaling[2]
+    feat_w = tf.log(feat_w / wref) / prior_scaling[3]
+    # Use SSD ordering: x / y / w / h instead of ours.
+    feat_localizations = tf.stack([feat_cx, feat_cy, feat_w, feat_h], axis=-1)
+    return feat_labels, feat_localizations, feat_scores
+
+
+def tf_ssd_bboxes_encode(labels,
+                         bboxes,
+                         anchors,
+                         num_classes,
+                         ignore_threshold=0.5,
+                         prior_scaling=[0.1, 0.1, 0.2, 0.2],
+                         dtype=tf.float32,
+                         scope='ssd_bboxes_encode'):
+    """Encode groundtruth labels and bounding boxes using SSD net anchors.
+    Encoding boxes for all feature layers.
+
+    Arguments:
+      labels: 1D Tensor(int64) containing groundtruth labels;
+      bboxes: Nx4 Tensor(float) with bboxes relative coordinates;
+      anchors: List of Numpy array with layer anchors;
+      matching_threshold: Threshold for positive match with groundtruth bboxes;
+      prior_scaling: Scaling of encoded coordinates.
+
+    Return:
+      (target_labels, target_localizations, target_scores):
+        Each element is a list of target Tensors.
+    """
+    with tf.name_scope(scope):
+        target_labels = []
+        target_localizations = []
+        target_scores = []
+        for i, anchors_layer in enumerate(anchors):
+            with tf.name_scope('bboxes_encode_block_%i' % i):
+                t_labels, t_loc, t_scores = \
+                    tf_ssd_bboxes_encode_layer(labels, bboxes, anchors_layer,
+                                               num_classes,
+                                               ignore_threshold,
+                                               prior_scaling, dtype)
+                target_labels.append(t_labels)
+                target_localizations.append(t_loc)
+                target_scores.append(t_scores)
+        return target_labels, target_localizations, target_scores
+
+
+def tf_ssd_bboxes_decode_layer(feat_localizations,
+                               anchors_layer,
+                               prior_scaling=[0.1, 0.1, 0.2, 0.2]):
+    """Compute the relative bounding boxes from the layer features and
+    reference anchor bounding boxes.
+
+    Arguments:
+      feat_localizations: Tensor containing localization features.
+      anchors: List of numpy array containing anchor boxes.
+
+    Return:
+      Tensor Nx4: ymin, xmin, ymax, xmax
+    """
+    yref, xref, href, wref = anchors_layer
+
+    # Compute center, height and width
+    cx = feat_localizations[:, :, :, :, 0] * wref * prior_scaling[0] + xref
+    cy = feat_localizations[:, :, :, :, 1] * href * prior_scaling[1] + yref
+    w = wref * tf.exp(feat_localizations[:, :, :, :, 2] * prior_scaling[2])
+    h = href * tf.exp(feat_localizations[:, :, :, :, 3] * prior_scaling[3])
+    # Boxes coordinates.
+    ymin = cy - h / 2.
+    xmin = cx - w / 2.
+    ymax = cy + h / 2.
+    xmax = cx + w / 2.
+    bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=-1)
+    return bboxes
+
+
+def tf_ssd_bboxes_decode(feat_localizations,
+                         anchors,
+                         prior_scaling=[0.1, 0.1, 0.2, 0.2],
+                         scope='ssd_bboxes_decode'):
+    """Compute the relative bounding boxes from the SSD net features and
+    reference anchors bounding boxes.
+
+    Arguments:
+      feat_localizations: List of Tensors containing localization features.
+      anchors: List of numpy array containing anchor boxes.
+
+    Return:
+      List of Tensors Nx4: ymin, xmin, ymax, xmax
+    """
+    with tf.name_scope(scope):
+        bboxes = []
+        for i, anchors_layer in enumerate(anchors):
+            bboxes.append(
+                tf_ssd_bboxes_decode_layer(feat_localizations[i],
+                                           anchors_layer,
+                                           prior_scaling))
+        return bboxes
+
+
+# =========================================================================== #
+# SSD boxes selection.
+# =========================================================================== #
+def tf_ssd_bboxes_select_layer(predictions_layer, localizations_layer,
+                               select_threshold=None,
+                               num_classes=21,
+                               ignore_class=0,
+                               scope=None):
+    """Extract classes, scores and bounding boxes from features in one layer.
+    Batch-compatible: inputs are supposed to have batch-type shapes.
+
+    Args:
+      predictions_layer: A SSD prediction layer;
+      localizations_layer: A SSD localization layer;
+      select_threshold: Classification threshold for selecting a box. All boxes
+        under the threshold are set to 'zero'. If None, no threshold applied.
+    Return:
+      d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of
+        size Batches X N x 1 | 4. Each key corresponding to a class.
+    """
+    select_threshold = 0.0 if select_threshold is None else select_threshold
+    with tf.name_scope(scope, 'ssd_bboxes_select_layer',
+                       [predictions_layer, localizations_layer]):
+        # Reshape features: Batches x N x N_labels | 4
+        p_shape = tfe.get_shape(predictions_layer)
+        predictions_layer = tf.reshape(predictions_layer,
+                                       tf.stack([p_shape[0], -1, p_shape[-1]]))
+        l_shape = tfe.get_shape(localizations_layer)
+        localizations_layer = tf.reshape(localizations_layer,
+                                         tf.stack([l_shape[0], -1, l_shape[-1]]))
+
+        d_scores = {}
+        d_bboxes = {}
+        for c in range(0, num_classes):
+            if c != ignore_class:
+                # Remove boxes under the threshold.
+                scores = predictions_layer[:, :, c]
+                fmask = tf.cast(tf.greater_equal(scores, select_threshold), scores.dtype)
+                scores = scores * fmask
+                bboxes = localizations_layer * tf.expand_dims(fmask, axis=-1)
+                # Append to dictionary.
+                d_scores[c] = scores
+                d_bboxes[c] = bboxes
+
+        return d_scores, d_bboxes
+
+
+def tf_ssd_bboxes_select(predictions_net, localizations_net,
+                         select_threshold=None,
+                         num_classes=21,
+                         ignore_class=0,
+                         scope=None):
+    """Extract classes, scores and bounding boxes from network output layers.
+    Batch-compatible: inputs are supposed to have batch-type shapes.
+
+    Args:
+      predictions_net: List of SSD prediction layers;
+      localizations_net: List of localization layers;
+      select_threshold: Classification threshold for selecting a box. All boxes
+        under the threshold are set to 'zero'. If None, no threshold applied.
+    Return:
+      d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of
+        size Batches X N x 1 | 4. Each key corresponding to a class.
+    """
+    with tf.name_scope(scope, 'ssd_bboxes_select',
+                       [predictions_net, localizations_net]):
+        l_scores = []
+        l_bboxes = []
+        for i in range(len(predictions_net)):
+            scores, bboxes = tf_ssd_bboxes_select_layer(predictions_net[i],
+                                                        localizations_net[i],
+                                                        select_threshold,
+                                                        num_classes,
+                                                        ignore_class)
+            l_scores.append(scores)
+            l_bboxes.append(bboxes)
+        # Concat results.
+        d_scores = {}
+        d_bboxes = {}
+        for c in l_scores[0].keys():
+            ls = [s[c] for s in l_scores]
+            lb = [b[c] for b in l_bboxes]
+            d_scores[c] = tf.concat(ls, axis=1)
+            d_bboxes[c] = tf.concat(lb, axis=1)
+        return d_scores, d_bboxes
+
+
+def tf_ssd_bboxes_select_layer_all_classes(predictions_layer, localizations_layer,
+                                           select_threshold=None):
+    """Extract classes, scores and bounding boxes from features in one layer.
+     Batch-compatible: inputs are supposed to have batch-type shapes.
+
+     Args:
+       predictions_layer: A SSD prediction layer;
+       localizations_layer: A SSD localization layer;
+      select_threshold: Classification threshold for selecting a box. If None,
+        select boxes whose classification score is higher than 'no class'.
+     Return:
+      classes, scores, bboxes: Input Tensors.
+     """
+    # Reshape features: Batches x N x N_labels | 4
+    p_shape = tfe.get_shape(predictions_layer)
+    predictions_layer = tf.reshape(predictions_layer,
+                                   tf.stack([p_shape[0], -1, p_shape[-1]]))
+    l_shape = tfe.get_shape(localizations_layer)
+    localizations_layer = tf.reshape(localizations_layer,
+                                     tf.stack([l_shape[0], -1, l_shape[-1]]))
+    # Boxes selection: use threshold or score > no-label criteria.
+    if select_threshold is None or select_threshold == 0:
+        # Class prediction and scores: assign 0. to 0-class
+        classes = tf.argmax(predictions_layer, axis=2)
+        scores = tf.reduce_max(predictions_layer, axis=2)
+        scores = scores * tf.cast(classes > 0, scores.dtype)
+    else:
+        sub_predictions = predictions_layer[:, :, 1:]
+        classes = tf.argmax(sub_predictions, axis=2) + 1
+        scores = tf.reduce_max(sub_predictions, axis=2)
+        # Only keep predictions higher than threshold.
+        mask = tf.greater(scores, select_threshold)
+        classes = classes * tf.cast(mask, classes.dtype)
+        scores = scores * tf.cast(mask, scores.dtype)
+    # Assume localization layer already decoded.
+    bboxes = localizations_layer
+    return classes, scores, bboxes
+
+
+def tf_ssd_bboxes_select_all_classes(predictions_net, localizations_net,
+                                     select_threshold=None,
+                                     scope=None):
+    """Extract classes, scores and bounding boxes from network output layers.
+    Batch-compatible: inputs are supposed to have batch-type shapes.
+
+    Args:
+      predictions_net: List of SSD prediction layers;
+      localizations_net: List of localization layers;
+      select_threshold: Classification threshold for selecting a box. If None,
+        select boxes whose classification score is higher than 'no class'.
+    Return:
+      classes, scores, bboxes: Tensors.
+    """
+    with tf.name_scope(scope, 'ssd_bboxes_select',
+                       [predictions_net, localizations_net]):
+        l_classes = []
+        l_scores = []
+        l_bboxes = []
+        for i in range(len(predictions_net)):
+            classes, scores, bboxes = \
+                tf_ssd_bboxes_select_layer_all_classes(predictions_net[i],
+                                                       localizations_net[i],
+                                                       select_threshold)
+            l_classes.append(classes)
+            l_scores.append(scores)
+            l_bboxes.append(bboxes)
+
+        classes = tf.concat(l_classes, axis=1)
+        scores = tf.concat(l_scores, axis=1)
+        bboxes = tf.concat(l_bboxes, axis=1)
+        return classes, scores, bboxes
+
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/model/ssd_vgg_300.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/model/ssd_vgg_300.py
@@ -0,0 +1,660 @@
+# Copyright 2016 Paul Balanca. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Definition of 300 VGG-based SSD network.
+
+This model was initially introduced in:
+SSD: Single Shot MultiBox Detector
+Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
+Cheng-Yang Fu, Alexander C. Berg
+https://arxiv.org/abs/1512.02325
+
+Two variants of the model are defined: the 300x300 and 512x512 models, the
+latter obtaining a slightly better accuracy on Pascal VOC.
+
+Usage:
+    with slim.arg_scope(ssd_vgg.ssd_vgg()):
+        outputs, end_points = ssd_vgg.ssd_vgg(inputs)
+
+This network port of the original Caffe model. The padding in TF and Caffe
+is slightly different, and can lead to severe accuracy drop if not taken care
+in a correct way!
+
+In Caffe, the output size of convolution and pooling layers are computing as
+following: h_o = (h_i + 2 * pad_h - kernel_h) / stride_h + 1
+
+Nevertheless, there is a subtle difference between both for stride > 1. In
+the case of convolution:
+    top_size = floor((bottom_size + 2*pad - kernel_size) / stride) + 1
+whereas for pooling:
+    top_size = ceil((bottom_size + 2*pad - kernel_size) / stride) + 1
+Hence implicitely allowing some additional padding even if pad = 0. This
+behaviour explains why pooling with stride and kernel of size 2 are behaving
+the same way in TensorFlow and Caffe.
+
+Nevertheless, this is not the case anymore for other kernel sizes, hence
+motivating the use of special padding layer for controlling these side-effects.
+
+@@ssd_vgg_300
+"""
+import math
+from collections import namedtuple
+
+import numpy as np
+import tensorflow as tf
+
+import tfextended as tfe
+from model import custom_layers, ssd_common
+
+slim = tf.contrib.slim
+
+
+# =========================================================================== #
+# SSD class definition.
+# =========================================================================== #
+SSDParams = namedtuple('SSDParameters', ['img_shape',
+                                         'num_classes',
+                                         'no_annotation_label',
+                                         'feat_layers',
+                                         'feat_shapes',
+                                         'anchor_size_bounds',
+                                         'anchor_sizes',
+                                         'anchor_ratios',
+                                         'anchor_steps',
+                                         'anchor_offset',
+                                         'normalizations',
+                                         'prior_scaling'
+                                         ])
+
+
+class SSDNet(object):
+    """Implementation of the SSD VGG-based 300 network.
+
+    The default features layers with 300x300 image input are:
+      conv4 ==> 38 x 38
+      conv7 ==> 19 x 19
+      conv8 ==> 10 x 10
+      conv9 ==> 5 x 5
+      conv10 ==> 3 x 3
+      conv11 ==> 1 x 1
+    The default image size used to train this network is 300x300.
+    """
+    default_params = SSDParams(
+        img_shape=(300, 300),
+        num_classes=21,
+        no_annotation_label=21,
+        feat_layers=['block4', 'block7', 'block8', 'block9', 'block10', 'block11'],
+        feat_shapes=[(37, 37), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)],
+        anchor_size_bounds=[0.15, 0.90],
+        # anchor_size_bounds=[0.20, 0.90],
+        anchor_sizes=[(21., 45.),
+                      (45., 99.),
+                      (99., 153.),
+                      (153., 207.),
+                      (207., 261.),
+                      (261., 315.)],
+        # anchor_sizes=[(30., 60.),
+        #               (60., 111.),
+        #               (111., 162.),
+        #               (162., 213.),
+        #               (213., 264.),
+        #               (264., 315.)],
+        anchor_ratios=[[2, .5],
+                       [2, .5, 3, 1./3],
+                       [2, .5, 3, 1./3],
+                       [2, .5, 3, 1./3],
+                       [2, .5],
+                       [2, .5]],
+        anchor_steps=[8, 16, 32, 64, 100, 300],
+        anchor_offset=0.5,
+        normalizations=[20, -1, -1, -1, -1, -1],
+        prior_scaling=[0.1, 0.1, 0.2, 0.2]
+        )
+
+    def __init__(self, params=None):
+        """Init the SSD net with some parameters. Use the default ones
+        if none provided.
+        """
+        if isinstance(params, SSDParams):
+            self.params = params
+        else:
+            self.params = SSDNet.default_params
+
+    # ======================================================================= #
+    def net(self, inputs,
+            is_training=True,
+            update_feat_shapes=True,
+            dropout_keep_prob=0.5,
+            prediction_fn=slim.softmax,
+            reuse=None,
+            scope='ssd_300_vgg'):
+        """SSD network definition.
+        """
+        r = ssd_net(inputs,
+                    num_classes=self.params.num_classes,
+                    feat_layers=self.params.feat_layers,
+                    anchor_sizes=self.params.anchor_sizes,
+                    anchor_ratios=self.params.anchor_ratios,
+                    normalizations=self.params.normalizations,
+                    is_training=is_training,
+                    dropout_keep_prob=dropout_keep_prob,
+                    prediction_fn=prediction_fn,
+                    reuse=reuse,
+                    scope=scope)
+        # Update feature shapes (try at least!)
+        if update_feat_shapes:
+            shapes = ssd_feat_shapes_from_net(r[0], self.params.feat_shapes)
+            self.params = self.params._replace(feat_shapes=shapes)
+        return r
+
+    def arg_scope(self, weight_decay=0.0005, data_format='NHWC'):
+        """Network arg_scope.
+        """
+        return ssd_arg_scope(weight_decay, data_format=data_format)
+
+    def arg_scope_caffe(self, caffe_scope):
+        """Caffe arg_scope used for weights importing.
+        """
+        return ssd_arg_scope_caffe(caffe_scope)
+
+    # ======================================================================= #
+    def update_feature_shapes(self, predictions):
+        """Update feature shapes from predictions collection (Tensor or Numpy
+        array).
+        """
+        shapes = ssd_feat_shapes_from_net(predictions, self.params.feat_shapes)
+        self.params = self.params._replace(feat_shapes=shapes)
+
+    def anchors(self, img_shape, dtype=np.float32):
+        """Compute the default anchor boxes, given an image shape.
+        """
+        return ssd_anchors_all_layers(img_shape,
+                                      self.params.feat_shapes,
+                                      self.params.anchor_sizes,
+                                      self.params.anchor_ratios,
+                                      self.params.anchor_steps,
+                                      self.params.anchor_offset,
+                                      dtype)
+
+    def bboxes_encode(self, labels, bboxes, anchors,
+                      scope=None):
+        """Encode labels and bounding boxes.
+        """
+        return ssd_common.tf_ssd_bboxes_encode(
+            labels, bboxes, anchors,
+            self.params.num_classes,
+            ignore_threshold=0.5,
+            prior_scaling=self.params.prior_scaling,
+            scope=scope)
+
+    def bboxes_decode(self, feat_localizations, anchors,
+                      scope='ssd_bboxes_decode'):
+        """Encode labels and bounding boxes.
+        """
+        return ssd_common.tf_ssd_bboxes_decode(
+            feat_localizations, anchors,
+            prior_scaling=self.params.prior_scaling,
+            scope=scope)
+
+    def detected_bboxes(self, predictions, localisations,
+                        select_threshold=None, nms_threshold=0.5,
+                        clipping_bbox=None, top_k=400, keep_top_k=200):
+        """Get the detected bounding boxes from the SSD network output.
+        """
+        # Select top_k bboxes from predictions, and clip
+        rscores, rbboxes = \
+            ssd_common.tf_ssd_bboxes_select(predictions, localisations,
+                                            select_threshold=select_threshold,
+                                            num_classes=self.params.num_classes)
+        rscores, rbboxes = \
+            tfe.bboxes_sort(rscores, rbboxes, top_k=top_k)
+        # Apply NMS algorithm.
+        rscores, rbboxes = \
+            tfe.bboxes_nms_batch(rscores, rbboxes,
+                                 nms_threshold=nms_threshold,
+                                 keep_top_k=keep_top_k)
+        if clipping_bbox is not None:
+            rbboxes = tfe.bboxes_clip(clipping_bbox, rbboxes)
+        return rscores, rbboxes
+
+    def losses(self, logits, localisations,
+               gclasses, glocalisations, gscores,
+               match_threshold=0.5,
+               negative_ratio=3.,
+               alpha=1.,
+               label_smoothing=0.,
+               scope='ssd_losses'):
+        """Define the SSD network losses.
+        """
+        return ssd_losses(logits, localisations,
+                          gclasses, glocalisations, gscores,
+                          match_threshold=match_threshold,
+                          negative_ratio=negative_ratio,
+                          alpha=alpha,
+                          label_smoothing=label_smoothing,
+                          scope=scope)
+
+
+# =========================================================================== #
+# SSD tools...
+# =========================================================================== #
+def ssd_size_bounds_to_values(size_bounds,
+                              n_feat_layers,
+                              img_shape=(300, 300)):
+    """Compute the reference sizes of the anchor boxes from relative bounds.
+    The absolute values are measured in pixels, based on the network
+    default size (300 pixels).
+
+    This function follows the computation performed in the original
+    implementation of SSD in Caffe.
+
+    Return:
+      list of list containing the absolute sizes at each scale. For each scale,
+      the ratios only apply to the first value.
+    """
+    assert img_shape[0] == img_shape[1]
+
+    img_size = img_shape[0]
+    min_ratio = int(size_bounds[0] * 100)
+    max_ratio = int(size_bounds[1] * 100)
+    step = int(math.floor((max_ratio - min_ratio) / (n_feat_layers - 2)))
+    # Start with the following smallest sizes.
+    sizes = [[img_size * size_bounds[0] / 2, img_size * size_bounds[0]]]
+    for ratio in range(min_ratio, max_ratio + 1, step):
+        sizes.append((img_size * ratio / 100.,
+                      img_size * (ratio + step) / 100.))
+    return sizes
+
+
+def ssd_feat_shapes_from_net(predictions, default_shapes=None):
+    """Try to obtain the feature shapes from the prediction layers. The latter
+    can be either a Tensor or Numpy ndarray.
+
+    Return:
+      list of feature shapes. Default values if predictions shape not fully
+      determined.
+    """
+    feat_shapes = []
+    for l in predictions:
+        # Get the shape, from either a np array or a tensor.
+        if isinstance(l, np.ndarray):
+            shape = l.shape
+        else:
+            shape = l.get_shape().as_list()
+        shape = shape[1:4]
+        # Problem: undetermined shape...
+        if None in shape:
+            return default_shapes
+        else:
+            feat_shapes.append(shape)
+    return feat_shapes
+
+
+def ssd_anchor_one_layer(img_shape,
+                         feat_shape,
+                         sizes,
+                         ratios,
+                         step,
+                         offset=0.5,
+                         dtype=np.float32):
+    """Computer SSD default anchor boxes for one feature layer.
+
+    Determine the relative position grid of the centers, and the relative
+    width and height.
+
+    Arguments:
+      feat_shape: Feature shape, used for computing relative position grids;
+      size: Absolute reference sizes;
+      ratios: Ratios to use on these features;
+      img_shape: Image shape, used for computing height, width relatively to the
+        former;
+      offset: Grid offset.
+
+    Return:
+      y, x, h, w: Relative x and y grids, and height and width.
+    """
+    # Compute the position grid: simple way.
+    # y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]]
+    # y = (y.astype(dtype) + offset) / feat_shape[0]
+    # x = (x.astype(dtype) + offset) / feat_shape[1]
+    # Weird SSD-Caffe computation using steps values...
+    y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]]
+    y = (y.astype(dtype) + offset) * step / img_shape[0]
+    x = (x.astype(dtype) + offset) * step / img_shape[1]
+
+    # Expand dims to support easy broadcasting.
+    y = np.expand_dims(y, axis=-1)
+    x = np.expand_dims(x, axis=-1)
+
+    # Compute relative height and width.
+    # Tries to follow the original implementation of SSD for the order.
+    num_anchors = len(sizes) + len(ratios)
+    h = np.zeros((num_anchors, ), dtype=dtype)
+    w = np.zeros((num_anchors, ), dtype=dtype)
+    # Add first anchor boxes with ratio=1.
+    h[0] = sizes[0] / img_shape[0]
+    w[0] = sizes[0] / img_shape[1]
+    di = 1
+    if len(sizes) > 1:
+        h[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[0]
+        w[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[1]
+        di += 1
+    for i, r in enumerate(ratios):
+        h[i+di] = sizes[0] / img_shape[0] / math.sqrt(r)
+        w[i+di] = sizes[0] / img_shape[1] * math.sqrt(r)
+    return y, x, h, w
+
+
+def ssd_anchors_all_layers(img_shape,
+                           layers_shape,
+                           anchor_sizes,
+                           anchor_ratios,
+                           anchor_steps,
+                           offset=0.5,
+                           dtype=np.float32):
+    """Compute anchor boxes for all feature layers.
+    """
+    layers_anchors = []
+    for i, s in enumerate(layers_shape):
+        anchor_bboxes = ssd_anchor_one_layer(img_shape, s,
+                                             anchor_sizes[i],
+                                             anchor_ratios[i],
+                                             anchor_steps[i],
+                                             offset=offset, dtype=dtype)
+        layers_anchors.append(anchor_bboxes)
+    return layers_anchors
+
+
+# =========================================================================== #
+# Functional definition of VGG-based SSD 300.
+# =========================================================================== #
+def tensor_shape(x, rank=3):
+    """Returns the dimensions of a tensor.
+    Args:
+      image: A N-D Tensor of shape.
+    Returns:
+      A list of dimensions. Dimensions that are statically known are python
+        integers,otherwise they are integer scalar tensors.
+    """
+    if x.get_shape().is_fully_defined():
+        return x.get_shape().as_list()
+    else:
+        static_shape = x.get_shape().with_rank(rank).as_list()
+        dynamic_shape = tf.unstack(tf.shape(x), rank)
+        return [s if s is not None else d
+                for s, d in zip(static_shape, dynamic_shape)]
+
+
+def ssd_multibox_layer(inputs,
+                       num_classes,
+                       sizes,
+                       ratios=[1],
+                       normalization=-1,
+                       bn_normalization=False):
+    """Construct a multibox layer, return a class and localization predictions.
+    """
+    net = inputs
+    if normalization > 0:
+        net = custom_layers.l2_normalization(net, scaling=True)
+    # Number of anchors.
+    num_anchors = len(sizes) + len(ratios)
+
+    # Location.
+    num_loc_pred = num_anchors * 4
+    loc_pred = slim.conv2d(net, num_loc_pred, [3, 3], activation_fn=None,
+                           scope='conv_loc')
+    loc_pred = custom_layers.channel_to_last(loc_pred)
+    loc_pred = tf.reshape(loc_pred,
+                          tensor_shape(loc_pred, 4)[:-1]+[num_anchors, 4])
+    # Class prediction.
+    num_cls_pred = num_anchors * num_classes
+    cls_pred = slim.conv2d(net, num_cls_pred, [3, 3], activation_fn=None,
+                           scope='conv_cls')
+    cls_pred = custom_layers.channel_to_last(cls_pred)
+    cls_pred = tf.reshape(cls_pred,
+                          tensor_shape(cls_pred, 4)[:-1]+[num_anchors, num_classes])
+    return cls_pred, loc_pred
+
+
+def ssd_net(inputs,
+            num_classes=SSDNet.default_params.num_classes,
+            feat_layers=SSDNet.default_params.feat_layers,
+            anchor_sizes=SSDNet.default_params.anchor_sizes,
+            anchor_ratios=SSDNet.default_params.anchor_ratios,
+            normalizations=SSDNet.default_params.normalizations,
+            is_training=True,
+            dropout_keep_prob=0.5,
+            prediction_fn=slim.softmax,
+            reuse=None,
+            scope='ssd_300_vgg'):
+    """SSD net definition.
+    """
+    # if data_format == 'NCHW':
+    #     inputs = tf.transpose(inputs, perm=(0, 3, 1, 2))
+
+    # End_points collect relevant activations for external use.
+    end_points = {}
+    with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse):
+        # Original VGG-16 blocks.
+        net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
+        end_points['block1'] = net
+        net = slim.max_pool2d(net, [2, 2], scope='pool1')
+        # Block 2.
+        net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
+        end_points['block2'] = net
+        net = slim.max_pool2d(net, [2, 2], scope='pool2')
+        # Block 3.
+        net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
+        end_points['block3'] = net
+        net = slim.max_pool2d(net, [2, 2], scope='pool3')
+        # Block 4.
+        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
+        end_points['block4'] = net
+        net = slim.max_pool2d(net, [2, 2], scope='pool4')
+        # Block 5.
+        net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
+        end_points['block5'] = net
+        net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')
+
+        # Additional SSD blocks.
+        # Block 6: let's dilate the hell out of it!
+        net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
+        end_points['block6'] = net
+        net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)
+        # Block 7: 1x1 conv. Because the fuck.
+        net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
+        end_points['block7'] = net
+        net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)
+
+        # Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
+        end_point = 'block8'
+        with tf.variable_scope(end_point):
+            net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
+            net = custom_layers.pad2d(net, pad=(1, 1))
+            net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID')
+        end_points[end_point] = net
+        end_point = 'block9'
+        with tf.variable_scope(end_point):
+            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
+            net = custom_layers.pad2d(net, pad=(1, 1))
+            net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID')
+        end_points[end_point] = net
+        end_point = 'block10'
+        with tf.variable_scope(end_point):
+            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
+            net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
+        end_points[end_point] = net
+        end_point = 'block11'
+        with tf.variable_scope(end_point):
+            net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
+            net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
+        end_points[end_point] = net
+
+        # Prediction and localisations layers.
+        predictions = []
+        logits = []
+        localisations = []
+        for i, layer in enumerate(feat_layers):
+            with tf.variable_scope(layer + '_box'):
+                p, l = ssd_multibox_layer(end_points[layer],
+                                          num_classes,
+                                          anchor_sizes[i],
+                                          anchor_ratios[i],
+                                          normalizations[i])
+            predictions.append(prediction_fn(p))
+            logits.append(p)
+            localisations.append(l)
+
+        return predictions, localisations, logits, end_points
+ssd_net.default_image_size = 300
+
+
+def ssd_arg_scope(weight_decay=0.0005, data_format='NHWC'):
+    """Defines the VGG arg scope.
+
+    Args:
+      weight_decay: The l2 regularization coefficient.
+
+    Returns:
+      An arg_scope.
+    """
+    with slim.arg_scope([slim.conv2d, slim.fully_connected],
+                        activation_fn=tf.nn.relu,
+                        weights_regularizer=slim.l2_regularizer(weight_decay),
+                        weights_initializer=tf.contrib.layers.xavier_initializer(),
+                        biases_initializer=tf.zeros_initializer()):
+        with slim.arg_scope([slim.conv2d, slim.max_pool2d],
+                            padding='SAME',
+                            data_format=data_format):
+            with slim.arg_scope([custom_layers.pad2d,
+                                 custom_layers.l2_normalization,
+                                 custom_layers.channel_to_last],
+                                data_format=data_format) as sc:
+                return sc
+
+
+# =========================================================================== #
+# Caffe scope: importing weights at initialization.
+# =========================================================================== #
+def ssd_arg_scope_caffe(caffe_scope):
+    """Caffe scope definition.
+
+    Args:
+      caffe_scope: Caffe scope object with loaded weights.
+
+    Returns:
+      An arg_scope.
+    """
+    # Default network arg scope.
+    with slim.arg_scope([slim.conv2d],
+                        activation_fn=tf.nn.relu,
+                        weights_initializer=caffe_scope.conv_weights_init(),
+                        biases_initializer=caffe_scope.conv_biases_init()):
+        with slim.arg_scope([slim.fully_connected],
+                            activation_fn=tf.nn.relu):
+            with slim.arg_scope([custom_layers.l2_normalization],
+                                scale_initializer=caffe_scope.l2_norm_scale_init()):
+                with slim.arg_scope([slim.conv2d, slim.max_pool2d],
+                                    padding='SAME') as sc:
+                    return sc
+
+
+# =========================================================================== #
+# SSD loss function.
+# =========================================================================== #
+def ssd_losses(logits, localisations,
+               gclasses, glocalisations, gscores,
+               match_threshold=0.5,
+               negative_ratio=3.,
+               alpha=1.,
+               label_smoothing=0.,
+               device='/cpu:0',
+               scope=None):
+    with tf.name_scope(scope, 'ssd_losses'):
+        lshape = tfe.get_shape(logits[0], 5)
+        num_classes = lshape[-1]
+        batch_size = lshape[0]
+
+        # Flatten out all vectors!
+        flogits = []
+        fgclasses = []
+        fgscores = []
+        flocalisations = []
+        fglocalisations = []
+        for i in range(len(logits)):
+            flogits.append(tf.reshape(logits[i], [-1, num_classes]))
+            fgclasses.append(tf.reshape(gclasses[i], [-1]))
+            fgscores.append(tf.reshape(gscores[i], [-1]))
+            flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
+            fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
+        # And concat the crap!
+        logits = tf.concat(flogits, axis=0)
+        gclasses = tf.concat(fgclasses, axis=0)
+        gscores = tf.concat(fgscores, axis=0)
+        localisations = tf.concat(flocalisations, axis=0)
+        glocalisations = tf.concat(fglocalisations, axis=0)
+        dtype = logits.dtype
+
+        # Compute positive matching mask...
+        pmask = gscores > match_threshold
+        fpmask = tf.cast(pmask, dtype)
+        n_positives = tf.reduce_sum(fpmask)
+
+        # Hard negative mining...
+        no_classes = tf.cast(pmask, tf.int32)
+        predictions = slim.softmax(logits)
+        nmask = tf.logical_and(tf.logical_not(pmask),
+                               gscores > -0.5)
+        fnmask = tf.cast(nmask, dtype)
+        nvalues = tf.where(nmask,
+                           predictions[:, 0],
+                           1. - fnmask)
+        nvalues_flat = tf.reshape(nvalues, [-1])
+        # Number of negative entries to select.
+        max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
+        n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
+        n_neg = tf.minimum(n_neg, max_neg_entries)
+
+        val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
+        max_hard_pred = -val[-1]
+        # Final negative mask.
+        nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
+        fnmask = tf.cast(nmask, dtype)
+
+        batch_float = tf.cast(batch_size, tf.float32)
+
+        # Add cross-entropy loss.
+        with tf.name_scope('cross_entropy_pos'):
+            cross_entropy_pos_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
+                                                                  labels=gclasses)
+            cross_entropy_pos_loss = tf.divide(tf.reduce_sum(cross_entropy_pos_loss * fpmask), batch_float, name='value')
+            tf.losses.add_loss(cross_entropy_pos_loss)
+
+        with tf.name_scope('cross_entropy_neg'):
+            cross_entropy_neg_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
+                                                                  labels=no_classes)
+            cross_entropy_neg_loss = tf.divide(tf.reduce_sum(cross_entropy_neg_loss * fnmask), batch_float, name='value')
+            tf.losses.add_loss(cross_entropy_neg_loss)
+
+        # Add localization loss: smooth L1, L2, ...
+        with tf.name_scope('localization'):
+            # Weights Tensor: positive mask + random negative.
+            weights = tf.expand_dims(alpha * fpmask, axis=-1)
+            localization_loss = custom_layers.abs_smooth(localisations - glocalisations)
+            localization_loss = tf.divide(tf.reduce_sum(localization_loss * weights), batch_float, name='value')
+            tf.losses.add_loss(localization_loss)
+
+        regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
+        all_losses = [cross_entropy_neg_loss, cross_entropy_pos_loss, localization_loss] + (regularization_losses if regularization_losses else [])
+        return tf.add_n(all_losses)
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfextended/init.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfextended/init.py
@@ -0,0 +1,24 @@
+# Copyright 2017 Paul Balanca. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TF Extended: additional metrics.
+"""
+
+# pylint: disable=unused-import,line-too-long,g-importing-member,wildcard-import
+from tfextended.metrics import *
+from tfextended.tensors import *
+from tfextended.bboxes import *
+from tfextended.image import *
+from tfextended.math import *
+
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfextended/bboxes.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfextended/bboxes.py
@@ -0,0 +1,508 @@
+# Copyright 2017 Paul Balanca. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TF Extended: additional bounding boxes methods.
+"""
+import numpy as np
+import tensorflow as tf
+
+from tfextended import tensors as tfe_tensors
+from tfextended import math as tfe_math
+
+
+# =========================================================================== #
+# Standard boxes algorithms.
+# =========================================================================== #
+def bboxes_sort_all_classes(classes, scores, bboxes, top_k=400, scope=None):
+    """Sort bounding boxes by decreasing order and keep only the top_k.
+    Assume the input Tensors mix-up objects with different classes.
+    Assume a batch-type input.
+
+    Args:
+      classes: Batch x N Tensor containing integer classes.
+      scores: Batch x N Tensor containing float scores.
+      bboxes: Batch x N x 4 Tensor containing boxes coordinates.
+      top_k: Top_k boxes to keep.
+    Return:
+      classes, scores, bboxes: Sorted tensors of shape Batch x Top_k.
+    """
+    with tf.name_scope(scope, 'bboxes_sort', [classes, scores, bboxes]):
+        scores, idxes = tf.nn.top_k(scores, k=top_k, sorted=True)
+
+        # Trick to be able to use tf.gather: map for each element in the batch.
+        def fn_gather(classes, bboxes, idxes):
+            cl = tf.gather(classes, idxes)
+            bb = tf.gather(bboxes, idxes)
+            return [cl, bb]
+        r = tf.map_fn(lambda x: fn_gather(x[0], x[1], x[2]),
+                      [classes, bboxes, idxes],
+                      dtype=[classes.dtype, bboxes.dtype],
+                      parallel_iterations=10,
+                      back_prop=False,
+                      swap_memory=False,
+                      infer_shape=True)
+        classes = r[0]
+        bboxes = r[1]
+        return classes, scores, bboxes
+
+
+def bboxes_sort(scores, bboxes, top_k=400, scope=None):
+    """Sort bounding boxes by decreasing order and keep only the top_k.
+    If inputs are dictionnaries, assume every key is a different class.
+    Assume a batch-type input.
+
+    Args:
+      scores: Batch x N Tensor/Dictionary containing float scores.
+      bboxes: Batch x N x 4 Tensor/Dictionary containing boxes coordinates.
+      top_k: Top_k boxes to keep.
+    Return:
+      scores, bboxes: Sorted Tensors/Dictionaries of shape Batch x Top_k x 1|4.
+    """
+    # Dictionaries as inputs.
+    if isinstance(scores, dict) or isinstance(bboxes, dict):
+        with tf.name_scope(scope, 'bboxes_sort_dict'):
+            d_scores = {}
+            d_bboxes = {}
+            for c in scores.keys():
+                s, b = bboxes_sort(scores[c], bboxes[c], top_k=top_k)
+                d_scores[c] = s
+                d_bboxes[c] = b
+            return d_scores, d_bboxes
+
+    # Tensors inputs.
+    with tf.name_scope(scope, 'bboxes_sort', [scores, bboxes]):
+        # Sort scores...
+        scores, idxes = tf.nn.top_k(scores, k=top_k, sorted=True)
+
+        # Trick to be able to use tf.gather: map for each element in the first dim.
+        def fn_gather(bboxes, idxes):
+            bb = tf.gather(bboxes, idxes)
+            return [bb]
+        r = tf.map_fn(lambda x: fn_gather(x[0], x[1]),
+                      [bboxes, idxes],
+                      dtype=[bboxes.dtype],
+                      parallel_iterations=10,
+                      back_prop=False,
+                      swap_memory=False,
+                      infer_shape=True)
+        bboxes = r[0]
+        return scores, bboxes
+
+
+def bboxes_clip(bbox_ref, bboxes, scope=None):
+    """Clip bounding boxes to a reference box.
+    Batch-compatible if the first dimension of `bbox_ref` and `bboxes`
+    can be broadcasted.
+
+    Args:
+      bbox_ref: Reference bounding box. Nx4 or 4 shaped-Tensor;
+      bboxes: Bounding boxes to clip. Nx4 or 4 shaped-Tensor or dictionary.
+    Return:
+      Clipped bboxes.
+    """
+    # Bboxes is dictionary.
+    if isinstance(bboxes, dict):
+        with tf.name_scope(scope, 'bboxes_clip_dict'):
+            d_bboxes = {}
+            for c in bboxes.keys():
+                d_bboxes[c] = bboxes_clip(bbox_ref, bboxes[c])
+            return d_bboxes
+
+    # Tensors inputs.
+    with tf.name_scope(scope, 'bboxes_clip'):
+        # Easier with transposed bboxes. Especially for broadcasting.
+        bbox_ref = tf.transpose(bbox_ref)
+        bboxes = tf.transpose(bboxes)
+        # Intersection bboxes and reference bbox.
+        ymin = tf.maximum(bboxes[0], bbox_ref[0])
+        xmin = tf.maximum(bboxes[1], bbox_ref[1])
+        ymax = tf.minimum(bboxes[2], bbox_ref[2])
+        xmax = tf.minimum(bboxes[3], bbox_ref[3])
+        # Double check! Empty boxes when no-intersection.
+        ymin = tf.minimum(ymin, ymax)
+        xmin = tf.minimum(xmin, xmax)
+        bboxes = tf.transpose(tf.stack([ymin, xmin, ymax, xmax], axis=0))
+        return bboxes
+
+
+def bboxes_resize(bbox_ref, bboxes, name=None):
+    """Resize bounding boxes based on a reference bounding box,
+    assuming that the latter is [0, 0, 1, 1] after transform. Useful for
+    updating a collection of boxes after cropping an image.
+    """
+    # Bboxes is dictionary.
+    if isinstance(bboxes, dict):
+        with tf.name_scope(name, 'bboxes_resize_dict'):
+            d_bboxes = {}
+            for c in bboxes.keys():
+                d_bboxes[c] = bboxes_resize(bbox_ref, bboxes[c])
+            return d_bboxes
+
+    # Tensors inputs.
+    with tf.name_scope(name, 'bboxes_resize'):
+        # Translate.
+        v = tf.stack([bbox_ref[0], bbox_ref[1], bbox_ref[0], bbox_ref[1]])
+        bboxes = bboxes - v
+        # Scale.
+        s = tf.stack([bbox_ref[2] - bbox_ref[0],
+                      bbox_ref[3] - bbox_ref[1],
+                      bbox_ref[2] - bbox_ref[0],
+                      bbox_ref[3] - bbox_ref[1]])
+        bboxes = bboxes / s
+        return bboxes
+
+
+def bboxes_nms(scores, bboxes, nms_threshold=0.5, keep_top_k=200, scope=None):
+    """Apply non-maximum selection to bounding boxes. In comparison to TF
+    implementation, use classes information for matching.
+    Should only be used on single-entries. Use batch version otherwise.
+
+    Args:
+      scores: N Tensor containing float scores.
+      bboxes: N x 4 Tensor containing boxes coordinates.
+      nms_threshold: Matching threshold in NMS algorithm;
+      keep_top_k: Number of total object to keep after NMS.
+    Return:
+      classes, scores, bboxes Tensors, sorted by score.
+        Padded with zero if necessary.
+    """
+    with tf.name_scope(scope, 'bboxes_nms_single', [scores, bboxes]):
+        # Apply NMS algorithm.
+        idxes = tf.image.non_max_suppression(bboxes, scores,
+                                             keep_top_k, nms_threshold)
+        scores = tf.gather(scores, idxes)
+        bboxes = tf.gather(bboxes, idxes)
+        # Pad results.
+        scores = tfe_tensors.pad_axis(scores, 0, keep_top_k, axis=0)
+        bboxes = tfe_tensors.pad_axis(bboxes, 0, keep_top_k, axis=0)
+        return scores, bboxes
+
+
+def bboxes_nms_batch(scores, bboxes, nms_threshold=0.5, keep_top_k=200,
+                     scope=None):
+    """Apply non-maximum selection to bounding boxes. In comparison to TF
+    implementation, use classes information for matching.
+    Use only on batched-inputs. Use zero-padding in order to batch output
+    results.
+
+    Args:
+      scores: Batch x N Tensor/Dictionary containing float scores.
+      bboxes: Batch x N x 4 Tensor/Dictionary containing boxes coordinates.
+      nms_threshold: Matching threshold in NMS algorithm;
+      keep_top_k: Number of total object to keep after NMS.
+    Return:
+      scores, bboxes Tensors/Dictionaries, sorted by score.
+        Padded with zero if necessary.
+    """
+    # Dictionaries as inputs.
+    if isinstance(scores, dict) or isinstance(bboxes, dict):
+        with tf.name_scope(scope, 'bboxes_nms_batch_dict'):
+            d_scores = {}
+            d_bboxes = {}
+            for c in scores.keys():
+                s, b = bboxes_nms_batch(scores[c], bboxes[c],
+                                        nms_threshold=nms_threshold,
+                                        keep_top_k=keep_top_k)
+                d_scores[c] = s
+                d_bboxes[c] = b
+            return d_scores, d_bboxes
+
+    # Tensors inputs.
+    with tf.name_scope(scope, 'bboxes_nms_batch'):
+        r = tf.map_fn(lambda x: bboxes_nms(x[0], x[1],
+                                           nms_threshold, keep_top_k),
+                      (scores, bboxes),
+                      dtype=(scores.dtype, bboxes.dtype),
+                      parallel_iterations=10,
+                      back_prop=False,
+                      swap_memory=False,
+                      infer_shape=True)
+        scores, bboxes = r
+        return scores, bboxes
+
+
+# def bboxes_fast_nms(classes, scores, bboxes,
+#                     nms_threshold=0.5, eta=3., num_classes=21,
+#                     pad_output=True, scope=None):
+#     with tf.name_scope(scope, 'bboxes_fast_nms',
+#                        [classes, scores, bboxes]):
+
+#         nms_classes = tf.zeros((0,), dtype=classes.dtype)
+#         nms_scores = tf.zeros((0,), dtype=scores.dtype)
+#         nms_bboxes = tf.zeros((0, 4), dtype=bboxes.dtype)
+
+
+def bboxes_matching(label, scores, bboxes,
+                    glabels, gbboxes, gdifficults,
+                    matching_threshold=0.5, scope=None):
+    """Matching a collection of detected boxes with groundtruth values.
+    Does not accept batched-inputs.
+    The algorithm goes as follows: for every detected box, check
+    if one grountruth box is matching. If none, then considered as False Positive.
+    If the grountruth box is already matched with another one, it also counts
+    as a False Positive. We refer the Pascal VOC documentation for the details.
+
+    Args:
+      rclasses, rscores, rbboxes: N(x4) Tensors. Detected objects, sorted by score;
+      glabels, gbboxes: Groundtruth bounding boxes. May be zero padded, hence
+        zero-class objects are ignored.
+      matching_threshold: Threshold for a positive match.
+    Return: Tuple of:
+       n_gbboxes: Scalar Tensor with number of groundtruth boxes (may difer from
+         size because of zero padding).
+       tp_match: (N,)-shaped boolean Tensor containing with True Positives.
+       fp_match: (N,)-shaped boolean Tensor containing with False Positives.
+    """
+    with tf.name_scope(scope, 'bboxes_matching_single',
+                       [scores, bboxes, glabels, gbboxes]):
+        rsize = tf.size(scores)
+        rshape = tf.shape(scores)
+        rlabel = tf.cast(label, glabels.dtype)
+        # Number of groundtruth boxes.
+        gdifficults = tf.cast(gdifficults, tf.bool)
+        n_gbboxes = tf.count_nonzero(tf.logical_and(tf.equal(glabels, label),
+                                                    tf.logical_not(gdifficults)))
+        # Grountruth matching arrays.
+        gmatch = tf.zeros(tf.shape(glabels), dtype=tf.bool)
+        grange = tf.range(tf.size(glabels), dtype=tf.int32)
+        # True/False positive matching TensorArrays.
+        sdtype = tf.bool
+        ta_tp_bool = tf.TensorArray(sdtype, size=rsize, dynamic_size=False, infer_shape=True)
+        ta_fp_bool = tf.TensorArray(sdtype, size=rsize, dynamic_size=False, infer_shape=True)
+
+        # Loop over returned objects.
+        def m_condition(i, ta_tp, ta_fp, gmatch):
+            r = tf.less(i, rsize)
+            return r
+
+        def m_body(i, ta_tp, ta_fp, gmatch):
+            # Jaccard score with groundtruth bboxes.
+            rbbox = bboxes[i]
+            jaccard = bboxes_jaccard(rbbox, gbboxes)
+            jaccard = jaccard * tf.cast(tf.equal(glabels, rlabel), dtype=jaccard.dtype)
+
+            # Best fit, checking it's above threshold.
+            idxmax = tf.cast(tf.argmax(jaccard, axis=0), tf.int32)
+            jcdmax = jaccard[idxmax]
+            match = jcdmax > matching_threshold
+            existing_match = gmatch[idxmax]
+            not_difficult = tf.logical_not(gdifficults[idxmax])
+
+            # TP: match & no previous match and FP: previous match | no match.
+            # If difficult: no record, i.e FP=False and TP=False.
+            tp = tf.logical_and(not_difficult,
+                                tf.logical_and(match, tf.logical_not(existing_match)))
+            ta_tp = ta_tp.write(i, tp)
+            fp = tf.logical_and(not_difficult,
+                                tf.logical_or(existing_match, tf.logical_not(match)))
+            ta_fp = ta_fp.write(i, fp)
+            # Update grountruth match.
+            mask = tf.logical_and(tf.equal(grange, idxmax),
+                                  tf.logical_and(not_difficult, match))
+            gmatch = tf.logical_or(gmatch, mask)
+
+            return [i+1, ta_tp, ta_fp, gmatch]
+        # Main loop definition.
+        i = 0
+        [i, ta_tp_bool, ta_fp_bool, gmatch] = \
+            tf.while_loop(m_condition, m_body,
+                          [i, ta_tp_bool, ta_fp_bool, gmatch],
+                          parallel_iterations=1,
+                          back_prop=False)
+        # TensorArrays to Tensors and reshape.
+        tp_match = tf.reshape(ta_tp_bool.stack(), rshape)
+        fp_match = tf.reshape(ta_fp_bool.stack(), rshape)
+
+        # Some debugging information...
+        # tp_match = tf.Print(tp_match,
+        #                     [n_gbboxes,
+        #                      tf.reduce_sum(tf.cast(tp_match, tf.int64)),
+        #                      tf.reduce_sum(tf.cast(fp_match, tf.int64)),
+        #                      tf.reduce_sum(tf.cast(gmatch, tf.int64))],
+        #                     'Matching (NG, TP, FP, GM): ')
+        return n_gbboxes, tp_match, fp_match
+
+
+def bboxes_matching_batch(labels, scores, bboxes,
+                          glabels, gbboxes, gdifficults,
+                          matching_threshold=0.5, scope=None):
+    """Matching a collection of detected boxes with groundtruth values.
+    Batched-inputs version.
+
+    Args:
+      rclasses, rscores, rbboxes: BxN(x4) Tensors. Detected objects, sorted by score;
+      glabels, gbboxes: Groundtruth bounding boxes. May be zero padded, hence
+        zero-class objects are ignored.
+      matching_threshold: Threshold for a positive match.
+    Return: Tuple or Dictionaries with:
+       n_gbboxes: Scalar Tensor with number of groundtruth boxes (may difer from
+         size because of zero padding).
+       tp: (B, N)-shaped boolean Tensor containing with True Positives.
+       fp: (B, N)-shaped boolean Tensor containing with False Positives.
+    """
+    # Dictionaries as inputs.
+    if isinstance(scores, dict) or isinstance(bboxes, dict):
+        with tf.name_scope(scope, 'bboxes_matching_batch_dict'):
+            d_n_gbboxes = {}
+            d_tp = {}
+            d_fp = {}
+            for c in labels:
+                n, tp, fp, _ = bboxes_matching_batch(c, scores[c], bboxes[c],
+                                                     glabels, gbboxes, gdifficults,
+                                                     matching_threshold)
+                d_n_gbboxes[c] = n
+                d_tp[c] = tp
+                d_fp[c] = fp
+            return d_n_gbboxes, d_tp, d_fp, scores
+
+    with tf.name_scope(scope, 'bboxes_matching_batch',
+                       [scores, bboxes, glabels, gbboxes]):
+        r = tf.map_fn(lambda x: bboxes_matching(labels, x[0], x[1],
+                                                x[2], x[3], x[4],
+                                                matching_threshold),
+                      (scores, bboxes, glabels, gbboxes, gdifficults),
+                      dtype=(tf.int64, tf.bool, tf.bool),
+                      parallel_iterations=10,
+                      back_prop=False,
+                      swap_memory=True,
+                      infer_shape=True)
+        return r[0], r[1], r[2], scores
+
+
+# =========================================================================== #
+# Some filteting methods.
+# =========================================================================== #
+def bboxes_filter_center(labels, bboxes, margins=[0., 0., 0., 0.],
+                         scope=None):
+    """Filter out bounding boxes whose center are not in
+    the rectangle [0, 0, 1, 1] + margins. The margin Tensor
+    can be used to enforce or loosen this condition.
+
+    Return:
+      labels, bboxes: Filtered elements.
+    """
+    with tf.name_scope(scope, 'bboxes_filter', [labels, bboxes]):
+        cy = (bboxes[:, 0] + bboxes[:, 2]) / 2.
+        cx = (bboxes[:, 1] + bboxes[:, 3]) / 2.
+        mask = tf.greater(cy, margins[0])
+        mask = tf.logical_and(mask, tf.greater(cx, margins[1]))
+        mask = tf.logical_and(mask, tf.less(cx, 1. + margins[2]))
+        mask = tf.logical_and(mask, tf.less(cx, 1. + margins[3]))
+        # Boolean masking...
+        labels = tf.boolean_mask(labels, mask)
+        bboxes = tf.boolean_mask(bboxes, mask)
+        return labels, bboxes
+
+
+def bboxes_filter_overlap(labels, bboxes,
+                          threshold=0.5, assign_negative=False,
+                          scope=None):
+    """Filter out bounding boxes based on (relative )overlap with reference
+    box [0, 0, 1, 1].  Remove completely bounding boxes, or assign negative
+    labels to the one outside (useful for latter processing...).
+
+    Return:
+      labels, bboxes: Filtered (or newly assigned) elements.
+    """
+    with tf.name_scope(scope, 'bboxes_filter', [labels, bboxes]):
+        scores = bboxes_intersection(tf.constant([0, 0, 1, 1], bboxes.dtype),
+                                     bboxes)
+        mask = scores > threshold
+        if assign_negative:
+            labels = tf.where(mask, labels, -labels)
+            # bboxes = tf.where(mask, bboxes, bboxes)
+        else:
+            labels = tf.boolean_mask(labels, mask)
+            bboxes = tf.boolean_mask(bboxes, mask)
+        return labels, bboxes
+
+
+def bboxes_filter_labels(labels, bboxes,
+                         out_labels=[], num_classes=np.inf,
+                         scope=None):
+    """Filter out labels from a collection. Typically used to get
+    of DontCare elements. Also remove elements based on the number of classes.
+
+    Return:
+      labels, bboxes: Filtered elements.
+    """
+    with tf.name_scope(scope, 'bboxes_filter_labels', [labels, bboxes]):
+        mask = tf.greater_equal(labels, num_classes)
+        for l in labels:
+            mask = tf.logical_and(mask, tf.not_equal(labels, l))
+        labels = tf.boolean_mask(labels, mask)
+        bboxes = tf.boolean_mask(bboxes, mask)
+        return labels, bboxes
+
+
+# =========================================================================== #
+# Standard boxes computation.
+# =========================================================================== #
+def bboxes_jaccard(bbox_ref, bboxes, name=None):
+    """Compute jaccard score between a reference box and a collection
+    of bounding boxes.
+
+    Args:
+      bbox_ref: (N, 4) or (4,) Tensor with reference bounding box(es).
+      bboxes: (N, 4) Tensor, collection of bounding boxes.
+    Return:
+      (N,) Tensor with Jaccard scores.
+    """
+    with tf.name_scope(name, 'bboxes_jaccard'):
+        # Should be more efficient to first transpose.
+        bboxes = tf.transpose(bboxes)
+        bbox_ref = tf.transpose(bbox_ref)
+        # Intersection bbox and volume.
+        int_ymin = tf.maximum(bboxes[0], bbox_ref[0])
+        int_xmin = tf.maximum(bboxes[1], bbox_ref[1])
+        int_ymax = tf.minimum(bboxes[2], bbox_ref[2])
+        int_xmax = tf.minimum(bboxes[3], bbox_ref[3])
+        h = tf.maximum(int_ymax - int_ymin, 0.)
+        w = tf.maximum(int_xmax - int_xmin, 0.)
+        # Volumes.
+        inter_vol = h * w
+        union_vol = -inter_vol \
+            + (bboxes[2] - bboxes[0]) * (bboxes[3] - bboxes[1]) \
+            + (bbox_ref[2] - bbox_ref[0]) * (bbox_ref[3] - bbox_ref[1])
+        jaccard = tfe_math.safe_divide(inter_vol, union_vol, 'jaccard')
+        return jaccard
+
+
+def bboxes_intersection(bbox_ref, bboxes, name=None):
+    """Compute relative intersection between a reference box and a
+    collection of bounding boxes. Namely, compute the quotient between
+    intersection area and box area.
+
+    Args:
+      bbox_ref: (N, 4) or (4,) Tensor with reference bounding box(es).
+      bboxes: (N, 4) Tensor, collection of bounding boxes.
+    Return:
+      (N,) Tensor with relative intersection.
+    """
+    with tf.name_scope(name, 'bboxes_intersection'):
+        # Should be more efficient to first transpose.
+        bboxes = tf.transpose(bboxes)
+        bbox_ref = tf.transpose(bbox_ref)
+        # Intersection bbox and volume.
+        int_ymin = tf.maximum(bboxes[0], bbox_ref[0])
+        int_xmin = tf.maximum(bboxes[1], bbox_ref[1])
+        int_ymax = tf.minimum(bboxes[2], bbox_ref[2])
+        int_xmax = tf.minimum(bboxes[3], bbox_ref[3])
+        h = tf.maximum(int_ymax - int_ymin, 0.)
+        w = tf.maximum(int_xmax - int_xmin, 0.)
+        # Volumes.
+        inter_vol = h * w
+        bboxes_vol = (bboxes[2] - bboxes[0]) * (bboxes[3] - bboxes[1])
+        scores = tfe_math.safe_divide(inter_vol, bboxes_vol, 'intersection')
+        return scores
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfextended/image.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfextended/image.py
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfextended/math.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfextended/math.py
@@ -0,0 +1,63 @@
+# Copyright 2017 Paul Balanca. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TF Extended: additional math functions.
+"""
+import tensorflow as tf
+
+from tensorflow.python.framework import ops
+
+def safe_divide(numerator, denominator, name):
+    """Divides two values, returning 0 if the denominator is <= 0.
+    Args:
+      numerator: A real `Tensor`.
+      denominator: A real `Tensor`, with dtype matching `numerator`.
+      name: Name for the returned op.
+    Returns:
+      0 if `denominator` <= 0, else `numerator` / `denominator`
+    """
+    return tf.where(
+        tf.greater(denominator, 0),
+        tf.divide(numerator, denominator),
+        tf.zeros_like(numerator),
+        name=name)
+
+
+def cummax(x, reverse=False, name=None):
+    """Compute the cumulative maximum of the tensor `x` along `axis`. This
+    operation is similar to the more classic `cumsum`. Only support 1D Tensor
+    for now.
+
+    Args:
+    x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
+       `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
+       `complex128`, `qint8`, `quint8`, `qint32`, `half`.
+       axis: A `Tensor` of type `int32` (default: 0).
+       reverse: A `bool` (default: False).
+       name: A name for the operation (optional).
+    Returns:
+    A `Tensor`. Has the same type as `x`.
+    """
+    with ops.name_scope(name, "Cummax", [x]) as name:
+        x = ops.convert_to_tensor(x, name="x")
+        # Not very optimal: should directly integrate reverse into tf.scan.
+        if reverse:
+            x = tf.reverse(x, axis=[0])
+        # 'Accumlating' maximum: ensure it is always increasing.
+        cmax = tf.scan(tf.maximum, x,
+                       initializer=None, parallel_iterations=1,
+                       back_prop=False, swap_memory=False)
+        if reverse:
+            cmax = tf.reverse(cmax, axis=[0])
+        return cmax
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfextended/metrics.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfextended/metrics.py
@@ -0,0 +1,397 @@
+# Copyright 2017 Paul Balanca. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TF Extended: additional metrics.
+"""
+import tensorflow as tf
+import numpy as np
+
+from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+
+from tfextended import math as tfe_math
+
+
+# =========================================================================== #
+# TensorFlow utils
+# =========================================================================== #
+def _create_local(name, shape, collections=None, validate_shape=False,
+                  dtype=dtypes.float32):
+    """Creates a new local variable.
+    Args:
+        name: The name of the new or existing variable.
+        shape: Shape of the new or existing variable.
+        collections: A list of collection names to which the Variable will be added.
+        validate_shape: Whether to validate the shape of the variable.
+        dtype: Data type of the variables.
+    Returns:
+        The created variable.
+    """
+    # Make sure local variables are added to tf.GraphKeys.LOCAL_VARIABLES
+    collections = list(collections or [])
+    collections += [ops.GraphKeys.LOCAL_VARIABLES]
+    return tf.Variable(
+            initial_value=array_ops.zeros(shape, dtype=dtype),
+            name=name,
+            trainable=False,
+            collections=collections,
+            validate_shape=validate_shape)
+
+
+def _safe_div(numerator, denominator, name):
+    """Divides two values, returning 0 if the denominator is <= 0.
+    Args:
+      numerator: A real `Tensor`.
+      denominator: A real `Tensor`, with dtype matching `numerator`.
+      name: Name for the returned op.
+    Returns:
+      0 if `denominator` <= 0, else `numerator` / `denominator`
+    """
+    return tf.where(
+        tf.math.greater(denominator, 0),
+        tf.math.divide(numerator, denominator),
+        tf.zeros_like(numerator),
+        name=name)
+
+
+def _broadcast_weights(weights, values):
+    """Broadcast `weights` to the same shape as `values`.
+    This returns a version of `weights` following the same broadcast rules as
+    `mul(weights, values)`. When computing a weighted average, use this function
+    to broadcast `weights` before summing them; e.g.,
+    `reduce_sum(w * v) / reduce_sum(_broadcast_weights(w, v))`.
+    Args:
+      weights: `Tensor` whose shape is broadcastable to `values`.
+      values: `Tensor` of any shape.
+    Returns:
+      `weights` broadcast to `values` shape.
+    """
+    weights_shape = weights.get_shape()
+    values_shape = values.get_shape()
+    if(weights_shape.is_fully_defined() and
+       values_shape.is_fully_defined() and
+       weights_shape.is_compatible_with(values_shape)):
+        return weights
+    return tf.math.multiply(
+        weights, array_ops.ones_like(values), name='broadcast_weights')
+
+
+# =========================================================================== #
+# TF Extended metrics: TP and FP arrays.
+# =========================================================================== #
+def precision_recall(num_gbboxes, num_detections, tp, fp, scores,
+                     dtype=tf.float64, scope=None):
+    """Compute precision and recall from scores, true positives and false
+    positives booleans arrays
+    """
+    # Input dictionaries: dict outputs as streaming metrics.
+    if isinstance(scores, dict):
+        d_precision = {}
+        d_recall = {}
+        for c in num_gbboxes.keys():
+            scope = 'precision_recall_%s' % c
+            p, r = precision_recall(num_gbboxes[c], num_detections[c],
+                                    tp[c], fp[c], scores[c],
+                                    dtype, scope)
+            d_precision[c] = p
+            d_recall[c] = r
+        return d_precision, d_recall
+
+    # Sort by score.
+    with tf.name_scope(scope, 'precision_recall',
+                       [num_gbboxes, num_detections, tp, fp, scores]):
+        # Sort detections by score.
+        scores, idxes = tf.nn.top_k(scores, k=num_detections, sorted=True)
+        tp = tf.gather(tp, idxes)
+        fp = tf.gather(fp, idxes)
+        # Computer recall and precision.
+        tp = tf.cumsum(tf.cast(tp, dtype), axis=0)
+        fp = tf.cumsum(tf.cast(fp, dtype), axis=0)
+        recall = _safe_div(tp, tf.cast(num_gbboxes, dtype), 'recall')
+        precision = _safe_div(tp, tp + fp, 'precision')
+        return tf.tuple([precision, recall])
+
+
+def streaming_tp_fp_arrays(num_gbboxes, tp, fp, scores,
+                           remove_zero_scores=True,
+                           metrics_collections=None,
+                           updates_collections=None,
+                           name=None):
+    """Streaming computation of True and False Positive arrays. This metrics
+    also keeps track of scores and number of grountruth objects.
+    """
+    # Input dictionaries: dict outputs as streaming metrics.
+    if isinstance(scores, dict) or isinstance(fp, dict):
+        d_values = {}
+        d_update_ops = {}
+        for c in num_gbboxes.keys():
+            scope = 'streaming_tp_fp_%s' % c
+            v, up = streaming_tp_fp_arrays(num_gbboxes[c], tp[c], fp[c], scores[c],
+                                           remove_zero_scores,
+                                           metrics_collections,
+                                           updates_collections,
+                                           name=scope)
+            d_values[c] = v
+            d_update_ops[c] = up
+        return d_values, d_update_ops
+
+    # Input Tensors...
+    with variable_scope.variable_scope(name, 'streaming_tp_fp',
+                                       [num_gbboxes, tp, fp, scores]):
+        num_gbboxes = tf.cast(num_gbboxes, dtype=tf.int64)
+        scores = tf.cast(scores, dtype=tf.float32)
+        stype = tf.bool
+        tp = tf.cast(tp, stype)
+        fp = tf.cast(fp, stype)
+        # Reshape TP and FP tensors and clean away 0 class values.
+        scores = tf.reshape(scores, [-1])
+        tp = tf.reshape(tp, [-1])
+        fp = tf.reshape(fp, [-1])
+        # Remove TP and FP both false.
+        mask = tf.logical_or(tp, fp)
+        if remove_zero_scores:
+            rm_threshold = 1e-4
+            mask = tf.logical_and(mask, tf.greater(scores, rm_threshold))
+            scores = tf.boolean_mask(scores, mask)
+            tp = tf.boolean_mask(tp, mask)
+            fp = tf.boolean_mask(fp, mask)
+
+        # Local variables accumlating information over batches.
+        v_nobjects = _create_local('v_num_gbboxes', shape=[], dtype=tf.int64)
+        v_ndetections = _create_local('v_num_detections', shape=[], dtype=tf.int32)
+        v_scores = _create_local('v_scores', shape=[0, ])
+        v_tp = _create_local('v_tp', shape=[0, ], dtype=stype)
+        v_fp = _create_local('v_fp', shape=[0, ], dtype=stype)
+
+        # Update operations.
+        nobjects_op = state_ops.assign_add(v_nobjects,
+                                           tf.reduce_sum(num_gbboxes))
+        ndetections_op = state_ops.assign_add(v_ndetections,
+                                              tf.size(scores, out_type=tf.int32))
+        scores_op = state_ops.assign(v_scores, tf.concat([v_scores, scores], axis=0),
+                                     validate_shape=False)
+        tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp], axis=0),
+                                 validate_shape=False)
+        fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp], axis=0),
+                                 validate_shape=False)
+
+        # Value and update ops.
+        val = (v_nobjects, v_ndetections, v_tp, v_fp, v_scores)
+        with ops.control_dependencies([nobjects_op, ndetections_op,
+                                       scores_op, tp_op, fp_op]):
+            update_op = (nobjects_op, ndetections_op, tp_op, fp_op, scores_op)
+
+        if metrics_collections:
+            ops.add_to_collections(metrics_collections, val)
+        if updates_collections:
+            ops.add_to_collections(updates_collections, update_op)
+        return val, update_op
+
+
+# =========================================================================== #
+# Average precision computations.
+# =========================================================================== #
+def average_precision_voc12(precision, recall, name=None):
+    """Compute (interpolated) average precision from precision and recall Tensors.
+
+    The implementation follows Pascal 2012 and ILSVRC guidelines.
+    See also: https://sanchom.wordpress.com/tag/average-precision/
+    """
+    with tf.name_scope(name, 'average_precision_voc12', [precision, recall]):
+        # Convert to float64 to decrease error on Riemann sums.
+        precision = tf.cast(precision, dtype=tf.float64)
+        recall = tf.cast(recall, dtype=tf.float64)
+
+        # Add bounds values to precision and recall.
+        precision = tf.concat([[0.], precision, [0.]], axis=0)
+        recall = tf.concat([[0.], recall, [1.]], axis=0)
+        # Ensures precision is increasing in reverse order.
+        precision = tfe_math.cummax(precision, reverse=True)
+
+        # Riemann sums for estimating the integral.
+        # mean_pre = (precision[1:] + precision[:-1]) / 2.
+        mean_pre = precision[1:]
+        diff_rec = recall[1:] - recall[:-1]
+        ap = tf.reduce_sum(mean_pre * diff_rec)
+        return ap
+
+
+def average_precision_voc07(precision, recall, name=None):
+    """Compute (interpolated) average precision from precision and recall Tensors.
+
+    The implementation follows Pascal 2007 guidelines.
+    See also: https://sanchom.wordpress.com/tag/average-precision/
+    """
+    with tf.name_scope(name, 'average_precision_voc07', [precision, recall]):
+        # Convert to float64 to decrease error on cumulated sums.
+        precision = tf.cast(precision, dtype=tf.float64)
+        recall = tf.cast(recall, dtype=tf.float64)
+        # Add zero-limit value to avoid any boundary problem...
+        precision = tf.concat([precision, [0.]], axis=0)
+        recall = tf.concat([recall, [np.inf]], axis=0)
+
+        # Split the integral into 10 bins.
+        l_aps = []
+        for t in np.arange(0., 1.1, 0.1):
+            mask = tf.greater_equal(recall, t)
+            v = tf.reduce_max(tf.boolean_mask(precision, mask))
+            l_aps.append(v / 11.)
+        ap = tf.add_n(l_aps)
+        return ap
+
+
+def precision_recall_values(xvals, precision, recall, name=None):
+    """Compute values on the precision/recall curve.
+
+    Args:
+      x: Python list of floats;
+      precision: 1D Tensor decreasing.
+      recall: 1D Tensor increasing.
+    Return:
+      list of precision values.
+    """
+    with ops.name_scope(name, "precision_recall_values",
+                        [precision, recall]) as name:
+        # Add bounds values to precision and recall.
+        precision = tf.concat([[0.], precision, [0.]], axis=0)
+        recall = tf.concat([[0.], recall, [1.]], axis=0)
+        precision = tfe_math.cummax(precision, reverse=True)
+
+        prec_values = []
+        for x in xvals:
+            mask = tf.less_equal(recall, x)
+            val = tf.reduce_min(tf.boolean_mask(precision, mask))
+            prec_values.append(val)
+        return tf.tuple(prec_values)
+
+
+# =========================================================================== #
+# TF Extended metrics: old stuff!
+# =========================================================================== #
+def _precision_recall(n_gbboxes, n_detections, scores, tp, fp, scope=None):
+    """Compute precision and recall from scores, true positives and false
+    positives booleans arrays
+    """
+    # Sort by score.
+    with tf.name_scope(scope, 'prec_rec', [n_gbboxes, scores, tp, fp]):
+        # Sort detections by score.
+        scores, idxes = tf.nn.top_k(scores, k=n_detections, sorted=True)
+        tp = tf.gather(tp, idxes)
+        fp = tf.gather(fp, idxes)
+        # Computer recall and precision.
+        dtype = tf.float64
+        tp = tf.cumsum(tf.cast(tp, dtype), axis=0)
+        fp = tf.cumsum(tf.cast(fp, dtype), axis=0)
+        recall = _safe_div(tp, tf.cast(n_gbboxes, dtype), 'recall')
+        precision = _safe_div(tp, tp + fp, 'precision')
+
+        return tf.tuple([precision, recall])
+
+
+def streaming_precision_recall_arrays(n_gbboxes, rclasses, rscores,
+                                      tp_tensor, fp_tensor,
+                                      remove_zero_labels=True,
+                                      metrics_collections=None,
+                                      updates_collections=None,
+                                      name=None):
+    """Streaming computation of precision / recall arrays. This metrics
+    keeps tracks of boolean True positives and False positives arrays.
+    """
+    with variable_scope.variable_scope(name, 'stream_precision_recall',
+                                       [n_gbboxes, rclasses, tp_tensor, fp_tensor]):
+        n_gbboxes = tf.cast(n_gbboxes, tf.int64)
+        rclasses = tf.cast(rclasses, tf.int64)
+        rscores = tf.cast(rscores, tf.float)
+
+        stype = tf.int32
+        tp_tensor = tf.cast(tp_tensor, stype)
+        fp_tensor = tf.cast(fp_tensor, stype)
+
+        # Reshape TP and FP tensors and clean away 0 class values.
+        rclasses = tf.reshape(rclasses, [-1])
+        rscores = tf.reshape(rscores, [-1])
+        tp_tensor = tf.reshape(tp_tensor, [-1])
+        fp_tensor = tf.reshape(fp_tensor, [-1])
+        if remove_zero_labels:
+            mask = tf.greater(rclasses, 0)
+            rclasses = tf.boolean_mask(rclasses, mask)
+            rscores = tf.boolean_mask(rscores, mask)
+            tp_tensor = tf.boolean_mask(tp_tensor, mask)
+            fp_tensor = tf.boolean_mask(fp_tensor, mask)
+
+        # Local variables accumlating information over batches.
+        v_nobjects = _create_local('v_nobjects', shape=[], dtype=tf.int64)
+        v_ndetections = _create_local('v_ndetections', shape=[], dtype=tf.int32)
+        v_scores = _create_local('v_scores', shape=[0, ])
+        v_tp = _create_local('v_tp', shape=[0, ], dtype=stype)
+        v_fp = _create_local('v_fp', shape=[0, ], dtype=stype)
+
+        # Update operations.
+        nobjects_op = state_ops.assign_add(v_nobjects,
+                                           tf.reduce_sum(n_gbboxes))
+        ndetections_op = state_ops.assign_add(v_ndetections,
+                                              tf.size(rscores, out_type=tf.int32))
+        scores_op = state_ops.assign(v_scores, tf.concat([v_scores, rscores], axis=0),
+                                     validate_shape=False)
+        tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp_tensor], axis=0),
+                                 validate_shape=False)
+        fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp_tensor], axis=0),
+                                 validate_shape=False)
+
+        # Precision and recall computations.
+        # r = _precision_recall(nobjects_op, scores_op, tp_op, fp_op, 'value')
+        r = _precision_recall(v_nobjects, v_ndetections, v_scores,
+                              v_tp, v_fp, 'value')
+
+        with ops.control_dependencies([nobjects_op, ndetections_op,
+                                       scores_op, tp_op, fp_op]):
+            update_op = _precision_recall(nobjects_op, ndetections_op,
+                                          scores_op, tp_op, fp_op, 'update_op')
+
+            # update_op = tf.Print(update_op,
+            #                      [tf.reduce_sum(tf.cast(mask, tf.int64)),
+            #                       tf.reduce_sum(tf.cast(mask2, tf.int64)),
+            #                       tf.reduce_min(rscores),
+            #                       tf.reduce_sum(n_gbboxes)],
+            #                      'Metric: ')
+            # Some debugging stuff!
+            # update_op = tf.Print(update_op,
+            #                      [tf.shape(tp_op),
+            #                       tf.reduce_sum(tf.cast(tp_op, tf.int64), axis=0)],
+            #                      'TP and FP shape: ')
+            # update_op[0] = tf.Print(update_op,
+            #                      [nobjects_op],
+            #                      '# Groundtruth bboxes: ')
+            # update_op = tf.Print(update_op,
+            #                      [update_op[0][0],
+            #                       update_op[0][-1],
+            #                       tf.reduce_min(update_op[0]),
+            #                       tf.reduce_max(update_op[0]),
+            #                       tf.reduce_min(update_op[1]),
+            #                       tf.reduce_max(update_op[1])],
+            #                      'Precision and recall :')
+
+        if metrics_collections:
+            ops.add_to_collections(metrics_collections, r)
+        if updates_collections:
+            ops.add_to_collections(updates_collections, update_op)
+        return r, update_op
+
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfextended/tensors.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfextended/tensors.py
@@ -0,0 +1,95 @@
+# Copyright 2017 Paul Balanca. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TF Extended: additional tensors operations.
+"""
+import tensorflow as tf
+
+from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.contrib.metrics.python.ops import set_ops
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import state_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+
+
+def get_shape(x, rank=None):
+    """Returns the dimensions of a Tensor as list of integers or scale tensors.
+
+    Args:
+      x: N-d Tensor;
+      rank: Rank of the Tensor. If None, will try to guess it.
+    Returns:
+      A list of `[d1, d2, ..., dN]` corresponding to the dimensions of the
+        input tensor.  Dimensions that are statically known are python integers,
+        otherwise they are integer scalar tensors.
+    """
+    if x.get_shape().is_fully_defined():
+        return x.get_shape().as_list()
+    else:
+        static_shape = x.get_shape()
+        if rank is None:
+            static_shape = static_shape.as_list()
+            rank = len(static_shape)
+        else:
+            static_shape = x.get_shape().with_rank(rank).as_list()
+        dynamic_shape = tf.unstack(tf.shape(x), rank)
+        return [s if s is not None else d
+                for s, d in zip(static_shape, dynamic_shape)]
+
+
+def pad_axis(x, offset, size, axis=0, name=None):
+    """Pad a tensor on an axis, with a given offset and output size.
+    The tensor is padded with zero (i.e. CONSTANT mode). Note that the if the
+    `size` is smaller than existing size + `offset`, the output tensor
+    was the latter dimension.
+
+    Args:
+      x: Tensor to pad;
+      offset: Offset to add on the dimension chosen;
+      size: Final size of the dimension.
+    Return:
+      Padded tensor whose dimension on `axis` is `size`, or greater if
+      the input vector was larger.
+    """
+    with tf.name_scope(name, 'pad_axis'):
+        shape = get_shape(x)
+        rank = len(shape)
+        # Padding description.
+        new_size = tf.maximum(size-offset-shape[axis], 0)
+        pad1 = tf.stack([0]*axis + [offset] + [0]*(rank-axis-1))
+        pad2 = tf.stack([0]*axis + [new_size] + [0]*(rank-axis-1))
+        paddings = tf.stack([pad1, pad2], axis=1)
+        x = tf.pad(x, paddings, mode='CONSTANT')
+        # Reshape, to get fully defined shape if possible.
+        # TODO: fix with tf.slice
+        shape[axis] = size
+        x = tf.reshape(x, tf.stack(shape))
+        return x
+
+
+# def select_at_index(idx, val, t):
+#     """Return a tensor.
+#     """
+#     idx = tf.expand_dims(tf.expand_dims(idx, 0), 0)
+#     val = tf.expand_dims(val, 0)
+#     t = t + tf.scatter_nd(idx, val, tf.shape(t))
+#     return t
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfutil/init.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfutil/init.py
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfutil/endpoints.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfutil/endpoints.py
@@ -0,0 +1,20 @@
+'''
+Endpoint names to look for in the graph
+'''
+
+from anchors import generate_anchors
+
+feat_layers = generate_anchors.feat_layers
+sub_feats = ['']
+localizations_names = [f'ssd_300_vgg/{feature}_box/Reshape:0' for feature in feat_layers]
+
+predictions_names = ['ssd_300_vgg/softmax/Reshape_1:0'] \
+    + [f'ssd_300_vgg/softmax_{n}/Reshape_1:0' for n in range(1, len(feat_layers))]
+
+logit_names = [f'ssd_300_vgg/{feature}_box/Reshape_1:0' for feature in feat_layers]
+
+endpoint_names = ['ssd_300_vgg/conv1/conv1_2/Relu:0'] \
+    + [f'ssd_300_vgg/conv{n}/conv{n}_3/Relu:0' for n in range(4, 6)] \
+    + [f'ssd_300_vgg/conv{n}/conv{n}_{n}/Relu:0' for n in range(2, 4)] \
+    + [f'ssd_300_vgg/conv{n}/Relu:0' for n in range(6, 8)] \
+    + [f'ssd_300_vgg/{feature}/conv3x3/Relu:0' for feature in feat_layers if feature != 'block4' and feature != 'block7']
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfutil/tf_utils.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfutil/tf_utils.py
@@ -0,0 +1,158 @@
+# Copyright 2016 Paul Balanca. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+"""Diverse TensorFlow utils, for training, evaluation and so on!
+"""
+import os
+
+import tensorflow as tf
+
+# =========================================================================== #
+# General tools.
+# =========================================================================== #
+def reshape_list(l, shape=None):
+    """Reshape list of (list): 1D to 2D or the other way around.
+
+    Args:
+      l: List or List of list.
+      shape: 1D or 2D shape.
+    Return
+      Reshaped list.
+    """
+    r = []
+    if shape is None:
+        # Flatten everything.
+        for a in l:
+            if isinstance(a, (list, tuple)):
+                r = r + list(a)
+            else:
+                r.append(a)
+    else:
+        # Reshape to list of list.
+        i = 0
+        for s in shape:
+            if s == 1:
+                r.append(l[i])
+            else:
+                r.append(l[i:i+s])
+            i += s
+    return r
+
+def configure_learning_rate(flags, num_samples_per_epoch, global_step):
+    """Configures the learning rate.
+
+    Args:
+      num_samples_per_epoch: The number of samples in each epoch of training.
+      global_step: The global_step tensor.
+    Returns:
+      A `Tensor` representing the learning rate.
+    """
+    decay_steps = int(num_samples_per_epoch / flags.batch_size *
+                      flags.num_epochs_per_decay)
+
+    if flags.learning_rate_decay_type == 'exponential':
+        return tf.train.exponential_decay(flags.learning_rate,
+                                          global_step,
+                                          decay_steps,
+                                          flags.learning_rate_decay_factor,
+                                          staircase=True,
+                                          name='exponential_decay_learning_rate')
+    elif flags.learning_rate_decay_type == 'fixed':
+        return tf.constant(flags.learning_rate, name='fixed_learning_rate')
+    elif flags.learning_rate_decay_type == 'polynomial':
+        return tf.train.polynomial_decay(flags.learning_rate,
+                                         global_step,
+                                         decay_steps,
+                                         flags.end_learning_rate,
+                                         power=1.0,
+                                         cycle=False,
+                                         name='polynomial_decay_learning_rate')
+    else:
+        raise ValueError('learning_rate_decay_type [%s] was not recognized',
+                         flags.learning_rate_decay_type)
+
+
+def configure_optimizer(flags, learning_rate):
+    """Configures the optimizer used for training.
+
+    Args:
+      learning_rate: A scalar or `Tensor` learning rate.
+    Returns:
+      An instance of an optimizer.
+    """
+    if flags.optimizer == 'adadelta':
+        optimizer = tf.train.AdadeltaOptimizer(
+            learning_rate,
+            rho=flags.adadelta_rho,
+            epsilon=flags.opt_epsilon)
+    elif flags.optimizer == 'adagrad':
+        optimizer = tf.train.AdagradOptimizer(
+            learning_rate,
+            initial_accumulator_value=flags.adagrad_initial_accumulator_value)
+    elif flags.optimizer == 'adam':
+        optimizer = tf.train.AdamOptimizer(
+            learning_rate,
+            beta1=flags.adam_beta1,
+            beta2=flags.adam_beta2,
+            epsilon=flags.opt_epsilon)
+    elif flags.optimizer == 'ftrl':
+        optimizer = tf.train.FtrlOptimizer(
+            learning_rate,
+            learning_rate_power=flags.ftrl_learning_rate_power,
+            initial_accumulator_value=flags.ftrl_initial_accumulator_value,
+            l1_regularization_strength=flags.ftrl_l1,
+            l2_regularization_strength=flags.ftrl_l2)
+    elif flags.optimizer == 'momentum':
+        optimizer = tf.train.MomentumOptimizer(
+            learning_rate,
+            momentum=flags.momentum,
+            name='Momentum')
+    elif flags.optimizer == 'rmsprop':
+        optimizer = tf.train.RMSPropOptimizer(
+            learning_rate,
+            decay=flags.rmsprop_decay,
+            momentum=flags.rmsprop_momentum,
+            epsilon=flags.opt_epsilon)
+    elif flags.optimizer == 'sgd':
+        optimizer = tf.train.GradientDescentOptimizer(learning_rate)
+    else:
+        raise ValueError('Optimizer [%s] was not recognized', flags.optimizer)
+    return optimizer
+
+
+def update_model_scope(var, ckpt_scope, new_scope):
+    return var.op.name.replace(new_scope,'vgg_16')
+
+
+def get_variables_to_train(flags):
+    """Returns a list of variables to train.
+
+    Returns:
+      A list of variables to train by the optimizer.
+    """
+    if flags.trainable_scopes is None:
+        return tf.trainable_variables()
+    else:
+        scopes = [scope.strip() for scope in flags.trainable_scopes.split(',')]
+
+    variables_to_train = []
+    for scope in scopes:
+        variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
+        variables_to_train.extend(variables)
+    return variables_to_train
+
+
+# =========================================================================== #
+# Evaluation utils.
+# =========================================================================== #
--- a/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfutil/visualization.py
+++ b/how-to-use-azureml/deployment/accelerated-models/finetune-ssd-vgg/tfssd/tfutil/visualization.py
@@ -0,0 +1,114 @@
+# Copyright 2017 Paul Balanca. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+import cv2
+import random
+
+import matplotlib.pyplot as plt
+import matplotlib.image as mpimg
+import matplotlib.cm as mpcm
+
+
+# =========================================================================== #
+# Some colormaps.
+# =========================================================================== #
+def colors_subselect(colors, num_classes=21):
+    dt = len(colors) // num_classes
+    sub_colors = []
+    for i in range(num_classes):
+        color = colors[i*dt]
+        if isinstance(color[0], float):
+            sub_colors.append([int(c * 255) for c in color])
+        else:
+            sub_colors.append([c for c in color])
+    return sub_colors
+
+colors_plasma = colors_subselect(mpcm.plasma.colors, num_classes=21)
+colors_tableau = [(255, 255, 255), (31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),
+                  (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),
+                  (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),
+                  (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),
+                  (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]
+
+
+# =========================================================================== #
+# OpenCV drawing.
+# =========================================================================== #
+def draw_lines(img, lines, color=[255, 0, 0], thickness=2):
+    """Draw a collection of lines on an image.
+    """
+    for line in lines:
+        for x1, y1, x2, y2 in line:
+            cv2.line(img, (x1, y1), (x2, y2), color, thickness)
+
+
+def draw_rectangle(img, p1, p2, color=[255, 0, 0], thickness=2):
+    cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness)
+
+
+def draw_bbox(img, bbox, shape, label, color=[255, 0, 0], thickness=2):
+    p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1]))
+    p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1]))
+    cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness)
+    p1 = (p1[0]+15, p1[1])
+    cv2.putText(img, str(label), p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.5, color, 1)
+
+
+def bboxes_draw_on_img(img, classes, scores, bboxes, colors, thickness=2):
+    shape = img.shape
+    for i in range(bboxes.shape[0]):
+        bbox = bboxes[i]
+        color = colors[classes[i]]
+        # Draw bounding box...
+        p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1]))
+        p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1]))
+        cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness)
+        # Draw text...
+        s = '%s/%.3f' % (classes[i], scores[i])
+        p1 = (p1[0]-5, p1[1])
+        cv2.putText(img, s, p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1)
+
+
+# =========================================================================== #
+# Matplotlib show...
+# =========================================================================== #
+def plt_bboxes(img, classes, scores, bboxes, figsize=(10,10), linewidth=1.5):
+    """Visualize bounding boxes. Largely inspired by SSD-MXNET!
+    """
+    fig = plt.figure(figsize=figsize)
+    plt.imshow(img)
+    height = img.shape[0]
+    width = img.shape[1]
+    colors = dict()
+    for i in range(classes.shape[0]):
+        cls_id = int(classes[i])
+        if cls_id >= 0:
+            score = scores[i]
+            if cls_id not in colors:
+                colors[cls_id] = (random.random(), random.random(), random.random())
+            ymin = int(bboxes[i, 0] * height)
+            xmin = int(bboxes[i, 1] * width)
+            ymax = int(bboxes[i, 2] * height)
+            xmax = int(bboxes[i, 3] * width)
+            rect = plt.Rectangle((xmin, ymin), xmax - xmin,
+                                 ymax - ymin, fill=False,
+                                 edgecolor=colors[cls_id],
+                                 linewidth=linewidth)
+            plt.gca().add_patch(rect)
+            class_name = str(cls_id)
+            plt.gca().text(xmin, ymin - 2,
+                           '{:s} | {:.3f}'.format(class_name, score),
+                           bbox=dict(facecolor=colors[cls_id], alpha=0.5),
+                           fontsize=12, color='white')
+    plt.show()