mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-25 01:00:11 -05:00
ssd vgg
This commit is contained in:
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"subscription_id": "93177b32-3f08-4530-a61e-d1775d2480ad",
|
||||
"resource_group": "MSRBrainwave",
|
||||
"workspace_name": "brainwave"
|
||||
}
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 83 KiB |
@@ -0,0 +1,26 @@
|
||||
<annotation>
|
||||
<folder>runeightft1</folder>
|
||||
<filename>1555394321.8154433.jpg</filename>
|
||||
<path>E:/Image grocerydemostills/runeightft1/1555394321.8154433.jpg</path>
|
||||
<source>
|
||||
<database>Unknown</database>
|
||||
</source>
|
||||
<size>
|
||||
<width>852</width>
|
||||
<height>506</height>
|
||||
<depth>3</depth>
|
||||
</size>
|
||||
<segmented>0</segmented>
|
||||
<object>
|
||||
<name>stockout</name>
|
||||
<pose>Unspecified</pose>
|
||||
<truncated>0</truncated>
|
||||
<difficult>0</difficult>
|
||||
<bndbox>
|
||||
<xmin>660</xmin>
|
||||
<ymin>201</ymin>
|
||||
<xmax>712</xmax>
|
||||
<ymax>294</ymax>
|
||||
</bndbox>
|
||||
</object>
|
||||
</annotation>
|
||||
@@ -0,0 +1,92 @@
|
||||
import numpy as np
|
||||
import math
|
||||
from model.ssd_vgg_300 import SSDNet, SSDParams
|
||||
|
||||
_R_MEAN = 123.
|
||||
_G_MEAN = 117.
|
||||
_B_MEAN = 104.
|
||||
EVAL_SIZE = (300, 300)
|
||||
|
||||
defaults = SSDNet.default_params
|
||||
|
||||
img_shape = defaults.img_shape
|
||||
num_classes = defaults.num_classes
|
||||
feat_layers = defaults.feat_layers
|
||||
feat_shapes = defaults.feat_shapes
|
||||
anchor_size_bounds = defaults.anchor_size_bounds
|
||||
anchor_sizes = defaults.anchor_sizes
|
||||
anchor_ratios = defaults.anchor_ratios
|
||||
anchor_steps = defaults.anchor_steps
|
||||
anchor_offset = defaults.anchor_offset
|
||||
normalizations = defaults.normalizations
|
||||
prior_scaling = defaults.prior_scaling
|
||||
|
||||
def ssd_anchor_one_layer(img_shape,
|
||||
feat_shape,
|
||||
sizes,
|
||||
ratios,
|
||||
step,
|
||||
offset=0.5,
|
||||
dtype=np.float32):
|
||||
"""Computer SSD default anchor boxes for one feature layer.
|
||||
|
||||
Determine the relative position grid of the centers, and the relative
|
||||
width and height.
|
||||
|
||||
Arguments:
|
||||
feat_shape: Feature shape, used for computing relative position grids;
|
||||
size: Absolute reference sizes;
|
||||
ratios: Ratios to use on these features;
|
||||
img_shape: Image shape, used for computing height, width relatively to the
|
||||
former;
|
||||
offset: Grid offset.
|
||||
|
||||
Return:
|
||||
y, x, h, w: Relative x and y grids, and height and width.
|
||||
"""
|
||||
# Compute the position grid: simple way.
|
||||
# y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]]
|
||||
# y = (y.astype(dtype) + offset) / feat_shape[0]
|
||||
# x = (x.astype(dtype) + offset) / feat_shape[1]
|
||||
# Weird SSD-Caffe computation using steps values...
|
||||
y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]]
|
||||
y = (y.astype(dtype) + offset) * step / img_shape[0]
|
||||
x = (x.astype(dtype) + offset) * step / img_shape[1]
|
||||
|
||||
# Expand dims to support easy broadcasting.
|
||||
y = np.expand_dims(y, axis=-1)
|
||||
x = np.expand_dims(x, axis=-1)
|
||||
|
||||
# Compute relative height and width.
|
||||
# Tries to follow the original implementation of SSD for the order.
|
||||
num_anchors = len(sizes) + len(ratios)
|
||||
h = np.zeros((num_anchors, ), dtype=dtype)
|
||||
w = np.zeros((num_anchors, ), dtype=dtype)
|
||||
# Add first anchor boxes with ratio=1.
|
||||
h[0] = sizes[0] / img_shape[0]
|
||||
w[0] = sizes[0] / img_shape[1]
|
||||
di = 1
|
||||
if len(sizes) > 1:
|
||||
h[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[0]
|
||||
w[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[1]
|
||||
di += 1
|
||||
for i, r in enumerate(ratios):
|
||||
h[i+di] = sizes[0] / img_shape[0] / math.sqrt(r)
|
||||
w[i+di] = sizes[0] / img_shape[1] * math.sqrt(r)
|
||||
return y, x, h, w
|
||||
|
||||
|
||||
def ssd_anchors_all_layers(img_shape = img_shape,
|
||||
offset=0.5,
|
||||
dtype=np.float32):
|
||||
"""Compute anchor boxes for all feature layers.
|
||||
"""
|
||||
layers_anchors = []
|
||||
for i, s in enumerate(feat_shapes):
|
||||
anchor_bboxes = ssd_anchor_one_layer(img_shape, s,
|
||||
anchor_sizes[i],
|
||||
anchor_ratios[i],
|
||||
anchor_steps[i],
|
||||
offset=offset, dtype=dtype)
|
||||
layers_anchors.append(anchor_bboxes)
|
||||
return layers_anchors
|
||||
@@ -0,0 +1,114 @@
|
||||
import os
|
||||
import sys
|
||||
import tarfile
|
||||
|
||||
from six.moves import urllib
|
||||
import tensorflow as tf
|
||||
|
||||
LABELS_FILENAME = 'labels.txt'
|
||||
|
||||
import shutil
|
||||
from os import path
|
||||
|
||||
def check_labelmatch(images, annotations):
|
||||
data_dir_images = os.path.split(images[0])[0]
|
||||
data_dir_annot = os.path.split(annotations[0])[0]
|
||||
|
||||
im_files = {os.path.splitext(os.path.split(f)[1])[0] for f in images}
|
||||
annot_files = {os.path.splitext(os.path.split(f)[1])[0] for f in annotations}
|
||||
|
||||
extra_ims = im_files.difference(annot_files)
|
||||
extra_annots = annot_files.difference(im_files)
|
||||
mismatch = len(extra_ims) > 0 or len(extra_annots) > 0
|
||||
|
||||
if mismatch:
|
||||
print(f"The following files will be removed from the training process:")
|
||||
|
||||
if len(extra_ims) > 0:
|
||||
print(f"images without annotations: {extra_ims}")
|
||||
|
||||
if len(extra_annots) > 0:
|
||||
print(f"annotations without images: {extra_annots}")
|
||||
|
||||
if not mismatch:
|
||||
print(str(len(images)) + ' images found and ' + str(len(annotations)) + ' matching annotations found.' )
|
||||
return (images, annotations)
|
||||
|
||||
im_files = im_files.difference(extra_ims)
|
||||
annot_files = annot_files.difference(extra_annots)
|
||||
|
||||
im_files = [os.path.join(data_dir_images, f+".jpg") for f in im_files]
|
||||
annot_files = [os.path.join(data_dir_annot, f+".xml") for f in annot_files]
|
||||
|
||||
return(im_files, annot_files)
|
||||
|
||||
def create_dir(path):
|
||||
try:
|
||||
path_annotations = path + '/Annotations'
|
||||
path_images = path + '/JPEGImages'
|
||||
|
||||
os.makedirs(path_annotations)
|
||||
os.makedirs(path_images)
|
||||
|
||||
except OSError:
|
||||
print("Creation of folders in directory %s failed. Folder may already exist." % path)
|
||||
else:
|
||||
print("Successfully created images and annotations folders at %s" % path)
|
||||
|
||||
def move_images(data_dir, train_images, train_annotations,
|
||||
test_images, test_annotations):
|
||||
|
||||
source = data_dir + '/'
|
||||
|
||||
for image in train_images:
|
||||
image = data_dir + '/' + image
|
||||
dst = source + 'train/' + '/JPEGImages'
|
||||
|
||||
if path.exists(image):
|
||||
shutil.copy(image, dst)
|
||||
|
||||
for image in test_images:
|
||||
image = data_dir + '/' + image
|
||||
dst = source + 'test/' + '/JPEGImages'
|
||||
|
||||
if path.exists(image):
|
||||
shutil.copy(image, dst)
|
||||
|
||||
for annot in train_annotations:
|
||||
annot = data_dir + '/' + annot
|
||||
dst = source + 'train/' + '/Annotations'
|
||||
|
||||
if path.exists(annot):
|
||||
shutil.copy(annot, dst)
|
||||
|
||||
for annot in test_annotations:
|
||||
annot = data_dir + '/' + annot
|
||||
dst = source + 'test/' + '/Annotations'
|
||||
|
||||
if path.exists(annot):
|
||||
shutil.copy(annot, dst)
|
||||
|
||||
print('Images and annotations have been copied to directories: ' + source + 'train' + ' and ' + source + 'test')
|
||||
|
||||
def int64_feature(value):
|
||||
"""Wrapper for inserting int64 features into Example proto.
|
||||
"""
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
|
||||
|
||||
|
||||
def float_feature(value):
|
||||
"""Wrapper for inserting float features into Example proto.
|
||||
"""
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
return tf.train.Feature(float_list=tf.train.FloatList(value=value))
|
||||
|
||||
|
||||
def bytes_feature(value):
|
||||
"""Wrapper for inserting bytes features into Example proto.
|
||||
"""
|
||||
if not isinstance(value, list):
|
||||
value = [value]
|
||||
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
|
||||
@@ -0,0 +1,112 @@
|
||||
# Copyright 2015 Paul Balanca. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Provides data for the Pascal VOC Dataset (images + annotations).
|
||||
"""
|
||||
import os
|
||||
|
||||
import tensorflow as tf
|
||||
from dataprep import dataset_utils
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
VOC_LABELS = {
|
||||
'none': (0, 'Background'),
|
||||
'aeroplane': (1, 'Vehicle'),
|
||||
'bicycle': (2, 'Vehicle'),
|
||||
'bird': (3, 'Animal'),
|
||||
'boat': (4, 'Vehicle'),
|
||||
'bottle': (5, 'Indoor'),
|
||||
'bus': (6, 'Vehicle'),
|
||||
'car': (7, 'Vehicle'),
|
||||
'cat': (8, 'Animal'),
|
||||
'chair': (9, 'Indoor'),
|
||||
'cow': (10, 'Animal'),
|
||||
'diningtable': (11, 'Indoor'),
|
||||
'dog': (12, 'Animal'),
|
||||
'horse': (13, 'Animal'),
|
||||
'motorbike': (14, 'Vehicle'),
|
||||
'person': (15, 'Person'),
|
||||
'pottedplant': (16, 'Indoor'),
|
||||
'sheep': (17, 'Animal'),
|
||||
'sofa': (18, 'Indoor'),
|
||||
'train': (19, 'Vehicle'),
|
||||
'tvmonitor': (20, 'Indoor'),
|
||||
}
|
||||
|
||||
def get_split(split_name, dataset_dir, file_pattern, reader,
|
||||
split_to_sizes, items_to_descriptions, num_classes):
|
||||
"""Gets a dataset tuple with instructions for reading Pascal VOC dataset.
|
||||
|
||||
Args:
|
||||
split_name: A train/test split name.
|
||||
dataset_dir: The base directory of the dataset sources.
|
||||
file_pattern: The file pattern to use when matching the dataset sources.
|
||||
It is assumed that the pattern contains a '%s' string so that the split
|
||||
name can be inserted.
|
||||
reader: The TensorFlow reader type.
|
||||
|
||||
Returns:
|
||||
A `Dataset` namedtuple.
|
||||
|
||||
Raises:
|
||||
ValueError: if `split_name` is not a valid train/test split.
|
||||
"""
|
||||
if split_name not in split_to_sizes:
|
||||
raise ValueError('split name %s was not recognized.' % split_name)
|
||||
file_pattern = os.path.join(dataset_dir, file_pattern % split_name)
|
||||
|
||||
# Allowing None in the signature so that dataset_factory can use the default.
|
||||
if reader is None:
|
||||
reader = tf.TFRecordReader
|
||||
# Features in Pascal VOC TFRecords.
|
||||
keys_to_features = {
|
||||
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
|
||||
'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
|
||||
'image/height': tf.FixedLenFeature([1], tf.int64),
|
||||
'image/width': tf.FixedLenFeature([1], tf.int64),
|
||||
'image/channels': tf.FixedLenFeature([1], tf.int64),
|
||||
'image/shape': tf.FixedLenFeature([3], tf.int64),
|
||||
'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
|
||||
'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64),
|
||||
'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64),
|
||||
}
|
||||
items_to_handlers = {
|
||||
'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
|
||||
'shape': slim.tfexample_decoder.Tensor('image/shape'),
|
||||
'object/bbox': slim.tfexample_decoder.BoundingBox(
|
||||
['ymin', 'xmin', 'ymax', 'xmax'], 'image/object/bbox/'),
|
||||
'object/label': slim.tfexample_decoder.Tensor('image/object/bbox/label'),
|
||||
'object/difficult': slim.tfexample_decoder.Tensor('image/object/bbox/difficult'),
|
||||
'object/truncated': slim.tfexample_decoder.Tensor('image/object/bbox/truncated'),
|
||||
}
|
||||
decoder = slim.tfexample_decoder.TFExampleDecoder(
|
||||
keys_to_features, items_to_handlers)
|
||||
|
||||
labels_to_names = None
|
||||
if dataset_utils.has_labels(dataset_dir):
|
||||
labels_to_names = dataset_utils.read_label_file(dataset_dir)
|
||||
|
||||
return slim.dataset.Dataset(
|
||||
data_sources=file_pattern,
|
||||
reader=reader,
|
||||
decoder=decoder,
|
||||
num_samples=split_to_sizes[split_name],
|
||||
items_to_descriptions=items_to_descriptions,
|
||||
num_classes=num_classes,
|
||||
labels_to_names=labels_to_names)
|
||||
@@ -0,0 +1,223 @@
|
||||
# Copyright 2015 Paul Balanca. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Converts Pascal VOC data to TFRecords file format with Example protos.
|
||||
|
||||
The raw Pascal VOC data set is expected to reside in JPEG files located in the
|
||||
directory 'JPEGImages'. Similarly, bounding box annotations are supposed to be
|
||||
stored in the 'Annotation directory'
|
||||
|
||||
This TensorFlow script converts the training and evaluation data into
|
||||
a sharded data set consisting of 1024 and 128 TFRecord files, respectively.
|
||||
|
||||
Each validation TFRecord file contains ~500 records. Each training TFREcord
|
||||
file contains ~1000 records. Each record within the TFRecord file is a
|
||||
serialized Example proto. The Example proto contains the following fields:
|
||||
|
||||
image/encoded: string containing JPEG encoded image in RGB colorspace
|
||||
image/height: integer, image height in pixels
|
||||
image/width: integer, image width in pixels
|
||||
image/channels: integer, specifying the number of channels, always 3
|
||||
image/format: string, specifying the format, always'JPEG'
|
||||
|
||||
|
||||
image/object/bbox/xmin: list of float specifying the 0+ human annotated
|
||||
bounding boxes
|
||||
image/object/bbox/xmax: list of float specifying the 0+ human annotated
|
||||
bounding boxes
|
||||
image/object/bbox/ymin: list of float specifying the 0+ human annotated
|
||||
bounding boxes
|
||||
image/object/bbox/ymax: list of float specifying the 0+ human annotated
|
||||
bounding boxes
|
||||
image/object/bbox/label: list of integer specifying the classification index.
|
||||
image/object/bbox/label_text: list of string descriptions.
|
||||
|
||||
Note that the length of xmin is identical to the length of xmax, ymin and ymax
|
||||
for each example.
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from dataprep.dataset_utils import int64_feature, float_feature, bytes_feature
|
||||
|
||||
# TFRecords conversion parameters.
|
||||
RANDOM_SEED = 4242
|
||||
SAMPLES_PER_FILES = 100
|
||||
|
||||
def _set_voc_labels_map(class_list):
|
||||
return dict(**{'none': 0}, **{cl: i + 1 for i, cl in enumerate(class_list)})
|
||||
|
||||
def _process_image(img_name, annot_name, class_list):
|
||||
"""Process a image and annotation file.
|
||||
|
||||
Args:
|
||||
img_name: string, path to an image file e.g., '/path/to/example.JPG'.
|
||||
Returns:
|
||||
image_buffer: string, JPEG encoding of RGB image.
|
||||
height: integer, image height in pixels.
|
||||
width: integer, image width in pixels.
|
||||
"""
|
||||
# Read the image file.
|
||||
image_data = tf.gfile.FastGFile(img_name, 'rb').read()
|
||||
class_dict = _set_voc_labels_map(class_list)
|
||||
|
||||
# Read the XML annotation file.
|
||||
filename = annot_name
|
||||
tree = ET.parse(filename)
|
||||
root = tree.getroot()
|
||||
|
||||
# Image shape.
|
||||
size = root.find('size')
|
||||
shape = [int(size.find('height').text),
|
||||
int(size.find('width').text),
|
||||
int(size.find('depth').text)]
|
||||
# Find annotations.
|
||||
bboxes = []
|
||||
labels = []
|
||||
labels_text = []
|
||||
difficult = []
|
||||
truncated = []
|
||||
for obj in root.findall('object'):
|
||||
label = obj.find('name').text
|
||||
labels.append(class_dict[label])
|
||||
labels_text.append(label.encode('ascii'))
|
||||
|
||||
if obj.find('difficult'):
|
||||
difficult.append(int(obj.find('difficult').text))
|
||||
else:
|
||||
difficult.append(0)
|
||||
if obj.find('truncated'):
|
||||
truncated.append(int(obj.find('truncated').text))
|
||||
else:
|
||||
truncated.append(0)
|
||||
|
||||
bbox = obj.find('bndbox')
|
||||
bboxes.append((to_valid_range(float(bbox.find('ymin').text) / shape[0]),
|
||||
to_valid_range(float(bbox.find('xmin').text) / shape[1]),
|
||||
to_valid_range(float(bbox.find('ymax').text) / shape[0]),
|
||||
to_valid_range(float(bbox.find('xmax').text) / shape[1])
|
||||
))
|
||||
return image_data, shape, np.clip(bboxes, a_min=0., a_max=1.), labels, labels_text, difficult, truncated
|
||||
|
||||
def to_valid_range(v):
|
||||
if v < 0.0:
|
||||
return 0.0
|
||||
if v > 1.0:
|
||||
return 1.0
|
||||
return v
|
||||
|
||||
|
||||
def _convert_to_example(image_data, labels, labels_text, bboxes, shape,
|
||||
difficult, truncated):
|
||||
"""Build an Example proto for an image example.
|
||||
|
||||
Args:
|
||||
image_data: string, JPEG encoding of RGB image;
|
||||
labels: list of integers, identifier for the ground truth;
|
||||
labels_text: list of strings, human-readable labels;
|
||||
bboxes: list of bounding boxes; each box is a list of integers;
|
||||
specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong
|
||||
to the same label as the image label.
|
||||
shape: 3 integers, image shapes in pixels.
|
||||
Returns:
|
||||
Example proto
|
||||
"""
|
||||
xmin = []
|
||||
ymin = []
|
||||
xmax = []
|
||||
ymax = []
|
||||
for b in bboxes:
|
||||
assert len(b) == 4
|
||||
# pylint: disable=expression-not-assigned
|
||||
[l.append(point) for l, point in zip([ymin, xmin, ymax, xmax], b)]
|
||||
# pylint: enable=expression-not-assigned
|
||||
|
||||
image_format = b'JPEG'
|
||||
example = tf.train.Example(features=tf.train.Features(feature={
|
||||
'image/height': int64_feature(shape[0]),
|
||||
'image/width': int64_feature(shape[1]),
|
||||
'image/channels': int64_feature(shape[2]),
|
||||
'image/shape': int64_feature(shape),
|
||||
'image/object/bbox/xmin': float_feature(xmin),
|
||||
'image/object/bbox/xmax': float_feature(xmax),
|
||||
'image/object/bbox/ymin': float_feature(ymin),
|
||||
'image/object/bbox/ymax': float_feature(ymax),
|
||||
'image/object/bbox/label': int64_feature(labels),
|
||||
'image/object/bbox/label_text': bytes_feature(labels_text),
|
||||
'image/object/bbox/difficult': int64_feature(difficult),
|
||||
'image/object/bbox/truncated': int64_feature(truncated),
|
||||
'image/format': bytes_feature(image_format),
|
||||
'image/encoded': bytes_feature(image_data)}))
|
||||
return example
|
||||
|
||||
|
||||
def _add_to_tfrecord(img_name, annot_name, class_list, tfrecord_writer):
|
||||
"""Loads data from image and annotations files and add them to a TFRecord.
|
||||
|
||||
Args:
|
||||
dataset_dir: Dataset directory;
|
||||
name: Image name to add to the TFRecord;
|
||||
tfrecord_writer: The TFRecord writer to use for writing.
|
||||
"""
|
||||
image_data, shape, bboxes, labels, labels_text, difficult, truncated = \
|
||||
_process_image(img_name, annot_name, class_list)
|
||||
|
||||
example = _convert_to_example(image_data, labels, labels_text,
|
||||
bboxes, shape, difficult, truncated)
|
||||
tfrecord_writer.write(example.SerializeToString())
|
||||
|
||||
|
||||
def _get_output_filename(output_dir, name, idx):
|
||||
return os.path.join(output_dir, f"{name}_{idx:04d}.tfrecord")
|
||||
|
||||
def run(output_dir, classes_list, images_list, annotations_list, output_name):
|
||||
"""Runs the conversion operation.
|
||||
|
||||
Args:
|
||||
output_dir: Output directory.
|
||||
"""
|
||||
|
||||
if not tf.gfile.Exists(output_dir):
|
||||
tf.gfile.MakeDirs(output_dir)
|
||||
|
||||
if(len(images_list) != len(annotations_list)):
|
||||
raise ValueError("Images and annotations lists are of different legnths!")
|
||||
|
||||
# Process dataset files.
|
||||
fidx = 0
|
||||
i = 0
|
||||
im_annot = list(zip(images_list, annotations_list))
|
||||
|
||||
while i < len(im_annot):
|
||||
# Open new TFRecord file.
|
||||
tf_filename = _get_output_filename(output_dir, output_name, fidx)
|
||||
with tf.python_io.TFRecordWriter(tf_filename) as tfrecord_writer:
|
||||
j = 0
|
||||
while i < len(im_annot) and j < SAMPLES_PER_FILES:
|
||||
sys.stdout.write('\r>> Converting image %d/%d' % (i+1, len(im_annot)))
|
||||
sys.stdout.flush()
|
||||
|
||||
img_name, annot_name = im_annot[i]
|
||||
_add_to_tfrecord(img_name, annot_name, classes_list, tfrecord_writer)
|
||||
i += 1
|
||||
j += 1
|
||||
fidx += 1
|
||||
|
||||
print('\nFinished converting the Pascal VOC dataset!')
|
||||
@@ -0,0 +1,65 @@
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
from datautil.ssd_vgg_preprocessing import preprocess_for_train, preprocess_for_eval
|
||||
from model import ssd_common
|
||||
from tfutil import tf_utils
|
||||
|
||||
features = {
|
||||
'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
|
||||
'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
|
||||
'image/height': tf.FixedLenFeature([1], tf.int64),
|
||||
'image/width': tf.FixedLenFeature([1], tf.int64),
|
||||
'image/channels': tf.FixedLenFeature([1], tf.int64),
|
||||
'image/shape': tf.FixedLenFeature([3], tf.int64),
|
||||
'image/object/bbox/xmin': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/ymin': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/xmax': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/ymax': tf.VarLenFeature(dtype=tf.float32),
|
||||
'image/object/bbox/label': tf.VarLenFeature(dtype=tf.int64),
|
||||
'image/object/bbox/difficult': tf.VarLenFeature(dtype=tf.int64),
|
||||
'image/object/bbox/truncated': tf.VarLenFeature(dtype=tf.int64),
|
||||
}
|
||||
|
||||
|
||||
def get_parser_func(anchors, num_classes, is_training, var_scope):
|
||||
'''
|
||||
Dataset parser function for training and evaluation
|
||||
|
||||
Arguments:
|
||||
preprocess_fn - function that does preprocesing
|
||||
'''
|
||||
|
||||
preprocess_fn = preprocess_for_train if is_training else preprocess_for_eval
|
||||
|
||||
def parse_tfrec_data(example_proto):
|
||||
with tf.variable_scope(var_scope):
|
||||
parsed_features = tf.parse_single_example(example_proto, features)
|
||||
|
||||
image_string = parsed_features['image/encoded']
|
||||
image_decoded = tf.image.decode_jpeg(image_string)
|
||||
|
||||
labels = tf.sparse.to_dense(parsed_features['image/object/bbox/label'])
|
||||
|
||||
xmin = tf.sparse.to_dense(parsed_features['image/object/bbox/xmin'])
|
||||
xmax = tf.sparse.to_dense(parsed_features['image/object/bbox/xmax'])
|
||||
ymin = tf.sparse.to_dense(parsed_features['image/object/bbox/ymin'])
|
||||
ymax = tf.sparse.to_dense(parsed_features['image/object/bbox/ymax'])
|
||||
bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=1)
|
||||
|
||||
if is_training:
|
||||
image, labels, bboxes = preprocess_fn(image_decoded, labels, bboxes)
|
||||
else:
|
||||
image, labels, bboxes, _ = preprocess_fn(image_decoded, labels, bboxes)
|
||||
|
||||
# ground truth encoding
|
||||
# each of the returns is a litst of tensors
|
||||
if is_training:
|
||||
classes, localisations, scores = \
|
||||
ssd_common.tf_ssd_bboxes_encode(labels, bboxes, anchors, num_classes)
|
||||
return tf_utils.reshape_list([image, classes, localisations, scores])
|
||||
else:
|
||||
return tf_utils.reshape_list([image, labels, bboxes])
|
||||
|
||||
return parse_tfrec_data
|
||||
@@ -0,0 +1,397 @@
|
||||
# Copyright 2015 Paul Balanca. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Pre-processing images for SSD-type networks.
|
||||
"""
|
||||
from enum import Enum, IntEnum
|
||||
import numpy as np
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
from tensorflow.python.ops import control_flow_ops
|
||||
|
||||
import tfextended as tfe
|
||||
from datautil import tf_image
|
||||
|
||||
# Resizing strategies.
|
||||
Resize = IntEnum('Resize', ('NONE', # Nothing!
|
||||
'CENTRAL_CROP', # Crop (and pad if necessary).
|
||||
'PAD_AND_RESIZE', # Pad, and resize to output shape.
|
||||
'WARP_RESIZE')) # Warp resize.
|
||||
|
||||
# VGG mean parameters.
|
||||
_R_MEAN = 123.
|
||||
_G_MEAN = 117.
|
||||
_B_MEAN = 104.
|
||||
|
||||
# Some training pre-processing parameters.
|
||||
BBOX_CROP_OVERLAP = 0.5 # Minimum overlap to keep a bbox after cropping.
|
||||
MIN_OBJECT_COVERED = 0.25
|
||||
CROP_RATIO_RANGE = (0.6, 1.67) # Distortion ratio during cropping.
|
||||
EVAL_SIZE = (300, 300)
|
||||
|
||||
|
||||
def tf_image_whitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN]):
|
||||
"""Subtracts the given means from each image channel.
|
||||
|
||||
Returns:
|
||||
the centered image.
|
||||
"""
|
||||
if image.get_shape().ndims != 3:
|
||||
raise ValueError('Input must be of size [height, width, C>0]')
|
||||
|
||||
mean = tf.constant(means, dtype=image.dtype)
|
||||
image = image - mean
|
||||
return image
|
||||
|
||||
|
||||
def tf_image_unwhitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN], to_int=True):
|
||||
"""Re-convert to original image distribution, and convert to int if
|
||||
necessary.
|
||||
|
||||
Returns:
|
||||
Centered image.
|
||||
"""
|
||||
mean = tf.constant(means, dtype=image.dtype)
|
||||
image = image + mean
|
||||
if to_int:
|
||||
image = tf.cast(image, tf.int32)
|
||||
return image
|
||||
|
||||
|
||||
def np_image_unwhitened(image, means=[_R_MEAN, _G_MEAN, _B_MEAN], to_int=True):
|
||||
"""Re-convert to original image distribution, and convert to int if
|
||||
necessary. Numpy version.
|
||||
|
||||
Returns:
|
||||
Centered image.
|
||||
"""
|
||||
img = np.copy(image)
|
||||
img += np.array(means, dtype=img.dtype)
|
||||
if to_int:
|
||||
img = img.astype(np.uint8)
|
||||
return img
|
||||
|
||||
|
||||
def tf_summary_image(image, bboxes, name='image', unwhitened=False):
|
||||
"""Add image with bounding boxes to summary.
|
||||
"""
|
||||
if unwhitened:
|
||||
image = tf_image_unwhitened(image)
|
||||
image = tf.expand_dims(image, 0)
|
||||
bboxes = tf.expand_dims(bboxes, 0)
|
||||
image_with_box = tf.image.draw_bounding_boxes(image, bboxes)
|
||||
tf.summary.image(name, image_with_box)
|
||||
|
||||
|
||||
def apply_with_random_selector(x, func, num_cases):
|
||||
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
|
||||
|
||||
Args:
|
||||
x: input Tensor.
|
||||
func: Python function to apply.
|
||||
num_cases: Python int32, number of cases to sample sel from.
|
||||
|
||||
Returns:
|
||||
The result of func(x, sel), where func receives the value of the
|
||||
selector as a python integer, but sel is sampled dynamically.
|
||||
"""
|
||||
sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
|
||||
# Pass the real x only to one of the func calls.
|
||||
return control_flow_ops.merge([
|
||||
func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case)
|
||||
for case in range(num_cases)])[0]
|
||||
|
||||
|
||||
def distort_color(image, color_ordering=0, fast_mode=True, scope=None):
|
||||
"""Distort the color of a Tensor image.
|
||||
|
||||
Each color distortion is non-commutative and thus ordering of the color ops
|
||||
matters. Ideally we would randomly permute the ordering of the color ops.
|
||||
Rather then adding that level of complication, we select a distinct ordering
|
||||
of color ops for each preprocessing thread.
|
||||
|
||||
Args:
|
||||
image: 3-D Tensor containing single image in [0, 1].
|
||||
color_ordering: Python int, a type of distortion (valid values: 0-3).
|
||||
fast_mode: Avoids slower ops (random_hue and random_contrast)
|
||||
scope: Optional scope for name_scope.
|
||||
Returns:
|
||||
3-D Tensor color-distorted image on range [0, 1]
|
||||
Raises:
|
||||
ValueError: if color_ordering not in [0, 3]
|
||||
"""
|
||||
with tf.name_scope(scope, 'distort_color', [image]):
|
||||
if fast_mode:
|
||||
if color_ordering == 0:
|
||||
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||||
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||||
else:
|
||||
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||||
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||||
else:
|
||||
if color_ordering == 0:
|
||||
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||||
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||||
image = tf.image.random_hue(image, max_delta=0.2)
|
||||
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
||||
elif color_ordering == 1:
|
||||
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||||
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||||
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
||||
image = tf.image.random_hue(image, max_delta=0.2)
|
||||
elif color_ordering == 2:
|
||||
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
||||
image = tf.image.random_hue(image, max_delta=0.2)
|
||||
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||||
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||||
elif color_ordering == 3:
|
||||
image = tf.image.random_hue(image, max_delta=0.2)
|
||||
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
|
||||
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
|
||||
image = tf.image.random_brightness(image, max_delta=32. / 255.)
|
||||
else:
|
||||
raise ValueError('color_ordering must be in [0, 3]')
|
||||
# The random_* ops do not necessarily clamp.
|
||||
return tf.clip_by_value(image, 0.0, 1.0)
|
||||
|
||||
|
||||
def distorted_bounding_box_crop(image,
|
||||
labels,
|
||||
bboxes,
|
||||
min_object_covered=0.3,
|
||||
aspect_ratio_range=(0.9, 1.1),
|
||||
area_range=(0.1, 1.0),
|
||||
max_attempts=200,
|
||||
clip_bboxes=True,
|
||||
scope=None):
|
||||
"""Generates cropped_image using a one of the bboxes randomly distorted.
|
||||
|
||||
See `tf.image.sample_distorted_bounding_box` for more documentation.
|
||||
|
||||
Args:
|
||||
image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
|
||||
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
|
||||
where each coordinate is [0, 1) and the coordinates are arranged
|
||||
as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
|
||||
image.
|
||||
min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
|
||||
area of the image must contain at least this fraction of any bounding box
|
||||
supplied.
|
||||
aspect_ratio_range: An optional list of `floats`. The cropped area of the
|
||||
image must have an aspect ratio = width / height within this range.
|
||||
area_range: An optional list of `floats`. The cropped area of the image
|
||||
must contain a fraction of the supplied image within in this range.
|
||||
max_attempts: An optional `int`. Number of attempts at generating a cropped
|
||||
region of the image of the specified constraints. After `max_attempts`
|
||||
failures, return the entire image.
|
||||
scope: Optional scope for name_scope.
|
||||
Returns:
|
||||
A tuple, a 3-D Tensor cropped_image and the distorted bbox
|
||||
"""
|
||||
with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes]):
|
||||
# Each bounding box has shape [1, num_boxes, box coords] and
|
||||
# the coordinates are ordered [ymin, xmin, ymax, xmax].
|
||||
bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box(
|
||||
tf.shape(image),
|
||||
bounding_boxes=tf.expand_dims(bboxes, 0),
|
||||
min_object_covered=min_object_covered,
|
||||
aspect_ratio_range=aspect_ratio_range,
|
||||
area_range=area_range,
|
||||
max_attempts=max_attempts,
|
||||
use_image_if_no_bounding_boxes=True)
|
||||
distort_bbox = distort_bbox[0, 0]
|
||||
|
||||
# Crop the image to the specified bounding box.
|
||||
cropped_image = tf.slice(image, bbox_begin, bbox_size)
|
||||
# Restore the shape since the dynamic slice loses 3rd dimension.
|
||||
cropped_image.set_shape([None, None, 3])
|
||||
|
||||
# Update bounding boxes: resize and filter out.
|
||||
bboxes = tfe.bboxes_resize(distort_bbox, bboxes)
|
||||
labels, bboxes = tfe.bboxes_filter_overlap(labels, bboxes,
|
||||
threshold=BBOX_CROP_OVERLAP,
|
||||
assign_negative=False)
|
||||
return cropped_image, labels, bboxes, distort_bbox
|
||||
|
||||
|
||||
def preprocess_for_train(image, labels, bboxes,
|
||||
out_shape = (300, 300), data_format='NHWC',
|
||||
scope='ssd_preprocessing_train'):
|
||||
"""Preprocesses the given image for training.
|
||||
|
||||
Note that the actual resizing scale is sampled from
|
||||
[`resize_size_min`, `resize_size_max`].
|
||||
|
||||
Args:
|
||||
image: A `Tensor` representing an image of arbitrary size.
|
||||
output_height: The height of the image after preprocessing.
|
||||
output_width: The width of the image after preprocessing.
|
||||
resize_side_min: The lower bound for the smallest side of the image for
|
||||
aspect-preserving resizing.
|
||||
resize_side_max: The upper bound for the smallest side of the image for
|
||||
aspect-preserving resizing.
|
||||
|
||||
Returns:
|
||||
A preprocessed image.
|
||||
"""
|
||||
fast_mode = False
|
||||
with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]):
|
||||
if image.get_shape().ndims != 3:
|
||||
raise ValueError('Input must be of size [height, width, C>0]')
|
||||
# Convert to float scaled [0, 1].
|
||||
if image.dtype != tf.float32:
|
||||
image = tf.image.convert_image_dtype(image, dtype=tf.float32)
|
||||
tf_summary_image(image, bboxes, 'image_with_bboxes')
|
||||
|
||||
# # Remove DontCare labels.
|
||||
# labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label,
|
||||
# labels,
|
||||
# bboxes)
|
||||
|
||||
# Distort image and bounding boxes.
|
||||
dst_image = image
|
||||
dst_image, labels, bboxes, distort_bbox = \
|
||||
distorted_bounding_box_crop(image, labels, bboxes,
|
||||
min_object_covered=MIN_OBJECT_COVERED,
|
||||
aspect_ratio_range=CROP_RATIO_RANGE)
|
||||
# Resize image to output size.
|
||||
dst_image = tf_image.resize_image(dst_image, out_shape,
|
||||
method=tf.image.ResizeMethod.BILINEAR,
|
||||
align_corners=False)
|
||||
tf_summary_image(dst_image, bboxes, 'image_shape_distorted')
|
||||
|
||||
# Randomly flip the image horizontally.
|
||||
dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)
|
||||
|
||||
# Randomly distort the colors. There are 4 ways to do it.
|
||||
dst_image = apply_with_random_selector(
|
||||
dst_image,
|
||||
lambda x, ordering: distort_color(x, ordering, fast_mode),
|
||||
num_cases=4)
|
||||
tf_summary_image(dst_image, bboxes, 'image_color_distorted')
|
||||
|
||||
# Rescale to VGG input scale.
|
||||
image = dst_image * 255.
|
||||
image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
|
||||
# Image data format.
|
||||
if data_format == 'NCHW':
|
||||
image = tf.transpose(image, perm=(2, 0, 1))
|
||||
return image, labels, bboxes
|
||||
|
||||
|
||||
def preprocess_for_eval(image, labels, bboxes,
|
||||
out_shape=EVAL_SIZE, data_format='NHWC',
|
||||
difficults=None, resize=Resize.WARP_RESIZE,
|
||||
scope='ssd_preprocessing_train'):
|
||||
"""Preprocess an image for evaluation.
|
||||
|
||||
Args:
|
||||
image: A `Tensor` representing an image of arbitrary size.
|
||||
out_shape: Output shape after pre-processing (if resize != None)
|
||||
resize: Resize strategy.
|
||||
|
||||
Returns:
|
||||
A preprocessed image.
|
||||
"""
|
||||
with tf.name_scope(scope):
|
||||
if image.get_shape().ndims != 3:
|
||||
raise ValueError('Input must be of size [height, width, C>0]')
|
||||
|
||||
image = tf.cast(image, tf.float32)
|
||||
image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
|
||||
|
||||
# Add image rectangle to bboxes.
|
||||
bbox_img = tf.constant([[0., 0., 1., 1.]])
|
||||
if bboxes is None:
|
||||
bboxes = bbox_img
|
||||
else:
|
||||
bboxes = tf.concat([bbox_img, bboxes], axis=0)
|
||||
|
||||
if resize == Resize.NONE:
|
||||
# No resizing...
|
||||
pass
|
||||
elif resize == Resize.CENTRAL_CROP:
|
||||
# Central cropping of the image.
|
||||
image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
|
||||
image, bboxes, out_shape[0], out_shape[1])
|
||||
elif resize == Resize.PAD_AND_RESIZE:
|
||||
# Resize image first: find the correct factor...
|
||||
shape = tf.shape(image)
|
||||
factor = tf.minimum(tf.to_double(1.0),
|
||||
tf.minimum(tf.to_double(out_shape[0] / shape[0]),
|
||||
tf.to_double(out_shape[1] / shape[1])))
|
||||
resize_shape = factor * tf.to_double(shape[0:2])
|
||||
resize_shape = tf.cast(tf.floor(resize_shape), tf.int32)
|
||||
|
||||
image = tf_image.resize_image(image, resize_shape,
|
||||
method=tf.image.ResizeMethod.BILINEAR,
|
||||
align_corners=False)
|
||||
# Pad to expected size.
|
||||
image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
|
||||
image, bboxes, out_shape[0], out_shape[1])
|
||||
elif resize == Resize.WARP_RESIZE:
|
||||
# Warp resize of the image.
|
||||
image = tf_image.resize_image(image, out_shape,
|
||||
method=tf.image.ResizeMethod.BILINEAR,
|
||||
align_corners=False)
|
||||
|
||||
# Split back bounding boxes.
|
||||
bbox_img = bboxes[0]
|
||||
bboxes = bboxes[1:]
|
||||
# Remove difficult boxes.
|
||||
if difficults is not None:
|
||||
mask = tf.logical_not(tf.cast(difficults, tf.bool))
|
||||
labels = tf.boolean_mask(labels, mask)
|
||||
bboxes = tf.boolean_mask(bboxes, mask)
|
||||
# Image data format.
|
||||
if data_format == 'NCHW':
|
||||
image = tf.transpose(image, perm=(2, 0, 1))
|
||||
return image, labels, bboxes, bbox_img
|
||||
|
||||
def preprocess_image(image,
|
||||
labels,
|
||||
bboxes,
|
||||
out_shape,
|
||||
data_format,
|
||||
is_training=False,
|
||||
**kwargs):
|
||||
"""Pre-process an given image.
|
||||
|
||||
Args:
|
||||
image: A `Tensor` representing an image of arbitrary size.
|
||||
output_height: The height of the image after preprocessing.
|
||||
output_width: The width of the image after preprocessing.
|
||||
is_training: `True` if we're preprocessing the image for training and
|
||||
`False` otherwise.
|
||||
resize_side_min: The lower bound for the smallest side of the image for
|
||||
aspect-preserving resizing. If `is_training` is `False`, then this value
|
||||
is used for rescaling.
|
||||
resize_side_max: The upper bound for the smallest side of the image for
|
||||
aspect-preserving resizing. If `is_training` is `False`, this value is
|
||||
ignored. Otherwise, the resize side is sampled from
|
||||
[resize_size_min, resize_size_max].
|
||||
|
||||
Returns:
|
||||
A preprocessed image.
|
||||
"""
|
||||
if is_training:
|
||||
return preprocess_for_train(image, labels, bboxes,
|
||||
out_shape=out_shape,
|
||||
data_format=data_format)
|
||||
else:
|
||||
return preprocess_for_eval(image, labels, bboxes,
|
||||
out_shape=out_shape,
|
||||
data_format=data_format,
|
||||
**kwargs)
|
||||
@@ -0,0 +1,306 @@
|
||||
# Copyright 2015 The TensorFlow Authors and Paul Balanca. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Custom image operations.
|
||||
Most of the following methods extend TensorFlow image library, and part of
|
||||
the code is shameless copy-paste of the former!
|
||||
"""
|
||||
import tensorflow as tf
|
||||
|
||||
from tensorflow.python.framework import constant_op
|
||||
from tensorflow.python.framework import dtypes
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.framework import tensor_shape
|
||||
from tensorflow.python.framework import tensor_util
|
||||
from tensorflow.python.ops import array_ops
|
||||
from tensorflow.python.ops import check_ops
|
||||
from tensorflow.python.ops import clip_ops
|
||||
from tensorflow.python.ops import control_flow_ops
|
||||
from tensorflow.python.ops import gen_image_ops
|
||||
from tensorflow.python.ops import gen_nn_ops
|
||||
from tensorflow.python.ops import string_ops
|
||||
from tensorflow.python.ops import math_ops
|
||||
from tensorflow.python.ops import random_ops
|
||||
from tensorflow.python.ops import variables
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# Modification of TensorFlow image routines.
|
||||
# =========================================================================== #
|
||||
def _assert(cond, ex_type, msg):
|
||||
"""A polymorphic assert, works with tensors and boolean expressions.
|
||||
If `cond` is not a tensor, behave like an ordinary assert statement, except
|
||||
that a empty list is returned. If `cond` is a tensor, return a list
|
||||
containing a single TensorFlow assert op.
|
||||
Args:
|
||||
cond: Something evaluates to a boolean value. May be a tensor.
|
||||
ex_type: The exception class to use.
|
||||
msg: The error message.
|
||||
Returns:
|
||||
A list, containing at most one assert op.
|
||||
"""
|
||||
if _is_tensor(cond):
|
||||
return [control_flow_ops.Assert(cond, [msg])]
|
||||
else:
|
||||
if not cond:
|
||||
raise ex_type(msg)
|
||||
else:
|
||||
return []
|
||||
|
||||
|
||||
def _is_tensor(x):
|
||||
"""Returns `True` if `x` is a symbolic tensor-like object.
|
||||
Args:
|
||||
x: A python object to check.
|
||||
Returns:
|
||||
`True` if `x` is a `tf.Tensor` or `tf.Variable`, otherwise `False`.
|
||||
"""
|
||||
return isinstance(x, (ops.Tensor, variables.Variable))
|
||||
|
||||
|
||||
def _ImageDimensions(image):
|
||||
"""Returns the dimensions of an image tensor.
|
||||
Args:
|
||||
image: A 3-D Tensor of shape `[height, width, channels]`.
|
||||
Returns:
|
||||
A list of `[height, width, channels]` corresponding to the dimensions of the
|
||||
input image. Dimensions that are statically known are python integers,
|
||||
otherwise they are integer scalar tensors.
|
||||
"""
|
||||
if image.get_shape().is_fully_defined():
|
||||
return image.get_shape().as_list()
|
||||
else:
|
||||
static_shape = image.get_shape().with_rank(3).as_list()
|
||||
dynamic_shape = array_ops.unstack(array_ops.shape(image), 3)
|
||||
return [s if s is not None else d
|
||||
for s, d in zip(static_shape, dynamic_shape)]
|
||||
|
||||
|
||||
def _Check3DImage(image, require_static=True):
|
||||
"""Assert that we are working with properly shaped image.
|
||||
Args:
|
||||
image: 3-D Tensor of shape [height, width, channels]
|
||||
require_static: If `True`, requires that all dimensions of `image` are
|
||||
known and non-zero.
|
||||
Raises:
|
||||
ValueError: if `image.shape` is not a 3-vector.
|
||||
Returns:
|
||||
An empty list, if `image` has fully defined dimensions. Otherwise, a list
|
||||
containing an assert op is returned.
|
||||
"""
|
||||
try:
|
||||
image_shape = image.get_shape().with_rank(3)
|
||||
except ValueError:
|
||||
raise ValueError("'image' must be three-dimensional.")
|
||||
if require_static and not image_shape.is_fully_defined():
|
||||
raise ValueError("'image' must be fully defined.")
|
||||
if any(x == 0 for x in image_shape):
|
||||
raise ValueError("all dims of 'image.shape' must be > 0: %s" %
|
||||
image_shape)
|
||||
if not image_shape.is_fully_defined():
|
||||
return [check_ops.assert_positive(array_ops.shape(image),
|
||||
["all dims of 'image.shape' "
|
||||
"must be > 0."])]
|
||||
else:
|
||||
return []
|
||||
|
||||
|
||||
def fix_image_flip_shape(image, result):
|
||||
"""Set the shape to 3 dimensional if we don't know anything else.
|
||||
Args:
|
||||
image: original image size
|
||||
result: flipped or transformed image
|
||||
Returns:
|
||||
An image whose shape is at least None,None,None.
|
||||
"""
|
||||
image_shape = image.get_shape()
|
||||
if image_shape == tensor_shape.unknown_shape():
|
||||
result.set_shape([None, None, None])
|
||||
else:
|
||||
result.set_shape(image_shape)
|
||||
return result
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# Image + BBoxes methods: cropping, resizing, flipping, ...
|
||||
# =========================================================================== #
|
||||
def bboxes_crop_or_pad(bboxes,
|
||||
height, width,
|
||||
offset_y, offset_x,
|
||||
target_height, target_width):
|
||||
"""Adapt bounding boxes to crop or pad operations.
|
||||
Coordinates are always supposed to be relative to the image.
|
||||
|
||||
Arguments:
|
||||
bboxes: Tensor Nx4 with bboxes coordinates [y_min, x_min, y_max, x_max];
|
||||
height, width: Original image dimension;
|
||||
offset_y, offset_x: Offset to apply,
|
||||
negative if cropping, positive if padding;
|
||||
target_height, target_width: Target dimension after cropping / padding.
|
||||
"""
|
||||
with tf.name_scope('bboxes_crop_or_pad'):
|
||||
# Rescale bounding boxes in pixels.
|
||||
scale = tf.cast(tf.stack([height, width, height, width]), bboxes.dtype)
|
||||
bboxes = bboxes * scale
|
||||
# Add offset.
|
||||
offset = tf.cast(tf.stack([offset_y, offset_x, offset_y, offset_x]), bboxes.dtype)
|
||||
bboxes = bboxes + offset
|
||||
# Rescale to target dimension.
|
||||
scale = tf.cast(tf.stack([target_height, target_width,
|
||||
target_height, target_width]), bboxes.dtype)
|
||||
bboxes = bboxes / scale
|
||||
return bboxes
|
||||
|
||||
|
||||
def resize_image_bboxes_with_crop_or_pad(image, bboxes,
|
||||
target_height, target_width):
|
||||
"""Crops and/or pads an image to a target width and height.
|
||||
Resizes an image to a target width and height by either centrally
|
||||
cropping the image or padding it evenly with zeros.
|
||||
|
||||
If `width` or `height` is greater than the specified `target_width` or
|
||||
`target_height` respectively, this op centrally crops along that dimension.
|
||||
If `width` or `height` is smaller than the specified `target_width` or
|
||||
`target_height` respectively, this op centrally pads with 0 along that
|
||||
dimension.
|
||||
Args:
|
||||
image: 3-D tensor of shape `[height, width, channels]`
|
||||
target_height: Target height.
|
||||
target_width: Target width.
|
||||
Raises:
|
||||
ValueError: if `target_height` or `target_width` are zero or negative.
|
||||
Returns:
|
||||
Cropped and/or padded image of shape
|
||||
`[target_height, target_width, channels]`
|
||||
"""
|
||||
with tf.name_scope('resize_with_crop_or_pad'):
|
||||
image = ops.convert_to_tensor(image, name='image')
|
||||
|
||||
assert_ops = []
|
||||
assert_ops += _Check3DImage(image, require_static=False)
|
||||
assert_ops += _assert(target_width > 0, ValueError,
|
||||
'target_width must be > 0.')
|
||||
assert_ops += _assert(target_height > 0, ValueError,
|
||||
'target_height must be > 0.')
|
||||
|
||||
image = control_flow_ops.with_dependencies(assert_ops, image)
|
||||
# `crop_to_bounding_box` and `pad_to_bounding_box` have their own checks.
|
||||
# Make sure our checks come first, so that error messages are clearer.
|
||||
if _is_tensor(target_height):
|
||||
target_height = control_flow_ops.with_dependencies(
|
||||
assert_ops, target_height)
|
||||
if _is_tensor(target_width):
|
||||
target_width = control_flow_ops.with_dependencies(assert_ops, target_width)
|
||||
|
||||
def max_(x, y):
|
||||
if _is_tensor(x) or _is_tensor(y):
|
||||
return math_ops.maximum(x, y)
|
||||
else:
|
||||
return max(x, y)
|
||||
|
||||
def min_(x, y):
|
||||
if _is_tensor(x) or _is_tensor(y):
|
||||
return math_ops.minimum(x, y)
|
||||
else:
|
||||
return min(x, y)
|
||||
|
||||
def equal_(x, y):
|
||||
if _is_tensor(x) or _is_tensor(y):
|
||||
return math_ops.equal(x, y)
|
||||
else:
|
||||
return x == y
|
||||
|
||||
height, width, _ = _ImageDimensions(image)
|
||||
width_diff = target_width - width
|
||||
offset_crop_width = max_(-width_diff // 2, 0)
|
||||
offset_pad_width = max_(width_diff // 2, 0)
|
||||
|
||||
height_diff = target_height - height
|
||||
offset_crop_height = max_(-height_diff // 2, 0)
|
||||
offset_pad_height = max_(height_diff // 2, 0)
|
||||
|
||||
# Maybe crop if needed.
|
||||
height_crop = min_(target_height, height)
|
||||
width_crop = min_(target_width, width)
|
||||
cropped = tf.image.crop_to_bounding_box(image, offset_crop_height, offset_crop_width,
|
||||
height_crop, width_crop)
|
||||
bboxes = bboxes_crop_or_pad(bboxes,
|
||||
height, width,
|
||||
-offset_crop_height, -offset_crop_width,
|
||||
height_crop, width_crop)
|
||||
# Maybe pad if needed.
|
||||
resized = tf.image.pad_to_bounding_box(cropped, offset_pad_height, offset_pad_width,
|
||||
target_height, target_width)
|
||||
bboxes = bboxes_crop_or_pad(bboxes,
|
||||
height_crop, width_crop,
|
||||
offset_pad_height, offset_pad_width,
|
||||
target_height, target_width)
|
||||
|
||||
# In theory all the checks below are redundant.
|
||||
if resized.get_shape().ndims is None:
|
||||
raise ValueError('resized contains no shape.')
|
||||
|
||||
resized_height, resized_width, _ = _ImageDimensions(resized)
|
||||
|
||||
assert_ops = []
|
||||
assert_ops += _assert(equal_(resized_height, target_height), ValueError,
|
||||
'resized height is not correct.')
|
||||
assert_ops += _assert(equal_(resized_width, target_width), ValueError,
|
||||
'resized width is not correct.')
|
||||
|
||||
resized = control_flow_ops.with_dependencies(assert_ops, resized)
|
||||
return resized, bboxes
|
||||
|
||||
|
||||
def resize_image(image, size,
|
||||
method=tf.image.ResizeMethod.BILINEAR,
|
||||
align_corners=False):
|
||||
"""Resize an image and bounding boxes.
|
||||
"""
|
||||
# Resize image.
|
||||
with tf.name_scope('resize_image'):
|
||||
height, width, channels = _ImageDimensions(image)
|
||||
image = tf.expand_dims(image, 0)
|
||||
image = tf.image.resize_images(image, size,
|
||||
method, align_corners)
|
||||
image = tf.reshape(image, tf.stack([size[0], size[1], channels]))
|
||||
return image
|
||||
|
||||
|
||||
def random_flip_left_right(image, bboxes, seed=None):
|
||||
"""Random flip left-right of an image and its bounding boxes.
|
||||
"""
|
||||
def flip_bboxes(bboxes):
|
||||
"""Flip bounding boxes coordinates.
|
||||
"""
|
||||
bboxes = tf.stack([bboxes[:, 0], 1 - bboxes[:, 3],
|
||||
bboxes[:, 2], 1 - bboxes[:, 1]], axis=-1)
|
||||
return bboxes
|
||||
|
||||
# Random flip. Tensorflow implementation.
|
||||
with tf.name_scope('random_flip_left_right'):
|
||||
image = ops.convert_to_tensor(image, name='image')
|
||||
_Check3DImage(image, require_static=False)
|
||||
uniform_random = random_ops.random_uniform([], 0, 1.0, seed=seed)
|
||||
mirror_cond = math_ops.less(uniform_random, .5)
|
||||
# Flip image.
|
||||
result = control_flow_ops.cond(mirror_cond,
|
||||
lambda: array_ops.reverse_v2(image, [1]),
|
||||
lambda: image)
|
||||
# Flip bboxes.
|
||||
bboxes = control_flow_ops.cond(mirror_cond,
|
||||
lambda: flip_bboxes(bboxes),
|
||||
lambda: bboxes)
|
||||
return fix_image_flip_shape(image, result), bboxes
|
||||
|
||||
@@ -0,0 +1,159 @@
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
import os, sys, time
|
||||
|
||||
from anchors import generate_anchors
|
||||
from model import ssd_common, ssd_vgg_300
|
||||
from datautil.parser import get_parser_func
|
||||
from datautil.ssd_vgg_preprocessing import preprocess_for_eval, preprocess_for_train
|
||||
from tfutil import endpoints, tf_utils
|
||||
import tfextended as tfe
|
||||
from finetune.train_eval_base import TrainerBase
|
||||
|
||||
class EvalVggSsd(TrainerBase):
|
||||
'''
|
||||
Run fine-tuning
|
||||
Have training and validation recordset files
|
||||
'''
|
||||
|
||||
def __init__(self, ckpt_dir, validation_recordset_files, steps_to_save = 1000, num_steps = 1000, num_classes = 21, print_steps = 10):
|
||||
|
||||
'''
|
||||
ckpt_dir - directory of checkpoint metagraph
|
||||
train_recordset_files - list of files represetnting the recordset for training
|
||||
validation_recordset_files - list of files representing validation recordset
|
||||
'''
|
||||
super().__init__(ckpt_dir, validation_recordset_files, steps_to_save, num_steps, num_classes, print_steps, 1, is_training=False)
|
||||
self.eval_classes = num_classes
|
||||
|
||||
def get_eval_ops(self, b_labels, b_bboxes, predictions, localizations):
|
||||
'''
|
||||
Create evaluation operation
|
||||
'''
|
||||
b_difficults = tf.zeros(tf.shape(b_labels), dtype=tf.int64)
|
||||
|
||||
# Performing post-processing on CPU: loop-intensive, usually more efficient.
|
||||
with tf.device('/device:CPU:0'):
|
||||
# Detected objects from SSD output.
|
||||
detected_localizations = self.ssd_net.bboxes_decode(localizations, self.anchors)
|
||||
|
||||
rscores, rbboxes = \
|
||||
self.ssd_net.detected_bboxes(predictions, detected_localizations,
|
||||
select_threshold=0.01,
|
||||
nms_threshold=0.45,
|
||||
clipping_bbox=None,
|
||||
top_k=400,
|
||||
keep_top_k=20)
|
||||
|
||||
# Compute TP and FP statistics.
|
||||
num_gbboxes, tp, fp, rscores = \
|
||||
tfe.bboxes_matching_batch(rscores.keys(), rscores, rbboxes,
|
||||
b_labels, b_bboxes, b_difficults,
|
||||
matching_threshold=0.5)
|
||||
|
||||
# =================================================================== #
|
||||
# Evaluation metrics.
|
||||
# =================================================================== #
|
||||
dict_metrics = {}
|
||||
metrics_scope = 'ssd_metrics_scope'
|
||||
|
||||
# First add all losses.
|
||||
for loss in tf.get_collection(tf.GraphKeys.LOSSES):
|
||||
dict_metrics[loss.op.name] = tf.metrics.mean(loss, name=metrics_scope)
|
||||
# Extra losses as well.
|
||||
for loss in tf.get_collection('EXTRA_LOSSES'):
|
||||
dict_metrics[loss.op.name] = tf.metrics.mean(loss, name=metrics_scope)
|
||||
|
||||
# Add metrics to summaries and Print on screen.
|
||||
for name, metric in dict_metrics.items():
|
||||
# summary_name = 'eval/%s' % name
|
||||
summary_name = name
|
||||
tf.summary.scalar(summary_name, metric[0])
|
||||
|
||||
# FP and TP metrics.
|
||||
tp_fp_metric = tfe.streaming_tp_fp_arrays(num_gbboxes, tp, fp, rscores, name=metrics_scope)
|
||||
|
||||
for c in tp_fp_metric[0].keys():
|
||||
dict_metrics['tp_fp_%s' % c] = (tp_fp_metric[0][c],
|
||||
tp_fp_metric[1][c])
|
||||
|
||||
# Add to summaries precision/recall values.
|
||||
aps_voc12 = {}
|
||||
# TODO: We cut it short by the actual number of classes we have
|
||||
for c in list(tp_fp_metric[0].keys())[:self.eval_classes - 1]:
|
||||
# Precison and recall values.
|
||||
prec, rec = tfe.precision_recall(*tp_fp_metric[0][c])
|
||||
|
||||
# Average precision VOC12.
|
||||
v = tfe.average_precision_voc12(prec, rec)
|
||||
summary_name = 'AP_VOC12/%s' % c
|
||||
tf.summary.scalar(summary_name, v)
|
||||
|
||||
aps_voc12[c] = v
|
||||
|
||||
# Mean average precision VOC12.
|
||||
summary_name = 'AP_VOC12/mAP'
|
||||
mAP = tf.add_n(list(aps_voc12.values())) / len(aps_voc12)
|
||||
tf.summary.scalar(summary_name, mAP)
|
||||
|
||||
names_to_values, names_to_updates = tf.contrib.metrics.aggregate_metric_map(dict_metrics)
|
||||
|
||||
# Split into values and updates ops.
|
||||
return (names_to_values, names_to_updates, mAP)
|
||||
|
||||
def eval(self):
|
||||
|
||||
tf.logging.set_verbosity(tf.logging.INFO)
|
||||
|
||||
# shorthand
|
||||
sess = self.sess
|
||||
|
||||
sess.run(self.iterator.initializer)
|
||||
batch_data = self.iterator.get_next()
|
||||
|
||||
# image, classes, scores, ground_truths are neatly packed into a flat list
|
||||
# this is how we will slice it to extract the data we need:
|
||||
# we will convert the flat list into a list of lists, where each sub-list
|
||||
# is as long as each slice dimension
|
||||
slice_shape = [1] * 3
|
||||
|
||||
b_image, b_labels, b_bboxes = tf_utils.reshape_list(batch_data, slice_shape)
|
||||
|
||||
# network endpoints
|
||||
predictions, localizations, _, _ = self.get_output_tensors(b_image)
|
||||
|
||||
# branch to create evaluation operation
|
||||
_, names_to_updates, mAP = \
|
||||
self.get_eval_ops(b_labels, b_bboxes, predictions, localizations)
|
||||
|
||||
eval_update_ops = tf_utils.reshape_list(list(names_to_updates.values()))
|
||||
|
||||
# summaries
|
||||
summary_op = tf.summary.merge_all()
|
||||
saver = tf.train.Saver()
|
||||
|
||||
eval_writer = tf.summary.FileWriter(self.ckpt_dir + '/eval')
|
||||
|
||||
# initialize globals
|
||||
sess.run(tf.global_variables_initializer())
|
||||
|
||||
saver.restore(self.sess, self.ckpt_file)
|
||||
sess.run(tf.local_variables_initializer())
|
||||
|
||||
tf.logging.info(f"Starting evaluation for {self.num_steps} steps")
|
||||
cur_step = self.latest_ckpt_step
|
||||
|
||||
for step in range(self.num_steps):
|
||||
print(f"Evaluation step: {step + 1}", end='\r', flush=True)
|
||||
_, summary = sess.run([eval_update_ops, summary_op])
|
||||
|
||||
if (step + 1) % self.print_steps == 0 or step == self.num_steps:
|
||||
eval_writer.add_summary(summary, cur_step + step + 1)
|
||||
|
||||
summary_final, mAP_val = sess.run([summary_op, mAP])
|
||||
|
||||
print(f"\nmAP: {mAP_val:.4f}")
|
||||
|
||||
if (step + 1) % self.print_steps != 0:
|
||||
eval_writer.add_summary(summary_final, self.num_steps + cur_step)
|
||||
@@ -0,0 +1,95 @@
|
||||
import tensorflow as tf
|
||||
|
||||
import os, time
|
||||
|
||||
from anchors import generate_anchors
|
||||
from model import np_methods
|
||||
from tfutil import endpoints, tf_utils
|
||||
from datautil.ssd_vgg_preprocessing import preprocess_for_eval
|
||||
import tfextended as tfe
|
||||
from azureml.accel.models import SsdVgg
|
||||
|
||||
class InferVggSsd:
|
||||
'''
|
||||
Run fine-tuning
|
||||
Have training and validation recordset files
|
||||
'''
|
||||
|
||||
def __init__(self, ckpt_dir, ckpt_file=None, gpu=True):
|
||||
|
||||
'''
|
||||
ckpt_dir - directory of checkpoint metagraph
|
||||
'''
|
||||
|
||||
if gpu:
|
||||
gpu_options = tf.GPUOptions(allow_growth=True)
|
||||
config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
|
||||
else:
|
||||
config = tf.ConfigProto(log_device_placement=False, device_count={'GPU': 0})
|
||||
|
||||
self.sess = tf.Session(config=config)
|
||||
|
||||
ssd_net_graph = SsdVgg(ckpt_dir)
|
||||
self.ckpt_dir = ssd_net_graph.model_path
|
||||
|
||||
self.img_input = tf.placeholder(tf.uint8, shape=(None, None, 3))
|
||||
|
||||
# Evaluation pre-processing: resize to SSD net shape.
|
||||
image_pre, _, _, self.bbox_img = preprocess_for_eval(
|
||||
self.img_input, None, None, generate_anchors.img_shape, "NHWC")
|
||||
self.image_4d = tf.expand_dims(image_pre, 0)
|
||||
|
||||
# import the graph
|
||||
ssd_net_graph.import_graph_def(self.image_4d, is_training=False)
|
||||
|
||||
graph = tf.get_default_graph()
|
||||
self.localizations = [graph.get_tensor_by_name(tensor_name) for tensor_name in endpoints.localizations_names]
|
||||
self.predictions = [graph.get_tensor_by_name(tensor_name) for tensor_name in endpoints.predictions_names]
|
||||
|
||||
# Restore SSD model.
|
||||
self.sess.run(tf.global_variables_initializer())
|
||||
|
||||
if ckpt_file is None:
|
||||
ssd_net_graph.restore_weights(self.sess)
|
||||
else:
|
||||
saver = tf.train.Saver()
|
||||
saver.restore(self.sess, os.path.join(self.ckpt_dir, ckpt_file))
|
||||
|
||||
# SSD default anchor boxes.
|
||||
self.ssd_anchors = generate_anchors.ssd_anchors_all_layers()
|
||||
|
||||
|
||||
def close(self):
|
||||
self.sess.close()
|
||||
tf.reset_default_graph()
|
||||
|
||||
def process_image(self, img, select_threshold=0.4, nms_threshold=.45, net_shape=(300, 300)):
|
||||
# Run SSD network.
|
||||
rpredictions, rlocalisations, rbbox_img = \
|
||||
self.sess.run([self.predictions, self.localizations, self.bbox_img],
|
||||
feed_dict={self.img_input: img})
|
||||
# Get classes and bboxes from the net outputs.
|
||||
rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select(
|
||||
rpredictions, rlocalisations, self.ssd_anchors,
|
||||
select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True)
|
||||
|
||||
rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes)
|
||||
rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400)
|
||||
rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold)
|
||||
return rclasses, rscores, rbboxes
|
||||
|
||||
def infer(self, img, visualize):
|
||||
rclasses, rscores, rbboxes = self.process_image(img)
|
||||
|
||||
if visualize:
|
||||
from tfutil import visualization
|
||||
visualization.plt_bboxes(img, rclasses, rscores, rbboxes)
|
||||
|
||||
return rclasses, rscores, rbboxes
|
||||
|
||||
def infer_file(self, im_file, visualize=False):
|
||||
import cv2
|
||||
|
||||
img = cv2.imread(im_file)
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
||||
return self.infer(img, visualize)
|
||||
@@ -0,0 +1,57 @@
|
||||
import tensorflow as tf
|
||||
|
||||
import os, time
|
||||
|
||||
from azureml.accel.models import SsdVgg
|
||||
import azureml.accel.models.utils as utils
|
||||
|
||||
class SaverVggSsd:
|
||||
'''
|
||||
Run fine-tuning
|
||||
Have training and validation recordset files
|
||||
'''
|
||||
|
||||
def __init__(self, ckpt_dir):
|
||||
|
||||
'''
|
||||
ckpt_dir - directory of checkpoint metagraph
|
||||
'''
|
||||
|
||||
config = tf.ConfigProto(log_device_placement=False, device_count={'GPU': 0})
|
||||
|
||||
self.sess = tf.Session(config=config)
|
||||
|
||||
ssd_net_graph = SsdVgg(ckpt_dir, is_frozen=True)
|
||||
self.ckpt_dir = ssd_net_graph.model_path
|
||||
|
||||
self.in_images = tf.placeholder(tf.string)
|
||||
self.image_tensors = utils.preprocess_array(self.in_images, output_width=300, output_height=300,
|
||||
preserve_aspect_ratio=False)
|
||||
|
||||
self.output_tensors = ssd_net_graph.import_graph_def(self.image_tensors, is_training=False)
|
||||
|
||||
self.output_names = ssd_net_graph.output_tensor_list
|
||||
self.input_name_str = self.in_images.name
|
||||
|
||||
# Restore SSD model.
|
||||
ssd_net_graph.restore_weights(self.sess)
|
||||
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.close()
|
||||
|
||||
def close(self):
|
||||
self.sess.close()
|
||||
tf.reset_default_graph()
|
||||
|
||||
def save_for_deployment(self, saved_path):
|
||||
|
||||
output_map = {'out_{}'.format(i): output for i, output in enumerate(self.output_tensors)}
|
||||
|
||||
tf.saved_model.simple_save(self.sess,
|
||||
saved_path,
|
||||
inputs={"images": self.in_images},
|
||||
outputs=output_map)
|
||||
@@ -0,0 +1,144 @@
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
import os, sys, time, re
|
||||
|
||||
from anchors import generate_anchors
|
||||
from model import ssd_common, ssd_vgg_300
|
||||
from datautil.parser import get_parser_func
|
||||
from datautil.ssd_vgg_preprocessing import preprocess_for_eval, preprocess_for_train
|
||||
from tfutil import endpoints, tf_utils
|
||||
import tfextended as tfe
|
||||
from finetune.train_eval_base import TrainerBase
|
||||
|
||||
class TrainVggSsd(TrainerBase):
|
||||
'''
|
||||
Run fine-tuning
|
||||
Have training and validation recordset files
|
||||
'''
|
||||
|
||||
def __init__(self, ckpt_dir, train_recordset_files,
|
||||
steps_to_save = 1000, num_steps = 1000, num_classes = 21,
|
||||
print_steps = 10, batch_size = 2,
|
||||
learning_rate = 1e-4, learning_rate_decay_steps=None, learning_rate_decay_value = None,
|
||||
adam_beta1 = 0.9, adam_beta2 = 0.999, adam_epsilon = 1e-8):
|
||||
|
||||
'''
|
||||
ckpt_dir - directory of checkpoint metagraph
|
||||
train_recordset_files - list of files represetnting the recordset for training
|
||||
validation_recordset_files - list of files representing validation recordset
|
||||
'''
|
||||
|
||||
super().__init__(ckpt_dir, train_recordset_files, steps_to_save, num_steps, num_classes, print_steps, batch_size)
|
||||
|
||||
# optimizer parameters
|
||||
self.learning_rate = learning_rate
|
||||
self.learning_rate_decay_steps = learning_rate_decay_steps
|
||||
self.learning_rate_decay_value = learning_rate_decay_value
|
||||
|
||||
if self.learning_rate <= 0 \
|
||||
or (self.learning_rate_decay_value is not None and self.learning_rate_decay_value <= 0) \
|
||||
or (self.learning_rate_decay_steps is not None and self.learning_rate_decay_steps <= 0) \
|
||||
or (self.learning_rate_decay_steps is None and self.learning_rate_decay_value is not None) \
|
||||
or (self.learning_rate_decay_steps is not None and self.learning_rate_decay_value is None):
|
||||
raise ValueError("learning rate, learning rate steps, learning rate decay must be positive, \
|
||||
learning decay steps and value must be both present or both absent")
|
||||
|
||||
self.adam_beta1 = adam_beta1
|
||||
self.adam_beta2 = adam_beta2
|
||||
self.adam_epsilon = adam_epsilon
|
||||
|
||||
def get_optimizer(self, learning_rate):
|
||||
optimizer = tf.train.AdamOptimizer(
|
||||
learning_rate,
|
||||
beta1=self.adam_beta1,
|
||||
beta2=self.adam_beta2,
|
||||
epsilon=self.adam_epsilon)
|
||||
return optimizer
|
||||
|
||||
def get_learning_rate(self, global_step):
|
||||
'''
|
||||
Configure learning rate based on decay specifications
|
||||
'''
|
||||
if self.learning_rate_decay_steps is None:
|
||||
return tf.constant(self.learning_rate, name = 'fixed_learning_rate')
|
||||
else:
|
||||
return tf.train.exponential_decay(self.learning_rate, global_step, \
|
||||
self.learning_rate_decay_steps, self.learning_rate_decay_value, \
|
||||
staircase=True, name="exponential_decay_learning_rate")
|
||||
|
||||
def train(self):
|
||||
|
||||
tf.logging.set_verbosity(tf.logging.INFO)
|
||||
|
||||
# shorthand
|
||||
sess = self.sess
|
||||
|
||||
batch_data = self.iterator.get_next()
|
||||
|
||||
# image, classes, scores, ground_truths are neatly packed into a flat list
|
||||
# this is how we will slice it to extract the data we need:
|
||||
# we will convert the flat list into a list of lists, where each sub-list
|
||||
# is as long as each slice dimension
|
||||
slice_shape = [1] + [len(self.anchors)] * 3
|
||||
|
||||
b_image, b_classes, b_localizations, b_scores = tf_utils.reshape_list(batch_data, slice_shape)
|
||||
# network endpoints
|
||||
_, localizations, logits, bw_saver = self.get_output_tensors(b_image)
|
||||
|
||||
variables_to_train = tf.trainable_variables()
|
||||
sess.run(tf.initialize_variables(variables_to_train))
|
||||
|
||||
# add losses
|
||||
total_loss = self.ssd_net.losses(logits, localizations, b_classes, b_localizations, b_scores)
|
||||
tf.summary.scalar("total_loss", total_loss)
|
||||
|
||||
global_step = tf.train.get_or_create_global_step()
|
||||
learning_rate = self.get_learning_rate(global_step)
|
||||
|
||||
# configure learning rate now that we have the global step
|
||||
# add optimizer
|
||||
optimizer = self.get_optimizer(learning_rate)
|
||||
|
||||
tf.summary.scalar("learning_rate", learning_rate)
|
||||
|
||||
grads_and_vars = optimizer.compute_gradients(total_loss, var_list=variables_to_train)
|
||||
grad_updates = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
|
||||
|
||||
# initialize all the variables we should initialize
|
||||
# weights will be restored right after
|
||||
sess.run(tf.global_variables_initializer())
|
||||
|
||||
# after the first restore, we want global step in our checkpoint
|
||||
saver = tf.train.Saver(variables_to_train + [global_step])
|
||||
if self.latest_ckpt_step == 0:
|
||||
bw_saver.restore(sess, self.ckpt_file)
|
||||
else:
|
||||
saver.restore(sess, self.ckpt_file)
|
||||
self.ckpt_file = os.path.join(self.ckpt_dir, self.ckpt_prefix)
|
||||
|
||||
# summaries
|
||||
train_summary_op = tf.summary.merge_all()
|
||||
train_writer = tf.summary.FileWriter(self.ckpt_dir + '/train', tf.get_default_graph())
|
||||
|
||||
tf.logging.info(f"Starting training for {self.num_steps} steps")
|
||||
|
||||
sess.run(self.iterator.initializer)
|
||||
|
||||
# training loop
|
||||
start = time.time()
|
||||
|
||||
for _ in range(self.num_steps):
|
||||
|
||||
loss, _, cur_step, summary = sess.run([total_loss, grad_updates, global_step, train_summary_op])
|
||||
cur_step += 1
|
||||
|
||||
if cur_step % self.print_steps == 0:
|
||||
|
||||
print(f"{cur_step}: loss: {loss:.3f}, avg per step: {(time.time() - start) / self.print_steps:.3f} sec", end='\r', flush=True)
|
||||
train_writer.add_summary(summary, cur_step + 1)
|
||||
start = time.time()
|
||||
|
||||
if cur_step % self.steps_to_save == 0:
|
||||
saver.save(sess, self.ckpt_file, global_step=global_step)
|
||||
print("\n")
|
||||
@@ -0,0 +1,125 @@
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
import os, sys, time, glob, re
|
||||
|
||||
from anchors import generate_anchors
|
||||
from model import ssd_common, ssd_vgg_300
|
||||
from datautil.parser import get_parser_func
|
||||
from datautil.ssd_vgg_preprocessing import preprocess_for_eval, preprocess_for_train
|
||||
from tfutil import endpoints, tf_utils
|
||||
import tfextended as tfe
|
||||
from azureml.accel.models import SsdVgg
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
class TrainerBase:
|
||||
'''
|
||||
Run fine-tuning
|
||||
Have training and validation recordset files
|
||||
'''
|
||||
|
||||
def __init__(self, ckpt_dir, recordset_files,
|
||||
steps_to_save = 1000, num_steps = 1000, num_classes = 21, print_steps = 10, batch_size=2, is_training=True):
|
||||
|
||||
'''
|
||||
ckpt_dir - directory of checkpoint metagraph
|
||||
recordset_files - list of files represetnting the recordset for training
|
||||
validation_recordset_files - list of files representing validation recordset
|
||||
'''
|
||||
|
||||
self.is_training = is_training
|
||||
|
||||
# This will pull the model with its weights
|
||||
# And seed the checkpoint
|
||||
self.ssd_net_graph = SsdVgg(ckpt_dir)
|
||||
self.ckpt_dir = self.ssd_net_graph.model_path
|
||||
self.ckpt_file = tf.train.latest_checkpoint(self.ssd_net_graph.model_path)
|
||||
|
||||
try:
|
||||
self.latest_ckpt_step = int(re.findall("-[0-9]+$", self.ckpt_file)[0][1:])
|
||||
except:
|
||||
self.latest_ckpt_step = 0
|
||||
|
||||
self.recordset = recordset_files
|
||||
self.ckpt_prefix = os.path.split(self.ssd_net_graph.model_ref + "_bw")[1]
|
||||
|
||||
self.pb_graph_path = os.path.join(self.ckpt_dir, self.ckpt_prefix + ".graph.pb")
|
||||
#if self.is_training:
|
||||
self.graph_file = os.path.join(self.ckpt_dir, self.ckpt_prefix + ".meta")
|
||||
#else:
|
||||
# self.graph_file = self.ckpt_file + ".meta"
|
||||
|
||||
# anchors
|
||||
self.anchors = generate_anchors.ssd_anchors_all_layers()
|
||||
|
||||
# shuffle
|
||||
self.n_shuffle = 1000
|
||||
self.num_steps = num_steps
|
||||
|
||||
# num of classes
|
||||
# REVIEW: this has to be 21!
|
||||
self.num_classes = 21
|
||||
|
||||
# initialize data pipeline
|
||||
self.batch_size = batch_size
|
||||
self.iterator = None
|
||||
self.prep_dataset_and_iterator()
|
||||
|
||||
self.steps_to_save = steps_to_save
|
||||
|
||||
self.print_steps = print_steps
|
||||
# for losses etc
|
||||
self.ssd_net = ssd_vgg_300.SSDNet()
|
||||
|
||||
# input placeholder
|
||||
self.input_tensor_name = self.ssd_net_graph.input_tensor_list[0]
|
||||
|
||||
|
||||
def __enter__(self):
|
||||
gpu_options = tf.GPUOptions(allow_growth=True)
|
||||
config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options)
|
||||
|
||||
self.sess = tf.Session(config=config)
|
||||
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
self.sess.close()
|
||||
tf.reset_default_graph()
|
||||
|
||||
def prep_dataset_and_iterator(self):
|
||||
'''
|
||||
Create datasets for training or validation
|
||||
'''
|
||||
|
||||
var_scope = "training" if self.is_training else "eval"
|
||||
|
||||
parse_func = get_parser_func(self.anchors, self.num_classes, self.is_training, var_scope)
|
||||
|
||||
with tf.variable_scope(var_scope):
|
||||
# data pipeline
|
||||
dataset = tf.data.TFRecordDataset(self.recordset)
|
||||
if self.is_training:
|
||||
dataset = dataset.shuffle(self.n_shuffle)
|
||||
dataset = dataset.map(parse_func)
|
||||
dataset = dataset.repeat()
|
||||
dataset = dataset.batch(self.batch_size)
|
||||
dataset = dataset.prefetch(1)
|
||||
|
||||
self.iterator = dataset.make_initializable_iterator()
|
||||
|
||||
def get_output_tensors(self, image):
|
||||
|
||||
is_training = tf.constant(self.is_training, dtype=tf.bool, shape=())
|
||||
input_map = {self.input_tensor_name: image, "is_training": is_training}
|
||||
|
||||
saver = tf.train.import_meta_graph(self.graph_file, input_map=input_map)
|
||||
graph = tf.get_default_graph()
|
||||
|
||||
logits = [graph.get_tensor_by_name(tensor_name) for tensor_name in endpoints.logit_names]
|
||||
localizations = [graph.get_tensor_by_name(tensor_name) for tensor_name in endpoints.localizations_names]
|
||||
predictions = [graph.get_tensor_by_name(tensor_name) for tensor_name in endpoints.predictions_names]
|
||||
|
||||
return predictions, localizations, logits, saver
|
||||
|
||||
@@ -0,0 +1,164 @@
|
||||
# Copyright 2015 Paul Balanca. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Implement some custom layers, not provided by TensorFlow.
|
||||
|
||||
Trying to follow as much as possible the style/standards used in
|
||||
tf.contrib.layers
|
||||
"""
|
||||
import tensorflow as tf
|
||||
|
||||
from tensorflow.contrib.framework.python.ops import add_arg_scope
|
||||
from tensorflow.contrib.layers.python.layers import initializers
|
||||
from tensorflow.contrib.framework.python.ops import variables
|
||||
from tensorflow.contrib.layers.python.layers import utils
|
||||
from tensorflow.python.ops import nn
|
||||
from tensorflow.python.ops import init_ops
|
||||
from tensorflow.python.ops import variable_scope
|
||||
|
||||
|
||||
def abs_smooth(x):
|
||||
"""Smoothed absolute function. Useful to compute an L1 smooth error.
|
||||
|
||||
Define as:
|
||||
x^2 / 2 if abs(x) < 1
|
||||
abs(x) - 0.5 if abs(x) > 1
|
||||
We use here a differentiable definition using min(x) and abs(x). Clearly
|
||||
not optimal, but good enough for our purpose!
|
||||
"""
|
||||
absx = tf.abs(x)
|
||||
minx = tf.minimum(absx, 1)
|
||||
r = 0.5 * ((absx - 1) * minx + absx)
|
||||
return r
|
||||
|
||||
|
||||
@add_arg_scope
|
||||
def l2_normalization(
|
||||
inputs,
|
||||
scaling=False,
|
||||
scale_initializer=init_ops.ones_initializer(),
|
||||
reuse=None,
|
||||
variables_collections=None,
|
||||
outputs_collections=None,
|
||||
data_format='NHWC',
|
||||
trainable=True,
|
||||
scope=None):
|
||||
"""Implement L2 normalization on every feature (i.e. spatial normalization).
|
||||
|
||||
Should be extended in some near future to other dimensions, providing a more
|
||||
flexible normalization framework.
|
||||
|
||||
Args:
|
||||
inputs: a 4-D tensor with dimensions [batch_size, height, width, channels].
|
||||
scaling: whether or not to add a post scaling operation along the dimensions
|
||||
which have been normalized.
|
||||
scale_initializer: An initializer for the weights.
|
||||
reuse: whether or not the layer and its variables should be reused. To be
|
||||
able to reuse the layer scope must be given.
|
||||
variables_collections: optional list of collections for all the variables or
|
||||
a dictionary containing a different list of collection per variable.
|
||||
outputs_collections: collection to add the outputs.
|
||||
data_format: NHWC or NCHW data format.
|
||||
trainable: If `True` also add variables to the graph collection
|
||||
`GraphKeys.TRAINABLE_VARIABLES` (see tf.Variable).
|
||||
scope: Optional scope for `variable_scope`.
|
||||
Returns:
|
||||
A `Tensor` representing the output of the operation.
|
||||
"""
|
||||
|
||||
with variable_scope.variable_scope(
|
||||
scope, 'L2Normalization', [inputs], reuse=reuse) as sc:
|
||||
inputs_shape = inputs.get_shape()
|
||||
inputs_rank = inputs_shape.ndims
|
||||
dtype = inputs.dtype.base_dtype
|
||||
if data_format == 'NHWC':
|
||||
# norm_dim = tf.range(1, inputs_rank-1)
|
||||
norm_dim = tf.range(inputs_rank-1, inputs_rank)
|
||||
params_shape = inputs_shape[-1:]
|
||||
elif data_format == 'NCHW':
|
||||
# norm_dim = tf.range(2, inputs_rank)
|
||||
norm_dim = tf.range(1, 2)
|
||||
params_shape = (inputs_shape[1])
|
||||
|
||||
# Normalize along spatial dimensions.
|
||||
outputs = nn.l2_normalize(inputs, norm_dim, epsilon=1e-12)
|
||||
# Additional scaling.
|
||||
if scaling:
|
||||
scale_collections = utils.get_variable_collections(
|
||||
variables_collections, 'scale')
|
||||
scale = variables.model_variable('gamma',
|
||||
shape=params_shape,
|
||||
dtype=dtype,
|
||||
initializer=scale_initializer,
|
||||
collections=scale_collections,
|
||||
trainable=trainable)
|
||||
if data_format == 'NHWC':
|
||||
outputs = tf.multiply(outputs, scale)
|
||||
elif data_format == 'NCHW':
|
||||
scale = tf.expand_dims(scale, axis=-1)
|
||||
scale = tf.expand_dims(scale, axis=-1)
|
||||
outputs = tf.multiply(outputs, scale)
|
||||
# outputs = tf.transpose(outputs, perm=(0, 2, 3, 1))
|
||||
|
||||
return utils.collect_named_outputs(outputs_collections,
|
||||
sc.original_name_scope, outputs)
|
||||
|
||||
|
||||
@add_arg_scope
|
||||
def pad2d(inputs,
|
||||
pad=(0, 0),
|
||||
mode='CONSTANT',
|
||||
data_format='NHWC',
|
||||
trainable=True,
|
||||
scope=None):
|
||||
"""2D Padding layer, adding a symmetric padding to H and W dimensions.
|
||||
|
||||
Aims to mimic padding in Caffe and MXNet, helping the port of models to
|
||||
TensorFlow. Tries to follow the naming convention of `tf.contrib.layers`.
|
||||
|
||||
Args:
|
||||
inputs: 4D input Tensor;
|
||||
pad: 2-Tuple with padding values for H and W dimensions;
|
||||
mode: Padding mode. C.f. `tf.pad`
|
||||
data_format: NHWC or NCHW data format.
|
||||
"""
|
||||
with tf.name_scope(scope, 'pad2d', [inputs]):
|
||||
# Padding shape.
|
||||
if data_format == 'NHWC':
|
||||
paddings = [[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]]
|
||||
elif data_format == 'NCHW':
|
||||
paddings = [[0, 0], [0, 0], [pad[0], pad[0]], [pad[1], pad[1]]]
|
||||
net = tf.pad(inputs, paddings, mode=mode)
|
||||
return net
|
||||
|
||||
|
||||
@add_arg_scope
|
||||
def channel_to_last(inputs,
|
||||
data_format='NHWC',
|
||||
scope=None):
|
||||
"""Move the channel axis to the last dimension. Allows to
|
||||
provide a single output format whatever the input data format.
|
||||
|
||||
Args:
|
||||
inputs: Input Tensor;
|
||||
data_format: NHWC or NCHW.
|
||||
Return:
|
||||
Input in NHWC format.
|
||||
"""
|
||||
with tf.name_scope(scope, 'channel_to_last', [inputs]):
|
||||
if data_format == 'NHWC':
|
||||
net = inputs
|
||||
elif data_format == 'NCHW':
|
||||
net = tf.transpose(inputs, perm=(0, 2, 3, 1))
|
||||
return net
|
||||
@@ -0,0 +1,252 @@
|
||||
# Copyright 2017 Paul Balanca. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Additional Numpy methods. Big mess of many things!
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# Numpy implementations of SSD boxes functions.
|
||||
# =========================================================================== #
|
||||
def ssd_bboxes_decode(feat_localizations,
|
||||
anchor_bboxes,
|
||||
prior_scaling=[0.1, 0.1, 0.2, 0.2]):
|
||||
"""Compute the relative bounding boxes from the layer features and
|
||||
reference anchor bounding boxes.
|
||||
|
||||
Return:
|
||||
numpy array Nx4: ymin, xmin, ymax, xmax
|
||||
"""
|
||||
# Reshape for easier broadcasting.
|
||||
l_shape = feat_localizations.shape
|
||||
feat_localizations = np.reshape(feat_localizations,
|
||||
(-1, l_shape[-2], l_shape[-1]))
|
||||
yref, xref, href, wref = anchor_bboxes
|
||||
xref = np.reshape(xref, [-1, 1])
|
||||
yref = np.reshape(yref, [-1, 1])
|
||||
|
||||
# Compute center, height and width
|
||||
cx = feat_localizations[:, :, 0] * wref * prior_scaling[0] + xref
|
||||
cy = feat_localizations[:, :, 1] * href * prior_scaling[1] + yref
|
||||
w = wref * np.exp(feat_localizations[:, :, 2] * prior_scaling[2])
|
||||
h = href * np.exp(feat_localizations[:, :, 3] * prior_scaling[3])
|
||||
# bboxes: ymin, xmin, xmax, ymax.
|
||||
bboxes = np.zeros_like(feat_localizations)
|
||||
bboxes[:, :, 0] = cy - h / 2.
|
||||
bboxes[:, :, 1] = cx - w / 2.
|
||||
bboxes[:, :, 2] = cy + h / 2.
|
||||
bboxes[:, :, 3] = cx + w / 2.
|
||||
# Back to original shape.
|
||||
bboxes = np.reshape(bboxes, l_shape)
|
||||
return bboxes
|
||||
|
||||
|
||||
def ssd_bboxes_select_layer(predictions_layer,
|
||||
localizations_layer,
|
||||
anchors_layer,
|
||||
select_threshold=0.5,
|
||||
img_shape=(300, 300),
|
||||
num_classes=21,
|
||||
decode=True):
|
||||
"""Extract classes, scores and bounding boxes from features in one layer.
|
||||
|
||||
Return:
|
||||
classes, scores, bboxes: Numpy arrays...
|
||||
"""
|
||||
# First decode localizations features if necessary.
|
||||
if decode:
|
||||
localizations_layer = ssd_bboxes_decode(localizations_layer, anchors_layer)
|
||||
|
||||
# Reshape features to: Batches x N x N_labels | 4.
|
||||
p_shape = predictions_layer.shape
|
||||
batch_size = p_shape[0] if len(p_shape) == 5 else 1
|
||||
predictions_layer = np.reshape(predictions_layer,
|
||||
(batch_size, -1, p_shape[-1]))
|
||||
l_shape = localizations_layer.shape
|
||||
localizations_layer = np.reshape(localizations_layer,
|
||||
(batch_size, -1, l_shape[-1]))
|
||||
|
||||
# Boxes selection: use threshold or score > no-label criteria.
|
||||
if select_threshold is None or select_threshold == 0:
|
||||
# Class prediction and scores: assign 0. to 0-class
|
||||
classes = np.argmax(predictions_layer, axis=2)
|
||||
scores = np.amax(predictions_layer, axis=2)
|
||||
mask = (classes > 0)
|
||||
classes = classes[mask]
|
||||
scores = scores[mask]
|
||||
bboxes = localizations_layer[mask]
|
||||
else:
|
||||
sub_predictions = predictions_layer[:, :, 1:]
|
||||
idxes = np.where(sub_predictions > select_threshold)
|
||||
classes = idxes[-1]+1
|
||||
scores = sub_predictions[idxes]
|
||||
bboxes = localizations_layer[idxes[:-1]]
|
||||
|
||||
return classes, scores, bboxes
|
||||
|
||||
|
||||
def ssd_bboxes_select(predictions_net,
|
||||
localizations_net,
|
||||
anchors_net,
|
||||
select_threshold=0.5,
|
||||
img_shape=(300, 300),
|
||||
num_classes=21,
|
||||
decode=True):
|
||||
"""Extract classes, scores and bounding boxes from network output layers.
|
||||
|
||||
Return:
|
||||
classes, scores, bboxes: Numpy arrays...
|
||||
"""
|
||||
l_classes = []
|
||||
l_scores = []
|
||||
l_bboxes = []
|
||||
# l_layers = []
|
||||
# l_idxes = []
|
||||
for i in range(len(predictions_net)):
|
||||
classes, scores, bboxes = ssd_bboxes_select_layer(
|
||||
predictions_net[i], localizations_net[i], anchors_net[i],
|
||||
select_threshold, img_shape, num_classes, decode)
|
||||
l_classes.append(classes)
|
||||
l_scores.append(scores)
|
||||
l_bboxes.append(bboxes)
|
||||
# Debug information.
|
||||
# l_layers.append(i)
|
||||
# l_idxes.append((i, idxes))
|
||||
|
||||
classes = np.concatenate(l_classes, 0)
|
||||
scores = np.concatenate(l_scores, 0)
|
||||
bboxes = np.concatenate(l_bboxes, 0)
|
||||
return classes, scores, bboxes
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# Common functions for bboxes handling and selection.
|
||||
# =========================================================================== #
|
||||
def bboxes_sort(classes, scores, bboxes, top_k=400):
|
||||
"""Sort bounding boxes by decreasing order and keep only the top_k
|
||||
"""
|
||||
# if priority_inside:
|
||||
# inside = (bboxes[:, 0] > margin) & (bboxes[:, 1] > margin) & \
|
||||
# (bboxes[:, 2] < 1-margin) & (bboxes[:, 3] < 1-margin)
|
||||
# idxes = np.argsort(-scores)
|
||||
# inside = inside[idxes]
|
||||
# idxes = np.concatenate([idxes[inside], idxes[~inside]])
|
||||
idxes = np.argsort(-scores)
|
||||
classes = classes[idxes][:top_k]
|
||||
scores = scores[idxes][:top_k]
|
||||
bboxes = bboxes[idxes][:top_k]
|
||||
return classes, scores, bboxes
|
||||
|
||||
|
||||
def bboxes_clip(bbox_ref, bboxes):
|
||||
"""Clip bounding boxes with respect to reference bbox.
|
||||
"""
|
||||
bboxes = np.copy(bboxes)
|
||||
bboxes = np.transpose(bboxes)
|
||||
bbox_ref = np.transpose(bbox_ref)
|
||||
bboxes[0] = np.maximum(bboxes[0], bbox_ref[0])
|
||||
bboxes[1] = np.maximum(bboxes[1], bbox_ref[1])
|
||||
bboxes[2] = np.minimum(bboxes[2], bbox_ref[2])
|
||||
bboxes[3] = np.minimum(bboxes[3], bbox_ref[3])
|
||||
bboxes = np.transpose(bboxes)
|
||||
return bboxes
|
||||
|
||||
|
||||
def bboxes_resize(bbox_ref, bboxes):
|
||||
"""Resize bounding boxes based on a reference bounding box,
|
||||
assuming that the latter is [0, 0, 1, 1] after transform.
|
||||
"""
|
||||
bboxes = np.copy(bboxes)
|
||||
# Translate.
|
||||
bboxes[:, 0] -= bbox_ref[0]
|
||||
bboxes[:, 1] -= bbox_ref[1]
|
||||
bboxes[:, 2] -= bbox_ref[0]
|
||||
bboxes[:, 3] -= bbox_ref[1]
|
||||
# Resize.
|
||||
resize = [bbox_ref[2] - bbox_ref[0], bbox_ref[3] - bbox_ref[1]]
|
||||
bboxes[:, 0] /= resize[0]
|
||||
bboxes[:, 1] /= resize[1]
|
||||
bboxes[:, 2] /= resize[0]
|
||||
bboxes[:, 3] /= resize[1]
|
||||
return bboxes
|
||||
|
||||
|
||||
def bboxes_jaccard(bboxes1, bboxes2):
|
||||
"""Computing jaccard index between bboxes1 and bboxes2.
|
||||
Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable.
|
||||
"""
|
||||
bboxes1 = np.transpose(bboxes1)
|
||||
bboxes2 = np.transpose(bboxes2)
|
||||
# Intersection bbox and volume.
|
||||
int_ymin = np.maximum(bboxes1[0], bboxes2[0])
|
||||
int_xmin = np.maximum(bboxes1[1], bboxes2[1])
|
||||
int_ymax = np.minimum(bboxes1[2], bboxes2[2])
|
||||
int_xmax = np.minimum(bboxes1[3], bboxes2[3])
|
||||
|
||||
int_h = np.maximum(int_ymax - int_ymin, 0.)
|
||||
int_w = np.maximum(int_xmax - int_xmin, 0.)
|
||||
int_vol = int_h * int_w
|
||||
# Union volume.
|
||||
vol1 = (bboxes1[2] - bboxes1[0]) * (bboxes1[3] - bboxes1[1])
|
||||
vol2 = (bboxes2[2] - bboxes2[0]) * (bboxes2[3] - bboxes2[1])
|
||||
jaccard = int_vol / (vol1 + vol2 - int_vol)
|
||||
return jaccard
|
||||
|
||||
|
||||
def bboxes_intersection(bboxes_ref, bboxes2):
|
||||
"""Computing jaccard index between bboxes1 and bboxes2.
|
||||
Note: bboxes1 and bboxes2 can be multi-dimensional, but should broacastable.
|
||||
"""
|
||||
bboxes_ref = np.transpose(bboxes_ref)
|
||||
bboxes2 = np.transpose(bboxes2)
|
||||
# Intersection bbox and volume.
|
||||
int_ymin = np.maximum(bboxes_ref[0], bboxes2[0])
|
||||
int_xmin = np.maximum(bboxes_ref[1], bboxes2[1])
|
||||
int_ymax = np.minimum(bboxes_ref[2], bboxes2[2])
|
||||
int_xmax = np.minimum(bboxes_ref[3], bboxes2[3])
|
||||
|
||||
int_h = np.maximum(int_ymax - int_ymin, 0.)
|
||||
int_w = np.maximum(int_xmax - int_xmin, 0.)
|
||||
int_vol = int_h * int_w
|
||||
# Union volume.
|
||||
vol = (bboxes_ref[2] - bboxes_ref[0]) * (bboxes_ref[3] - bboxes_ref[1])
|
||||
score = int_vol / vol
|
||||
return score
|
||||
|
||||
|
||||
def bboxes_nms(classes, scores, bboxes, nms_threshold=0.45):
|
||||
"""Apply non-maximum selection to bounding boxes.
|
||||
"""
|
||||
keep_bboxes = np.ones(scores.shape, dtype=np.bool)
|
||||
for i in range(scores.size-1):
|
||||
if keep_bboxes[i]:
|
||||
# Computer overlap with bboxes which are following.
|
||||
overlap = bboxes_jaccard(bboxes[i], bboxes[(i+1):])
|
||||
# Overlap threshold for keeping + checking part of the same class
|
||||
keep_overlap = np.logical_or(overlap < nms_threshold, classes[(i+1):] != classes[i])
|
||||
keep_bboxes[(i+1):] = np.logical_and(keep_bboxes[(i+1):], keep_overlap)
|
||||
|
||||
idxes = np.where(keep_bboxes)
|
||||
return classes[idxes], scores[idxes], bboxes[idxes]
|
||||
|
||||
|
||||
def bboxes_nms_fast(classes, scores, bboxes, threshold=0.45):
|
||||
"""Apply non-maximum selection to bounding boxes.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,408 @@
|
||||
# Copyright 2015 Paul Balanca. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Shared function between different SSD implementations.
|
||||
"""
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
import tfextended as tfe
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# TensorFlow implementation of boxes SSD encoding / decoding.
|
||||
# =========================================================================== #
|
||||
def tf_ssd_bboxes_encode_layer(labels,
|
||||
bboxes,
|
||||
anchors_layer,
|
||||
num_classes,
|
||||
ignore_threshold=0.5,
|
||||
prior_scaling=[0.1, 0.1, 0.2, 0.2],
|
||||
dtype=tf.float32):
|
||||
"""Encode groundtruth labels and bounding boxes using SSD anchors from
|
||||
one layer.
|
||||
|
||||
Arguments:
|
||||
labels: 1D Tensor(int64) containing groundtruth labels;
|
||||
bboxes: Nx4 Tensor(float) with bboxes relative coordinates;
|
||||
anchors_layer: Numpy array with layer anchors;
|
||||
matching_threshold: Threshold for positive match with groundtruth bboxes;
|
||||
prior_scaling: Scaling of encoded coordinates.
|
||||
|
||||
Return:
|
||||
(target_labels, target_localizations, target_scores): Target Tensors.
|
||||
"""
|
||||
# Anchors coordinates and volume.
|
||||
yref, xref, href, wref = anchors_layer
|
||||
ymin = yref - href / 2.
|
||||
xmin = xref - wref / 2.
|
||||
ymax = yref + href / 2.
|
||||
xmax = xref + wref / 2.
|
||||
vol_anchors = (xmax - xmin) * (ymax - ymin)
|
||||
|
||||
# Initialize tensors...
|
||||
shape = (yref.shape[0], yref.shape[1], href.size)
|
||||
feat_labels = tf.zeros(shape, dtype=tf.int64)
|
||||
feat_scores = tf.zeros(shape, dtype=dtype)
|
||||
|
||||
feat_ymin = tf.zeros(shape, dtype=dtype)
|
||||
feat_xmin = tf.zeros(shape, dtype=dtype)
|
||||
feat_ymax = tf.ones(shape, dtype=dtype)
|
||||
feat_xmax = tf.ones(shape, dtype=dtype)
|
||||
|
||||
def jaccard_with_anchors(bbox):
|
||||
"""Compute jaccard score between a box and the anchors.
|
||||
"""
|
||||
int_ymin = tf.maximum(ymin, bbox[0])
|
||||
int_xmin = tf.maximum(xmin, bbox[1])
|
||||
int_ymax = tf.minimum(ymax, bbox[2])
|
||||
int_xmax = tf.minimum(xmax, bbox[3])
|
||||
h = tf.maximum(int_ymax - int_ymin, 0.)
|
||||
w = tf.maximum(int_xmax - int_xmin, 0.)
|
||||
# Volumes.
|
||||
inter_vol = h * w
|
||||
union_vol = vol_anchors - inter_vol \
|
||||
+ (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
|
||||
jaccard = tf.divide(inter_vol, union_vol)
|
||||
return jaccard
|
||||
|
||||
def intersection_with_anchors(bbox):
|
||||
"""Compute intersection between score a box and the anchors.
|
||||
"""
|
||||
int_ymin = tf.maximum(ymin, bbox[0])
|
||||
int_xmin = tf.maximum(xmin, bbox[1])
|
||||
int_ymax = tf.minimum(ymax, bbox[2])
|
||||
int_xmax = tf.minimum(xmax, bbox[3])
|
||||
h = tf.maximum(int_ymax - int_ymin, 0.)
|
||||
w = tf.maximum(int_xmax - int_xmin, 0.)
|
||||
inter_vol = h * w
|
||||
scores = tf.divide(inter_vol, vol_anchors)
|
||||
return scores
|
||||
|
||||
def condition(i, feat_labels, feat_scores,
|
||||
feat_ymin, feat_xmin, feat_ymax, feat_xmax):
|
||||
"""Condition: check label index.
|
||||
"""
|
||||
r = tf.less(i, tf.shape(labels))
|
||||
return r[0]
|
||||
|
||||
def body(i, feat_labels, feat_scores,
|
||||
feat_ymin, feat_xmin, feat_ymax, feat_xmax):
|
||||
"""Body: update feature labels, scores and bboxes.
|
||||
Follow the original SSD paper for that purpose:
|
||||
- assign values when jaccard > 0.5;
|
||||
- only update if beat the score of other bboxes.
|
||||
"""
|
||||
# Jaccard score.
|
||||
label = labels[i]
|
||||
bbox = bboxes[i]
|
||||
jaccard = jaccard_with_anchors(bbox)
|
||||
# Mask: check threshold + scores + no annotations + num_classes.
|
||||
mask = tf.greater(jaccard, feat_scores)
|
||||
# mask = tf.logical_and(mask, tf.greater(jaccard, matching_threshold))
|
||||
mask = tf.logical_and(mask, feat_scores > -0.5)
|
||||
mask = tf.logical_and(mask, label < num_classes)
|
||||
imask = tf.cast(mask, tf.int64)
|
||||
fmask = tf.cast(mask, dtype)
|
||||
# Update values using mask.
|
||||
feat_labels = imask * label + (1 - imask) * feat_labels
|
||||
feat_scores = tf.where(mask, jaccard, feat_scores)
|
||||
|
||||
feat_ymin = fmask * bbox[0] + (1 - fmask) * feat_ymin
|
||||
feat_xmin = fmask * bbox[1] + (1 - fmask) * feat_xmin
|
||||
feat_ymax = fmask * bbox[2] + (1 - fmask) * feat_ymax
|
||||
feat_xmax = fmask * bbox[3] + (1 - fmask) * feat_xmax
|
||||
|
||||
# Check no annotation label: ignore these anchors...
|
||||
# interscts = intersection_with_anchors(bbox)
|
||||
# mask = tf.logical_and(interscts > ignore_threshold,
|
||||
# label == no_annotation_label)
|
||||
# # Replace scores by -1.
|
||||
# feat_scores = tf.where(mask, -tf.cast(mask, dtype), feat_scores)
|
||||
|
||||
return [i+1, feat_labels, feat_scores,
|
||||
feat_ymin, feat_xmin, feat_ymax, feat_xmax]
|
||||
# Main loop definition.
|
||||
i = 0
|
||||
[i, feat_labels, feat_scores,
|
||||
feat_ymin, feat_xmin,
|
||||
feat_ymax, feat_xmax] = tf.while_loop(condition, body,
|
||||
[i, feat_labels, feat_scores,
|
||||
feat_ymin, feat_xmin,
|
||||
feat_ymax, feat_xmax])
|
||||
# Transform to center / size.
|
||||
feat_cy = (feat_ymax + feat_ymin) / 2.
|
||||
feat_cx = (feat_xmax + feat_xmin) / 2.
|
||||
feat_h = feat_ymax - feat_ymin
|
||||
feat_w = feat_xmax - feat_xmin
|
||||
# Encode features.
|
||||
feat_cy = (feat_cy - yref) / href / prior_scaling[0]
|
||||
feat_cx = (feat_cx - xref) / wref / prior_scaling[1]
|
||||
feat_h = tf.log(feat_h / href) / prior_scaling[2]
|
||||
feat_w = tf.log(feat_w / wref) / prior_scaling[3]
|
||||
# Use SSD ordering: x / y / w / h instead of ours.
|
||||
feat_localizations = tf.stack([feat_cx, feat_cy, feat_w, feat_h], axis=-1)
|
||||
return feat_labels, feat_localizations, feat_scores
|
||||
|
||||
|
||||
def tf_ssd_bboxes_encode(labels,
|
||||
bboxes,
|
||||
anchors,
|
||||
num_classes,
|
||||
ignore_threshold=0.5,
|
||||
prior_scaling=[0.1, 0.1, 0.2, 0.2],
|
||||
dtype=tf.float32,
|
||||
scope='ssd_bboxes_encode'):
|
||||
"""Encode groundtruth labels and bounding boxes using SSD net anchors.
|
||||
Encoding boxes for all feature layers.
|
||||
|
||||
Arguments:
|
||||
labels: 1D Tensor(int64) containing groundtruth labels;
|
||||
bboxes: Nx4 Tensor(float) with bboxes relative coordinates;
|
||||
anchors: List of Numpy array with layer anchors;
|
||||
matching_threshold: Threshold for positive match with groundtruth bboxes;
|
||||
prior_scaling: Scaling of encoded coordinates.
|
||||
|
||||
Return:
|
||||
(target_labels, target_localizations, target_scores):
|
||||
Each element is a list of target Tensors.
|
||||
"""
|
||||
with tf.name_scope(scope):
|
||||
target_labels = []
|
||||
target_localizations = []
|
||||
target_scores = []
|
||||
for i, anchors_layer in enumerate(anchors):
|
||||
with tf.name_scope('bboxes_encode_block_%i' % i):
|
||||
t_labels, t_loc, t_scores = \
|
||||
tf_ssd_bboxes_encode_layer(labels, bboxes, anchors_layer,
|
||||
num_classes,
|
||||
ignore_threshold,
|
||||
prior_scaling, dtype)
|
||||
target_labels.append(t_labels)
|
||||
target_localizations.append(t_loc)
|
||||
target_scores.append(t_scores)
|
||||
return target_labels, target_localizations, target_scores
|
||||
|
||||
|
||||
def tf_ssd_bboxes_decode_layer(feat_localizations,
|
||||
anchors_layer,
|
||||
prior_scaling=[0.1, 0.1, 0.2, 0.2]):
|
||||
"""Compute the relative bounding boxes from the layer features and
|
||||
reference anchor bounding boxes.
|
||||
|
||||
Arguments:
|
||||
feat_localizations: Tensor containing localization features.
|
||||
anchors: List of numpy array containing anchor boxes.
|
||||
|
||||
Return:
|
||||
Tensor Nx4: ymin, xmin, ymax, xmax
|
||||
"""
|
||||
yref, xref, href, wref = anchors_layer
|
||||
|
||||
# Compute center, height and width
|
||||
cx = feat_localizations[:, :, :, :, 0] * wref * prior_scaling[0] + xref
|
||||
cy = feat_localizations[:, :, :, :, 1] * href * prior_scaling[1] + yref
|
||||
w = wref * tf.exp(feat_localizations[:, :, :, :, 2] * prior_scaling[2])
|
||||
h = href * tf.exp(feat_localizations[:, :, :, :, 3] * prior_scaling[3])
|
||||
# Boxes coordinates.
|
||||
ymin = cy - h / 2.
|
||||
xmin = cx - w / 2.
|
||||
ymax = cy + h / 2.
|
||||
xmax = cx + w / 2.
|
||||
bboxes = tf.stack([ymin, xmin, ymax, xmax], axis=-1)
|
||||
return bboxes
|
||||
|
||||
|
||||
def tf_ssd_bboxes_decode(feat_localizations,
|
||||
anchors,
|
||||
prior_scaling=[0.1, 0.1, 0.2, 0.2],
|
||||
scope='ssd_bboxes_decode'):
|
||||
"""Compute the relative bounding boxes from the SSD net features and
|
||||
reference anchors bounding boxes.
|
||||
|
||||
Arguments:
|
||||
feat_localizations: List of Tensors containing localization features.
|
||||
anchors: List of numpy array containing anchor boxes.
|
||||
|
||||
Return:
|
||||
List of Tensors Nx4: ymin, xmin, ymax, xmax
|
||||
"""
|
||||
with tf.name_scope(scope):
|
||||
bboxes = []
|
||||
for i, anchors_layer in enumerate(anchors):
|
||||
bboxes.append(
|
||||
tf_ssd_bboxes_decode_layer(feat_localizations[i],
|
||||
anchors_layer,
|
||||
prior_scaling))
|
||||
return bboxes
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# SSD boxes selection.
|
||||
# =========================================================================== #
|
||||
def tf_ssd_bboxes_select_layer(predictions_layer, localizations_layer,
|
||||
select_threshold=None,
|
||||
num_classes=21,
|
||||
ignore_class=0,
|
||||
scope=None):
|
||||
"""Extract classes, scores and bounding boxes from features in one layer.
|
||||
Batch-compatible: inputs are supposed to have batch-type shapes.
|
||||
|
||||
Args:
|
||||
predictions_layer: A SSD prediction layer;
|
||||
localizations_layer: A SSD localization layer;
|
||||
select_threshold: Classification threshold for selecting a box. All boxes
|
||||
under the threshold are set to 'zero'. If None, no threshold applied.
|
||||
Return:
|
||||
d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of
|
||||
size Batches X N x 1 | 4. Each key corresponding to a class.
|
||||
"""
|
||||
select_threshold = 0.0 if select_threshold is None else select_threshold
|
||||
with tf.name_scope(scope, 'ssd_bboxes_select_layer',
|
||||
[predictions_layer, localizations_layer]):
|
||||
# Reshape features: Batches x N x N_labels | 4
|
||||
p_shape = tfe.get_shape(predictions_layer)
|
||||
predictions_layer = tf.reshape(predictions_layer,
|
||||
tf.stack([p_shape[0], -1, p_shape[-1]]))
|
||||
l_shape = tfe.get_shape(localizations_layer)
|
||||
localizations_layer = tf.reshape(localizations_layer,
|
||||
tf.stack([l_shape[0], -1, l_shape[-1]]))
|
||||
|
||||
d_scores = {}
|
||||
d_bboxes = {}
|
||||
for c in range(0, num_classes):
|
||||
if c != ignore_class:
|
||||
# Remove boxes under the threshold.
|
||||
scores = predictions_layer[:, :, c]
|
||||
fmask = tf.cast(tf.greater_equal(scores, select_threshold), scores.dtype)
|
||||
scores = scores * fmask
|
||||
bboxes = localizations_layer * tf.expand_dims(fmask, axis=-1)
|
||||
# Append to dictionary.
|
||||
d_scores[c] = scores
|
||||
d_bboxes[c] = bboxes
|
||||
|
||||
return d_scores, d_bboxes
|
||||
|
||||
|
||||
def tf_ssd_bboxes_select(predictions_net, localizations_net,
|
||||
select_threshold=None,
|
||||
num_classes=21,
|
||||
ignore_class=0,
|
||||
scope=None):
|
||||
"""Extract classes, scores and bounding boxes from network output layers.
|
||||
Batch-compatible: inputs are supposed to have batch-type shapes.
|
||||
|
||||
Args:
|
||||
predictions_net: List of SSD prediction layers;
|
||||
localizations_net: List of localization layers;
|
||||
select_threshold: Classification threshold for selecting a box. All boxes
|
||||
under the threshold are set to 'zero'. If None, no threshold applied.
|
||||
Return:
|
||||
d_scores, d_bboxes: Dictionary of scores and bboxes Tensors of
|
||||
size Batches X N x 1 | 4. Each key corresponding to a class.
|
||||
"""
|
||||
with tf.name_scope(scope, 'ssd_bboxes_select',
|
||||
[predictions_net, localizations_net]):
|
||||
l_scores = []
|
||||
l_bboxes = []
|
||||
for i in range(len(predictions_net)):
|
||||
scores, bboxes = tf_ssd_bboxes_select_layer(predictions_net[i],
|
||||
localizations_net[i],
|
||||
select_threshold,
|
||||
num_classes,
|
||||
ignore_class)
|
||||
l_scores.append(scores)
|
||||
l_bboxes.append(bboxes)
|
||||
# Concat results.
|
||||
d_scores = {}
|
||||
d_bboxes = {}
|
||||
for c in l_scores[0].keys():
|
||||
ls = [s[c] for s in l_scores]
|
||||
lb = [b[c] for b in l_bboxes]
|
||||
d_scores[c] = tf.concat(ls, axis=1)
|
||||
d_bboxes[c] = tf.concat(lb, axis=1)
|
||||
return d_scores, d_bboxes
|
||||
|
||||
|
||||
def tf_ssd_bboxes_select_layer_all_classes(predictions_layer, localizations_layer,
|
||||
select_threshold=None):
|
||||
"""Extract classes, scores and bounding boxes from features in one layer.
|
||||
Batch-compatible: inputs are supposed to have batch-type shapes.
|
||||
|
||||
Args:
|
||||
predictions_layer: A SSD prediction layer;
|
||||
localizations_layer: A SSD localization layer;
|
||||
select_threshold: Classification threshold for selecting a box. If None,
|
||||
select boxes whose classification score is higher than 'no class'.
|
||||
Return:
|
||||
classes, scores, bboxes: Input Tensors.
|
||||
"""
|
||||
# Reshape features: Batches x N x N_labels | 4
|
||||
p_shape = tfe.get_shape(predictions_layer)
|
||||
predictions_layer = tf.reshape(predictions_layer,
|
||||
tf.stack([p_shape[0], -1, p_shape[-1]]))
|
||||
l_shape = tfe.get_shape(localizations_layer)
|
||||
localizations_layer = tf.reshape(localizations_layer,
|
||||
tf.stack([l_shape[0], -1, l_shape[-1]]))
|
||||
# Boxes selection: use threshold or score > no-label criteria.
|
||||
if select_threshold is None or select_threshold == 0:
|
||||
# Class prediction and scores: assign 0. to 0-class
|
||||
classes = tf.argmax(predictions_layer, axis=2)
|
||||
scores = tf.reduce_max(predictions_layer, axis=2)
|
||||
scores = scores * tf.cast(classes > 0, scores.dtype)
|
||||
else:
|
||||
sub_predictions = predictions_layer[:, :, 1:]
|
||||
classes = tf.argmax(sub_predictions, axis=2) + 1
|
||||
scores = tf.reduce_max(sub_predictions, axis=2)
|
||||
# Only keep predictions higher than threshold.
|
||||
mask = tf.greater(scores, select_threshold)
|
||||
classes = classes * tf.cast(mask, classes.dtype)
|
||||
scores = scores * tf.cast(mask, scores.dtype)
|
||||
# Assume localization layer already decoded.
|
||||
bboxes = localizations_layer
|
||||
return classes, scores, bboxes
|
||||
|
||||
|
||||
def tf_ssd_bboxes_select_all_classes(predictions_net, localizations_net,
|
||||
select_threshold=None,
|
||||
scope=None):
|
||||
"""Extract classes, scores and bounding boxes from network output layers.
|
||||
Batch-compatible: inputs are supposed to have batch-type shapes.
|
||||
|
||||
Args:
|
||||
predictions_net: List of SSD prediction layers;
|
||||
localizations_net: List of localization layers;
|
||||
select_threshold: Classification threshold for selecting a box. If None,
|
||||
select boxes whose classification score is higher than 'no class'.
|
||||
Return:
|
||||
classes, scores, bboxes: Tensors.
|
||||
"""
|
||||
with tf.name_scope(scope, 'ssd_bboxes_select',
|
||||
[predictions_net, localizations_net]):
|
||||
l_classes = []
|
||||
l_scores = []
|
||||
l_bboxes = []
|
||||
for i in range(len(predictions_net)):
|
||||
classes, scores, bboxes = \
|
||||
tf_ssd_bboxes_select_layer_all_classes(predictions_net[i],
|
||||
localizations_net[i],
|
||||
select_threshold)
|
||||
l_classes.append(classes)
|
||||
l_scores.append(scores)
|
||||
l_bboxes.append(bboxes)
|
||||
|
||||
classes = tf.concat(l_classes, axis=1)
|
||||
scores = tf.concat(l_scores, axis=1)
|
||||
bboxes = tf.concat(l_bboxes, axis=1)
|
||||
return classes, scores, bboxes
|
||||
|
||||
@@ -0,0 +1,660 @@
|
||||
# Copyright 2016 Paul Balanca. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""Definition of 300 VGG-based SSD network.
|
||||
|
||||
This model was initially introduced in:
|
||||
SSD: Single Shot MultiBox Detector
|
||||
Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
|
||||
Cheng-Yang Fu, Alexander C. Berg
|
||||
https://arxiv.org/abs/1512.02325
|
||||
|
||||
Two variants of the model are defined: the 300x300 and 512x512 models, the
|
||||
latter obtaining a slightly better accuracy on Pascal VOC.
|
||||
|
||||
Usage:
|
||||
with slim.arg_scope(ssd_vgg.ssd_vgg()):
|
||||
outputs, end_points = ssd_vgg.ssd_vgg(inputs)
|
||||
|
||||
This network port of the original Caffe model. The padding in TF and Caffe
|
||||
is slightly different, and can lead to severe accuracy drop if not taken care
|
||||
in a correct way!
|
||||
|
||||
In Caffe, the output size of convolution and pooling layers are computing as
|
||||
following: h_o = (h_i + 2 * pad_h - kernel_h) / stride_h + 1
|
||||
|
||||
Nevertheless, there is a subtle difference between both for stride > 1. In
|
||||
the case of convolution:
|
||||
top_size = floor((bottom_size + 2*pad - kernel_size) / stride) + 1
|
||||
whereas for pooling:
|
||||
top_size = ceil((bottom_size + 2*pad - kernel_size) / stride) + 1
|
||||
Hence implicitely allowing some additional padding even if pad = 0. This
|
||||
behaviour explains why pooling with stride and kernel of size 2 are behaving
|
||||
the same way in TensorFlow and Caffe.
|
||||
|
||||
Nevertheless, this is not the case anymore for other kernel sizes, hence
|
||||
motivating the use of special padding layer for controlling these side-effects.
|
||||
|
||||
@@ssd_vgg_300
|
||||
"""
|
||||
import math
|
||||
from collections import namedtuple
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
import tfextended as tfe
|
||||
from model import custom_layers, ssd_common
|
||||
|
||||
slim = tf.contrib.slim
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# SSD class definition.
|
||||
# =========================================================================== #
|
||||
SSDParams = namedtuple('SSDParameters', ['img_shape',
|
||||
'num_classes',
|
||||
'no_annotation_label',
|
||||
'feat_layers',
|
||||
'feat_shapes',
|
||||
'anchor_size_bounds',
|
||||
'anchor_sizes',
|
||||
'anchor_ratios',
|
||||
'anchor_steps',
|
||||
'anchor_offset',
|
||||
'normalizations',
|
||||
'prior_scaling'
|
||||
])
|
||||
|
||||
|
||||
class SSDNet(object):
|
||||
"""Implementation of the SSD VGG-based 300 network.
|
||||
|
||||
The default features layers with 300x300 image input are:
|
||||
conv4 ==> 38 x 38
|
||||
conv7 ==> 19 x 19
|
||||
conv8 ==> 10 x 10
|
||||
conv9 ==> 5 x 5
|
||||
conv10 ==> 3 x 3
|
||||
conv11 ==> 1 x 1
|
||||
The default image size used to train this network is 300x300.
|
||||
"""
|
||||
default_params = SSDParams(
|
||||
img_shape=(300, 300),
|
||||
num_classes=21,
|
||||
no_annotation_label=21,
|
||||
feat_layers=['block4', 'block7', 'block8', 'block9', 'block10', 'block11'],
|
||||
feat_shapes=[(37, 37), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)],
|
||||
anchor_size_bounds=[0.15, 0.90],
|
||||
# anchor_size_bounds=[0.20, 0.90],
|
||||
anchor_sizes=[(21., 45.),
|
||||
(45., 99.),
|
||||
(99., 153.),
|
||||
(153., 207.),
|
||||
(207., 261.),
|
||||
(261., 315.)],
|
||||
# anchor_sizes=[(30., 60.),
|
||||
# (60., 111.),
|
||||
# (111., 162.),
|
||||
# (162., 213.),
|
||||
# (213., 264.),
|
||||
# (264., 315.)],
|
||||
anchor_ratios=[[2, .5],
|
||||
[2, .5, 3, 1./3],
|
||||
[2, .5, 3, 1./3],
|
||||
[2, .5, 3, 1./3],
|
||||
[2, .5],
|
||||
[2, .5]],
|
||||
anchor_steps=[8, 16, 32, 64, 100, 300],
|
||||
anchor_offset=0.5,
|
||||
normalizations=[20, -1, -1, -1, -1, -1],
|
||||
prior_scaling=[0.1, 0.1, 0.2, 0.2]
|
||||
)
|
||||
|
||||
def __init__(self, params=None):
|
||||
"""Init the SSD net with some parameters. Use the default ones
|
||||
if none provided.
|
||||
"""
|
||||
if isinstance(params, SSDParams):
|
||||
self.params = params
|
||||
else:
|
||||
self.params = SSDNet.default_params
|
||||
|
||||
# ======================================================================= #
|
||||
def net(self, inputs,
|
||||
is_training=True,
|
||||
update_feat_shapes=True,
|
||||
dropout_keep_prob=0.5,
|
||||
prediction_fn=slim.softmax,
|
||||
reuse=None,
|
||||
scope='ssd_300_vgg'):
|
||||
"""SSD network definition.
|
||||
"""
|
||||
r = ssd_net(inputs,
|
||||
num_classes=self.params.num_classes,
|
||||
feat_layers=self.params.feat_layers,
|
||||
anchor_sizes=self.params.anchor_sizes,
|
||||
anchor_ratios=self.params.anchor_ratios,
|
||||
normalizations=self.params.normalizations,
|
||||
is_training=is_training,
|
||||
dropout_keep_prob=dropout_keep_prob,
|
||||
prediction_fn=prediction_fn,
|
||||
reuse=reuse,
|
||||
scope=scope)
|
||||
# Update feature shapes (try at least!)
|
||||
if update_feat_shapes:
|
||||
shapes = ssd_feat_shapes_from_net(r[0], self.params.feat_shapes)
|
||||
self.params = self.params._replace(feat_shapes=shapes)
|
||||
return r
|
||||
|
||||
def arg_scope(self, weight_decay=0.0005, data_format='NHWC'):
|
||||
"""Network arg_scope.
|
||||
"""
|
||||
return ssd_arg_scope(weight_decay, data_format=data_format)
|
||||
|
||||
def arg_scope_caffe(self, caffe_scope):
|
||||
"""Caffe arg_scope used for weights importing.
|
||||
"""
|
||||
return ssd_arg_scope_caffe(caffe_scope)
|
||||
|
||||
# ======================================================================= #
|
||||
def update_feature_shapes(self, predictions):
|
||||
"""Update feature shapes from predictions collection (Tensor or Numpy
|
||||
array).
|
||||
"""
|
||||
shapes = ssd_feat_shapes_from_net(predictions, self.params.feat_shapes)
|
||||
self.params = self.params._replace(feat_shapes=shapes)
|
||||
|
||||
def anchors(self, img_shape, dtype=np.float32):
|
||||
"""Compute the default anchor boxes, given an image shape.
|
||||
"""
|
||||
return ssd_anchors_all_layers(img_shape,
|
||||
self.params.feat_shapes,
|
||||
self.params.anchor_sizes,
|
||||
self.params.anchor_ratios,
|
||||
self.params.anchor_steps,
|
||||
self.params.anchor_offset,
|
||||
dtype)
|
||||
|
||||
def bboxes_encode(self, labels, bboxes, anchors,
|
||||
scope=None):
|
||||
"""Encode labels and bounding boxes.
|
||||
"""
|
||||
return ssd_common.tf_ssd_bboxes_encode(
|
||||
labels, bboxes, anchors,
|
||||
self.params.num_classes,
|
||||
ignore_threshold=0.5,
|
||||
prior_scaling=self.params.prior_scaling,
|
||||
scope=scope)
|
||||
|
||||
def bboxes_decode(self, feat_localizations, anchors,
|
||||
scope='ssd_bboxes_decode'):
|
||||
"""Encode labels and bounding boxes.
|
||||
"""
|
||||
return ssd_common.tf_ssd_bboxes_decode(
|
||||
feat_localizations, anchors,
|
||||
prior_scaling=self.params.prior_scaling,
|
||||
scope=scope)
|
||||
|
||||
def detected_bboxes(self, predictions, localisations,
|
||||
select_threshold=None, nms_threshold=0.5,
|
||||
clipping_bbox=None, top_k=400, keep_top_k=200):
|
||||
"""Get the detected bounding boxes from the SSD network output.
|
||||
"""
|
||||
# Select top_k bboxes from predictions, and clip
|
||||
rscores, rbboxes = \
|
||||
ssd_common.tf_ssd_bboxes_select(predictions, localisations,
|
||||
select_threshold=select_threshold,
|
||||
num_classes=self.params.num_classes)
|
||||
rscores, rbboxes = \
|
||||
tfe.bboxes_sort(rscores, rbboxes, top_k=top_k)
|
||||
# Apply NMS algorithm.
|
||||
rscores, rbboxes = \
|
||||
tfe.bboxes_nms_batch(rscores, rbboxes,
|
||||
nms_threshold=nms_threshold,
|
||||
keep_top_k=keep_top_k)
|
||||
if clipping_bbox is not None:
|
||||
rbboxes = tfe.bboxes_clip(clipping_bbox, rbboxes)
|
||||
return rscores, rbboxes
|
||||
|
||||
def losses(self, logits, localisations,
|
||||
gclasses, glocalisations, gscores,
|
||||
match_threshold=0.5,
|
||||
negative_ratio=3.,
|
||||
alpha=1.,
|
||||
label_smoothing=0.,
|
||||
scope='ssd_losses'):
|
||||
"""Define the SSD network losses.
|
||||
"""
|
||||
return ssd_losses(logits, localisations,
|
||||
gclasses, glocalisations, gscores,
|
||||
match_threshold=match_threshold,
|
||||
negative_ratio=negative_ratio,
|
||||
alpha=alpha,
|
||||
label_smoothing=label_smoothing,
|
||||
scope=scope)
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# SSD tools...
|
||||
# =========================================================================== #
|
||||
def ssd_size_bounds_to_values(size_bounds,
|
||||
n_feat_layers,
|
||||
img_shape=(300, 300)):
|
||||
"""Compute the reference sizes of the anchor boxes from relative bounds.
|
||||
The absolute values are measured in pixels, based on the network
|
||||
default size (300 pixels).
|
||||
|
||||
This function follows the computation performed in the original
|
||||
implementation of SSD in Caffe.
|
||||
|
||||
Return:
|
||||
list of list containing the absolute sizes at each scale. For each scale,
|
||||
the ratios only apply to the first value.
|
||||
"""
|
||||
assert img_shape[0] == img_shape[1]
|
||||
|
||||
img_size = img_shape[0]
|
||||
min_ratio = int(size_bounds[0] * 100)
|
||||
max_ratio = int(size_bounds[1] * 100)
|
||||
step = int(math.floor((max_ratio - min_ratio) / (n_feat_layers - 2)))
|
||||
# Start with the following smallest sizes.
|
||||
sizes = [[img_size * size_bounds[0] / 2, img_size * size_bounds[0]]]
|
||||
for ratio in range(min_ratio, max_ratio + 1, step):
|
||||
sizes.append((img_size * ratio / 100.,
|
||||
img_size * (ratio + step) / 100.))
|
||||
return sizes
|
||||
|
||||
|
||||
def ssd_feat_shapes_from_net(predictions, default_shapes=None):
|
||||
"""Try to obtain the feature shapes from the prediction layers. The latter
|
||||
can be either a Tensor or Numpy ndarray.
|
||||
|
||||
Return:
|
||||
list of feature shapes. Default values if predictions shape not fully
|
||||
determined.
|
||||
"""
|
||||
feat_shapes = []
|
||||
for l in predictions:
|
||||
# Get the shape, from either a np array or a tensor.
|
||||
if isinstance(l, np.ndarray):
|
||||
shape = l.shape
|
||||
else:
|
||||
shape = l.get_shape().as_list()
|
||||
shape = shape[1:4]
|
||||
# Problem: undetermined shape...
|
||||
if None in shape:
|
||||
return default_shapes
|
||||
else:
|
||||
feat_shapes.append(shape)
|
||||
return feat_shapes
|
||||
|
||||
|
||||
def ssd_anchor_one_layer(img_shape,
|
||||
feat_shape,
|
||||
sizes,
|
||||
ratios,
|
||||
step,
|
||||
offset=0.5,
|
||||
dtype=np.float32):
|
||||
"""Computer SSD default anchor boxes for one feature layer.
|
||||
|
||||
Determine the relative position grid of the centers, and the relative
|
||||
width and height.
|
||||
|
||||
Arguments:
|
||||
feat_shape: Feature shape, used for computing relative position grids;
|
||||
size: Absolute reference sizes;
|
||||
ratios: Ratios to use on these features;
|
||||
img_shape: Image shape, used for computing height, width relatively to the
|
||||
former;
|
||||
offset: Grid offset.
|
||||
|
||||
Return:
|
||||
y, x, h, w: Relative x and y grids, and height and width.
|
||||
"""
|
||||
# Compute the position grid: simple way.
|
||||
# y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]]
|
||||
# y = (y.astype(dtype) + offset) / feat_shape[0]
|
||||
# x = (x.astype(dtype) + offset) / feat_shape[1]
|
||||
# Weird SSD-Caffe computation using steps values...
|
||||
y, x = np.mgrid[0:feat_shape[0], 0:feat_shape[1]]
|
||||
y = (y.astype(dtype) + offset) * step / img_shape[0]
|
||||
x = (x.astype(dtype) + offset) * step / img_shape[1]
|
||||
|
||||
# Expand dims to support easy broadcasting.
|
||||
y = np.expand_dims(y, axis=-1)
|
||||
x = np.expand_dims(x, axis=-1)
|
||||
|
||||
# Compute relative height and width.
|
||||
# Tries to follow the original implementation of SSD for the order.
|
||||
num_anchors = len(sizes) + len(ratios)
|
||||
h = np.zeros((num_anchors, ), dtype=dtype)
|
||||
w = np.zeros((num_anchors, ), dtype=dtype)
|
||||
# Add first anchor boxes with ratio=1.
|
||||
h[0] = sizes[0] / img_shape[0]
|
||||
w[0] = sizes[0] / img_shape[1]
|
||||
di = 1
|
||||
if len(sizes) > 1:
|
||||
h[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[0]
|
||||
w[1] = math.sqrt(sizes[0] * sizes[1]) / img_shape[1]
|
||||
di += 1
|
||||
for i, r in enumerate(ratios):
|
||||
h[i+di] = sizes[0] / img_shape[0] / math.sqrt(r)
|
||||
w[i+di] = sizes[0] / img_shape[1] * math.sqrt(r)
|
||||
return y, x, h, w
|
||||
|
||||
|
||||
def ssd_anchors_all_layers(img_shape,
|
||||
layers_shape,
|
||||
anchor_sizes,
|
||||
anchor_ratios,
|
||||
anchor_steps,
|
||||
offset=0.5,
|
||||
dtype=np.float32):
|
||||
"""Compute anchor boxes for all feature layers.
|
||||
"""
|
||||
layers_anchors = []
|
||||
for i, s in enumerate(layers_shape):
|
||||
anchor_bboxes = ssd_anchor_one_layer(img_shape, s,
|
||||
anchor_sizes[i],
|
||||
anchor_ratios[i],
|
||||
anchor_steps[i],
|
||||
offset=offset, dtype=dtype)
|
||||
layers_anchors.append(anchor_bboxes)
|
||||
return layers_anchors
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# Functional definition of VGG-based SSD 300.
|
||||
# =========================================================================== #
|
||||
def tensor_shape(x, rank=3):
|
||||
"""Returns the dimensions of a tensor.
|
||||
Args:
|
||||
image: A N-D Tensor of shape.
|
||||
Returns:
|
||||
A list of dimensions. Dimensions that are statically known are python
|
||||
integers,otherwise they are integer scalar tensors.
|
||||
"""
|
||||
if x.get_shape().is_fully_defined():
|
||||
return x.get_shape().as_list()
|
||||
else:
|
||||
static_shape = x.get_shape().with_rank(rank).as_list()
|
||||
dynamic_shape = tf.unstack(tf.shape(x), rank)
|
||||
return [s if s is not None else d
|
||||
for s, d in zip(static_shape, dynamic_shape)]
|
||||
|
||||
|
||||
def ssd_multibox_layer(inputs,
|
||||
num_classes,
|
||||
sizes,
|
||||
ratios=[1],
|
||||
normalization=-1,
|
||||
bn_normalization=False):
|
||||
"""Construct a multibox layer, return a class and localization predictions.
|
||||
"""
|
||||
net = inputs
|
||||
if normalization > 0:
|
||||
net = custom_layers.l2_normalization(net, scaling=True)
|
||||
# Number of anchors.
|
||||
num_anchors = len(sizes) + len(ratios)
|
||||
|
||||
# Location.
|
||||
num_loc_pred = num_anchors * 4
|
||||
loc_pred = slim.conv2d(net, num_loc_pred, [3, 3], activation_fn=None,
|
||||
scope='conv_loc')
|
||||
loc_pred = custom_layers.channel_to_last(loc_pred)
|
||||
loc_pred = tf.reshape(loc_pred,
|
||||
tensor_shape(loc_pred, 4)[:-1]+[num_anchors, 4])
|
||||
# Class prediction.
|
||||
num_cls_pred = num_anchors * num_classes
|
||||
cls_pred = slim.conv2d(net, num_cls_pred, [3, 3], activation_fn=None,
|
||||
scope='conv_cls')
|
||||
cls_pred = custom_layers.channel_to_last(cls_pred)
|
||||
cls_pred = tf.reshape(cls_pred,
|
||||
tensor_shape(cls_pred, 4)[:-1]+[num_anchors, num_classes])
|
||||
return cls_pred, loc_pred
|
||||
|
||||
|
||||
def ssd_net(inputs,
|
||||
num_classes=SSDNet.default_params.num_classes,
|
||||
feat_layers=SSDNet.default_params.feat_layers,
|
||||
anchor_sizes=SSDNet.default_params.anchor_sizes,
|
||||
anchor_ratios=SSDNet.default_params.anchor_ratios,
|
||||
normalizations=SSDNet.default_params.normalizations,
|
||||
is_training=True,
|
||||
dropout_keep_prob=0.5,
|
||||
prediction_fn=slim.softmax,
|
||||
reuse=None,
|
||||
scope='ssd_300_vgg'):
|
||||
"""SSD net definition.
|
||||
"""
|
||||
# if data_format == 'NCHW':
|
||||
# inputs = tf.transpose(inputs, perm=(0, 3, 1, 2))
|
||||
|
||||
# End_points collect relevant activations for external use.
|
||||
end_points = {}
|
||||
with tf.variable_scope(scope, 'ssd_300_vgg', [inputs], reuse=reuse):
|
||||
# Original VGG-16 blocks.
|
||||
net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1')
|
||||
end_points['block1'] = net
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool1')
|
||||
# Block 2.
|
||||
net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2')
|
||||
end_points['block2'] = net
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool2')
|
||||
# Block 3.
|
||||
net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3')
|
||||
end_points['block3'] = net
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool3')
|
||||
# Block 4.
|
||||
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4')
|
||||
end_points['block4'] = net
|
||||
net = slim.max_pool2d(net, [2, 2], scope='pool4')
|
||||
# Block 5.
|
||||
net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5')
|
||||
end_points['block5'] = net
|
||||
net = slim.max_pool2d(net, [3, 3], stride=1, scope='pool5')
|
||||
|
||||
# Additional SSD blocks.
|
||||
# Block 6: let's dilate the hell out of it!
|
||||
net = slim.conv2d(net, 1024, [3, 3], rate=6, scope='conv6')
|
||||
end_points['block6'] = net
|
||||
net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)
|
||||
# Block 7: 1x1 conv. Because the fuck.
|
||||
net = slim.conv2d(net, 1024, [1, 1], scope='conv7')
|
||||
end_points['block7'] = net
|
||||
net = tf.layers.dropout(net, rate=dropout_keep_prob, training=is_training)
|
||||
|
||||
# Block 8/9/10/11: 1x1 and 3x3 convolutions stride 2 (except lasts).
|
||||
end_point = 'block8'
|
||||
with tf.variable_scope(end_point):
|
||||
net = slim.conv2d(net, 256, [1, 1], scope='conv1x1')
|
||||
net = custom_layers.pad2d(net, pad=(1, 1))
|
||||
net = slim.conv2d(net, 512, [3, 3], stride=2, scope='conv3x3', padding='VALID')
|
||||
end_points[end_point] = net
|
||||
end_point = 'block9'
|
||||
with tf.variable_scope(end_point):
|
||||
net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
|
||||
net = custom_layers.pad2d(net, pad=(1, 1))
|
||||
net = slim.conv2d(net, 256, [3, 3], stride=2, scope='conv3x3', padding='VALID')
|
||||
end_points[end_point] = net
|
||||
end_point = 'block10'
|
||||
with tf.variable_scope(end_point):
|
||||
net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
|
||||
net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
|
||||
end_points[end_point] = net
|
||||
end_point = 'block11'
|
||||
with tf.variable_scope(end_point):
|
||||
net = slim.conv2d(net, 128, [1, 1], scope='conv1x1')
|
||||
net = slim.conv2d(net, 256, [3, 3], scope='conv3x3', padding='VALID')
|
||||
end_points[end_point] = net
|
||||
|
||||
# Prediction and localisations layers.
|
||||
predictions = []
|
||||
logits = []
|
||||
localisations = []
|
||||
for i, layer in enumerate(feat_layers):
|
||||
with tf.variable_scope(layer + '_box'):
|
||||
p, l = ssd_multibox_layer(end_points[layer],
|
||||
num_classes,
|
||||
anchor_sizes[i],
|
||||
anchor_ratios[i],
|
||||
normalizations[i])
|
||||
predictions.append(prediction_fn(p))
|
||||
logits.append(p)
|
||||
localisations.append(l)
|
||||
|
||||
return predictions, localisations, logits, end_points
|
||||
ssd_net.default_image_size = 300
|
||||
|
||||
|
||||
def ssd_arg_scope(weight_decay=0.0005, data_format='NHWC'):
|
||||
"""Defines the VGG arg scope.
|
||||
|
||||
Args:
|
||||
weight_decay: The l2 regularization coefficient.
|
||||
|
||||
Returns:
|
||||
An arg_scope.
|
||||
"""
|
||||
with slim.arg_scope([slim.conv2d, slim.fully_connected],
|
||||
activation_fn=tf.nn.relu,
|
||||
weights_regularizer=slim.l2_regularizer(weight_decay),
|
||||
weights_initializer=tf.contrib.layers.xavier_initializer(),
|
||||
biases_initializer=tf.zeros_initializer()):
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d],
|
||||
padding='SAME',
|
||||
data_format=data_format):
|
||||
with slim.arg_scope([custom_layers.pad2d,
|
||||
custom_layers.l2_normalization,
|
||||
custom_layers.channel_to_last],
|
||||
data_format=data_format) as sc:
|
||||
return sc
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# Caffe scope: importing weights at initialization.
|
||||
# =========================================================================== #
|
||||
def ssd_arg_scope_caffe(caffe_scope):
|
||||
"""Caffe scope definition.
|
||||
|
||||
Args:
|
||||
caffe_scope: Caffe scope object with loaded weights.
|
||||
|
||||
Returns:
|
||||
An arg_scope.
|
||||
"""
|
||||
# Default network arg scope.
|
||||
with slim.arg_scope([slim.conv2d],
|
||||
activation_fn=tf.nn.relu,
|
||||
weights_initializer=caffe_scope.conv_weights_init(),
|
||||
biases_initializer=caffe_scope.conv_biases_init()):
|
||||
with slim.arg_scope([slim.fully_connected],
|
||||
activation_fn=tf.nn.relu):
|
||||
with slim.arg_scope([custom_layers.l2_normalization],
|
||||
scale_initializer=caffe_scope.l2_norm_scale_init()):
|
||||
with slim.arg_scope([slim.conv2d, slim.max_pool2d],
|
||||
padding='SAME') as sc:
|
||||
return sc
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# SSD loss function.
|
||||
# =========================================================================== #
|
||||
def ssd_losses(logits, localisations,
|
||||
gclasses, glocalisations, gscores,
|
||||
match_threshold=0.5,
|
||||
negative_ratio=3.,
|
||||
alpha=1.,
|
||||
label_smoothing=0.,
|
||||
device='/cpu:0',
|
||||
scope=None):
|
||||
with tf.name_scope(scope, 'ssd_losses'):
|
||||
lshape = tfe.get_shape(logits[0], 5)
|
||||
num_classes = lshape[-1]
|
||||
batch_size = lshape[0]
|
||||
|
||||
# Flatten out all vectors!
|
||||
flogits = []
|
||||
fgclasses = []
|
||||
fgscores = []
|
||||
flocalisations = []
|
||||
fglocalisations = []
|
||||
for i in range(len(logits)):
|
||||
flogits.append(tf.reshape(logits[i], [-1, num_classes]))
|
||||
fgclasses.append(tf.reshape(gclasses[i], [-1]))
|
||||
fgscores.append(tf.reshape(gscores[i], [-1]))
|
||||
flocalisations.append(tf.reshape(localisations[i], [-1, 4]))
|
||||
fglocalisations.append(tf.reshape(glocalisations[i], [-1, 4]))
|
||||
# And concat the crap!
|
||||
logits = tf.concat(flogits, axis=0)
|
||||
gclasses = tf.concat(fgclasses, axis=0)
|
||||
gscores = tf.concat(fgscores, axis=0)
|
||||
localisations = tf.concat(flocalisations, axis=0)
|
||||
glocalisations = tf.concat(fglocalisations, axis=0)
|
||||
dtype = logits.dtype
|
||||
|
||||
# Compute positive matching mask...
|
||||
pmask = gscores > match_threshold
|
||||
fpmask = tf.cast(pmask, dtype)
|
||||
n_positives = tf.reduce_sum(fpmask)
|
||||
|
||||
# Hard negative mining...
|
||||
no_classes = tf.cast(pmask, tf.int32)
|
||||
predictions = slim.softmax(logits)
|
||||
nmask = tf.logical_and(tf.logical_not(pmask),
|
||||
gscores > -0.5)
|
||||
fnmask = tf.cast(nmask, dtype)
|
||||
nvalues = tf.where(nmask,
|
||||
predictions[:, 0],
|
||||
1. - fnmask)
|
||||
nvalues_flat = tf.reshape(nvalues, [-1])
|
||||
# Number of negative entries to select.
|
||||
max_neg_entries = tf.cast(tf.reduce_sum(fnmask), tf.int32)
|
||||
n_neg = tf.cast(negative_ratio * n_positives, tf.int32) + batch_size
|
||||
n_neg = tf.minimum(n_neg, max_neg_entries)
|
||||
|
||||
val, idxes = tf.nn.top_k(-nvalues_flat, k=n_neg)
|
||||
max_hard_pred = -val[-1]
|
||||
# Final negative mask.
|
||||
nmask = tf.logical_and(nmask, nvalues < max_hard_pred)
|
||||
fnmask = tf.cast(nmask, dtype)
|
||||
|
||||
batch_float = tf.cast(batch_size, tf.float32)
|
||||
|
||||
# Add cross-entropy loss.
|
||||
with tf.name_scope('cross_entropy_pos'):
|
||||
cross_entropy_pos_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
|
||||
labels=gclasses)
|
||||
cross_entropy_pos_loss = tf.divide(tf.reduce_sum(cross_entropy_pos_loss * fpmask), batch_float, name='value')
|
||||
tf.losses.add_loss(cross_entropy_pos_loss)
|
||||
|
||||
with tf.name_scope('cross_entropy_neg'):
|
||||
cross_entropy_neg_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
|
||||
labels=no_classes)
|
||||
cross_entropy_neg_loss = tf.divide(tf.reduce_sum(cross_entropy_neg_loss * fnmask), batch_float, name='value')
|
||||
tf.losses.add_loss(cross_entropy_neg_loss)
|
||||
|
||||
# Add localization loss: smooth L1, L2, ...
|
||||
with tf.name_scope('localization'):
|
||||
# Weights Tensor: positive mask + random negative.
|
||||
weights = tf.expand_dims(alpha * fpmask, axis=-1)
|
||||
localization_loss = custom_layers.abs_smooth(localisations - glocalisations)
|
||||
localization_loss = tf.divide(tf.reduce_sum(localization_loss * weights), batch_float, name='value')
|
||||
tf.losses.add_loss(localization_loss)
|
||||
|
||||
regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
|
||||
all_losses = [cross_entropy_neg_loss, cross_entropy_pos_loss, localization_loss] + (regularization_losses if regularization_losses else [])
|
||||
return tf.add_n(all_losses)
|
||||
@@ -0,0 +1,24 @@
|
||||
# Copyright 2017 Paul Balanca. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""TF Extended: additional metrics.
|
||||
"""
|
||||
|
||||
# pylint: disable=unused-import,line-too-long,g-importing-member,wildcard-import
|
||||
from tfextended.metrics import *
|
||||
from tfextended.tensors import *
|
||||
from tfextended.bboxes import *
|
||||
from tfextended.image import *
|
||||
from tfextended.math import *
|
||||
|
||||
@@ -0,0 +1,508 @@
|
||||
# Copyright 2017 Paul Balanca. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""TF Extended: additional bounding boxes methods.
|
||||
"""
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from tfextended import tensors as tfe_tensors
|
||||
from tfextended import math as tfe_math
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# Standard boxes algorithms.
|
||||
# =========================================================================== #
|
||||
def bboxes_sort_all_classes(classes, scores, bboxes, top_k=400, scope=None):
|
||||
"""Sort bounding boxes by decreasing order and keep only the top_k.
|
||||
Assume the input Tensors mix-up objects with different classes.
|
||||
Assume a batch-type input.
|
||||
|
||||
Args:
|
||||
classes: Batch x N Tensor containing integer classes.
|
||||
scores: Batch x N Tensor containing float scores.
|
||||
bboxes: Batch x N x 4 Tensor containing boxes coordinates.
|
||||
top_k: Top_k boxes to keep.
|
||||
Return:
|
||||
classes, scores, bboxes: Sorted tensors of shape Batch x Top_k.
|
||||
"""
|
||||
with tf.name_scope(scope, 'bboxes_sort', [classes, scores, bboxes]):
|
||||
scores, idxes = tf.nn.top_k(scores, k=top_k, sorted=True)
|
||||
|
||||
# Trick to be able to use tf.gather: map for each element in the batch.
|
||||
def fn_gather(classes, bboxes, idxes):
|
||||
cl = tf.gather(classes, idxes)
|
||||
bb = tf.gather(bboxes, idxes)
|
||||
return [cl, bb]
|
||||
r = tf.map_fn(lambda x: fn_gather(x[0], x[1], x[2]),
|
||||
[classes, bboxes, idxes],
|
||||
dtype=[classes.dtype, bboxes.dtype],
|
||||
parallel_iterations=10,
|
||||
back_prop=False,
|
||||
swap_memory=False,
|
||||
infer_shape=True)
|
||||
classes = r[0]
|
||||
bboxes = r[1]
|
||||
return classes, scores, bboxes
|
||||
|
||||
|
||||
def bboxes_sort(scores, bboxes, top_k=400, scope=None):
|
||||
"""Sort bounding boxes by decreasing order and keep only the top_k.
|
||||
If inputs are dictionnaries, assume every key is a different class.
|
||||
Assume a batch-type input.
|
||||
|
||||
Args:
|
||||
scores: Batch x N Tensor/Dictionary containing float scores.
|
||||
bboxes: Batch x N x 4 Tensor/Dictionary containing boxes coordinates.
|
||||
top_k: Top_k boxes to keep.
|
||||
Return:
|
||||
scores, bboxes: Sorted Tensors/Dictionaries of shape Batch x Top_k x 1|4.
|
||||
"""
|
||||
# Dictionaries as inputs.
|
||||
if isinstance(scores, dict) or isinstance(bboxes, dict):
|
||||
with tf.name_scope(scope, 'bboxes_sort_dict'):
|
||||
d_scores = {}
|
||||
d_bboxes = {}
|
||||
for c in scores.keys():
|
||||
s, b = bboxes_sort(scores[c], bboxes[c], top_k=top_k)
|
||||
d_scores[c] = s
|
||||
d_bboxes[c] = b
|
||||
return d_scores, d_bboxes
|
||||
|
||||
# Tensors inputs.
|
||||
with tf.name_scope(scope, 'bboxes_sort', [scores, bboxes]):
|
||||
# Sort scores...
|
||||
scores, idxes = tf.nn.top_k(scores, k=top_k, sorted=True)
|
||||
|
||||
# Trick to be able to use tf.gather: map for each element in the first dim.
|
||||
def fn_gather(bboxes, idxes):
|
||||
bb = tf.gather(bboxes, idxes)
|
||||
return [bb]
|
||||
r = tf.map_fn(lambda x: fn_gather(x[0], x[1]),
|
||||
[bboxes, idxes],
|
||||
dtype=[bboxes.dtype],
|
||||
parallel_iterations=10,
|
||||
back_prop=False,
|
||||
swap_memory=False,
|
||||
infer_shape=True)
|
||||
bboxes = r[0]
|
||||
return scores, bboxes
|
||||
|
||||
|
||||
def bboxes_clip(bbox_ref, bboxes, scope=None):
|
||||
"""Clip bounding boxes to a reference box.
|
||||
Batch-compatible if the first dimension of `bbox_ref` and `bboxes`
|
||||
can be broadcasted.
|
||||
|
||||
Args:
|
||||
bbox_ref: Reference bounding box. Nx4 or 4 shaped-Tensor;
|
||||
bboxes: Bounding boxes to clip. Nx4 or 4 shaped-Tensor or dictionary.
|
||||
Return:
|
||||
Clipped bboxes.
|
||||
"""
|
||||
# Bboxes is dictionary.
|
||||
if isinstance(bboxes, dict):
|
||||
with tf.name_scope(scope, 'bboxes_clip_dict'):
|
||||
d_bboxes = {}
|
||||
for c in bboxes.keys():
|
||||
d_bboxes[c] = bboxes_clip(bbox_ref, bboxes[c])
|
||||
return d_bboxes
|
||||
|
||||
# Tensors inputs.
|
||||
with tf.name_scope(scope, 'bboxes_clip'):
|
||||
# Easier with transposed bboxes. Especially for broadcasting.
|
||||
bbox_ref = tf.transpose(bbox_ref)
|
||||
bboxes = tf.transpose(bboxes)
|
||||
# Intersection bboxes and reference bbox.
|
||||
ymin = tf.maximum(bboxes[0], bbox_ref[0])
|
||||
xmin = tf.maximum(bboxes[1], bbox_ref[1])
|
||||
ymax = tf.minimum(bboxes[2], bbox_ref[2])
|
||||
xmax = tf.minimum(bboxes[3], bbox_ref[3])
|
||||
# Double check! Empty boxes when no-intersection.
|
||||
ymin = tf.minimum(ymin, ymax)
|
||||
xmin = tf.minimum(xmin, xmax)
|
||||
bboxes = tf.transpose(tf.stack([ymin, xmin, ymax, xmax], axis=0))
|
||||
return bboxes
|
||||
|
||||
|
||||
def bboxes_resize(bbox_ref, bboxes, name=None):
|
||||
"""Resize bounding boxes based on a reference bounding box,
|
||||
assuming that the latter is [0, 0, 1, 1] after transform. Useful for
|
||||
updating a collection of boxes after cropping an image.
|
||||
"""
|
||||
# Bboxes is dictionary.
|
||||
if isinstance(bboxes, dict):
|
||||
with tf.name_scope(name, 'bboxes_resize_dict'):
|
||||
d_bboxes = {}
|
||||
for c in bboxes.keys():
|
||||
d_bboxes[c] = bboxes_resize(bbox_ref, bboxes[c])
|
||||
return d_bboxes
|
||||
|
||||
# Tensors inputs.
|
||||
with tf.name_scope(name, 'bboxes_resize'):
|
||||
# Translate.
|
||||
v = tf.stack([bbox_ref[0], bbox_ref[1], bbox_ref[0], bbox_ref[1]])
|
||||
bboxes = bboxes - v
|
||||
# Scale.
|
||||
s = tf.stack([bbox_ref[2] - bbox_ref[0],
|
||||
bbox_ref[3] - bbox_ref[1],
|
||||
bbox_ref[2] - bbox_ref[0],
|
||||
bbox_ref[3] - bbox_ref[1]])
|
||||
bboxes = bboxes / s
|
||||
return bboxes
|
||||
|
||||
|
||||
def bboxes_nms(scores, bboxes, nms_threshold=0.5, keep_top_k=200, scope=None):
|
||||
"""Apply non-maximum selection to bounding boxes. In comparison to TF
|
||||
implementation, use classes information for matching.
|
||||
Should only be used on single-entries. Use batch version otherwise.
|
||||
|
||||
Args:
|
||||
scores: N Tensor containing float scores.
|
||||
bboxes: N x 4 Tensor containing boxes coordinates.
|
||||
nms_threshold: Matching threshold in NMS algorithm;
|
||||
keep_top_k: Number of total object to keep after NMS.
|
||||
Return:
|
||||
classes, scores, bboxes Tensors, sorted by score.
|
||||
Padded with zero if necessary.
|
||||
"""
|
||||
with tf.name_scope(scope, 'bboxes_nms_single', [scores, bboxes]):
|
||||
# Apply NMS algorithm.
|
||||
idxes = tf.image.non_max_suppression(bboxes, scores,
|
||||
keep_top_k, nms_threshold)
|
||||
scores = tf.gather(scores, idxes)
|
||||
bboxes = tf.gather(bboxes, idxes)
|
||||
# Pad results.
|
||||
scores = tfe_tensors.pad_axis(scores, 0, keep_top_k, axis=0)
|
||||
bboxes = tfe_tensors.pad_axis(bboxes, 0, keep_top_k, axis=0)
|
||||
return scores, bboxes
|
||||
|
||||
|
||||
def bboxes_nms_batch(scores, bboxes, nms_threshold=0.5, keep_top_k=200,
|
||||
scope=None):
|
||||
"""Apply non-maximum selection to bounding boxes. In comparison to TF
|
||||
implementation, use classes information for matching.
|
||||
Use only on batched-inputs. Use zero-padding in order to batch output
|
||||
results.
|
||||
|
||||
Args:
|
||||
scores: Batch x N Tensor/Dictionary containing float scores.
|
||||
bboxes: Batch x N x 4 Tensor/Dictionary containing boxes coordinates.
|
||||
nms_threshold: Matching threshold in NMS algorithm;
|
||||
keep_top_k: Number of total object to keep after NMS.
|
||||
Return:
|
||||
scores, bboxes Tensors/Dictionaries, sorted by score.
|
||||
Padded with zero if necessary.
|
||||
"""
|
||||
# Dictionaries as inputs.
|
||||
if isinstance(scores, dict) or isinstance(bboxes, dict):
|
||||
with tf.name_scope(scope, 'bboxes_nms_batch_dict'):
|
||||
d_scores = {}
|
||||
d_bboxes = {}
|
||||
for c in scores.keys():
|
||||
s, b = bboxes_nms_batch(scores[c], bboxes[c],
|
||||
nms_threshold=nms_threshold,
|
||||
keep_top_k=keep_top_k)
|
||||
d_scores[c] = s
|
||||
d_bboxes[c] = b
|
||||
return d_scores, d_bboxes
|
||||
|
||||
# Tensors inputs.
|
||||
with tf.name_scope(scope, 'bboxes_nms_batch'):
|
||||
r = tf.map_fn(lambda x: bboxes_nms(x[0], x[1],
|
||||
nms_threshold, keep_top_k),
|
||||
(scores, bboxes),
|
||||
dtype=(scores.dtype, bboxes.dtype),
|
||||
parallel_iterations=10,
|
||||
back_prop=False,
|
||||
swap_memory=False,
|
||||
infer_shape=True)
|
||||
scores, bboxes = r
|
||||
return scores, bboxes
|
||||
|
||||
|
||||
# def bboxes_fast_nms(classes, scores, bboxes,
|
||||
# nms_threshold=0.5, eta=3., num_classes=21,
|
||||
# pad_output=True, scope=None):
|
||||
# with tf.name_scope(scope, 'bboxes_fast_nms',
|
||||
# [classes, scores, bboxes]):
|
||||
|
||||
# nms_classes = tf.zeros((0,), dtype=classes.dtype)
|
||||
# nms_scores = tf.zeros((0,), dtype=scores.dtype)
|
||||
# nms_bboxes = tf.zeros((0, 4), dtype=bboxes.dtype)
|
||||
|
||||
|
||||
def bboxes_matching(label, scores, bboxes,
|
||||
glabels, gbboxes, gdifficults,
|
||||
matching_threshold=0.5, scope=None):
|
||||
"""Matching a collection of detected boxes with groundtruth values.
|
||||
Does not accept batched-inputs.
|
||||
The algorithm goes as follows: for every detected box, check
|
||||
if one grountruth box is matching. If none, then considered as False Positive.
|
||||
If the grountruth box is already matched with another one, it also counts
|
||||
as a False Positive. We refer the Pascal VOC documentation for the details.
|
||||
|
||||
Args:
|
||||
rclasses, rscores, rbboxes: N(x4) Tensors. Detected objects, sorted by score;
|
||||
glabels, gbboxes: Groundtruth bounding boxes. May be zero padded, hence
|
||||
zero-class objects are ignored.
|
||||
matching_threshold: Threshold for a positive match.
|
||||
Return: Tuple of:
|
||||
n_gbboxes: Scalar Tensor with number of groundtruth boxes (may difer from
|
||||
size because of zero padding).
|
||||
tp_match: (N,)-shaped boolean Tensor containing with True Positives.
|
||||
fp_match: (N,)-shaped boolean Tensor containing with False Positives.
|
||||
"""
|
||||
with tf.name_scope(scope, 'bboxes_matching_single',
|
||||
[scores, bboxes, glabels, gbboxes]):
|
||||
rsize = tf.size(scores)
|
||||
rshape = tf.shape(scores)
|
||||
rlabel = tf.cast(label, glabels.dtype)
|
||||
# Number of groundtruth boxes.
|
||||
gdifficults = tf.cast(gdifficults, tf.bool)
|
||||
n_gbboxes = tf.count_nonzero(tf.logical_and(tf.equal(glabels, label),
|
||||
tf.logical_not(gdifficults)))
|
||||
# Grountruth matching arrays.
|
||||
gmatch = tf.zeros(tf.shape(glabels), dtype=tf.bool)
|
||||
grange = tf.range(tf.size(glabels), dtype=tf.int32)
|
||||
# True/False positive matching TensorArrays.
|
||||
sdtype = tf.bool
|
||||
ta_tp_bool = tf.TensorArray(sdtype, size=rsize, dynamic_size=False, infer_shape=True)
|
||||
ta_fp_bool = tf.TensorArray(sdtype, size=rsize, dynamic_size=False, infer_shape=True)
|
||||
|
||||
# Loop over returned objects.
|
||||
def m_condition(i, ta_tp, ta_fp, gmatch):
|
||||
r = tf.less(i, rsize)
|
||||
return r
|
||||
|
||||
def m_body(i, ta_tp, ta_fp, gmatch):
|
||||
# Jaccard score with groundtruth bboxes.
|
||||
rbbox = bboxes[i]
|
||||
jaccard = bboxes_jaccard(rbbox, gbboxes)
|
||||
jaccard = jaccard * tf.cast(tf.equal(glabels, rlabel), dtype=jaccard.dtype)
|
||||
|
||||
# Best fit, checking it's above threshold.
|
||||
idxmax = tf.cast(tf.argmax(jaccard, axis=0), tf.int32)
|
||||
jcdmax = jaccard[idxmax]
|
||||
match = jcdmax > matching_threshold
|
||||
existing_match = gmatch[idxmax]
|
||||
not_difficult = tf.logical_not(gdifficults[idxmax])
|
||||
|
||||
# TP: match & no previous match and FP: previous match | no match.
|
||||
# If difficult: no record, i.e FP=False and TP=False.
|
||||
tp = tf.logical_and(not_difficult,
|
||||
tf.logical_and(match, tf.logical_not(existing_match)))
|
||||
ta_tp = ta_tp.write(i, tp)
|
||||
fp = tf.logical_and(not_difficult,
|
||||
tf.logical_or(existing_match, tf.logical_not(match)))
|
||||
ta_fp = ta_fp.write(i, fp)
|
||||
# Update grountruth match.
|
||||
mask = tf.logical_and(tf.equal(grange, idxmax),
|
||||
tf.logical_and(not_difficult, match))
|
||||
gmatch = tf.logical_or(gmatch, mask)
|
||||
|
||||
return [i+1, ta_tp, ta_fp, gmatch]
|
||||
# Main loop definition.
|
||||
i = 0
|
||||
[i, ta_tp_bool, ta_fp_bool, gmatch] = \
|
||||
tf.while_loop(m_condition, m_body,
|
||||
[i, ta_tp_bool, ta_fp_bool, gmatch],
|
||||
parallel_iterations=1,
|
||||
back_prop=False)
|
||||
# TensorArrays to Tensors and reshape.
|
||||
tp_match = tf.reshape(ta_tp_bool.stack(), rshape)
|
||||
fp_match = tf.reshape(ta_fp_bool.stack(), rshape)
|
||||
|
||||
# Some debugging information...
|
||||
# tp_match = tf.Print(tp_match,
|
||||
# [n_gbboxes,
|
||||
# tf.reduce_sum(tf.cast(tp_match, tf.int64)),
|
||||
# tf.reduce_sum(tf.cast(fp_match, tf.int64)),
|
||||
# tf.reduce_sum(tf.cast(gmatch, tf.int64))],
|
||||
# 'Matching (NG, TP, FP, GM): ')
|
||||
return n_gbboxes, tp_match, fp_match
|
||||
|
||||
|
||||
def bboxes_matching_batch(labels, scores, bboxes,
|
||||
glabels, gbboxes, gdifficults,
|
||||
matching_threshold=0.5, scope=None):
|
||||
"""Matching a collection of detected boxes with groundtruth values.
|
||||
Batched-inputs version.
|
||||
|
||||
Args:
|
||||
rclasses, rscores, rbboxes: BxN(x4) Tensors. Detected objects, sorted by score;
|
||||
glabels, gbboxes: Groundtruth bounding boxes. May be zero padded, hence
|
||||
zero-class objects are ignored.
|
||||
matching_threshold: Threshold for a positive match.
|
||||
Return: Tuple or Dictionaries with:
|
||||
n_gbboxes: Scalar Tensor with number of groundtruth boxes (may difer from
|
||||
size because of zero padding).
|
||||
tp: (B, N)-shaped boolean Tensor containing with True Positives.
|
||||
fp: (B, N)-shaped boolean Tensor containing with False Positives.
|
||||
"""
|
||||
# Dictionaries as inputs.
|
||||
if isinstance(scores, dict) or isinstance(bboxes, dict):
|
||||
with tf.name_scope(scope, 'bboxes_matching_batch_dict'):
|
||||
d_n_gbboxes = {}
|
||||
d_tp = {}
|
||||
d_fp = {}
|
||||
for c in labels:
|
||||
n, tp, fp, _ = bboxes_matching_batch(c, scores[c], bboxes[c],
|
||||
glabels, gbboxes, gdifficults,
|
||||
matching_threshold)
|
||||
d_n_gbboxes[c] = n
|
||||
d_tp[c] = tp
|
||||
d_fp[c] = fp
|
||||
return d_n_gbboxes, d_tp, d_fp, scores
|
||||
|
||||
with tf.name_scope(scope, 'bboxes_matching_batch',
|
||||
[scores, bboxes, glabels, gbboxes]):
|
||||
r = tf.map_fn(lambda x: bboxes_matching(labels, x[0], x[1],
|
||||
x[2], x[3], x[4],
|
||||
matching_threshold),
|
||||
(scores, bboxes, glabels, gbboxes, gdifficults),
|
||||
dtype=(tf.int64, tf.bool, tf.bool),
|
||||
parallel_iterations=10,
|
||||
back_prop=False,
|
||||
swap_memory=True,
|
||||
infer_shape=True)
|
||||
return r[0], r[1], r[2], scores
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# Some filteting methods.
|
||||
# =========================================================================== #
|
||||
def bboxes_filter_center(labels, bboxes, margins=[0., 0., 0., 0.],
|
||||
scope=None):
|
||||
"""Filter out bounding boxes whose center are not in
|
||||
the rectangle [0, 0, 1, 1] + margins. The margin Tensor
|
||||
can be used to enforce or loosen this condition.
|
||||
|
||||
Return:
|
||||
labels, bboxes: Filtered elements.
|
||||
"""
|
||||
with tf.name_scope(scope, 'bboxes_filter', [labels, bboxes]):
|
||||
cy = (bboxes[:, 0] + bboxes[:, 2]) / 2.
|
||||
cx = (bboxes[:, 1] + bboxes[:, 3]) / 2.
|
||||
mask = tf.greater(cy, margins[0])
|
||||
mask = tf.logical_and(mask, tf.greater(cx, margins[1]))
|
||||
mask = tf.logical_and(mask, tf.less(cx, 1. + margins[2]))
|
||||
mask = tf.logical_and(mask, tf.less(cx, 1. + margins[3]))
|
||||
# Boolean masking...
|
||||
labels = tf.boolean_mask(labels, mask)
|
||||
bboxes = tf.boolean_mask(bboxes, mask)
|
||||
return labels, bboxes
|
||||
|
||||
|
||||
def bboxes_filter_overlap(labels, bboxes,
|
||||
threshold=0.5, assign_negative=False,
|
||||
scope=None):
|
||||
"""Filter out bounding boxes based on (relative )overlap with reference
|
||||
box [0, 0, 1, 1]. Remove completely bounding boxes, or assign negative
|
||||
labels to the one outside (useful for latter processing...).
|
||||
|
||||
Return:
|
||||
labels, bboxes: Filtered (or newly assigned) elements.
|
||||
"""
|
||||
with tf.name_scope(scope, 'bboxes_filter', [labels, bboxes]):
|
||||
scores = bboxes_intersection(tf.constant([0, 0, 1, 1], bboxes.dtype),
|
||||
bboxes)
|
||||
mask = scores > threshold
|
||||
if assign_negative:
|
||||
labels = tf.where(mask, labels, -labels)
|
||||
# bboxes = tf.where(mask, bboxes, bboxes)
|
||||
else:
|
||||
labels = tf.boolean_mask(labels, mask)
|
||||
bboxes = tf.boolean_mask(bboxes, mask)
|
||||
return labels, bboxes
|
||||
|
||||
|
||||
def bboxes_filter_labels(labels, bboxes,
|
||||
out_labels=[], num_classes=np.inf,
|
||||
scope=None):
|
||||
"""Filter out labels from a collection. Typically used to get
|
||||
of DontCare elements. Also remove elements based on the number of classes.
|
||||
|
||||
Return:
|
||||
labels, bboxes: Filtered elements.
|
||||
"""
|
||||
with tf.name_scope(scope, 'bboxes_filter_labels', [labels, bboxes]):
|
||||
mask = tf.greater_equal(labels, num_classes)
|
||||
for l in labels:
|
||||
mask = tf.logical_and(mask, tf.not_equal(labels, l))
|
||||
labels = tf.boolean_mask(labels, mask)
|
||||
bboxes = tf.boolean_mask(bboxes, mask)
|
||||
return labels, bboxes
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# Standard boxes computation.
|
||||
# =========================================================================== #
|
||||
def bboxes_jaccard(bbox_ref, bboxes, name=None):
|
||||
"""Compute jaccard score between a reference box and a collection
|
||||
of bounding boxes.
|
||||
|
||||
Args:
|
||||
bbox_ref: (N, 4) or (4,) Tensor with reference bounding box(es).
|
||||
bboxes: (N, 4) Tensor, collection of bounding boxes.
|
||||
Return:
|
||||
(N,) Tensor with Jaccard scores.
|
||||
"""
|
||||
with tf.name_scope(name, 'bboxes_jaccard'):
|
||||
# Should be more efficient to first transpose.
|
||||
bboxes = tf.transpose(bboxes)
|
||||
bbox_ref = tf.transpose(bbox_ref)
|
||||
# Intersection bbox and volume.
|
||||
int_ymin = tf.maximum(bboxes[0], bbox_ref[0])
|
||||
int_xmin = tf.maximum(bboxes[1], bbox_ref[1])
|
||||
int_ymax = tf.minimum(bboxes[2], bbox_ref[2])
|
||||
int_xmax = tf.minimum(bboxes[3], bbox_ref[3])
|
||||
h = tf.maximum(int_ymax - int_ymin, 0.)
|
||||
w = tf.maximum(int_xmax - int_xmin, 0.)
|
||||
# Volumes.
|
||||
inter_vol = h * w
|
||||
union_vol = -inter_vol \
|
||||
+ (bboxes[2] - bboxes[0]) * (bboxes[3] - bboxes[1]) \
|
||||
+ (bbox_ref[2] - bbox_ref[0]) * (bbox_ref[3] - bbox_ref[1])
|
||||
jaccard = tfe_math.safe_divide(inter_vol, union_vol, 'jaccard')
|
||||
return jaccard
|
||||
|
||||
|
||||
def bboxes_intersection(bbox_ref, bboxes, name=None):
|
||||
"""Compute relative intersection between a reference box and a
|
||||
collection of bounding boxes. Namely, compute the quotient between
|
||||
intersection area and box area.
|
||||
|
||||
Args:
|
||||
bbox_ref: (N, 4) or (4,) Tensor with reference bounding box(es).
|
||||
bboxes: (N, 4) Tensor, collection of bounding boxes.
|
||||
Return:
|
||||
(N,) Tensor with relative intersection.
|
||||
"""
|
||||
with tf.name_scope(name, 'bboxes_intersection'):
|
||||
# Should be more efficient to first transpose.
|
||||
bboxes = tf.transpose(bboxes)
|
||||
bbox_ref = tf.transpose(bbox_ref)
|
||||
# Intersection bbox and volume.
|
||||
int_ymin = tf.maximum(bboxes[0], bbox_ref[0])
|
||||
int_xmin = tf.maximum(bboxes[1], bbox_ref[1])
|
||||
int_ymax = tf.minimum(bboxes[2], bbox_ref[2])
|
||||
int_xmax = tf.minimum(bboxes[3], bbox_ref[3])
|
||||
h = tf.maximum(int_ymax - int_ymin, 0.)
|
||||
w = tf.maximum(int_xmax - int_xmin, 0.)
|
||||
# Volumes.
|
||||
inter_vol = h * w
|
||||
bboxes_vol = (bboxes[2] - bboxes[0]) * (bboxes[3] - bboxes[1])
|
||||
scores = tfe_math.safe_divide(inter_vol, bboxes_vol, 'intersection')
|
||||
return scores
|
||||
@@ -0,0 +1,63 @@
|
||||
# Copyright 2017 Paul Balanca. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""TF Extended: additional math functions.
|
||||
"""
|
||||
import tensorflow as tf
|
||||
|
||||
from tensorflow.python.framework import ops
|
||||
|
||||
def safe_divide(numerator, denominator, name):
|
||||
"""Divides two values, returning 0 if the denominator is <= 0.
|
||||
Args:
|
||||
numerator: A real `Tensor`.
|
||||
denominator: A real `Tensor`, with dtype matching `numerator`.
|
||||
name: Name for the returned op.
|
||||
Returns:
|
||||
0 if `denominator` <= 0, else `numerator` / `denominator`
|
||||
"""
|
||||
return tf.where(
|
||||
tf.greater(denominator, 0),
|
||||
tf.divide(numerator, denominator),
|
||||
tf.zeros_like(numerator),
|
||||
name=name)
|
||||
|
||||
|
||||
def cummax(x, reverse=False, name=None):
|
||||
"""Compute the cumulative maximum of the tensor `x` along `axis`. This
|
||||
operation is similar to the more classic `cumsum`. Only support 1D Tensor
|
||||
for now.
|
||||
|
||||
Args:
|
||||
x: A `Tensor`. Must be one of the following types: `float32`, `float64`,
|
||||
`int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`,
|
||||
`complex128`, `qint8`, `quint8`, `qint32`, `half`.
|
||||
axis: A `Tensor` of type `int32` (default: 0).
|
||||
reverse: A `bool` (default: False).
|
||||
name: A name for the operation (optional).
|
||||
Returns:
|
||||
A `Tensor`. Has the same type as `x`.
|
||||
"""
|
||||
with ops.name_scope(name, "Cummax", [x]) as name:
|
||||
x = ops.convert_to_tensor(x, name="x")
|
||||
# Not very optimal: should directly integrate reverse into tf.scan.
|
||||
if reverse:
|
||||
x = tf.reverse(x, axis=[0])
|
||||
# 'Accumlating' maximum: ensure it is always increasing.
|
||||
cmax = tf.scan(tf.maximum, x,
|
||||
initializer=None, parallel_iterations=1,
|
||||
back_prop=False, swap_memory=False)
|
||||
if reverse:
|
||||
cmax = tf.reverse(cmax, axis=[0])
|
||||
return cmax
|
||||
@@ -0,0 +1,397 @@
|
||||
# Copyright 2017 Paul Balanca. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""TF Extended: additional metrics.
|
||||
"""
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
from tensorflow.contrib.framework.python.ops import variables as contrib_variables
|
||||
from tensorflow.python.framework import dtypes
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.ops import array_ops
|
||||
from tensorflow.python.ops import math_ops
|
||||
from tensorflow.python.ops import nn
|
||||
from tensorflow.python.ops import state_ops
|
||||
from tensorflow.python.ops import variable_scope
|
||||
from tensorflow.python.ops import variables
|
||||
|
||||
from tfextended import math as tfe_math
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# TensorFlow utils
|
||||
# =========================================================================== #
|
||||
def _create_local(name, shape, collections=None, validate_shape=False,
|
||||
dtype=dtypes.float32):
|
||||
"""Creates a new local variable.
|
||||
Args:
|
||||
name: The name of the new or existing variable.
|
||||
shape: Shape of the new or existing variable.
|
||||
collections: A list of collection names to which the Variable will be added.
|
||||
validate_shape: Whether to validate the shape of the variable.
|
||||
dtype: Data type of the variables.
|
||||
Returns:
|
||||
The created variable.
|
||||
"""
|
||||
# Make sure local variables are added to tf.GraphKeys.LOCAL_VARIABLES
|
||||
collections = list(collections or [])
|
||||
collections += [ops.GraphKeys.LOCAL_VARIABLES]
|
||||
return tf.Variable(
|
||||
initial_value=array_ops.zeros(shape, dtype=dtype),
|
||||
name=name,
|
||||
trainable=False,
|
||||
collections=collections,
|
||||
validate_shape=validate_shape)
|
||||
|
||||
|
||||
def _safe_div(numerator, denominator, name):
|
||||
"""Divides two values, returning 0 if the denominator is <= 0.
|
||||
Args:
|
||||
numerator: A real `Tensor`.
|
||||
denominator: A real `Tensor`, with dtype matching `numerator`.
|
||||
name: Name for the returned op.
|
||||
Returns:
|
||||
0 if `denominator` <= 0, else `numerator` / `denominator`
|
||||
"""
|
||||
return tf.where(
|
||||
tf.math.greater(denominator, 0),
|
||||
tf.math.divide(numerator, denominator),
|
||||
tf.zeros_like(numerator),
|
||||
name=name)
|
||||
|
||||
|
||||
def _broadcast_weights(weights, values):
|
||||
"""Broadcast `weights` to the same shape as `values`.
|
||||
This returns a version of `weights` following the same broadcast rules as
|
||||
`mul(weights, values)`. When computing a weighted average, use this function
|
||||
to broadcast `weights` before summing them; e.g.,
|
||||
`reduce_sum(w * v) / reduce_sum(_broadcast_weights(w, v))`.
|
||||
Args:
|
||||
weights: `Tensor` whose shape is broadcastable to `values`.
|
||||
values: `Tensor` of any shape.
|
||||
Returns:
|
||||
`weights` broadcast to `values` shape.
|
||||
"""
|
||||
weights_shape = weights.get_shape()
|
||||
values_shape = values.get_shape()
|
||||
if(weights_shape.is_fully_defined() and
|
||||
values_shape.is_fully_defined() and
|
||||
weights_shape.is_compatible_with(values_shape)):
|
||||
return weights
|
||||
return tf.math.multiply(
|
||||
weights, array_ops.ones_like(values), name='broadcast_weights')
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# TF Extended metrics: TP and FP arrays.
|
||||
# =========================================================================== #
|
||||
def precision_recall(num_gbboxes, num_detections, tp, fp, scores,
|
||||
dtype=tf.float64, scope=None):
|
||||
"""Compute precision and recall from scores, true positives and false
|
||||
positives booleans arrays
|
||||
"""
|
||||
# Input dictionaries: dict outputs as streaming metrics.
|
||||
if isinstance(scores, dict):
|
||||
d_precision = {}
|
||||
d_recall = {}
|
||||
for c in num_gbboxes.keys():
|
||||
scope = 'precision_recall_%s' % c
|
||||
p, r = precision_recall(num_gbboxes[c], num_detections[c],
|
||||
tp[c], fp[c], scores[c],
|
||||
dtype, scope)
|
||||
d_precision[c] = p
|
||||
d_recall[c] = r
|
||||
return d_precision, d_recall
|
||||
|
||||
# Sort by score.
|
||||
with tf.name_scope(scope, 'precision_recall',
|
||||
[num_gbboxes, num_detections, tp, fp, scores]):
|
||||
# Sort detections by score.
|
||||
scores, idxes = tf.nn.top_k(scores, k=num_detections, sorted=True)
|
||||
tp = tf.gather(tp, idxes)
|
||||
fp = tf.gather(fp, idxes)
|
||||
# Computer recall and precision.
|
||||
tp = tf.cumsum(tf.cast(tp, dtype), axis=0)
|
||||
fp = tf.cumsum(tf.cast(fp, dtype), axis=0)
|
||||
recall = _safe_div(tp, tf.cast(num_gbboxes, dtype), 'recall')
|
||||
precision = _safe_div(tp, tp + fp, 'precision')
|
||||
return tf.tuple([precision, recall])
|
||||
|
||||
|
||||
def streaming_tp_fp_arrays(num_gbboxes, tp, fp, scores,
|
||||
remove_zero_scores=True,
|
||||
metrics_collections=None,
|
||||
updates_collections=None,
|
||||
name=None):
|
||||
"""Streaming computation of True and False Positive arrays. This metrics
|
||||
also keeps track of scores and number of grountruth objects.
|
||||
"""
|
||||
# Input dictionaries: dict outputs as streaming metrics.
|
||||
if isinstance(scores, dict) or isinstance(fp, dict):
|
||||
d_values = {}
|
||||
d_update_ops = {}
|
||||
for c in num_gbboxes.keys():
|
||||
scope = 'streaming_tp_fp_%s' % c
|
||||
v, up = streaming_tp_fp_arrays(num_gbboxes[c], tp[c], fp[c], scores[c],
|
||||
remove_zero_scores,
|
||||
metrics_collections,
|
||||
updates_collections,
|
||||
name=scope)
|
||||
d_values[c] = v
|
||||
d_update_ops[c] = up
|
||||
return d_values, d_update_ops
|
||||
|
||||
# Input Tensors...
|
||||
with variable_scope.variable_scope(name, 'streaming_tp_fp',
|
||||
[num_gbboxes, tp, fp, scores]):
|
||||
num_gbboxes = tf.cast(num_gbboxes, dtype=tf.int64)
|
||||
scores = tf.cast(scores, dtype=tf.float32)
|
||||
stype = tf.bool
|
||||
tp = tf.cast(tp, stype)
|
||||
fp = tf.cast(fp, stype)
|
||||
# Reshape TP and FP tensors and clean away 0 class values.
|
||||
scores = tf.reshape(scores, [-1])
|
||||
tp = tf.reshape(tp, [-1])
|
||||
fp = tf.reshape(fp, [-1])
|
||||
# Remove TP and FP both false.
|
||||
mask = tf.logical_or(tp, fp)
|
||||
if remove_zero_scores:
|
||||
rm_threshold = 1e-4
|
||||
mask = tf.logical_and(mask, tf.greater(scores, rm_threshold))
|
||||
scores = tf.boolean_mask(scores, mask)
|
||||
tp = tf.boolean_mask(tp, mask)
|
||||
fp = tf.boolean_mask(fp, mask)
|
||||
|
||||
# Local variables accumlating information over batches.
|
||||
v_nobjects = _create_local('v_num_gbboxes', shape=[], dtype=tf.int64)
|
||||
v_ndetections = _create_local('v_num_detections', shape=[], dtype=tf.int32)
|
||||
v_scores = _create_local('v_scores', shape=[0, ])
|
||||
v_tp = _create_local('v_tp', shape=[0, ], dtype=stype)
|
||||
v_fp = _create_local('v_fp', shape=[0, ], dtype=stype)
|
||||
|
||||
# Update operations.
|
||||
nobjects_op = state_ops.assign_add(v_nobjects,
|
||||
tf.reduce_sum(num_gbboxes))
|
||||
ndetections_op = state_ops.assign_add(v_ndetections,
|
||||
tf.size(scores, out_type=tf.int32))
|
||||
scores_op = state_ops.assign(v_scores, tf.concat([v_scores, scores], axis=0),
|
||||
validate_shape=False)
|
||||
tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp], axis=0),
|
||||
validate_shape=False)
|
||||
fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp], axis=0),
|
||||
validate_shape=False)
|
||||
|
||||
# Value and update ops.
|
||||
val = (v_nobjects, v_ndetections, v_tp, v_fp, v_scores)
|
||||
with ops.control_dependencies([nobjects_op, ndetections_op,
|
||||
scores_op, tp_op, fp_op]):
|
||||
update_op = (nobjects_op, ndetections_op, tp_op, fp_op, scores_op)
|
||||
|
||||
if metrics_collections:
|
||||
ops.add_to_collections(metrics_collections, val)
|
||||
if updates_collections:
|
||||
ops.add_to_collections(updates_collections, update_op)
|
||||
return val, update_op
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# Average precision computations.
|
||||
# =========================================================================== #
|
||||
def average_precision_voc12(precision, recall, name=None):
|
||||
"""Compute (interpolated) average precision from precision and recall Tensors.
|
||||
|
||||
The implementation follows Pascal 2012 and ILSVRC guidelines.
|
||||
See also: https://sanchom.wordpress.com/tag/average-precision/
|
||||
"""
|
||||
with tf.name_scope(name, 'average_precision_voc12', [precision, recall]):
|
||||
# Convert to float64 to decrease error on Riemann sums.
|
||||
precision = tf.cast(precision, dtype=tf.float64)
|
||||
recall = tf.cast(recall, dtype=tf.float64)
|
||||
|
||||
# Add bounds values to precision and recall.
|
||||
precision = tf.concat([[0.], precision, [0.]], axis=0)
|
||||
recall = tf.concat([[0.], recall, [1.]], axis=0)
|
||||
# Ensures precision is increasing in reverse order.
|
||||
precision = tfe_math.cummax(precision, reverse=True)
|
||||
|
||||
# Riemann sums for estimating the integral.
|
||||
# mean_pre = (precision[1:] + precision[:-1]) / 2.
|
||||
mean_pre = precision[1:]
|
||||
diff_rec = recall[1:] - recall[:-1]
|
||||
ap = tf.reduce_sum(mean_pre * diff_rec)
|
||||
return ap
|
||||
|
||||
|
||||
def average_precision_voc07(precision, recall, name=None):
|
||||
"""Compute (interpolated) average precision from precision and recall Tensors.
|
||||
|
||||
The implementation follows Pascal 2007 guidelines.
|
||||
See also: https://sanchom.wordpress.com/tag/average-precision/
|
||||
"""
|
||||
with tf.name_scope(name, 'average_precision_voc07', [precision, recall]):
|
||||
# Convert to float64 to decrease error on cumulated sums.
|
||||
precision = tf.cast(precision, dtype=tf.float64)
|
||||
recall = tf.cast(recall, dtype=tf.float64)
|
||||
# Add zero-limit value to avoid any boundary problem...
|
||||
precision = tf.concat([precision, [0.]], axis=0)
|
||||
recall = tf.concat([recall, [np.inf]], axis=0)
|
||||
|
||||
# Split the integral into 10 bins.
|
||||
l_aps = []
|
||||
for t in np.arange(0., 1.1, 0.1):
|
||||
mask = tf.greater_equal(recall, t)
|
||||
v = tf.reduce_max(tf.boolean_mask(precision, mask))
|
||||
l_aps.append(v / 11.)
|
||||
ap = tf.add_n(l_aps)
|
||||
return ap
|
||||
|
||||
|
||||
def precision_recall_values(xvals, precision, recall, name=None):
|
||||
"""Compute values on the precision/recall curve.
|
||||
|
||||
Args:
|
||||
x: Python list of floats;
|
||||
precision: 1D Tensor decreasing.
|
||||
recall: 1D Tensor increasing.
|
||||
Return:
|
||||
list of precision values.
|
||||
"""
|
||||
with ops.name_scope(name, "precision_recall_values",
|
||||
[precision, recall]) as name:
|
||||
# Add bounds values to precision and recall.
|
||||
precision = tf.concat([[0.], precision, [0.]], axis=0)
|
||||
recall = tf.concat([[0.], recall, [1.]], axis=0)
|
||||
precision = tfe_math.cummax(precision, reverse=True)
|
||||
|
||||
prec_values = []
|
||||
for x in xvals:
|
||||
mask = tf.less_equal(recall, x)
|
||||
val = tf.reduce_min(tf.boolean_mask(precision, mask))
|
||||
prec_values.append(val)
|
||||
return tf.tuple(prec_values)
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# TF Extended metrics: old stuff!
|
||||
# =========================================================================== #
|
||||
def _precision_recall(n_gbboxes, n_detections, scores, tp, fp, scope=None):
|
||||
"""Compute precision and recall from scores, true positives and false
|
||||
positives booleans arrays
|
||||
"""
|
||||
# Sort by score.
|
||||
with tf.name_scope(scope, 'prec_rec', [n_gbboxes, scores, tp, fp]):
|
||||
# Sort detections by score.
|
||||
scores, idxes = tf.nn.top_k(scores, k=n_detections, sorted=True)
|
||||
tp = tf.gather(tp, idxes)
|
||||
fp = tf.gather(fp, idxes)
|
||||
# Computer recall and precision.
|
||||
dtype = tf.float64
|
||||
tp = tf.cumsum(tf.cast(tp, dtype), axis=0)
|
||||
fp = tf.cumsum(tf.cast(fp, dtype), axis=0)
|
||||
recall = _safe_div(tp, tf.cast(n_gbboxes, dtype), 'recall')
|
||||
precision = _safe_div(tp, tp + fp, 'precision')
|
||||
|
||||
return tf.tuple([precision, recall])
|
||||
|
||||
|
||||
def streaming_precision_recall_arrays(n_gbboxes, rclasses, rscores,
|
||||
tp_tensor, fp_tensor,
|
||||
remove_zero_labels=True,
|
||||
metrics_collections=None,
|
||||
updates_collections=None,
|
||||
name=None):
|
||||
"""Streaming computation of precision / recall arrays. This metrics
|
||||
keeps tracks of boolean True positives and False positives arrays.
|
||||
"""
|
||||
with variable_scope.variable_scope(name, 'stream_precision_recall',
|
||||
[n_gbboxes, rclasses, tp_tensor, fp_tensor]):
|
||||
n_gbboxes = tf.cast(n_gbboxes, tf.int64)
|
||||
rclasses = tf.cast(rclasses, tf.int64)
|
||||
rscores = tf.cast(rscores, tf.float)
|
||||
|
||||
stype = tf.int32
|
||||
tp_tensor = tf.cast(tp_tensor, stype)
|
||||
fp_tensor = tf.cast(fp_tensor, stype)
|
||||
|
||||
# Reshape TP and FP tensors and clean away 0 class values.
|
||||
rclasses = tf.reshape(rclasses, [-1])
|
||||
rscores = tf.reshape(rscores, [-1])
|
||||
tp_tensor = tf.reshape(tp_tensor, [-1])
|
||||
fp_tensor = tf.reshape(fp_tensor, [-1])
|
||||
if remove_zero_labels:
|
||||
mask = tf.greater(rclasses, 0)
|
||||
rclasses = tf.boolean_mask(rclasses, mask)
|
||||
rscores = tf.boolean_mask(rscores, mask)
|
||||
tp_tensor = tf.boolean_mask(tp_tensor, mask)
|
||||
fp_tensor = tf.boolean_mask(fp_tensor, mask)
|
||||
|
||||
# Local variables accumlating information over batches.
|
||||
v_nobjects = _create_local('v_nobjects', shape=[], dtype=tf.int64)
|
||||
v_ndetections = _create_local('v_ndetections', shape=[], dtype=tf.int32)
|
||||
v_scores = _create_local('v_scores', shape=[0, ])
|
||||
v_tp = _create_local('v_tp', shape=[0, ], dtype=stype)
|
||||
v_fp = _create_local('v_fp', shape=[0, ], dtype=stype)
|
||||
|
||||
# Update operations.
|
||||
nobjects_op = state_ops.assign_add(v_nobjects,
|
||||
tf.reduce_sum(n_gbboxes))
|
||||
ndetections_op = state_ops.assign_add(v_ndetections,
|
||||
tf.size(rscores, out_type=tf.int32))
|
||||
scores_op = state_ops.assign(v_scores, tf.concat([v_scores, rscores], axis=0),
|
||||
validate_shape=False)
|
||||
tp_op = state_ops.assign(v_tp, tf.concat([v_tp, tp_tensor], axis=0),
|
||||
validate_shape=False)
|
||||
fp_op = state_ops.assign(v_fp, tf.concat([v_fp, fp_tensor], axis=0),
|
||||
validate_shape=False)
|
||||
|
||||
# Precision and recall computations.
|
||||
# r = _precision_recall(nobjects_op, scores_op, tp_op, fp_op, 'value')
|
||||
r = _precision_recall(v_nobjects, v_ndetections, v_scores,
|
||||
v_tp, v_fp, 'value')
|
||||
|
||||
with ops.control_dependencies([nobjects_op, ndetections_op,
|
||||
scores_op, tp_op, fp_op]):
|
||||
update_op = _precision_recall(nobjects_op, ndetections_op,
|
||||
scores_op, tp_op, fp_op, 'update_op')
|
||||
|
||||
# update_op = tf.Print(update_op,
|
||||
# [tf.reduce_sum(tf.cast(mask, tf.int64)),
|
||||
# tf.reduce_sum(tf.cast(mask2, tf.int64)),
|
||||
# tf.reduce_min(rscores),
|
||||
# tf.reduce_sum(n_gbboxes)],
|
||||
# 'Metric: ')
|
||||
# Some debugging stuff!
|
||||
# update_op = tf.Print(update_op,
|
||||
# [tf.shape(tp_op),
|
||||
# tf.reduce_sum(tf.cast(tp_op, tf.int64), axis=0)],
|
||||
# 'TP and FP shape: ')
|
||||
# update_op[0] = tf.Print(update_op,
|
||||
# [nobjects_op],
|
||||
# '# Groundtruth bboxes: ')
|
||||
# update_op = tf.Print(update_op,
|
||||
# [update_op[0][0],
|
||||
# update_op[0][-1],
|
||||
# tf.reduce_min(update_op[0]),
|
||||
# tf.reduce_max(update_op[0]),
|
||||
# tf.reduce_min(update_op[1]),
|
||||
# tf.reduce_max(update_op[1])],
|
||||
# 'Precision and recall :')
|
||||
|
||||
if metrics_collections:
|
||||
ops.add_to_collections(metrics_collections, r)
|
||||
if updates_collections:
|
||||
ops.add_to_collections(updates_collections, update_op)
|
||||
return r, update_op
|
||||
|
||||
@@ -0,0 +1,95 @@
|
||||
# Copyright 2017 Paul Balanca. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
"""TF Extended: additional tensors operations.
|
||||
"""
|
||||
import tensorflow as tf
|
||||
|
||||
from tensorflow.contrib.framework.python.ops import variables as contrib_variables
|
||||
from tensorflow.contrib.metrics.python.ops import set_ops
|
||||
from tensorflow.python.framework import dtypes
|
||||
from tensorflow.python.framework import ops
|
||||
from tensorflow.python.framework import sparse_tensor
|
||||
from tensorflow.python.ops import array_ops
|
||||
from tensorflow.python.ops import check_ops
|
||||
from tensorflow.python.ops import control_flow_ops
|
||||
from tensorflow.python.ops import math_ops
|
||||
from tensorflow.python.ops import nn
|
||||
from tensorflow.python.ops import state_ops
|
||||
from tensorflow.python.ops import variable_scope
|
||||
from tensorflow.python.ops import variables
|
||||
|
||||
|
||||
def get_shape(x, rank=None):
|
||||
"""Returns the dimensions of a Tensor as list of integers or scale tensors.
|
||||
|
||||
Args:
|
||||
x: N-d Tensor;
|
||||
rank: Rank of the Tensor. If None, will try to guess it.
|
||||
Returns:
|
||||
A list of `[d1, d2, ..., dN]` corresponding to the dimensions of the
|
||||
input tensor. Dimensions that are statically known are python integers,
|
||||
otherwise they are integer scalar tensors.
|
||||
"""
|
||||
if x.get_shape().is_fully_defined():
|
||||
return x.get_shape().as_list()
|
||||
else:
|
||||
static_shape = x.get_shape()
|
||||
if rank is None:
|
||||
static_shape = static_shape.as_list()
|
||||
rank = len(static_shape)
|
||||
else:
|
||||
static_shape = x.get_shape().with_rank(rank).as_list()
|
||||
dynamic_shape = tf.unstack(tf.shape(x), rank)
|
||||
return [s if s is not None else d
|
||||
for s, d in zip(static_shape, dynamic_shape)]
|
||||
|
||||
|
||||
def pad_axis(x, offset, size, axis=0, name=None):
|
||||
"""Pad a tensor on an axis, with a given offset and output size.
|
||||
The tensor is padded with zero (i.e. CONSTANT mode). Note that the if the
|
||||
`size` is smaller than existing size + `offset`, the output tensor
|
||||
was the latter dimension.
|
||||
|
||||
Args:
|
||||
x: Tensor to pad;
|
||||
offset: Offset to add on the dimension chosen;
|
||||
size: Final size of the dimension.
|
||||
Return:
|
||||
Padded tensor whose dimension on `axis` is `size`, or greater if
|
||||
the input vector was larger.
|
||||
"""
|
||||
with tf.name_scope(name, 'pad_axis'):
|
||||
shape = get_shape(x)
|
||||
rank = len(shape)
|
||||
# Padding description.
|
||||
new_size = tf.maximum(size-offset-shape[axis], 0)
|
||||
pad1 = tf.stack([0]*axis + [offset] + [0]*(rank-axis-1))
|
||||
pad2 = tf.stack([0]*axis + [new_size] + [0]*(rank-axis-1))
|
||||
paddings = tf.stack([pad1, pad2], axis=1)
|
||||
x = tf.pad(x, paddings, mode='CONSTANT')
|
||||
# Reshape, to get fully defined shape if possible.
|
||||
# TODO: fix with tf.slice
|
||||
shape[axis] = size
|
||||
x = tf.reshape(x, tf.stack(shape))
|
||||
return x
|
||||
|
||||
|
||||
# def select_at_index(idx, val, t):
|
||||
# """Return a tensor.
|
||||
# """
|
||||
# idx = tf.expand_dims(tf.expand_dims(idx, 0), 0)
|
||||
# val = tf.expand_dims(val, 0)
|
||||
# t = t + tf.scatter_nd(idx, val, tf.shape(t))
|
||||
# return t
|
||||
@@ -0,0 +1,20 @@
|
||||
'''
|
||||
Endpoint names to look for in the graph
|
||||
'''
|
||||
|
||||
from anchors import generate_anchors
|
||||
|
||||
feat_layers = generate_anchors.feat_layers
|
||||
sub_feats = ['']
|
||||
localizations_names = [f'ssd_300_vgg/{feature}_box/Reshape:0' for feature in feat_layers]
|
||||
|
||||
predictions_names = ['ssd_300_vgg/softmax/Reshape_1:0'] \
|
||||
+ [f'ssd_300_vgg/softmax_{n}/Reshape_1:0' for n in range(1, len(feat_layers))]
|
||||
|
||||
logit_names = [f'ssd_300_vgg/{feature}_box/Reshape_1:0' for feature in feat_layers]
|
||||
|
||||
endpoint_names = ['ssd_300_vgg/conv1/conv1_2/Relu:0'] \
|
||||
+ [f'ssd_300_vgg/conv{n}/conv{n}_3/Relu:0' for n in range(4, 6)] \
|
||||
+ [f'ssd_300_vgg/conv{n}/conv{n}_{n}/Relu:0' for n in range(2, 4)] \
|
||||
+ [f'ssd_300_vgg/conv{n}/Relu:0' for n in range(6, 8)] \
|
||||
+ [f'ssd_300_vgg/{feature}/conv3x3/Relu:0' for feature in feat_layers if feature != 'block4' and feature != 'block7']
|
||||
@@ -0,0 +1,158 @@
|
||||
# Copyright 2016 Paul Balanca. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# =============================================================================
|
||||
"""Diverse TensorFlow utils, for training, evaluation and so on!
|
||||
"""
|
||||
import os
|
||||
|
||||
import tensorflow as tf
|
||||
|
||||
# =========================================================================== #
|
||||
# General tools.
|
||||
# =========================================================================== #
|
||||
def reshape_list(l, shape=None):
|
||||
"""Reshape list of (list): 1D to 2D or the other way around.
|
||||
|
||||
Args:
|
||||
l: List or List of list.
|
||||
shape: 1D or 2D shape.
|
||||
Return
|
||||
Reshaped list.
|
||||
"""
|
||||
r = []
|
||||
if shape is None:
|
||||
# Flatten everything.
|
||||
for a in l:
|
||||
if isinstance(a, (list, tuple)):
|
||||
r = r + list(a)
|
||||
else:
|
||||
r.append(a)
|
||||
else:
|
||||
# Reshape to list of list.
|
||||
i = 0
|
||||
for s in shape:
|
||||
if s == 1:
|
||||
r.append(l[i])
|
||||
else:
|
||||
r.append(l[i:i+s])
|
||||
i += s
|
||||
return r
|
||||
|
||||
def configure_learning_rate(flags, num_samples_per_epoch, global_step):
|
||||
"""Configures the learning rate.
|
||||
|
||||
Args:
|
||||
num_samples_per_epoch: The number of samples in each epoch of training.
|
||||
global_step: The global_step tensor.
|
||||
Returns:
|
||||
A `Tensor` representing the learning rate.
|
||||
"""
|
||||
decay_steps = int(num_samples_per_epoch / flags.batch_size *
|
||||
flags.num_epochs_per_decay)
|
||||
|
||||
if flags.learning_rate_decay_type == 'exponential':
|
||||
return tf.train.exponential_decay(flags.learning_rate,
|
||||
global_step,
|
||||
decay_steps,
|
||||
flags.learning_rate_decay_factor,
|
||||
staircase=True,
|
||||
name='exponential_decay_learning_rate')
|
||||
elif flags.learning_rate_decay_type == 'fixed':
|
||||
return tf.constant(flags.learning_rate, name='fixed_learning_rate')
|
||||
elif flags.learning_rate_decay_type == 'polynomial':
|
||||
return tf.train.polynomial_decay(flags.learning_rate,
|
||||
global_step,
|
||||
decay_steps,
|
||||
flags.end_learning_rate,
|
||||
power=1.0,
|
||||
cycle=False,
|
||||
name='polynomial_decay_learning_rate')
|
||||
else:
|
||||
raise ValueError('learning_rate_decay_type [%s] was not recognized',
|
||||
flags.learning_rate_decay_type)
|
||||
|
||||
|
||||
def configure_optimizer(flags, learning_rate):
|
||||
"""Configures the optimizer used for training.
|
||||
|
||||
Args:
|
||||
learning_rate: A scalar or `Tensor` learning rate.
|
||||
Returns:
|
||||
An instance of an optimizer.
|
||||
"""
|
||||
if flags.optimizer == 'adadelta':
|
||||
optimizer = tf.train.AdadeltaOptimizer(
|
||||
learning_rate,
|
||||
rho=flags.adadelta_rho,
|
||||
epsilon=flags.opt_epsilon)
|
||||
elif flags.optimizer == 'adagrad':
|
||||
optimizer = tf.train.AdagradOptimizer(
|
||||
learning_rate,
|
||||
initial_accumulator_value=flags.adagrad_initial_accumulator_value)
|
||||
elif flags.optimizer == 'adam':
|
||||
optimizer = tf.train.AdamOptimizer(
|
||||
learning_rate,
|
||||
beta1=flags.adam_beta1,
|
||||
beta2=flags.adam_beta2,
|
||||
epsilon=flags.opt_epsilon)
|
||||
elif flags.optimizer == 'ftrl':
|
||||
optimizer = tf.train.FtrlOptimizer(
|
||||
learning_rate,
|
||||
learning_rate_power=flags.ftrl_learning_rate_power,
|
||||
initial_accumulator_value=flags.ftrl_initial_accumulator_value,
|
||||
l1_regularization_strength=flags.ftrl_l1,
|
||||
l2_regularization_strength=flags.ftrl_l2)
|
||||
elif flags.optimizer == 'momentum':
|
||||
optimizer = tf.train.MomentumOptimizer(
|
||||
learning_rate,
|
||||
momentum=flags.momentum,
|
||||
name='Momentum')
|
||||
elif flags.optimizer == 'rmsprop':
|
||||
optimizer = tf.train.RMSPropOptimizer(
|
||||
learning_rate,
|
||||
decay=flags.rmsprop_decay,
|
||||
momentum=flags.rmsprop_momentum,
|
||||
epsilon=flags.opt_epsilon)
|
||||
elif flags.optimizer == 'sgd':
|
||||
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
|
||||
else:
|
||||
raise ValueError('Optimizer [%s] was not recognized', flags.optimizer)
|
||||
return optimizer
|
||||
|
||||
|
||||
def update_model_scope(var, ckpt_scope, new_scope):
|
||||
return var.op.name.replace(new_scope,'vgg_16')
|
||||
|
||||
|
||||
def get_variables_to_train(flags):
|
||||
"""Returns a list of variables to train.
|
||||
|
||||
Returns:
|
||||
A list of variables to train by the optimizer.
|
||||
"""
|
||||
if flags.trainable_scopes is None:
|
||||
return tf.trainable_variables()
|
||||
else:
|
||||
scopes = [scope.strip() for scope in flags.trainable_scopes.split(',')]
|
||||
|
||||
variables_to_train = []
|
||||
for scope in scopes:
|
||||
variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
|
||||
variables_to_train.extend(variables)
|
||||
return variables_to_train
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# Evaluation utils.
|
||||
# =========================================================================== #
|
||||
@@ -0,0 +1,114 @@
|
||||
# Copyright 2017 Paul Balanca. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
import cv2
|
||||
import random
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.image as mpimg
|
||||
import matplotlib.cm as mpcm
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# Some colormaps.
|
||||
# =========================================================================== #
|
||||
def colors_subselect(colors, num_classes=21):
|
||||
dt = len(colors) // num_classes
|
||||
sub_colors = []
|
||||
for i in range(num_classes):
|
||||
color = colors[i*dt]
|
||||
if isinstance(color[0], float):
|
||||
sub_colors.append([int(c * 255) for c in color])
|
||||
else:
|
||||
sub_colors.append([c for c in color])
|
||||
return sub_colors
|
||||
|
||||
colors_plasma = colors_subselect(mpcm.plasma.colors, num_classes=21)
|
||||
colors_tableau = [(255, 255, 255), (31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120),
|
||||
(44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150),
|
||||
(148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148),
|
||||
(227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199),
|
||||
(188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)]
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# OpenCV drawing.
|
||||
# =========================================================================== #
|
||||
def draw_lines(img, lines, color=[255, 0, 0], thickness=2):
|
||||
"""Draw a collection of lines on an image.
|
||||
"""
|
||||
for line in lines:
|
||||
for x1, y1, x2, y2 in line:
|
||||
cv2.line(img, (x1, y1), (x2, y2), color, thickness)
|
||||
|
||||
|
||||
def draw_rectangle(img, p1, p2, color=[255, 0, 0], thickness=2):
|
||||
cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness)
|
||||
|
||||
|
||||
def draw_bbox(img, bbox, shape, label, color=[255, 0, 0], thickness=2):
|
||||
p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1]))
|
||||
p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1]))
|
||||
cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness)
|
||||
p1 = (p1[0]+15, p1[1])
|
||||
cv2.putText(img, str(label), p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.5, color, 1)
|
||||
|
||||
|
||||
def bboxes_draw_on_img(img, classes, scores, bboxes, colors, thickness=2):
|
||||
shape = img.shape
|
||||
for i in range(bboxes.shape[0]):
|
||||
bbox = bboxes[i]
|
||||
color = colors[classes[i]]
|
||||
# Draw bounding box...
|
||||
p1 = (int(bbox[0] * shape[0]), int(bbox[1] * shape[1]))
|
||||
p2 = (int(bbox[2] * shape[0]), int(bbox[3] * shape[1]))
|
||||
cv2.rectangle(img, p1[::-1], p2[::-1], color, thickness)
|
||||
# Draw text...
|
||||
s = '%s/%.3f' % (classes[i], scores[i])
|
||||
p1 = (p1[0]-5, p1[1])
|
||||
cv2.putText(img, s, p1[::-1], cv2.FONT_HERSHEY_DUPLEX, 0.4, color, 1)
|
||||
|
||||
|
||||
# =========================================================================== #
|
||||
# Matplotlib show...
|
||||
# =========================================================================== #
|
||||
def plt_bboxes(img, classes, scores, bboxes, figsize=(10,10), linewidth=1.5):
|
||||
"""Visualize bounding boxes. Largely inspired by SSD-MXNET!
|
||||
"""
|
||||
fig = plt.figure(figsize=figsize)
|
||||
plt.imshow(img)
|
||||
height = img.shape[0]
|
||||
width = img.shape[1]
|
||||
colors = dict()
|
||||
for i in range(classes.shape[0]):
|
||||
cls_id = int(classes[i])
|
||||
if cls_id >= 0:
|
||||
score = scores[i]
|
||||
if cls_id not in colors:
|
||||
colors[cls_id] = (random.random(), random.random(), random.random())
|
||||
ymin = int(bboxes[i, 0] * height)
|
||||
xmin = int(bboxes[i, 1] * width)
|
||||
ymax = int(bboxes[i, 2] * height)
|
||||
xmax = int(bboxes[i, 3] * width)
|
||||
rect = plt.Rectangle((xmin, ymin), xmax - xmin,
|
||||
ymax - ymin, fill=False,
|
||||
edgecolor=colors[cls_id],
|
||||
linewidth=linewidth)
|
||||
plt.gca().add_patch(rect)
|
||||
class_name = str(cls_id)
|
||||
plt.gca().text(xmin, ymin - 2,
|
||||
'{:s} | {:.3f}'.format(class_name, score),
|
||||
bbox=dict(facecolor=colors[cls_id], alpha=0.5),
|
||||
fontsize=12, color='white')
|
||||
plt.show()
|
||||
Reference in New Issue
Block a user