mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-19 17:17:04 -05:00
214 lines
7.6 KiB
Python
214 lines
7.6 KiB
Python
import argparse
|
|
import os
|
|
import json
|
|
import numpy as np
|
|
import PIL.Image as Image
|
|
import xml.etree.ElementTree as ET
|
|
|
|
from simplification.cutil import simplify_coords
|
|
from skimage import measure
|
|
|
|
|
|
def convert_mask_to_polygon(
|
|
mask,
|
|
max_polygon_points=100,
|
|
score_threshold=0.5,
|
|
max_refinement_iterations=25,
|
|
edge_safety_padding=1,
|
|
):
|
|
"""Convert a numpy mask to a polygon outline in normalized coordinates.
|
|
|
|
:param mask: Pixel mask, where each pixel has an object (float) score in [0, 1], in size ([1, height, width])
|
|
:type: mask: <class 'numpy.array'>
|
|
:param max_polygon_points: Maximum number of (x, y) coordinate pairs in polygon
|
|
:type: max_polygon_points: Int
|
|
:param score_threshold: Score cutoff for considering a pixel as in object.
|
|
:type: score_threshold: Float
|
|
:param max_refinement_iterations: Maximum number of times to refine the polygon
|
|
trying to reduce the number of pixels to meet max polygon points.
|
|
:type: max_refinement_iterations: Int
|
|
:param edge_safety_padding: Number of pixels to pad the mask with
|
|
:type edge_safety_padding: Int
|
|
:return: normalized polygon coordinates
|
|
:rtype: list of list
|
|
"""
|
|
# Convert to numpy bitmask
|
|
mask = mask[0]
|
|
mask_array = np.array((mask > score_threshold), dtype=np.uint8)
|
|
image_shape = mask_array.shape
|
|
|
|
# Pad the mask to avoid errors at the edge of the mask
|
|
embedded_mask = np.zeros(
|
|
(
|
|
image_shape[0] + 2 * edge_safety_padding,
|
|
image_shape[1] + 2 * edge_safety_padding,
|
|
),
|
|
dtype=np.uint8,
|
|
)
|
|
embedded_mask[
|
|
edge_safety_padding : image_shape[0] + edge_safety_padding,
|
|
edge_safety_padding : image_shape[1] + edge_safety_padding,
|
|
] = mask_array
|
|
|
|
# Find Image Contours
|
|
contours = measure.find_contours(embedded_mask, 0.5)
|
|
simplified_contours = []
|
|
|
|
for contour in contours:
|
|
|
|
# Iteratively reduce polygon points, if necessary
|
|
if max_polygon_points is not None:
|
|
simplify_factor = 0
|
|
while (
|
|
len(contour) > max_polygon_points
|
|
and simplify_factor < max_refinement_iterations
|
|
):
|
|
contour = simplify_coords(contour, simplify_factor)
|
|
simplify_factor += 1
|
|
|
|
# Convert to [x, y, x, y, ....] coordinates and correct for padding
|
|
unwrapped_contour = [0] * (2 * len(contour))
|
|
unwrapped_contour[::2] = np.ceil(contour[:, 1]) - edge_safety_padding
|
|
unwrapped_contour[1::2] = np.ceil(contour[:, 0]) - edge_safety_padding
|
|
|
|
simplified_contours.append(unwrapped_contour)
|
|
|
|
return _normalize_contour(simplified_contours, image_shape)
|
|
|
|
|
|
def _normalize_contour(contours, image_shape):
|
|
|
|
height, width = image_shape[0], image_shape[1]
|
|
|
|
for contour in contours:
|
|
contour[::2] = [x * 1.0 / width for x in contour[::2]]
|
|
contour[1::2] = [y * 1.0 / height for y in contour[1::2]]
|
|
|
|
return contours
|
|
|
|
|
|
def binarise_mask(mask_fname):
|
|
|
|
mask = Image.open(mask_fname)
|
|
mask = np.array(mask)
|
|
# instances are encoded as different colors
|
|
obj_ids = np.unique(mask)
|
|
# first id is the background, so remove it
|
|
obj_ids = obj_ids[1:]
|
|
|
|
# split the color-encoded mask into a set of binary masks
|
|
binary_masks = mask == obj_ids[:, None, None]
|
|
return binary_masks
|
|
|
|
|
|
def parsing_mask(mask_fname):
|
|
|
|
# For this particular dataset, initially each mask was merged (based on binary mask of each object)
|
|
# in the order of the bounding boxes described in the corresponding PASCAL VOC annotation file.
|
|
# Therefore, we have to extract each binary mask which is in the order of objects in the annotation file.
|
|
# https://github.com/microsoft/computervision-recipes/blob/master/utils_cv/detection/dataset.py
|
|
binary_masks = binarise_mask(mask_fname)
|
|
polygons = []
|
|
for bi_mask in binary_masks:
|
|
|
|
if len(bi_mask.shape) == 2:
|
|
bi_mask = bi_mask[np.newaxis, :]
|
|
polygon = convert_mask_to_polygon(bi_mask)
|
|
polygons.append(polygon)
|
|
|
|
return polygons
|
|
|
|
|
|
def convert_mask_in_VOC_to_jsonl(base_dir, workspace):
|
|
|
|
src = base_dir
|
|
train_validation_ratio = 5
|
|
|
|
# Retrieving default datastore that got automatically created when we setup a workspace
|
|
workspaceblobstore = workspace.get_default_datastore().name
|
|
|
|
# Path to the annotations
|
|
annotations_folder = os.path.join(src, "annotations")
|
|
mask_folder = os.path.join(src, "segmentation-masks")
|
|
|
|
# Path to the training and validation files
|
|
train_annotations_file = os.path.join(src, "train_annotations.jsonl")
|
|
validation_annotations_file = os.path.join(src, "validation_annotations.jsonl")
|
|
|
|
# sample json line dictionary
|
|
json_line_sample = {
|
|
"image_url": "AmlDatastore://"
|
|
+ workspaceblobstore
|
|
+ "/"
|
|
+ os.path.basename(os.path.dirname(src))
|
|
+ "/"
|
|
+ "images",
|
|
"image_details": {"format": None, "width": None, "height": None},
|
|
"label": [],
|
|
}
|
|
|
|
# Read each annotation and convert it to jsonl line
|
|
with open(train_annotations_file, "w") as train_f:
|
|
with open(validation_annotations_file, "w") as validation_f:
|
|
for i, filename in enumerate(os.listdir(annotations_folder)):
|
|
if filename.endswith(".xml"):
|
|
print("Parsing " + os.path.join(src, filename))
|
|
|
|
root = ET.parse(
|
|
os.path.join(annotations_folder, filename)
|
|
).getroot()
|
|
|
|
width = int(root.find("size/width").text)
|
|
height = int(root.find("size/height").text)
|
|
# convert mask into polygon
|
|
mask_fname = os.path.join(mask_folder, filename[:-4] + ".png")
|
|
polygons = parsing_mask(mask_fname)
|
|
|
|
labels = []
|
|
for index, object in enumerate(root.findall("object")):
|
|
name = object.find("name").text
|
|
isCrowd = int(object.find("difficult").text)
|
|
labels.append(
|
|
{
|
|
"label": name,
|
|
"bbox": "null",
|
|
"isCrowd": isCrowd,
|
|
"polygon": polygons[index],
|
|
}
|
|
)
|
|
|
|
# build the jsonl file
|
|
image_filename = root.find("filename").text
|
|
_, file_extension = os.path.splitext(image_filename)
|
|
json_line = dict(json_line_sample)
|
|
json_line["image_url"] = (
|
|
json_line["image_url"] + "/" + image_filename
|
|
)
|
|
json_line["image_details"]["format"] = file_extension[1:]
|
|
json_line["image_details"]["width"] = width
|
|
json_line["image_details"]["height"] = height
|
|
json_line["label"] = labels
|
|
|
|
if i % train_validation_ratio == 0:
|
|
# validation annotation
|
|
validation_f.write(json.dumps(json_line) + "\n")
|
|
else:
|
|
# train annotation
|
|
train_f.write(json.dumps(json_line) + "\n")
|
|
else:
|
|
print("Skipping unknown file: {}".format(filename))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(allow_abbrev=False)
|
|
parser.add_argument(
|
|
"--data_path",
|
|
type=str,
|
|
help="the directory contains images, annotations, and masks",
|
|
)
|
|
|
|
args, remaining_args = parser.parse_known_args()
|
|
data_path = args.data_path
|
|
|
|
convert_mask_in_VOC_to_jsonl(data_path)
|