updated pipeline notebooks with expanded tutorial

2025-12-21 10:05:09 -05:00 · 2018-11-21 20:00:07 -08:00
parent e039b98ee6
commit ef5844fffd
19 changed files with 3793 additions and 286 deletions
--- a/pipeline/batch_scoring.py
+++ b/pipeline/batch_scoring.py
@@ -0,0 +1,119 @@
+# Copyright (c) Microsoft. All rights reserved.
+# Licensed under the MIT license.
+
+import os
+import argparse
+import datetime
+import time
+import tensorflow as tf
+from math import ceil
+import numpy as np
+import shutil
+from tensorflow.contrib.slim.python.slim.nets import inception_v3
+from azureml.core.model import Model
+
+slim = tf.contrib.slim
+
+parser = argparse.ArgumentParser(description="Start a tensorflow model serving")
+parser.add_argument('--model_name', dest="model_name", required=True)
+parser.add_argument('--label_dir', dest="label_dir", required=True)
+parser.add_argument('--dataset_path', dest="dataset_path", required=True)
+parser.add_argument('--output_dir', dest="output_dir", required=True)
+parser.add_argument('--batch_size', dest="batch_size", type=int, required=True)
+
+args = parser.parse_args()
+
+image_size = 299
+num_channel = 3
+
+# create output directory if it does not exist
+os.makedirs(args.output_dir, exist_ok=True)
+
+
+def get_class_label_dict(label_file):
+    label = []
+    proto_as_ascii_lines = tf.gfile.GFile(label_file).readlines()
+    for l in proto_as_ascii_lines:
+        label.append(l.rstrip())
+    return label
+
+
+class DataIterator:
+    def __init__(self, data_dir):
+        self.file_paths = []
+        image_list = os.listdir(data_dir)
+        # total_size = len(image_list)
+        self.file_paths = [data_dir + '/' + file_name.rstrip() for file_name in image_list]
+
+        self.labels = [1 for file_name in self.file_paths]
+
+    @property
+    def size(self):
+        return len(self.labels)
+
+    def input_pipeline(self, batch_size):
+        images_tensor = tf.convert_to_tensor(self.file_paths, dtype=tf.string)
+        labels_tensor = tf.convert_to_tensor(self.labels, dtype=tf.int64)
+        input_queue = tf.train.slice_input_producer([images_tensor, labels_tensor], shuffle=False)
+        labels = input_queue[1]
+        images_content = tf.read_file(input_queue[0])
+
+        image_reader = tf.image.decode_jpeg(images_content, channels=num_channel, name="jpeg_reader")
+        float_caster = tf.cast(image_reader, tf.float32)
+        new_size = tf.constant([image_size, image_size], dtype=tf.int32)
+        images = tf.image.resize_images(float_caster, new_size)
+        images = tf.divide(tf.subtract(images, [0]), [255])
+
+        image_batch, label_batch = tf.train.batch([images, labels], batch_size=batch_size, capacity=5 * batch_size)
+        return image_batch
+
+
+def main(_):
+    # start_time = datetime.datetime.now()
+    label_file_name = os.path.join(args.label_dir, "labels.txt")
+    label_dict = get_class_label_dict(label_file_name)
+    classes_num = len(label_dict)
+    test_feeder = DataIterator(data_dir=args.dataset_path)
+    total_size = len(test_feeder.labels)
+    count = 0
+    # get model from model registry
+    model_path = Model.get_model_path(args.model_name)
+    with tf.Session() as sess:
+        test_images = test_feeder.input_pipeline(batch_size=args.batch_size)
+        with slim.arg_scope(inception_v3.inception_v3_arg_scope()):
+            input_images = tf.placeholder(tf.float32, [args.batch_size, image_size, image_size, num_channel])
+            logits, _ = inception_v3.inception_v3(input_images,
+                                                  num_classes=classes_num,
+                                                  is_training=False)
+            probabilities = tf.argmax(logits, 1)
+
+        sess.run(tf.global_variables_initializer())
+        sess.run(tf.local_variables_initializer())
+        coord = tf.train.Coordinator()
+        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
+        saver = tf.train.Saver()
+        saver.restore(sess, model_path)
+        out_filename = os.path.join(args.output_dir, "result-labels.txt")
+        with open(out_filename, "w") as result_file:
+            i = 0
+            while count < total_size and not coord.should_stop():
+                test_images_batch = sess.run(test_images)
+                file_names_batch = test_feeder.file_paths[i * args.batch_size:
+                                                          min(test_feeder.size, (i + 1) * args.batch_size)]
+                results = sess.run(probabilities, feed_dict={input_images: test_images_batch})
+                new_add = min(args.batch_size, total_size - count)
+                count += new_add
+                i += 1
+                for j in range(new_add):
+                    result_file.write(os.path.basename(file_names_batch[j]) + ": " + label_dict[results[j]] + "\n")
+                result_file.flush()
+            coord.request_stop()
+            coord.join(threads)
+
+        # copy the file to artifacts
+        shutil.copy(out_filename, "./outputs/")
+        # Move the processed data out of the blob so that the next run can process the data.
+
+
+if __name__ == "__main__":
+    tf.app.run()