# Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. import numpy as np import argparse import os import tensorflow as tf from azureml.core import Run from utils import load_data print("TensorFlow version:", tf.VERSION) parser = argparse.ArgumentParser() parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point') parser.add_argument('--batch-size', type=int, dest='batch_size', default=50, help='mini batch size for training') parser.add_argument('--first-layer-neurons', type=int, dest='n_hidden_1', default=100, help='# of neurons in the first layer') parser.add_argument('--second-layer-neurons', type=int, dest='n_hidden_2', default=100, help='# of neurons in the second layer') parser.add_argument('--learning-rate', type=float, dest='learning_rate', default=0.01, help='learning rate') args = parser.parse_args() data_folder = os.path.join(args.data_folder, 'mnist') print('training dataset is stored here:', data_folder) X_train = load_data(os.path.join(data_folder, 'train-images.gz'), False) / 255.0 X_test = load_data(os.path.join(data_folder, 'test-images.gz'), False) / 255.0 y_train = load_data(os.path.join(data_folder, 'train-labels.gz'), True).reshape(-1) y_test = load_data(os.path.join(data_folder, 'test-labels.gz'), True).reshape(-1) print(X_train.shape, y_train.shape, X_test.shape, y_test.shape, sep='\n') training_set_size = X_train.shape[0] n_inputs = 28 * 28 n_h1 = args.n_hidden_1 n_h2 = args.n_hidden_2 n_outputs = 10 learning_rate = args.learning_rate n_epochs = 20 batch_size = args.batch_size with tf.name_scope('network'): # construct the DNN X = tf.placeholder(tf.float32, shape=(None, n_inputs), name='X') y = tf.placeholder(tf.int64, shape=(None), name='y') h1 = tf.layers.dense(X, n_h1, activation=tf.nn.relu, name='h1') h2 = tf.layers.dense(h1, n_h2, activation=tf.nn.relu, name='h2') output = tf.layers.dense(h2, n_outputs, name='output') with tf.name_scope('train'): cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=output) loss = tf.reduce_mean(cross_entropy, name='loss') optimizer = tf.train.GradientDescentOptimizer(learning_rate) train_op = optimizer.minimize(loss) with tf.name_scope('eval'): correct = tf.nn.in_top_k(output, y, 1) acc_op = tf.reduce_mean(tf.cast(correct, tf.float32)) init = tf.global_variables_initializer() saver = tf.train.Saver() # start an Azure ML run run = Run.get_context() with tf.Session() as sess: init.run() for epoch in range(n_epochs): # randomly shuffle training set indices = np.random.permutation(training_set_size) X_train = X_train[indices] y_train = y_train[indices] # batch index b_start = 0 b_end = b_start + batch_size for _ in range(training_set_size // batch_size): # get a batch X_batch, y_batch = X_train[b_start: b_end], y_train[b_start: b_end] # update batch index for the next batch b_start = b_start + batch_size b_end = min(b_start + batch_size, training_set_size) # train sess.run(train_op, feed_dict={X: X_batch, y: y_batch}) # evaluate training set acc_train = acc_op.eval(feed_dict={X: X_batch, y: y_batch}) # evaluate validation set acc_val = acc_op.eval(feed_dict={X: X_test, y: y_test}) # log accuracies run.log('training_acc', np.float(acc_train)) run.log('validation_acc', np.float(acc_val)) print(epoch, '-- Training accuracy:', acc_train, '\b Validation accuracy:', acc_val) y_hat = np.argmax(output.eval(feed_dict={X: X_test}), axis=1) run.log('final_acc', np.float(acc_val)) os.makedirs('./outputs/model', exist_ok=True) # files saved in the "./outputs" folder are automatically uploaded into run history saver.save(sess, './outputs/model/mnist-tf.model')