update samples from Release-166 as a part of 1.49.0 SDK stable release

This commit is contained in:
amlrelsa-ms
2023-02-14 02:46:24 +00:00
parent 9b1e130d18
commit 0ecbbbce75
36 changed files with 48 additions and 1822 deletions

View File

@@ -103,7 +103,7 @@
"source": [
"import azureml.core\n",
"\n",
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -6,7 +6,8 @@ dependencies:
- fairlearn>=0.6.2
- joblib
- liac-arff
- raiwidgets~=0.23.0
- raiwidgets~=0.24.0
- itsdangerous==2.0.1
- markupsafe<2.1.0
- protobuf==3.20.0
- numpy<1.24.0

View File

@@ -6,7 +6,8 @@ dependencies:
- fairlearn>=0.6.2
- joblib
- liac-arff
- raiwidgets~=0.23.0
- raiwidgets~=0.24.0
- itsdangerous==2.0.1
- markupsafe<2.1.0
- protobuf==3.20.0
- numpy<1.24.0

View File

@@ -8,13 +8,17 @@ dependencies:
# Azure ML only supports 3.7.0 and later.
- pip==22.3.1
- python>=3.7,<3.9
- conda-forge::fbprophet==0.7.1
- pandas==1.1.5
- scipy==1.5.3
- Cython==0.29.14
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.48.0
- azureml-defaults~=1.48.0
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.48.0/validated_win32_requirements.txt [--no-deps]
- azureml-widgets~=1.49.0
- azureml-defaults~=1.49.0
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.49.0/validated_win32_requirements.txt [--no-deps]
- matplotlib==3.6.2
- xgboost==1.3.3
- arch==4.14
- mlflow-skinny==1.30.0
- cmdstanpy==0.9.5
- setuptools-git==1.2

View File

@@ -6,7 +6,7 @@ channels:
dependencies:
# The python interpreter version.
# Azure ML only supports 3.7 and later.
- pip==20.1.1
- pip==22.3.1
- python>=3.7,<3.9
- matplotlib==3.2.1
- numpy>=1.21.6,<=1.22.3
@@ -20,16 +20,13 @@ dependencies:
- pytorch::pytorch=1.11.0
- cudatoolkit=10.1.243
- notebook
- jinja2<=2.11.2
- markupsafe<2.1.0
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.48.0
- azureml-defaults~=1.48.0
- azureml-widgets~=1.49.0
- azureml-defaults~=1.49.0
- pytorch-transformers==1.0.0
- spacy==2.2.4
- pystan==2.19.1.1
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.48.0/validated_linux_requirements.txt [--no-deps]
- arch==4.14
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.49.0/validated_linux_requirements.txt [--no-deps]

View File

@@ -6,7 +6,7 @@ channels:
dependencies:
# The python interpreter version.
# Currently Azure ML only supports 3.7 and later.
- pip==20.1.1
- pip==22.3.1
- python>=3.7,<3.9
- matplotlib==3.2.1
- numpy>=1.21.6,<=1.22.3
@@ -20,16 +20,13 @@ dependencies:
- pytorch::pytorch=1.11.0
- cudatoolkit=9.0
- notebook
- jinja2<=2.11.2
- markupsafe<2.1.0
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.48.0
- azureml-defaults~=1.48.0
- azureml-widgets~=1.49.0
- azureml-defaults~=1.49.0
- pytorch-transformers==1.0.0
- spacy==2.2.4
- pystan==2.19.1.1
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.48.0/validated_darwin_requirements.txt [--no-deps]
- arch==4.14
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.49.0/validated_darwin_requirements.txt [--no-deps]

View File

@@ -97,7 +97,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -97,7 +97,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -3,7 +3,7 @@ dependencies:
# The python interpreter version.
# Currently Azure ML only supports 3.7.0 and later.
- pip<=22.3.1
- python>=3.7.0,<3.10
- python>=3.7.0,<3.11
- pip:
# Required packages for AzureML execution, history, and data preparation.

View File

@@ -4,10 +4,10 @@ channels:
- main
dependencies:
# The python interpreter version.
# Currently Azure ML only supports 3.6.0 and later.
# Currently Azure ML only supports 3.7.0 and later.
- pip<=20.2.4
- nomkl
- python>=3.6.0,<3.10
- python>=3.7.0,<3.11
- urllib3==1.26.7
- PyJWT < 2.0.0
- numpy>=1.21.6,<=1.22.3

View File

@@ -92,7 +92,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -91,7 +91,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -106,7 +106,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},
@@ -298,8 +298,9 @@
"pip uninstall -y xgboost && \\\n",
"conda install py-xgboost==1.3.3 && \\\n",
"pip uninstall -y numpy && \\\n",
"pip install {numpy_dep} &&\\\n",
"pip install {sklearn_dep} \\\n",
"pip install {numpy_dep} && \\\n",
"pip install {sklearn_dep} && \\\n",
"pip install chardet \\\n",
"\"\"\"\n",
"\n",
"env.python.user_managed_dependencies = True\n",

View File

@@ -10,7 +10,7 @@ dependencies:
- ipython
- matplotlib
- ipywidgets
- raiwidgets~=0.23.0
- raiwidgets~=0.24.0
- itsdangerous==2.0.1
- markupsafe<2.1.0
- scipy>=1.5.3

View File

@@ -10,7 +10,7 @@ dependencies:
- matplotlib
- azureml-dataset-runtime
- ipywidgets
- raiwidgets~=0.23.0
- raiwidgets~=0.24.0
- itsdangerous==2.0.1
- markupsafe<2.1.0
- scipy>=1.5.3

View File

@@ -9,7 +9,7 @@ dependencies:
- ipython
- matplotlib
- ipywidgets
- raiwidgets~=0.23.0
- raiwidgets~=0.24.0
- packaging>=20.9
- itsdangerous==2.0.1
- markupsafe<2.1.0

View File

@@ -9,7 +9,7 @@ dependencies:
- ipython
- matplotlib
- ipywidgets
- raiwidgets~=0.23.0
- raiwidgets~=0.24.0
- packaging>=20.9
- itsdangerous==2.0.1
- markupsafe<2.1.0

View File

@@ -11,7 +11,7 @@ dependencies:
- azureml-dataset-runtime
- azureml-core
- ipywidgets
- raiwidgets~=0.23.0
- raiwidgets~=0.24.0
- itsdangerous==2.0.1
- markupsafe<2.1.0
- scipy>=1.5.3

View File

@@ -330,7 +330,7 @@
"- **inputs:** List of input connections for data consumed by this step. Fetch this inside the notebook using dbutils.widgets.get(\"input\")\n",
"- **outputs:** List of output port definitions for outputs produced by this step. Fetch this inside the notebook using dbutils.widgets.get(\"output\")\n",
"- **existing_cluster_id:** Cluster ID of an existing Interactive cluster on the Databricks workspace. If you are providing this, do not provide any of the parameters below that are used to create a new cluster such as spark_version, node_type, etc.\n",
"- **spark_version:** Version of spark for the databricks run cluster. You can refer to [DataBricks runtime version](https://learn.microsoft.com/azure/databricks/dev-tools/api/#--runtime-version-strings) to specify the spark version. default value: 4.0.x-scala2.11\n",
"- **spark_version:** Version of spark for the databricks run cluster. You can refer to [DataBricks runtime version](https://learn.microsoft.com/azure/databricks/dev-tools/api/#--runtime-version-strings) to specify the spark version. default value: 10.4.x-scala2.12\n",
"- **node_type:** Azure vm node types for the databricks run cluster. default value: Standard_D3_v2\n",
"- **num_workers:** Specifies a static number of workers for the databricks run cluster\n",
"- **min_workers:** Specifies a min number of workers to use for auto-scaling the databricks run cluster\n",

View File

@@ -86,7 +86,7 @@
"import requests\n",
"\n",
"oj_sales_path = \"./oj.csv\"\n",
"r = requests.get(\"http://www.cs.unitn.it/~taufer/Data/oj.csv\")\n",
"r = requests.get(\"https://raw.githubusercontent.com/Azure/azureml-examples/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-orange-juice-sales/data/dominicks_OJ.csv\")\n",
"open(oj_sales_path, \"wb\").write(r.content)"
]
},
@@ -140,7 +140,7 @@
"metadata": {},
"outputs": [],
"source": [
"partitioned_dataset = dataset.partition_by(partition_keys=['store', 'brand'], target=(datastore, \"partition_by_key_res\"), name=\"partitioned_oj_data\")\n",
"partitioned_dataset = dataset.partition_by(partition_keys=['Store', 'Brand'], target=(datastore, \"partition_by_key_res\"), name=\"partitioned_oj_data\")\n",
"partitioned_dataset.partition_keys"
]
},
@@ -274,7 +274,7 @@
"parallel_run_config = ParallelRunConfig(\n",
" source_directory=scripts_folder,\n",
" entry_script=script_file, # the user script to run against each input\n",
" partition_keys=['store', 'brand'],\n",
" partition_keys=['Store', 'Brand'],\n",
" error_threshold=5,\n",
" output_action='append_row',\n",
" append_row_file_name=\"revenue_outputs.txt\",\n",
@@ -362,8 +362,8 @@
"result_file = os.path.join(target_dir, batch_output.path_on_datastore, parallel_run_config.append_row_file_name)\n",
"\n",
"df = pd.read_csv(result_file, delimiter=\" \", header=None)\n",
"df.columns=[\"WeekStarting\", \"Quantity\", \"logQuantity\", \"Advert\", \"Price\", \"Age60\", \"COLLEGE\", \"INCOME\", \"Hincome150\", \"Large HH\", \"Minorities\", \"WorkingWoman\", \"SSTRDIST\", \"SSTRVOL\", \"CPDIST5\", \"CPWVOL5\", \"Store\", \"Brand\", \"total_income\"]\n",
"\n",
"df.columns = [\"week\", \"logmove\", \"feat\", \"price\", \"AGE60\", \"EDUC\", \"ETHNIC\", \"INCOME\", \"HHLARGE\", \"WORKWOM\", \"HVAL150\", \"SSTRDIST\", \"SSTRVOL\", \"CPDIST5\", \"CPWVOL5\", \"store\", \"brand\", \"total_income\"]\n",
"print(\"Prediction has \", df.shape[0], \" rows\")\n",
"df.head(10)"
]
@@ -413,7 +413,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
"version": "3.8.13"
}
},
"nbformat": 4,

View File

@@ -36,8 +36,6 @@ Using these samples, you will learn how to do the following.
| [cartpole_ci.ipynb](cartpole-on-compute-instance/cartpole_ci.ipynb) | Notebook to train a Cartpole playing agent on an Azure Machine Learning Compute Instance |
| [cartpole_sc.ipynb](cartpole-on-single-compute/cartpole_sc.ipynb) | Notebook to train a Cartpole playing agent on an Azure Machine Learning Compute Cluster (single node) |
| [pong_rllib.ipynb](atari-on-distributed-compute/pong_rllib.ipynb) | Notebook for distributed training of Pong agent using RLlib on multiple compute targets |
| [minecraft.ipynb](minecraft-on-distributed-compute/minecraft.ipynb) | Notebook to train an agent to navigate through a lava maze in the Minecraft game |
| [particle.ipynb](multiagent-particle-envs/particle.ipynb) | Notebook to train policies in a multiagent cooperative navigation scenario based on OpenAI's Particle environments |
## Prerequisites

View File

@@ -1,39 +0,0 @@
# DisableDockerDetector "Disabled to unblock PRs until the owner can fix the file. Not used in any prod deployments - only as a documentation for the customers"
FROM akdmsft/particle-cpu
RUN conda install -c anaconda python=3.7
# Install required pip packages
RUN pip3 install --upgrade pip setuptools && pip3 install --upgrade \
pandas \
matplotlib \
psutil \
numpy \
scipy \
gym \
azureml-defaults \
tensorboardX \
tensorflow==1.15 \
tensorflow-probability==0.8.0 \
onnxruntime \
tf2onnx \
cloudpickle==1.1.1 \
tabulate \
dm_tree \
lz4 \
opencv-python
RUN cd multiagent-particle-envs && \
pip3 install -e . && \
pip3 install --upgrade pyglet==1.3.2
RUN pip3 install ray-on-aml==0.1.6
RUN pip install protobuf==3.20.0
RUN pip3 install --upgrade \
ray==0.8.7 \
ray[rllib]==0.8.7 \
ray[tune]==0.8.7
RUN pip install 'msrest<0.7.0'

View File

@@ -1,70 +0,0 @@
# MIT License
# Copyright (c) 2018 OpenAI
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import numpy as np
import gym
class MultiDiscrete(gym.Space):
"""
- The multi-discrete action space consists of a series of discrete action spaces with different
parameters
- It can be adapted to both a Discrete action space or a continuous (Box) action space
- It is useful to represent game controllers or keyboards where each key can be represented as
a discrete action space
- It is parametrized by passing an array of arrays containing [min, max] for each discrete action
space where the discrete action space can take any integers from `min` to `max` (both inclusive)
Note: A value of 0 always need to represent the NOOP action.
e.g. Nintendo Game Controller
- Can be conceptualized as 3 discrete action spaces:
1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4
2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
- Can be initialized as
MultiDiscrete([ [0,4], [0,1], [0,1] ])
"""
def __init__(self, array_of_param_array):
self.low = np.array([x[0] for x in array_of_param_array])
self.high = np.array([x[1] for x in array_of_param_array])
self.num_discrete_space = self.low.shape[0]
def sample(self):
""" Returns a array with one sample from each discrete action space """
# For each row: round(random .* (max - min) + min, 0)
# random_array = prng.np_random.rand(self.num_discrete_space)
random_array = np.random.RandomState().rand(self.num_discrete_space)
return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
def contains(self, x):
return len(x) == self.num_discrete_space \
and (np.array(x) >= self.low).all() \
and (np.array(x) <= self.high).all()
@property
def shape(self):
return self.num_discrete_space
def __repr__(self):
return "MultiDiscrete" + str(self.num_discrete_space)
def __eq__(self, other):
return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high)

View File

@@ -1,413 +0,0 @@
# MIT License
# Copyright (c) 2018 OpenAI
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""
2D rendering framework
"""
from __future__ import division
import os
import six
import sys
from gym import error
import math
import numpy as np
import pyglet
from pyglet.gl import glEnable, glHint, glLineWidth, glBlendFunc, glClearColor, glPushMatrix, \
glTranslatef, glRotatef, glScalef, glPopMatrix, glColor4f, glBegin, glVertex3f, glEnd, glLineStipple, \
glDisable, glVertex2f, GL_BLEND, GL_LINE_SMOOTH, GL_LINE_SMOOTH_HINT, GL_NICEST, GL_SRC_ALPHA, \
GL_ONE_MINUS_SRC_ALPHA, GL_LINE_STIPPLE, GL_POINTS, GL_QUADS, GL_TRIANGLES, GL_POLYGON, GL_LINE_LOOP, \
GL_LINE_STRIP, GL_LINES
if "Apple" in sys.version:
if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ:
os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
# (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite
RAD2DEG = 57.29577951308232
def get_display(spec):
"""Convert a display specification (such as :0) into an actual Display
object.
Pyglet only supports multiple Displays on Linux.
"""
if spec is None:
return None
elif isinstance(spec, six.string_types):
return pyglet.canvas.Display(spec)
else:
raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec))
class Viewer(object):
def __init__(self, width, height, display=None):
display = get_display(display)
self.width = width
self.height = height
self.window = pyglet.window.Window(width=width, height=height, display=display)
self.window.on_close = self.window_closed_by_user
self.geoms = []
self.onetime_geoms = []
self.transform = Transform()
glEnable(GL_BLEND)
# glEnable(GL_MULTISAMPLE)
glEnable(GL_LINE_SMOOTH)
# glHint(GL_LINE_SMOOTH_HINT, GL_DONT_CARE)
glHint(GL_LINE_SMOOTH_HINT, GL_NICEST)
glLineWidth(2.0)
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
def close(self):
self.window.close()
def window_closed_by_user(self):
self.close()
def set_bounds(self, left, right, bottom, top):
assert right > left and top > bottom
scalex = self.width / (right - left)
scaley = self.height / (top - bottom)
self.transform = Transform(
translation=(-left * scalex, -bottom * scaley),
scale=(scalex, scaley))
def add_geom(self, geom):
self.geoms.append(geom)
def add_onetime(self, geom):
self.onetime_geoms.append(geom)
def render(self, return_rgb_array=False):
glClearColor(1, 1, 1, 1)
self.window.clear()
self.window.switch_to()
self.window.dispatch_events()
self.transform.enable()
for geom in self.geoms:
geom.render()
for geom in self.onetime_geoms:
geom.render()
self.transform.disable()
arr = None
if return_rgb_array:
buffer = pyglet.image.get_buffer_manager().get_color_buffer()
image_data = buffer.get_image_data()
arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
# In https://github.com/openai/gym-http-api/issues/2, we
# discovered that someone using Xmonad on Arch was having
# a window of size 598 x 398, though a 600 x 400 window
# was requested. (Guess Xmonad was preserving a pixel for
# the boundary.) So we use the buffer height/width rather
# than the requested one.
arr = arr.reshape(buffer.height, buffer.width, 4)
arr = arr[::-1, :, 0:3]
self.window.flip()
self.onetime_geoms = []
return arr
# Convenience
def draw_circle(self, radius=10, res=30, filled=True, **attrs):
geom = make_circle(radius=radius, res=res, filled=filled)
_add_attrs(geom, attrs)
self.add_onetime(geom)
return geom
def draw_polygon(self, v, filled=True, **attrs):
geom = make_polygon(v=v, filled=filled)
_add_attrs(geom, attrs)
self.add_onetime(geom)
return geom
def draw_polyline(self, v, **attrs):
geom = make_polyline(v=v)
_add_attrs(geom, attrs)
self.add_onetime(geom)
return geom
def draw_line(self, start, end, **attrs):
geom = Line(start, end)
_add_attrs(geom, attrs)
self.add_onetime(geom)
return geom
def get_array(self):
self.window.flip()
image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
self.window.flip()
arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
arr = arr.reshape(self.height, self.width, 4)
return arr[::-1, :, 0:3]
def _add_attrs(geom, attrs):
if "color" in attrs:
geom.set_color(*attrs["color"])
if "linewidth" in attrs:
geom.set_linewidth(attrs["linewidth"])
class Geom(object):
def __init__(self):
self._color = Color((0, 0, 0, 1.0))
self.attrs = [self._color]
def render(self):
for attr in reversed(self.attrs):
attr.enable()
self.render1()
for attr in self.attrs:
attr.disable()
def render1(self):
raise NotImplementedError
def add_attr(self, attr):
self.attrs.append(attr)
def set_color(self, r, g, b, alpha=1):
self._color.vec4 = (r, g, b, alpha)
class Attr(object):
def enable(self):
raise NotImplementedError
def disable(self):
pass
class Transform(Attr):
def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1, 1)):
self.set_translation(*translation)
self.set_rotation(rotation)
self.set_scale(*scale)
def enable(self):
glPushMatrix()
glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint
glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0)
glScalef(self.scale[0], self.scale[1], 1)
def disable(self):
glPopMatrix()
def set_translation(self, newx, newy):
self.translation = (float(newx), float(newy))
def set_rotation(self, new):
self.rotation = float(new)
def set_scale(self, newx, newy):
self.scale = (float(newx), float(newy))
class Color(Attr):
def __init__(self, vec4):
self.vec4 = vec4
def enable(self):
glColor4f(*self.vec4)
class LineStyle(Attr):
def __init__(self, style):
self.style = style
def enable(self):
glEnable(GL_LINE_STIPPLE)
glLineStipple(1, self.style)
def disable(self):
glDisable(GL_LINE_STIPPLE)
class LineWidth(Attr):
def __init__(self, stroke):
self.stroke = stroke
def enable(self):
glLineWidth(self.stroke)
class Point(Geom):
def __init__(self):
Geom.__init__(self)
def render1(self):
glBegin(GL_POINTS) # draw point
glVertex3f(0.0, 0.0, 0.0)
glEnd()
class FilledPolygon(Geom):
def __init__(self, v):
Geom.__init__(self)
self.v = v
def render1(self):
if len(self.v) == 4:
glBegin(GL_QUADS)
elif len(self.v) > 4:
glBegin(GL_POLYGON)
else:
glBegin(GL_TRIANGLES)
for p in self.v:
glVertex3f(p[0], p[1], 0) # draw each vertex
glEnd()
color = (
self._color.vec4[0] * 0.5,
self._color.vec4[1] * 0.5,
self._color.vec4[2] * 0.5,
self._color.vec4[3] * 0.5)
glColor4f(*color)
glBegin(GL_LINE_LOOP)
for p in self.v:
glVertex3f(p[0], p[1], 0) # draw each vertex
glEnd()
def make_circle(radius=10, res=30, filled=True):
points = []
for i in range(res):
ang = 2 * math.pi * i / res
points.append((math.cos(ang) * radius, math.sin(ang) * radius))
if filled:
return FilledPolygon(points)
else:
return PolyLine(points, True)
def make_polygon(v, filled=True):
if filled:
return FilledPolygon(v)
else:
return PolyLine(v, True)
def make_polyline(v):
return PolyLine(v, False)
def make_capsule(length, width):
l, r, t, b = 0, length, width / 2, -width / 2
box = make_polygon([(l, b), (l, t), (r, t), (r, b)])
circ0 = make_circle(width / 2)
circ1 = make_circle(width / 2)
circ1.add_attr(Transform(translation=(length, 0)))
geom = Compound([box, circ0, circ1])
return geom
class Compound(Geom):
def __init__(self, gs):
Geom.__init__(self)
self.gs = gs
for g in self.gs:
g.attrs = [a for a in g.attrs if not isinstance(a, Color)]
def render1(self):
for g in self.gs:
g.render()
class PolyLine(Geom):
def __init__(self, v, close):
Geom.__init__(self)
self.v = v
self.close = close
self.linewidth = LineWidth(1)
self.add_attr(self.linewidth)
def render1(self):
glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP)
for p in self.v:
glVertex3f(p[0], p[1], 0) # draw each vertex
glEnd()
def set_linewidth(self, x):
self.linewidth.stroke = x
class Line(Geom):
def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)):
Geom.__init__(self)
self.start = start
self.end = end
self.linewidth = LineWidth(1)
self.add_attr(self.linewidth)
def render1(self):
glBegin(GL_LINES)
glVertex2f(*self.start)
glVertex2f(*self.end)
glEnd()
class Image(Geom):
def __init__(self, fname, width, height):
Geom.__init__(self)
self.width = width
self.height = height
img = pyglet.image.load(fname)
self.img = img
self.flip = False
def render1(self):
self.img.blit(-self.width / 2, -self.height / 2, width=self.width, height=self.height)
class SimpleImageViewer(object):
def __init__(self, display=None):
self.window = None
self.isopen = False
self.display = display
def imshow(self, arr):
if self.window is None:
height, width, channels = arr.shape
self.window = pyglet.window.Window(width=width, height=height, display=self.display)
self.width = width
self.height = height
self.isopen = True
assert arr.shape == (self.height, self.width, 3), "You passed in an image with the wrong number shape"
image = pyglet.image.ImageData(self.width, self.height, 'RGB', arr.tobytes(), pitch=self.width * -3)
self.window.clear()
self.window.switch_to()
self.window.dispatch_events()
image.blit(0, 0)
self.window.flip()
def close(self):
if self.isopen:
self.window.close()
self.isopen = False
def __del__(self):
self.close()

View File

@@ -1,123 +0,0 @@
import os
from ray_on_aml.core import Ray_On_AML
from ray.tune import run_experiments
from ray.tune.registry import register_trainable, register_env, get_trainable_cls
import ray.rllib.contrib.maddpg.maddpg as maddpg
from rllib_multiagent_particle_env import env_creator
from util import parse_args
def setup_ray():
ray_on_aml = Ray_On_AML()
ray_on_aml.getRay()
register_env('particle', env_creator)
def gen_policy(args, env, id):
use_local_critic = [
args.adv_policy == 'ddpg' if id < args.num_adversaries else
args.good_policy == 'ddpg' for id in range(env.num_agents)
]
return (
None,
env.observation_space_dict[id],
env.action_space_dict[id],
{
'agent_id': id,
'use_local_critic': use_local_critic[id],
'obs_space_dict': env.observation_space_dict,
'act_space_dict': env.action_space_dict,
}
)
def gen_policies(args, env_config):
env = env_creator(env_config)
return {'policy_%d' % i: gen_policy(args, env, i) for i in range(len(env.observation_space_dict))}
def to_multiagent_config(policies):
policy_ids = list(policies.keys())
return {
'policies': policies,
'policy_mapping_fn': lambda index: policy_ids[index]
}
def train(args, env_config):
def stop(trial_id, result):
max_train_time = int(os.environ.get('AML_MAX_TRAIN_TIME_SECONDS', 2 * 60 * 60))
return result['episode_reward_mean'] >= args.final_reward \
or result['time_total_s'] >= max_train_time
run_experiments({
'MADDPG_RLLib': {
'run': 'contrib/MADDPG',
'env': 'particle',
'stop': stop,
# Uncomment to enable more frequent checkpoints:
# 'checkpoint_freq': args.checkpoint_freq,
'checkpoint_at_end': True,
'local_dir': args.local_dir,
'restore': args.restore,
'config': {
# === Log ===
'log_level': 'ERROR',
# === Environment ===
'env_config': env_config,
'num_envs_per_worker': args.num_envs_per_worker,
'horizon': args.max_episode_len,
# === Policy Config ===
# --- Model ---
'good_policy': args.good_policy,
'adv_policy': args.adv_policy,
'actor_hiddens': [args.num_units] * 2,
'actor_hidden_activation': 'relu',
'critic_hiddens': [args.num_units] * 2,
'critic_hidden_activation': 'relu',
'n_step': args.n_step,
'gamma': args.gamma,
# --- Exploration ---
'tau': 0.01,
# --- Replay buffer ---
'buffer_size': int(1e6),
# --- Optimization ---
'actor_lr': args.lr,
'critic_lr': args.lr,
'learning_starts': args.train_batch_size * args.max_episode_len,
'sample_batch_size': args.sample_batch_size,
'train_batch_size': args.train_batch_size,
'batch_mode': 'truncate_episodes',
# --- Parallelism ---
'num_workers': args.num_workers,
'num_gpus': args.num_gpus,
'num_gpus_per_worker': 0,
# === Multi-agent setting ===
'multiagent': to_multiagent_config(gen_policies(args, env_config)),
},
},
}, verbose=1)
if __name__ == '__main__':
args = parse_args()
setup_ray()
env_config = {
'scenario_name': args.scenario,
'horizon': args.max_episode_len,
'video_frequency': args.checkpoint_freq,
}
train(args, env_config)

View File

@@ -1,113 +0,0 @@
# Some code taken from: https://github.com/wsjeon/maddpg-rllib/
import imp
import os
import gym
from gym import wrappers
from ray import rllib
from multiagent.environment import MultiAgentEnv
import multiagent.scenarios as scenarios
CUSTOM_SCENARIOS = ['simple_switch']
class ParticleEnvRenderWrapper(gym.Wrapper):
def __init__(self, env, horizon):
super().__init__(env)
self.horizon = horizon
def reset(self):
self._num_steps = 0
return self.env.reset()
def render(self, mode):
if mode == 'human':
self.env.render(mode=mode)
else:
return self.env.render(mode=mode)[0]
def step(self, actions):
obs_list, rew_list, done_list, info_list = self.env.step(actions)
self._num_steps += 1
done = (all(done_list) or self._num_steps >= self.horizon)
# Gym monitor expects reward to be an int. This is only used for its
# stats reporter, which we're not interested in. To make video recording
# work, we package the rewards in the info object and extract it below.
return obs_list, 0, done, [rew_list, done_list, info_list]
class RLlibMultiAgentParticleEnv(rllib.MultiAgentEnv):
def __init__(self, scenario_name, horizon, monitor_enabled=False, video_frequency=500):
self._env = _make_env(scenario_name, horizon, monitor_enabled, video_frequency)
self.num_agents = self._env.n
self.agent_ids = list(range(self.num_agents))
self.observation_space_dict = self._make_dict(self._env.observation_space)
self.action_space_dict = self._make_dict(self._env.action_space)
def reset(self):
obs_dict = self._make_dict(self._env.reset())
return obs_dict
def step(self, action_dict):
actions = list(action_dict.values())
obs_list, _, _, infos = self._env.step(actions)
rew_list, done_list, _ = infos
obs_dict = self._make_dict(obs_list)
rew_dict = self._make_dict(rew_list)
done_dict = self._make_dict(done_list)
done_dict['__all__'] = all(done_list)
info_dict = self._make_dict([{'done': done} for done in done_list])
return obs_dict, rew_dict, done_dict, info_dict
def render(self, mode='human'):
self._env.render(mode=mode)
def _make_dict(self, values):
return dict(zip(self.agent_ids, values))
def _video_callable(video_frequency):
def should_record_video(episode_id):
if episode_id % video_frequency == 0:
return True
return False
return should_record_video
def _make_env(scenario_name, horizon, monitor_enabled, video_frequency):
if scenario_name in CUSTOM_SCENARIOS:
# Scenario file must exist locally
file_path = os.path.join(os.path.dirname(__file__), scenario_name + '.py')
scenario = imp.load_source('', file_path).Scenario()
else:
scenario = scenarios.load(scenario_name + '.py').Scenario()
world = scenario.make_world()
env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation)
env.metadata['video.frames_per_second'] = 8
env = ParticleEnvRenderWrapper(env, horizon)
if not monitor_enabled:
return env
return wrappers.Monitor(env, './logs/videos', resume=True, video_callable=_video_callable(video_frequency))
def env_creator(config):
monitor_enabled = False
if hasattr(config, 'worker_index') and hasattr(config, 'vector_index'):
monitor_enabled = (config.worker_index == 1 and config.vector_index == 0)
return RLlibMultiAgentParticleEnv(**config, monitor_enabled=monitor_enabled)

View File

@@ -1,358 +0,0 @@
import numpy as np
import random
from multiagent.core import World, Agent, Landmark
from multiagent.scenario import BaseScenario
class SwitchWorld(World):
""" Extended World with hills and switches """
def __init__(self, hills, switches):
super().__init__()
# add hills and switches
self.hills = hills
self.switches = switches
self.landmarks.extend(self.hills)
self.landmarks.extend(self.switches)
def step(self):
super().step()
# if all hills are activated, reset the switches and hills
if all([hill.active for hill in self.hills]):
self.reset_hills()
self.reset_switches()
else:
# Update switches
for switch in self.switches:
switch.step(self)
# Update hills
for hill in self.hills:
hill.step(self)
def reset_hills(self):
possible_hill_positions = [np.array([-0.8, 0]), np.array([0, 0.8]), np.array([0.8, 0]), np.array([0, -0.8])]
hill_positions = random.sample(possible_hill_positions, k=len(self.hills))
for i, hill in enumerate(self.hills):
hill.state.p_pos = hill_positions[i]
hill.deactivate()
def reset_switches(self):
possible_switch_positions = [
np.array([-0.8, -0.8]),
np.array([-0.8, 0.8]),
np.array([0.8, -0.8]),
np.array([0.8, 0.8])]
switch_positions = random.sample(possible_switch_positions, k=len(self.switches))
for i, switch in enumerate(self.switches):
switch.state.p_pos = switch_positions[i]
switch.deactivate()
class Scenario(BaseScenario):
def make_world(self):
# main configurations
num_agents = 2
num_hills = 2
num_switches = 1
self.max_episode_length = 100
# create hills (on edges)
possible_hill_positions = [np.array([-0.8, 0]), np.array([0, 0.8]), np.array([0.8, 0]), np.array([0, -0.8])]
hill_positions = random.sample(possible_hill_positions, k=num_hills)
hills = [Hill(hill_positions[i]) for i in range(num_hills)]
# create switches (in corners)
possible_switch_positions = [
np.array([-0.8, -0.8]),
np.array([-0.8, 0.8]),
np.array([0.8, -0.8]),
np.array([0.8, 0.8])]
switch_positions = random.sample(possible_switch_positions, k=num_switches)
switches = [Switch(switch_positions[i]) for i in range(num_switches)]
# make world and set basic properties
world = SwitchWorld(hills, switches)
world.dim_c = 2
world.collaborative = True
# add agents
world.agents = [Agent() for i in range(num_agents)]
for i, agent in enumerate(world.agents):
agent.name = 'agent %d' % i
agent.collide = True
agent.silent = True
agent.size = 0.1
agent.accel = 5.0
agent.max_speed = 5.0
if i == 0:
agent.color = np.array([0.35, 0.35, 0.85])
else:
agent.color = np.array([0.35, 0.85, 0.85])
# make initial conditions
self.reset_world(world)
return world
def reset_world(self, world):
# set random initial states
for agent in world.agents:
agent.state.p_pos = np.array([random.uniform(-1, +1) for _ in range(world.dim_p)])
agent.state.p_vel = np.zeros(world.dim_p)
agent.state.c = np.zeros(world.dim_c)
# set hills randomly
world.reset_hills()
# set switches randomly
world.reset_switches()
def is_collision(self, agent1, agent2):
delta_pos = agent1.state.p_pos - agent2.state.p_pos
dist = np.sqrt(np.sum(np.square(delta_pos)))
dist_min = agent1.size + agent2.size
return True if dist < dist_min else False
def reward(self, agent, world):
# Agents are rewarded based on number of landmarks activated
rew = 0
if all([h.active for h in world.hills]):
rew += 100
else:
# give bonus each time a hill is activated
for hill in world.hills:
if hill.activated_just_now:
rew += 50
# penalise timesteps where nothing is happening
if rew == 0:
rew -= 0.1
# add collision penalty
if agent.collide:
for a in world.agents:
# note: this also counts collision with "itself", so gives -1 at every timestep
# would be good to tune the reward function and use (not a == agent) here
if self.is_collision(a, agent):
rew -= 1
return rew
def observation(self, agent, world):
# get positions of all entities in this agent's reference frame
entity_pos = []
for entity in world.landmarks: # world.entities:
entity_pos.append(entity.state.p_pos - agent.state.p_pos)
# entity colors
entity_color = []
for entity in world.landmarks: # world.entities:
entity_color.append(entity.color)
# communication of all other agents
comm = []
other_pos = []
for other in world.agents:
if other is agent:
continue
comm.append(other.state.c)
other_pos.append(other.state.p_pos - agent.state.p_pos)
return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + comm)
class Hill(Landmark):
"""
A hill that can be captured by an agent.
To be captured, a team must occupy a hill for a fixed amount of time.
"""
def __init__(self,
pos=None,
size=0.08,
capture_time=2
):
# Initialize Landmark super class
super().__init__()
self.movable = False
self.collide = False
self.state.p_pos = pos
self.size = size
# Set static configurations
self.capture_time = capture_time
# Initialize all hills to be inactive
self.active = False
self.color = np.array([0.5, 0.5, 0.5])
self.capture_timer = 0
self.activated_just_now = False
def activate(self):
self.active = True
self.color = np.array([0.1, 0.1, 0.9])
def deactivate(self):
self.active = False
self.color = np.array([0.5, 0.5, 0.5])
def _is_occupied(self, agents):
# a hill is occupied if an agent stands on it
for agent in agents:
dist = np.sqrt(np.sum(np.square(agent.state.p_pos - self.state.p_pos)))
if dist < agent.size + self.size:
return True
return False
def step(self, world):
self.activated_just_now = False
# If hill isn't activated yet, check if an agent activates it
# if (not self.active) and (world.switch.is_active()):
if (not self.active):
# Check if an agent is on the hill and all switches are active
if (self._is_occupied(world.agents)) and all([switch.active for switch in world.switches]):
self.capture_timer += 1
# activate hill (this is irreversible)
if self.capture_timer > self.capture_time:
self.activate()
self.activated_just_now = True
# Reset capture timer if hill is not occupied
else:
self.capture_timer = 0
class Switch(Landmark):
"""
A switch that can be activated by an agent.
The agent has to stay on the switch for it to be active.
"""
def __init__(self,
pos=None,
size=0.03,
):
# Initialize Landmark super class
super().__init__()
self.movable = False
self.collide = False
self.state.p_pos = pos
self.size = size
# Initialize all hills to be inactive
self.active = False
self.color = np.array([0.8, 0.05, 0.3])
self.capture_timer = 0
def activate(self):
self.active = True
self.color = np.array([0.1, 0.9, 0.4])
def deactivate(self):
self.active = False
self.color = np.array([0.8, 0.05, 0.3])
def _is_occupied(self, agents):
# a switch is active if an agent stands on it
for agent in agents:
dist = np.sqrt(np.sum(np.square(agent.state.p_pos - self.state.p_pos)))
if dist < agent.size + self.size:
return True
return False
def step(self, world):
# check if an agent is on the switch and activate/deactive accordingly
if self._is_occupied(world.agents):
self.activate()
else:
self.deactivate()
class SwitchExpertPolicy():
"""
Hand-coded expert policy for the simple switch environment.
Types of possible experts:
- always go to the switch
- always go to the hills
"""
def __init__(self, dim_c, agent, world, expert_type=None, discrete_action_input=True):
self.dim_c = dim_c
self.discrete_action_input = discrete_action_input
# the agent we control and world we're in
self.agent = agent
self.world = world
if expert_type is None:
self.expert_type = random.choice(['switch', 'hill'])
else:
self.expert_type = expert_type
if self.expert_type == 'switch':
self.target_switch = self.select_inital_target_switch()
elif self.expert_type == 'hill':
self.target_hill = self.select_inital_target_hill()
else:
raise NotImplementedError
self.step_count = 0
def select_inital_target_switch(self):
return random.choice(self.world.switches)
def select_inital_target_hill(self):
return random.choice(self.world.hills)
def action(self):
# select a target!
if self.expert_type == 'switch':
# if agent is not already on a switch, choose target switch
if not any([switch._is_occupied([self.agent]) for switch in self.world.switches]):
# select a target switch if there's an inactive one
inactive_switches = [switch for switch in self.world.switches if not switch.active]
if len(inactive_switches) > 0 and (self.target_switch not in inactive_switches):
self.target_switch = random.choice(inactive_switches)
target = self.target_switch.state.p_pos
elif self.expert_type == 'hill':
# select a target hill if we haven't done so yet, or the current target switch is inactive
inactive_hills = [hill for hill in self.world.hills if not hill.active]
if len(inactive_hills) > 0 and (self.target_hill not in inactive_hills):
self.target_hill = random.choice(inactive_hills)
target = self.target_hill.state.p_pos
self.step_count += 1
impulse = np.clip(target - self.agent.state.p_pos, -self.agent.u_range, self.agent.u_range)
if self.discrete_action_input:
u_idx = np.argmax(np.abs(impulse))
if u_idx == 0 and impulse[u_idx] < 0:
u = 1
elif u_idx == 0 and impulse[u_idx] > 0:
u = 2
elif u_idx == 1 and impulse[u_idx] < 0:
u = 3
elif u_idx == 1 and impulse[u_idx] > 0:
u = 4
else:
u = 0
else:
u = np.zeros(5)
if (impulse[0] == impulse[1] == 0) \
or (self.step_count < self.burn_in) \
or (self.burn_step != 0 and self.step_count % self.burn_step != 0):
u[0] = 0.1
else:
pass
# u: noop (?), right, left, down, up
if impulse[0] > 0: # x-direction (- left/right + )
u[1] = impulse[0] # right
elif impulse[0] < 0:
u[2] = -impulse[0]
if impulse[1] > 0: # y-direction (- up/down + )
u[3] = impulse[1]
elif impulse[1] < 0:
u[4] = -impulse[1]
return u

View File

@@ -1,82 +0,0 @@
import argparse
import os
import re
from rllib_multiagent_particle_env import CUSTOM_SCENARIOS
def parse_args():
parser = argparse.ArgumentParser('MADDPG with OpenAI MPE')
# Environment
parser.add_argument('--scenario', type=str, default='simple',
choices=['simple', 'simple_speaker_listener',
'simple_crypto', 'simple_push',
'simple_tag', 'simple_spread', 'simple_adversary'
] + CUSTOM_SCENARIOS,
help='name of the scenario script')
parser.add_argument('--max-episode-len', type=int, default=25,
help='maximum episode length')
parser.add_argument('--num-episodes', type=int, default=60000,
help='number of episodes')
parser.add_argument('--num-adversaries', type=int, default=0,
help='number of adversaries')
parser.add_argument('--good-policy', type=str, default='maddpg',
help='policy for good agents')
parser.add_argument('--adv-policy', type=str, default='maddpg',
help='policy of adversaries')
# Core training parameters
parser.add_argument('--lr', type=float, default=1e-2,
help='learning rate for Adam optimizer')
parser.add_argument('--gamma', type=float, default=0.95,
help='discount factor')
# NOTE: 1 iteration = sample_batch_size * num_workers timesteps * num_envs_per_worker
parser.add_argument('--sample-batch-size', type=int, default=25,
help='number of data points sampled /update /worker')
parser.add_argument('--train-batch-size', type=int, default=1024,
help='number of data points /update')
parser.add_argument('--n-step', type=int, default=1,
help='length of multistep value backup')
parser.add_argument('--num-units', type=int, default=64,
help='number of units in the mlp')
parser.add_argument('--final-reward', type=int, default=-400,
help='final reward after which to stop training')
# Checkpoint
parser.add_argument('--checkpoint-freq', type=int, default=200,
help='save model once every time this many iterations are completed')
parser.add_argument('--local-dir', type=str, default='./logs',
help='path to save checkpoints')
parser.add_argument('--restore', type=str, default=None,
help='directory in which training state and model are loaded')
# Parallelism
parser.add_argument('--num-workers', type=int, default=1)
parser.add_argument('--num-envs-per-worker', type=int, default=4)
parser.add_argument('--num-gpus', type=int, default=0)
return parser.parse_args()
def find_final_checkpoint(start_dir):
def find(pattern, path):
result = []
for root, _, files in os.walk(path):
for name in files:
if pattern.match(name):
result.append(os.path.join(root, name))
return result
cp_pattern = re.compile('.*checkpoint-\\d+$')
checkpoint_files = find(cp_pattern, start_dir)
checkpoint_numbers = []
for file in checkpoint_files:
checkpoint_numbers.append(int(file.split('-')[-1]))
final_checkpoint_number = max(checkpoint_numbers)
return next(
checkpoint_file for checkpoint_file in checkpoint_files
if checkpoint_file.endswith(str(final_checkpoint_number)))

View File

@@ -1,566 +0,0 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
"\n",
"Licensed under the MIT License."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"![Impressions](https://PixelServer20190423114238.azurewebsites.net/api/impressions/MachineLearningNotebooks/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/particle.png)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Reinforcement Learning in Azure Machine Learning - Training multiple agents on collaborative ParticleEnv tasks\n",
"\n",
"This tutorial will show you how to train policies in a multi-agent scenario.\n",
"We use OpenAI Gym's [Particle environments](https://github.com/openai/multiagent-particle-envs),\n",
"which model agents and landmarks in a two-dimensional world. Particle comes with\n",
"several predefined scenarios, both competitive and collaborative, and with or without communication.\n",
"\n",
"For this tutorial, we pick a cooperative navigation scenario where N agents are in a world with N\n",
"landmarks. The agents' goal is to cover all the landmarks without collisions,\n",
"so agents must learn to avoid each other (social distancing!). The video below shows training\n",
"results for N=3 agents/landmarks:\n",
"\n",
"<table style=\"width:50%\">\n",
" <tr>\n",
" <th style=\"text-align: center;\">\n",
" <img src=\"./images/particle_simple_spread.gif\" alt=\"Particle video\" align=\"middle\" margin-left=\"auto\" margin-right=\"auto\"/>\n",
" </th>\n",
" </tr>\n",
" <tr style=\"text-align: center;\">\n",
" <th>Fig 1. Video of 3 agents covering 3 landmarks in a multiagent Particle scenario.</th>\n",
" </tr>\n",
"</table>\n",
"\n",
"The tutorial will cover the following steps:\n",
"- Initializing Azure Machine Learning resources for training\n",
"- Training policies in a multi-agent environment with Azure Machine Learning service\n",
"- Monitoring training progress\n",
"\n",
"## Prerequisites\n",
"\n",
"The user should have completed the Azure Machine Learning introductory tutorial. You will need to make sure that you have a valid subscription id, a resource group and a workspace. For detailed instructions see [Tutorial: Get started creating your first ML experiment](https://docs.microsoft.com/en-us/azure/machine-learning/tutorial-1st-experiment-sdk-setup).\n",
"\n",
"Please ensure that you have a current version of IPython (>= 7.15) installed.\n",
"\n",
"While this is a standalone notebook, we highly recommend going over the introductory notebooks for RL first.\n",
"- Getting started:\n",
" - [RL using a compute instance with Azure Machine Learning](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/reinforcement-learning/cartpole-on-compute-instance/cartpole_ci.ipynb)\n",
" - [RL using Azure Machine Learning compute](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/reinforcement-learning/cartpole-on-single-compute/cartpole_sc.ipynb)\n",
"- [Scaling RL training runs with Azure Machine Learning](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/reinforcement-learning/atari-on-distributed-compute/pong_rllib.ipynb)\n",
"\n",
"## Initialize resources\n",
"\n",
"All required Azure Machine Learning service resources for this tutorial can be set up from Jupyter. This includes:\n",
"\n",
"- Connecting to your existing Azure Machine Learning workspace.\n",
"- Creating an experiment to track runs.\n",
"- Creating remote compute targets for [Ray](https://docs.ray.io/en/latest/index.html).\n",
"\n",
"\n",
"### Azure Machine Learning SDK\n",
"\n",
"Display the Azure Machine Learning SDK version."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1646249589452
}
},
"outputs": [],
"source": [
"import azureml.core\n",
"print('Azure Machine Learning SDK version: ', azureml.core.VERSION)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Connect to workspace\n",
"\n",
"Get a reference to an existing Azure Machine Learning workspace."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1646250284486
}
},
"outputs": [],
"source": [
"from azureml.core import Workspace\n",
"\n",
"ws = Workspace.from_config()\n",
"print(ws.name, ws.location, ws.resource_group, sep=' | ')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create an experiment\n",
"\n",
"Create an experiment to track the runs in your workspace. A\n",
"workspace can have multiple experiments and each experiment\n",
"can be used to track multiple runs (see [documentation](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.experiment.experiment?view=azure-ml-py)\n",
"for details)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1646250342411
}
},
"outputs": [],
"source": [
"from azureml.core import Experiment\n",
"\n",
"exp = Experiment(workspace=ws, name='particle-multiagent')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create or attach an existing compute resource\n",
"\n",
"A compute target is a designated compute resource where you run your training script. For more information, see [What are compute targets in Azure Machine Learning service?](https://docs.microsoft.com/en-us/azure/machine-learning/concept-compute-target).\n",
"\n",
"> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist.\n",
"\n",
"#### CPU target for Ray head\n",
"\n",
"In the experiment setup for this tutorial, the Ray head node will\n",
"run on a CPU node (D3 type). A maximum cluster size of 1 node is\n",
"therefore sufficient. If you wish to run multiple experiments in\n",
"parallel using the same CPU cluster, you may elect to increase this\n",
"number. The cluster will automatically scale down to 0 nodes when\n",
"no training jobs are scheduled (see min_nodes).\n",
"\n",
"The code below creates a compute cluster of D3 type nodes.\n",
"If the cluster with the specified name is already in your workspace\n",
"the code will skip the creation process.\n",
"\n",
"**Note: Creation of a compute resource can take several minutes**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1646250346756
}
},
"outputs": [],
"source": [
"from azureml.core.compute import AmlCompute, ComputeTarget\n",
"\n",
"cpu_cluster_name = 'cpu-cl-d3'\n",
"\n",
"if cpu_cluster_name in ws.compute_targets:\n",
" cpu_cluster = ws.compute_targets[cpu_cluster_name]\n",
" if cpu_cluster and type(cpu_cluster) is AmlCompute:\n",
" if cpu_cluster.provisioning_state == 'Succeeded':\n",
" print('Found existing compute target for {}. Using it.'.format(cpu_cluster_name))\n",
" else: \n",
" raise Exception('Found existing compute target for {} '.format(cpu_cluster_name)\n",
" + 'but it is in state {}'.format(cpu_cluster.provisioning_state))\n",
"else:\n",
" print('Creating a new compute target for {}...'.format(cpu_cluster_name))\n",
" provisioning_config = AmlCompute.provisioning_configuration(\n",
" vm_size='STANDARD_D3',\n",
" min_nodes=0, \n",
" max_nodes=1)\n",
"\n",
" cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, provisioning_config)\n",
" cpu_cluster.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
" \n",
" print('Cluster created.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Training the policies\n",
"\n",
"### Training environment\n",
"\n",
"This tutorial uses a custom docker image\n",
"with the necessary software installed. The [Environment](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-use-environments)\n",
"class stores the configuration for the training environment. The\n",
"docker image is set via `env.docker.base_image`.\n",
"`user_managed_dependencies` is set so that\n",
"the preinstalled Python packages in the image are preserved.\n",
"\n",
"Note that since we want to capture videos of the training runs requiring a display, we set the interpreter_path such that the Python process is started via **xvfb-run**."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1646257481631
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"from azureml.core import Environment\n",
"import os\n",
"\n",
"ray_environment_name = 'particle-cpu'\n",
"ray_environment_dockerfile_path = os.path.join(os.getcwd(), 'docker', 'cpu', 'Dockerfile')\n",
"ray_environment = Environment. \\\n",
" from_dockerfile(name=ray_environment_name, dockerfile=ray_environment_dockerfile_path). \\\n",
" register(workspace=ws)\n",
"ray_cpu_build_details = ray_environment.build(workspace=ws)\n",
"\n",
"ray_cpu_build_details.wait_for_completion(show_output=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Training script\n",
"\n",
"This tutorial uses the multiagent algorithm [Multi-Agent Deep Deterministic Policy Gradient (MADDPG)](https://docs.ray.io/en/latest/rllib-algorithms.html?highlight=maddpg#multi-agent-deep-deterministic-policy-gradient-contrib-maddpg).\n",
"For training policies in a multiagent scenario, Ray's RLlib also\n",
"requires the `multiagent` configuration section to be specified. You\n",
"can find more information in the [common parameters](https://docs.ray.io/en/latest/rllib-training.html?highlight=multiagent#common-parameters)\n",
"documentation.\n",
"\n",
"The stopping criteria are set such that the training run is\n",
"terminated after either a mean reward of -450 is observed, or\n",
"training has run for over 2 hours.\n",
"\n",
"### Submitting a training run\n",
"\n",
"You can submit the training run using a `ScriptRunConfig`. By providing the\n",
"command to run the training, and a `RunConfig` object configured with your\n",
"compute target, number of nodes, and environment image to use.\n",
"\n",
"Note that you can use the same notebook and scripts to experiment with\n",
"different Particle environments. You can find a list of supported\n",
"environments [here](https://github.com/openai/multiagent-particle-envs/tree/master#list-of-environments).\n",
"Simply change the `--scenario` parameter to a supported scenario.\n",
"\n",
"In order to get the best training results, you can also adjust the\n",
"`--final-reward` parameter to determine when to stop training. A greater\n",
"reward means longer running time, but improved results. By default,\n",
"the final reward will be -450, which should show good progress after\n",
"about one hour of run time.\n",
"\n",
"For this notebook, we use a single D3 nodes, giving us a total of 4 CPUs and\n",
"0 GPUs. One CPU is used by the MADDPG trainer, and an additional CPU is\n",
"consumed by the RLlib rollout worker. The other 2 CPUs are not used, though\n",
"smaller node types will run out of memory for this task."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"gather": {
"logged": 1646275371701
},
"jupyter": {
"outputs_hidden": false,
"source_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
},
"outputs": [],
"source": [
"from azureml.core import RunConfiguration, ScriptRunConfig, Experiment\n",
"from azureml.core.runconfig import DockerConfiguration, RunConfiguration\n",
"from azureml.widgets import RunDetails\n",
"\n",
"experiment_name = 'particle-multiagent'\n",
"\n",
"experiment = Experiment(workspace=ws, name=experiment_name)\n",
"\n",
"aml_run_config_ml = RunConfiguration(communicator='OpenMpi')\n",
"aml_run_config_ml.target = cpu_cluster\n",
"aml_run_config_ml.node_count = 1\n",
"aml_run_config_ml.environment = ray_environment\n",
"\n",
"config = ScriptRunConfig(source_directory='./files',\n",
" command=[\n",
" 'xvfb-run -s \"-screen 0 640x480x16 -ac +extension GLX +render\" python',\n",
" 'particle_train.py',\n",
" '--scenario', 'simple_spread',\n",
" '--final-reward', '-450'\n",
" ],\n",
" run_config = aml_run_config_ml\n",
" )\n",
"train_run = experiment.submit(config)\n",
"\n",
"RunDetails(train_run).show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Job cancellation\n",
"\n",
"You may cancel the job by uncommenting and running the cell below."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# If you wish to cancel the run before it completes, uncomment and execute:\n",
"# train_run.cancel()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Monitoring training progress\n",
"\n",
"### View the Tensorboard\n",
"\n",
"The Tensorboard can be displayed via the Azure Machine Learning\n",
"service's [Tensorboard API](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-monitor-tensorboard).\n",
"When running locally, please make sure to follow the instructions\n",
"in the link and install required packages. Running this cell will output a URL for the Tensorboard.\n",
"\n",
"Note that the training script sets the log directory when\n",
"starting RLlib via the local_dir parameter. ./logs will automatically\n",
"appear in the downloadable files for a run. Since this script is\n",
"executed on the Ray head node run, we need to get a reference to it\n",
"as shown below.\n",
"\n",
"The Tensorboard API will continuously stream logs from the run.\n",
"\n",
"**Note: It may take a couple of minutes after the run is in \"Running\"\n",
"state before Tensorboard files are available and the board will refresh automatically**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# from azureml.tensorboard import Tensorboard\n",
"\n",
"# tb = Tensorboard([train_run])\n",
"# tb.start()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### View training videos\n",
"\n",
"As mentioned above, we record videos of the agents interacting with the\n",
"Particle world. These videos are often a crucial indicator for training\n",
"success. The code below downloads the latest video as it becomes available\n",
"and displays it in-line.\n",
"\n",
"Over time, the agents learn to cooperate and avoid collisions while\n",
"traveling to all landmarks.\n",
"\n",
"**Note: It can take several minutes for a video to appear after the run\n",
"was started.**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from azureml.core import Dataset\n",
"from azureml.data.dataset_error_handling import DatasetValidationError\n",
"\n",
"from IPython.display import clear_output\n",
"from IPython.core.display import display, Video\n",
"\n",
"datastore = ws.datastores['workspaceartifactstore']\n",
"path_prefix = './tmp_videos'\n",
"\n",
"def download_latest_training_video(run, video_checkpoint_counter):\n",
" run_artifacts_path = os.path.join('ExperimentRun', f'dcid.{run.id}', 'logs', 'videos')\n",
" \n",
" try:\n",
" run_artifacts_ds = Dataset.File.from_files(datastore.path(os.path.join(run_artifacts_path, '**')))\n",
" except DatasetValidationError as e:\n",
" # This happens at the start of the run when there is no data available\n",
" # in the run's artifacts\n",
" return None, video_checkpoint_counter\n",
" \n",
" video_files = [file for file in run_artifacts_ds.to_path() if file.endswith('.mp4')]\n",
" if len(video_files) == video_checkpoint_counter:\n",
" return None, video_checkpoint_counter\n",
" \n",
" iteration_numbers = [int(vf[vf.rindex('video') + len('video') : vf.index('.mp4')]) for vf in video_files]\n",
" latest_video = next(vf for vf in video_files if vf.endswith('{num}.mp4'.format(num=max(iteration_numbers))))\n",
" latest_video = os.path.join(run_artifacts_path, os.path.normpath(latest_video[1:]))\n",
" \n",
" datastore.download(\n",
" target_path=path_prefix,\n",
" prefix=latest_video.replace('\\\\', '/'),\n",
" show_progress=False)\n",
" \n",
" return os.path.join(path_prefix, latest_video), len(video_files)\n",
"\n",
"\n",
"def render_video(vf):\n",
" clear_output(wait=True)\n",
" display(Video(data=vf, embed=True, html_attributes='loop autoplay controls width=50%'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import shutil, time\n",
"\n",
"terminal_statuses = ['Canceled', 'Completed', 'Failed']\n",
"video_checkpoint_counter = 0\n",
"\n",
"while train_run.get_status() not in terminal_statuses:\n",
" video_file, video_checkpoint_counter = download_latest_training_video(train_run, video_checkpoint_counter)\n",
" if video_file is not None:\n",
" render_video(video_file)\n",
" \n",
" print('Displaying video number {}'.format(video_checkpoint_counter))\n",
" shutil.rmtree(path_prefix)\n",
" \n",
" # Interrupting the kernel can take up to 15 seconds\n",
" # depending on when time.sleep started\n",
" time.sleep(15)\n",
" \n",
"train_run.wait_for_completion()\n",
"print('The training run has reached a terminal status.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Cleaning up\n",
"\n",
"Below, you can find code snippets for your convenience to clean up any resources created as part of this tutorial you don't wish to retain."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# to stop the Tensorboard, uncomment and run\n",
"# tb.stop()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# to delete the cpu compute target, uncomment and run\n",
"# cpu_cluster.delete()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Next steps\n",
"\n",
"We would love to hear your feedback! Please let us know what you think of Reinforcement Learning in Azure Machine Learning and what features you are looking forward to."
]
}
],
"metadata": {
"authors": [
{
"name": "andress"
}
],
"categories": [
"how-to-use-azureml",
"reinforcement-learning"
],
"interpreter": {
"hash": "13382f70c1d0595120591d2e358c8d446daf961bf951d1fba9a32631e205d5ab"
},
"kernel_info": {
"name": "python38-azureml"
},
"kernelspec": {
"display_name": "Python 3.8 - AzureML",
"language": "python",
"name": "python38-azureml"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.9"
},
"notice": "Copyright (c) Microsoft Corporation. All rights reserved.\u00c3\u0192\u00c2\u00a2\u00c3\u00a2\u00e2\u20ac\u0161\u00c2\u00ac\u00c3\u201a\u00c2\u00afLicensed under the MIT License.\u00c3\u0192\u00c2\u00a2\u00c3\u00a2\u00e2\u20ac\u0161\u00c2\u00ac\u00c3\u201a\u00c2\u00af ",
"nteract": {
"version": "nteract-front-end@1.0.0"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@@ -1,9 +0,0 @@
name: particle
dependencies:
- pip:
- azureml-sdk
- azureml-contrib-reinforcementlearning
- azureml-widgets
- tensorboard
- azureml-tensorboard
- ipython

View File

@@ -8,7 +8,7 @@ dependencies:
- matplotlib
- azureml-dataset-runtime
- ipywidgets
- raiwidgets~=0.23.0
- raiwidgets~=0.24.0
- liac-arff
- packaging>=20.9
- itsdangerous==2.0.1

View File

@@ -101,7 +101,7 @@
"\n",
"# Check core SDK version number\n",
"\n",
"print(\"This notebook was created using SDK version 1.48.0, you are currently running version\", azureml.core.VERSION)"
"print(\"This notebook was created using SDK version 1.49.0, you are currently running version\", azureml.core.VERSION)"
]
},
{

View File

@@ -3,5 +3,6 @@ dependencies:
- pip:
- azureml-sdk
- azureml-tensorboard
- tensorboard
- tensorflow
- setuptools>=41.0.0

View File

@@ -138,7 +138,6 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
| [pong_rllib](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/reinforcement-learning/atari-on-distributed-compute/pong_rllib.ipynb) | | | | | | |
| [cartpole_ci](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/reinforcement-learning/cartpole-on-compute-instance/cartpole_ci.ipynb) | | | | | | |
| [cartpole_sc](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/reinforcement-learning/cartpole-on-single-compute/cartpole_sc.ipynb) | | | | | | |
| [particle](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/particle.ipynb) | | | | | | |
| [rai-loan-decision](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/responsible-ai/visualize-upload-loan-decision/rai-loan-decision.ipynb) | | | | | | |
| [Logging APIs](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/logging-api/logging-api.ipynb) | Logging APIs and analyzing results | None | None | None | None | None |
| [configuration](https://github.com/Azure/MachineLearningNotebooks/blob/master//setup-environment/configuration.ipynb) | | | | | | |

View File

@@ -102,7 +102,7 @@
"source": [
"import azureml.core\n",
"\n",
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},