mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-19 17:17:04 -05:00
update samples from Release-166 as a part of 1.49.0 SDK stable release
This commit is contained in:
@@ -103,7 +103,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -6,7 +6,8 @@ dependencies:
|
||||
- fairlearn>=0.6.2
|
||||
- joblib
|
||||
- liac-arff
|
||||
- raiwidgets~=0.23.0
|
||||
- raiwidgets~=0.24.0
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- protobuf==3.20.0
|
||||
- numpy<1.24.0
|
||||
|
||||
@@ -6,7 +6,8 @@ dependencies:
|
||||
- fairlearn>=0.6.2
|
||||
- joblib
|
||||
- liac-arff
|
||||
- raiwidgets~=0.23.0
|
||||
- raiwidgets~=0.24.0
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- protobuf==3.20.0
|
||||
- numpy<1.24.0
|
||||
|
||||
@@ -8,13 +8,17 @@ dependencies:
|
||||
# Azure ML only supports 3.7.0 and later.
|
||||
- pip==22.3.1
|
||||
- python>=3.7,<3.9
|
||||
- conda-forge::fbprophet==0.7.1
|
||||
- pandas==1.1.5
|
||||
- scipy==1.5.3
|
||||
- Cython==0.29.14
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.48.0
|
||||
- azureml-defaults~=1.48.0
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.48.0/validated_win32_requirements.txt [--no-deps]
|
||||
- azureml-widgets~=1.49.0
|
||||
- azureml-defaults~=1.49.0
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.49.0/validated_win32_requirements.txt [--no-deps]
|
||||
- matplotlib==3.6.2
|
||||
- xgboost==1.3.3
|
||||
- arch==4.14
|
||||
- mlflow-skinny==1.30.0
|
||||
- cmdstanpy==0.9.5
|
||||
- setuptools-git==1.2
|
||||
|
||||
@@ -6,7 +6,7 @@ channels:
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
# Azure ML only supports 3.7 and later.
|
||||
- pip==20.1.1
|
||||
- pip==22.3.1
|
||||
- python>=3.7,<3.9
|
||||
- matplotlib==3.2.1
|
||||
- numpy>=1.21.6,<=1.22.3
|
||||
@@ -20,16 +20,13 @@ dependencies:
|
||||
- pytorch::pytorch=1.11.0
|
||||
- cudatoolkit=10.1.243
|
||||
- notebook
|
||||
- jinja2<=2.11.2
|
||||
- markupsafe<2.1.0
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.48.0
|
||||
- azureml-defaults~=1.48.0
|
||||
- azureml-widgets~=1.49.0
|
||||
- azureml-defaults~=1.49.0
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.2.4
|
||||
- pystan==2.19.1.1
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.48.0/validated_linux_requirements.txt [--no-deps]
|
||||
- arch==4.14
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.49.0/validated_linux_requirements.txt [--no-deps]
|
||||
|
||||
@@ -6,7 +6,7 @@ channels:
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
# Currently Azure ML only supports 3.7 and later.
|
||||
- pip==20.1.1
|
||||
- pip==22.3.1
|
||||
- python>=3.7,<3.9
|
||||
- matplotlib==3.2.1
|
||||
- numpy>=1.21.6,<=1.22.3
|
||||
@@ -20,16 +20,13 @@ dependencies:
|
||||
- pytorch::pytorch=1.11.0
|
||||
- cudatoolkit=9.0
|
||||
- notebook
|
||||
- jinja2<=2.11.2
|
||||
- markupsafe<2.1.0
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
- azureml-widgets~=1.48.0
|
||||
- azureml-defaults~=1.48.0
|
||||
- azureml-widgets~=1.49.0
|
||||
- azureml-defaults~=1.49.0
|
||||
- pytorch-transformers==1.0.0
|
||||
- spacy==2.2.4
|
||||
- pystan==2.19.1.1
|
||||
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.48.0/validated_darwin_requirements.txt [--no-deps]
|
||||
- arch==4.14
|
||||
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.49.0/validated_darwin_requirements.txt [--no-deps]
|
||||
|
||||
@@ -97,7 +97,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -97,7 +97,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -3,7 +3,7 @@ dependencies:
|
||||
# The python interpreter version.
|
||||
# Currently Azure ML only supports 3.7.0 and later.
|
||||
- pip<=22.3.1
|
||||
- python>=3.7.0,<3.10
|
||||
- python>=3.7.0,<3.11
|
||||
|
||||
- pip:
|
||||
# Required packages for AzureML execution, history, and data preparation.
|
||||
|
||||
@@ -4,10 +4,10 @@ channels:
|
||||
- main
|
||||
dependencies:
|
||||
# The python interpreter version.
|
||||
# Currently Azure ML only supports 3.6.0 and later.
|
||||
# Currently Azure ML only supports 3.7.0 and later.
|
||||
- pip<=20.2.4
|
||||
- nomkl
|
||||
- python>=3.6.0,<3.10
|
||||
- python>=3.7.0,<3.11
|
||||
- urllib3==1.26.7
|
||||
- PyJWT < 2.0.0
|
||||
- numpy>=1.21.6,<=1.22.3
|
||||
|
||||
@@ -92,7 +92,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -91,7 +91,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
@@ -106,7 +106,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
@@ -298,8 +298,9 @@
|
||||
"pip uninstall -y xgboost && \\\n",
|
||||
"conda install py-xgboost==1.3.3 && \\\n",
|
||||
"pip uninstall -y numpy && \\\n",
|
||||
"pip install {numpy_dep} &&\\\n",
|
||||
"pip install {sklearn_dep} \\\n",
|
||||
"pip install {numpy_dep} && \\\n",
|
||||
"pip install {sklearn_dep} && \\\n",
|
||||
"pip install chardet \\\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"env.python.user_managed_dependencies = True\n",
|
||||
|
||||
@@ -10,7 +10,7 @@ dependencies:
|
||||
- ipython
|
||||
- matplotlib
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.23.0
|
||||
- raiwidgets~=0.24.0
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- scipy>=1.5.3
|
||||
|
||||
@@ -10,7 +10,7 @@ dependencies:
|
||||
- matplotlib
|
||||
- azureml-dataset-runtime
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.23.0
|
||||
- raiwidgets~=0.24.0
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- scipy>=1.5.3
|
||||
|
||||
@@ -9,7 +9,7 @@ dependencies:
|
||||
- ipython
|
||||
- matplotlib
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.23.0
|
||||
- raiwidgets~=0.24.0
|
||||
- packaging>=20.9
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
|
||||
@@ -9,7 +9,7 @@ dependencies:
|
||||
- ipython
|
||||
- matplotlib
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.23.0
|
||||
- raiwidgets~=0.24.0
|
||||
- packaging>=20.9
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
|
||||
@@ -11,7 +11,7 @@ dependencies:
|
||||
- azureml-dataset-runtime
|
||||
- azureml-core
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.23.0
|
||||
- raiwidgets~=0.24.0
|
||||
- itsdangerous==2.0.1
|
||||
- markupsafe<2.1.0
|
||||
- scipy>=1.5.3
|
||||
|
||||
@@ -330,7 +330,7 @@
|
||||
"- **inputs:** List of input connections for data consumed by this step. Fetch this inside the notebook using dbutils.widgets.get(\"input\")\n",
|
||||
"- **outputs:** List of output port definitions for outputs produced by this step. Fetch this inside the notebook using dbutils.widgets.get(\"output\")\n",
|
||||
"- **existing_cluster_id:** Cluster ID of an existing Interactive cluster on the Databricks workspace. If you are providing this, do not provide any of the parameters below that are used to create a new cluster such as spark_version, node_type, etc.\n",
|
||||
"- **spark_version:** Version of spark for the databricks run cluster. You can refer to [DataBricks runtime version](https://learn.microsoft.com/azure/databricks/dev-tools/api/#--runtime-version-strings) to specify the spark version. default value: 4.0.x-scala2.11\n",
|
||||
"- **spark_version:** Version of spark for the databricks run cluster. You can refer to [DataBricks runtime version](https://learn.microsoft.com/azure/databricks/dev-tools/api/#--runtime-version-strings) to specify the spark version. default value: 10.4.x-scala2.12\n",
|
||||
"- **node_type:** Azure vm node types for the databricks run cluster. default value: Standard_D3_v2\n",
|
||||
"- **num_workers:** Specifies a static number of workers for the databricks run cluster\n",
|
||||
"- **min_workers:** Specifies a min number of workers to use for auto-scaling the databricks run cluster\n",
|
||||
|
||||
@@ -86,7 +86,7 @@
|
||||
"import requests\n",
|
||||
"\n",
|
||||
"oj_sales_path = \"./oj.csv\"\n",
|
||||
"r = requests.get(\"http://www.cs.unitn.it/~taufer/Data/oj.csv\")\n",
|
||||
"r = requests.get(\"https://raw.githubusercontent.com/Azure/azureml-examples/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-orange-juice-sales/data/dominicks_OJ.csv\")\n",
|
||||
"open(oj_sales_path, \"wb\").write(r.content)"
|
||||
]
|
||||
},
|
||||
@@ -140,7 +140,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"partitioned_dataset = dataset.partition_by(partition_keys=['store', 'brand'], target=(datastore, \"partition_by_key_res\"), name=\"partitioned_oj_data\")\n",
|
||||
"partitioned_dataset = dataset.partition_by(partition_keys=['Store', 'Brand'], target=(datastore, \"partition_by_key_res\"), name=\"partitioned_oj_data\")\n",
|
||||
"partitioned_dataset.partition_keys"
|
||||
]
|
||||
},
|
||||
@@ -274,7 +274,7 @@
|
||||
"parallel_run_config = ParallelRunConfig(\n",
|
||||
" source_directory=scripts_folder,\n",
|
||||
" entry_script=script_file, # the user script to run against each input\n",
|
||||
" partition_keys=['store', 'brand'],\n",
|
||||
" partition_keys=['Store', 'Brand'],\n",
|
||||
" error_threshold=5,\n",
|
||||
" output_action='append_row',\n",
|
||||
" append_row_file_name=\"revenue_outputs.txt\",\n",
|
||||
@@ -362,8 +362,8 @@
|
||||
"result_file = os.path.join(target_dir, batch_output.path_on_datastore, parallel_run_config.append_row_file_name)\n",
|
||||
"\n",
|
||||
"df = pd.read_csv(result_file, delimiter=\" \", header=None)\n",
|
||||
"df.columns=[\"WeekStarting\", \"Quantity\", \"logQuantity\", \"Advert\", \"Price\", \"Age60\", \"COLLEGE\", \"INCOME\", \"Hincome150\", \"Large HH\", \"Minorities\", \"WorkingWoman\", \"SSTRDIST\", \"SSTRVOL\", \"CPDIST5\", \"CPWVOL5\", \"Store\", \"Brand\", \"total_income\"]\n",
|
||||
"\n",
|
||||
"df.columns = [\"week\", \"logmove\", \"feat\", \"price\", \"AGE60\", \"EDUC\", \"ETHNIC\", \"INCOME\", \"HHLARGE\", \"WORKWOM\", \"HVAL150\", \"SSTRDIST\", \"SSTRVOL\", \"CPDIST5\", \"CPWVOL5\", \"store\", \"brand\", \"total_income\"]\n",
|
||||
"print(\"Prediction has \", df.shape[0], \" rows\")\n",
|
||||
"df.head(10)"
|
||||
]
|
||||
@@ -413,7 +413,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
"version": "3.8.13"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
||||
@@ -36,8 +36,6 @@ Using these samples, you will learn how to do the following.
|
||||
| [cartpole_ci.ipynb](cartpole-on-compute-instance/cartpole_ci.ipynb) | Notebook to train a Cartpole playing agent on an Azure Machine Learning Compute Instance |
|
||||
| [cartpole_sc.ipynb](cartpole-on-single-compute/cartpole_sc.ipynb) | Notebook to train a Cartpole playing agent on an Azure Machine Learning Compute Cluster (single node) |
|
||||
| [pong_rllib.ipynb](atari-on-distributed-compute/pong_rllib.ipynb) | Notebook for distributed training of Pong agent using RLlib on multiple compute targets |
|
||||
| [minecraft.ipynb](minecraft-on-distributed-compute/minecraft.ipynb) | Notebook to train an agent to navigate through a lava maze in the Minecraft game |
|
||||
| [particle.ipynb](multiagent-particle-envs/particle.ipynb) | Notebook to train policies in a multiagent cooperative navigation scenario based on OpenAI's Particle environments |
|
||||
|
||||
## Prerequisites
|
||||
|
||||
|
||||
@@ -1,39 +0,0 @@
|
||||
# DisableDockerDetector "Disabled to unblock PRs until the owner can fix the file. Not used in any prod deployments - only as a documentation for the customers"
|
||||
FROM akdmsft/particle-cpu
|
||||
|
||||
RUN conda install -c anaconda python=3.7
|
||||
|
||||
# Install required pip packages
|
||||
RUN pip3 install --upgrade pip setuptools && pip3 install --upgrade \
|
||||
pandas \
|
||||
matplotlib \
|
||||
psutil \
|
||||
numpy \
|
||||
scipy \
|
||||
gym \
|
||||
azureml-defaults \
|
||||
tensorboardX \
|
||||
tensorflow==1.15 \
|
||||
tensorflow-probability==0.8.0 \
|
||||
onnxruntime \
|
||||
tf2onnx \
|
||||
cloudpickle==1.1.1 \
|
||||
tabulate \
|
||||
dm_tree \
|
||||
lz4 \
|
||||
opencv-python
|
||||
|
||||
RUN cd multiagent-particle-envs && \
|
||||
pip3 install -e . && \
|
||||
pip3 install --upgrade pyglet==1.3.2
|
||||
|
||||
RUN pip3 install ray-on-aml==0.1.6
|
||||
|
||||
RUN pip install protobuf==3.20.0
|
||||
|
||||
RUN pip3 install --upgrade \
|
||||
ray==0.8.7 \
|
||||
ray[rllib]==0.8.7 \
|
||||
ray[tune]==0.8.7
|
||||
|
||||
RUN pip install 'msrest<0.7.0'
|
||||
@@ -1,70 +0,0 @@
|
||||
# MIT License
|
||||
|
||||
# Copyright (c) 2018 OpenAI
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
import numpy as np
|
||||
import gym
|
||||
|
||||
|
||||
class MultiDiscrete(gym.Space):
|
||||
"""
|
||||
- The multi-discrete action space consists of a series of discrete action spaces with different
|
||||
parameters
|
||||
- It can be adapted to both a Discrete action space or a continuous (Box) action space
|
||||
- It is useful to represent game controllers or keyboards where each key can be represented as
|
||||
a discrete action space
|
||||
- It is parametrized by passing an array of arrays containing [min, max] for each discrete action
|
||||
space where the discrete action space can take any integers from `min` to `max` (both inclusive)
|
||||
Note: A value of 0 always need to represent the NOOP action.
|
||||
e.g. Nintendo Game Controller
|
||||
- Can be conceptualized as 3 discrete action spaces:
|
||||
1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4
|
||||
2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
|
||||
3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1
|
||||
- Can be initialized as
|
||||
MultiDiscrete([ [0,4], [0,1], [0,1] ])
|
||||
"""
|
||||
def __init__(self, array_of_param_array):
|
||||
self.low = np.array([x[0] for x in array_of_param_array])
|
||||
self.high = np.array([x[1] for x in array_of_param_array])
|
||||
self.num_discrete_space = self.low.shape[0]
|
||||
|
||||
def sample(self):
|
||||
""" Returns a array with one sample from each discrete action space """
|
||||
# For each row: round(random .* (max - min) + min, 0)
|
||||
# random_array = prng.np_random.rand(self.num_discrete_space)
|
||||
random_array = np.random.RandomState().rand(self.num_discrete_space)
|
||||
return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)]
|
||||
|
||||
def contains(self, x):
|
||||
return len(x) == self.num_discrete_space \
|
||||
and (np.array(x) >= self.low).all() \
|
||||
and (np.array(x) <= self.high).all()
|
||||
|
||||
@property
|
||||
def shape(self):
|
||||
return self.num_discrete_space
|
||||
|
||||
def __repr__(self):
|
||||
return "MultiDiscrete" + str(self.num_discrete_space)
|
||||
|
||||
def __eq__(self, other):
|
||||
return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high)
|
||||
@@ -1,413 +0,0 @@
|
||||
# MIT License
|
||||
|
||||
# Copyright (c) 2018 OpenAI
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
# The above copyright notice and this permission notice shall be included in all
|
||||
# copies or substantial portions of the Software.
|
||||
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
"""
|
||||
2D rendering framework
|
||||
"""
|
||||
from __future__ import division
|
||||
import os
|
||||
import six
|
||||
import sys
|
||||
from gym import error
|
||||
import math
|
||||
import numpy as np
|
||||
import pyglet
|
||||
|
||||
from pyglet.gl import glEnable, glHint, glLineWidth, glBlendFunc, glClearColor, glPushMatrix, \
|
||||
glTranslatef, glRotatef, glScalef, glPopMatrix, glColor4f, glBegin, glVertex3f, glEnd, glLineStipple, \
|
||||
glDisable, glVertex2f, GL_BLEND, GL_LINE_SMOOTH, GL_LINE_SMOOTH_HINT, GL_NICEST, GL_SRC_ALPHA, \
|
||||
GL_ONE_MINUS_SRC_ALPHA, GL_LINE_STIPPLE, GL_POINTS, GL_QUADS, GL_TRIANGLES, GL_POLYGON, GL_LINE_LOOP, \
|
||||
GL_LINE_STRIP, GL_LINES
|
||||
|
||||
|
||||
if "Apple" in sys.version:
|
||||
if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ:
|
||||
os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib'
|
||||
# (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite
|
||||
|
||||
|
||||
RAD2DEG = 57.29577951308232
|
||||
|
||||
|
||||
def get_display(spec):
|
||||
"""Convert a display specification (such as :0) into an actual Display
|
||||
object.
|
||||
|
||||
Pyglet only supports multiple Displays on Linux.
|
||||
"""
|
||||
if spec is None:
|
||||
return None
|
||||
elif isinstance(spec, six.string_types):
|
||||
return pyglet.canvas.Display(spec)
|
||||
else:
|
||||
raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec))
|
||||
|
||||
|
||||
class Viewer(object):
|
||||
def __init__(self, width, height, display=None):
|
||||
display = get_display(display)
|
||||
|
||||
self.width = width
|
||||
self.height = height
|
||||
|
||||
self.window = pyglet.window.Window(width=width, height=height, display=display)
|
||||
self.window.on_close = self.window_closed_by_user
|
||||
self.geoms = []
|
||||
self.onetime_geoms = []
|
||||
self.transform = Transform()
|
||||
|
||||
glEnable(GL_BLEND)
|
||||
# glEnable(GL_MULTISAMPLE)
|
||||
glEnable(GL_LINE_SMOOTH)
|
||||
# glHint(GL_LINE_SMOOTH_HINT, GL_DONT_CARE)
|
||||
glHint(GL_LINE_SMOOTH_HINT, GL_NICEST)
|
||||
glLineWidth(2.0)
|
||||
glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA)
|
||||
|
||||
def close(self):
|
||||
self.window.close()
|
||||
|
||||
def window_closed_by_user(self):
|
||||
self.close()
|
||||
|
||||
def set_bounds(self, left, right, bottom, top):
|
||||
assert right > left and top > bottom
|
||||
scalex = self.width / (right - left)
|
||||
scaley = self.height / (top - bottom)
|
||||
self.transform = Transform(
|
||||
translation=(-left * scalex, -bottom * scaley),
|
||||
scale=(scalex, scaley))
|
||||
|
||||
def add_geom(self, geom):
|
||||
self.geoms.append(geom)
|
||||
|
||||
def add_onetime(self, geom):
|
||||
self.onetime_geoms.append(geom)
|
||||
|
||||
def render(self, return_rgb_array=False):
|
||||
glClearColor(1, 1, 1, 1)
|
||||
self.window.clear()
|
||||
self.window.switch_to()
|
||||
self.window.dispatch_events()
|
||||
self.transform.enable()
|
||||
for geom in self.geoms:
|
||||
geom.render()
|
||||
for geom in self.onetime_geoms:
|
||||
geom.render()
|
||||
self.transform.disable()
|
||||
arr = None
|
||||
if return_rgb_array:
|
||||
buffer = pyglet.image.get_buffer_manager().get_color_buffer()
|
||||
image_data = buffer.get_image_data()
|
||||
arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
|
||||
# In https://github.com/openai/gym-http-api/issues/2, we
|
||||
# discovered that someone using Xmonad on Arch was having
|
||||
# a window of size 598 x 398, though a 600 x 400 window
|
||||
# was requested. (Guess Xmonad was preserving a pixel for
|
||||
# the boundary.) So we use the buffer height/width rather
|
||||
# than the requested one.
|
||||
arr = arr.reshape(buffer.height, buffer.width, 4)
|
||||
arr = arr[::-1, :, 0:3]
|
||||
self.window.flip()
|
||||
self.onetime_geoms = []
|
||||
return arr
|
||||
|
||||
# Convenience
|
||||
def draw_circle(self, radius=10, res=30, filled=True, **attrs):
|
||||
geom = make_circle(radius=radius, res=res, filled=filled)
|
||||
_add_attrs(geom, attrs)
|
||||
self.add_onetime(geom)
|
||||
return geom
|
||||
|
||||
def draw_polygon(self, v, filled=True, **attrs):
|
||||
geom = make_polygon(v=v, filled=filled)
|
||||
_add_attrs(geom, attrs)
|
||||
self.add_onetime(geom)
|
||||
return geom
|
||||
|
||||
def draw_polyline(self, v, **attrs):
|
||||
geom = make_polyline(v=v)
|
||||
_add_attrs(geom, attrs)
|
||||
self.add_onetime(geom)
|
||||
return geom
|
||||
|
||||
def draw_line(self, start, end, **attrs):
|
||||
geom = Line(start, end)
|
||||
_add_attrs(geom, attrs)
|
||||
self.add_onetime(geom)
|
||||
return geom
|
||||
|
||||
def get_array(self):
|
||||
self.window.flip()
|
||||
image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data()
|
||||
self.window.flip()
|
||||
arr = np.fromstring(image_data.data, dtype=np.uint8, sep='')
|
||||
arr = arr.reshape(self.height, self.width, 4)
|
||||
return arr[::-1, :, 0:3]
|
||||
|
||||
|
||||
def _add_attrs(geom, attrs):
|
||||
if "color" in attrs:
|
||||
geom.set_color(*attrs["color"])
|
||||
if "linewidth" in attrs:
|
||||
geom.set_linewidth(attrs["linewidth"])
|
||||
|
||||
|
||||
class Geom(object):
|
||||
def __init__(self):
|
||||
self._color = Color((0, 0, 0, 1.0))
|
||||
self.attrs = [self._color]
|
||||
|
||||
def render(self):
|
||||
for attr in reversed(self.attrs):
|
||||
attr.enable()
|
||||
self.render1()
|
||||
for attr in self.attrs:
|
||||
attr.disable()
|
||||
|
||||
def render1(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def add_attr(self, attr):
|
||||
self.attrs.append(attr)
|
||||
|
||||
def set_color(self, r, g, b, alpha=1):
|
||||
self._color.vec4 = (r, g, b, alpha)
|
||||
|
||||
|
||||
class Attr(object):
|
||||
def enable(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def disable(self):
|
||||
pass
|
||||
|
||||
|
||||
class Transform(Attr):
|
||||
def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1, 1)):
|
||||
self.set_translation(*translation)
|
||||
self.set_rotation(rotation)
|
||||
self.set_scale(*scale)
|
||||
|
||||
def enable(self):
|
||||
glPushMatrix()
|
||||
glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint
|
||||
glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0)
|
||||
glScalef(self.scale[0], self.scale[1], 1)
|
||||
|
||||
def disable(self):
|
||||
glPopMatrix()
|
||||
|
||||
def set_translation(self, newx, newy):
|
||||
self.translation = (float(newx), float(newy))
|
||||
|
||||
def set_rotation(self, new):
|
||||
self.rotation = float(new)
|
||||
|
||||
def set_scale(self, newx, newy):
|
||||
self.scale = (float(newx), float(newy))
|
||||
|
||||
|
||||
class Color(Attr):
|
||||
def __init__(self, vec4):
|
||||
self.vec4 = vec4
|
||||
|
||||
def enable(self):
|
||||
glColor4f(*self.vec4)
|
||||
|
||||
|
||||
class LineStyle(Attr):
|
||||
def __init__(self, style):
|
||||
self.style = style
|
||||
|
||||
def enable(self):
|
||||
glEnable(GL_LINE_STIPPLE)
|
||||
glLineStipple(1, self.style)
|
||||
|
||||
def disable(self):
|
||||
glDisable(GL_LINE_STIPPLE)
|
||||
|
||||
|
||||
class LineWidth(Attr):
|
||||
def __init__(self, stroke):
|
||||
self.stroke = stroke
|
||||
|
||||
def enable(self):
|
||||
glLineWidth(self.stroke)
|
||||
|
||||
|
||||
class Point(Geom):
|
||||
def __init__(self):
|
||||
Geom.__init__(self)
|
||||
|
||||
def render1(self):
|
||||
glBegin(GL_POINTS) # draw point
|
||||
glVertex3f(0.0, 0.0, 0.0)
|
||||
glEnd()
|
||||
|
||||
|
||||
class FilledPolygon(Geom):
|
||||
def __init__(self, v):
|
||||
Geom.__init__(self)
|
||||
self.v = v
|
||||
|
||||
def render1(self):
|
||||
if len(self.v) == 4:
|
||||
glBegin(GL_QUADS)
|
||||
elif len(self.v) > 4:
|
||||
glBegin(GL_POLYGON)
|
||||
else:
|
||||
glBegin(GL_TRIANGLES)
|
||||
for p in self.v:
|
||||
glVertex3f(p[0], p[1], 0) # draw each vertex
|
||||
glEnd()
|
||||
|
||||
color = (
|
||||
self._color.vec4[0] * 0.5,
|
||||
self._color.vec4[1] * 0.5,
|
||||
self._color.vec4[2] * 0.5,
|
||||
self._color.vec4[3] * 0.5)
|
||||
glColor4f(*color)
|
||||
glBegin(GL_LINE_LOOP)
|
||||
for p in self.v:
|
||||
glVertex3f(p[0], p[1], 0) # draw each vertex
|
||||
glEnd()
|
||||
|
||||
|
||||
def make_circle(radius=10, res=30, filled=True):
|
||||
points = []
|
||||
for i in range(res):
|
||||
ang = 2 * math.pi * i / res
|
||||
points.append((math.cos(ang) * radius, math.sin(ang) * radius))
|
||||
if filled:
|
||||
return FilledPolygon(points)
|
||||
else:
|
||||
return PolyLine(points, True)
|
||||
|
||||
|
||||
def make_polygon(v, filled=True):
|
||||
if filled:
|
||||
return FilledPolygon(v)
|
||||
else:
|
||||
return PolyLine(v, True)
|
||||
|
||||
|
||||
def make_polyline(v):
|
||||
return PolyLine(v, False)
|
||||
|
||||
|
||||
def make_capsule(length, width):
|
||||
l, r, t, b = 0, length, width / 2, -width / 2
|
||||
box = make_polygon([(l, b), (l, t), (r, t), (r, b)])
|
||||
circ0 = make_circle(width / 2)
|
||||
circ1 = make_circle(width / 2)
|
||||
circ1.add_attr(Transform(translation=(length, 0)))
|
||||
geom = Compound([box, circ0, circ1])
|
||||
return geom
|
||||
|
||||
|
||||
class Compound(Geom):
|
||||
def __init__(self, gs):
|
||||
Geom.__init__(self)
|
||||
self.gs = gs
|
||||
for g in self.gs:
|
||||
g.attrs = [a for a in g.attrs if not isinstance(a, Color)]
|
||||
|
||||
def render1(self):
|
||||
for g in self.gs:
|
||||
g.render()
|
||||
|
||||
|
||||
class PolyLine(Geom):
|
||||
def __init__(self, v, close):
|
||||
Geom.__init__(self)
|
||||
self.v = v
|
||||
self.close = close
|
||||
self.linewidth = LineWidth(1)
|
||||
self.add_attr(self.linewidth)
|
||||
|
||||
def render1(self):
|
||||
glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP)
|
||||
for p in self.v:
|
||||
glVertex3f(p[0], p[1], 0) # draw each vertex
|
||||
glEnd()
|
||||
|
||||
def set_linewidth(self, x):
|
||||
self.linewidth.stroke = x
|
||||
|
||||
|
||||
class Line(Geom):
|
||||
def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)):
|
||||
Geom.__init__(self)
|
||||
self.start = start
|
||||
self.end = end
|
||||
self.linewidth = LineWidth(1)
|
||||
self.add_attr(self.linewidth)
|
||||
|
||||
def render1(self):
|
||||
glBegin(GL_LINES)
|
||||
glVertex2f(*self.start)
|
||||
glVertex2f(*self.end)
|
||||
glEnd()
|
||||
|
||||
|
||||
class Image(Geom):
|
||||
def __init__(self, fname, width, height):
|
||||
Geom.__init__(self)
|
||||
self.width = width
|
||||
self.height = height
|
||||
img = pyglet.image.load(fname)
|
||||
self.img = img
|
||||
self.flip = False
|
||||
|
||||
def render1(self):
|
||||
self.img.blit(-self.width / 2, -self.height / 2, width=self.width, height=self.height)
|
||||
|
||||
|
||||
class SimpleImageViewer(object):
|
||||
def __init__(self, display=None):
|
||||
self.window = None
|
||||
self.isopen = False
|
||||
self.display = display
|
||||
|
||||
def imshow(self, arr):
|
||||
if self.window is None:
|
||||
height, width, channels = arr.shape
|
||||
self.window = pyglet.window.Window(width=width, height=height, display=self.display)
|
||||
self.width = width
|
||||
self.height = height
|
||||
self.isopen = True
|
||||
assert arr.shape == (self.height, self.width, 3), "You passed in an image with the wrong number shape"
|
||||
image = pyglet.image.ImageData(self.width, self.height, 'RGB', arr.tobytes(), pitch=self.width * -3)
|
||||
self.window.clear()
|
||||
self.window.switch_to()
|
||||
self.window.dispatch_events()
|
||||
image.blit(0, 0)
|
||||
self.window.flip()
|
||||
|
||||
def close(self):
|
||||
if self.isopen:
|
||||
self.window.close()
|
||||
self.isopen = False
|
||||
|
||||
def __del__(self):
|
||||
self.close()
|
||||
@@ -1,123 +0,0 @@
|
||||
import os
|
||||
|
||||
from ray_on_aml.core import Ray_On_AML
|
||||
|
||||
from ray.tune import run_experiments
|
||||
from ray.tune.registry import register_trainable, register_env, get_trainable_cls
|
||||
import ray.rllib.contrib.maddpg.maddpg as maddpg
|
||||
|
||||
from rllib_multiagent_particle_env import env_creator
|
||||
from util import parse_args
|
||||
|
||||
|
||||
def setup_ray():
|
||||
ray_on_aml = Ray_On_AML()
|
||||
ray_on_aml.getRay()
|
||||
|
||||
register_env('particle', env_creator)
|
||||
|
||||
|
||||
def gen_policy(args, env, id):
|
||||
use_local_critic = [
|
||||
args.adv_policy == 'ddpg' if id < args.num_adversaries else
|
||||
args.good_policy == 'ddpg' for id in range(env.num_agents)
|
||||
]
|
||||
return (
|
||||
None,
|
||||
env.observation_space_dict[id],
|
||||
env.action_space_dict[id],
|
||||
{
|
||||
'agent_id': id,
|
||||
'use_local_critic': use_local_critic[id],
|
||||
'obs_space_dict': env.observation_space_dict,
|
||||
'act_space_dict': env.action_space_dict,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def gen_policies(args, env_config):
|
||||
env = env_creator(env_config)
|
||||
return {'policy_%d' % i: gen_policy(args, env, i) for i in range(len(env.observation_space_dict))}
|
||||
|
||||
|
||||
def to_multiagent_config(policies):
|
||||
policy_ids = list(policies.keys())
|
||||
return {
|
||||
'policies': policies,
|
||||
'policy_mapping_fn': lambda index: policy_ids[index]
|
||||
}
|
||||
|
||||
|
||||
def train(args, env_config):
|
||||
def stop(trial_id, result):
|
||||
max_train_time = int(os.environ.get('AML_MAX_TRAIN_TIME_SECONDS', 2 * 60 * 60))
|
||||
|
||||
return result['episode_reward_mean'] >= args.final_reward \
|
||||
or result['time_total_s'] >= max_train_time
|
||||
|
||||
run_experiments({
|
||||
'MADDPG_RLLib': {
|
||||
'run': 'contrib/MADDPG',
|
||||
'env': 'particle',
|
||||
'stop': stop,
|
||||
# Uncomment to enable more frequent checkpoints:
|
||||
# 'checkpoint_freq': args.checkpoint_freq,
|
||||
'checkpoint_at_end': True,
|
||||
'local_dir': args.local_dir,
|
||||
'restore': args.restore,
|
||||
'config': {
|
||||
# === Log ===
|
||||
'log_level': 'ERROR',
|
||||
|
||||
# === Environment ===
|
||||
'env_config': env_config,
|
||||
'num_envs_per_worker': args.num_envs_per_worker,
|
||||
'horizon': args.max_episode_len,
|
||||
|
||||
# === Policy Config ===
|
||||
# --- Model ---
|
||||
'good_policy': args.good_policy,
|
||||
'adv_policy': args.adv_policy,
|
||||
'actor_hiddens': [args.num_units] * 2,
|
||||
'actor_hidden_activation': 'relu',
|
||||
'critic_hiddens': [args.num_units] * 2,
|
||||
'critic_hidden_activation': 'relu',
|
||||
'n_step': args.n_step,
|
||||
'gamma': args.gamma,
|
||||
|
||||
# --- Exploration ---
|
||||
'tau': 0.01,
|
||||
|
||||
# --- Replay buffer ---
|
||||
'buffer_size': int(1e6),
|
||||
|
||||
# --- Optimization ---
|
||||
'actor_lr': args.lr,
|
||||
'critic_lr': args.lr,
|
||||
'learning_starts': args.train_batch_size * args.max_episode_len,
|
||||
'sample_batch_size': args.sample_batch_size,
|
||||
'train_batch_size': args.train_batch_size,
|
||||
'batch_mode': 'truncate_episodes',
|
||||
|
||||
# --- Parallelism ---
|
||||
'num_workers': args.num_workers,
|
||||
'num_gpus': args.num_gpus,
|
||||
'num_gpus_per_worker': 0,
|
||||
|
||||
# === Multi-agent setting ===
|
||||
'multiagent': to_multiagent_config(gen_policies(args, env_config)),
|
||||
},
|
||||
},
|
||||
}, verbose=1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
setup_ray()
|
||||
|
||||
env_config = {
|
||||
'scenario_name': args.scenario,
|
||||
'horizon': args.max_episode_len,
|
||||
'video_frequency': args.checkpoint_freq,
|
||||
}
|
||||
train(args, env_config)
|
||||
@@ -1,113 +0,0 @@
|
||||
# Some code taken from: https://github.com/wsjeon/maddpg-rllib/
|
||||
|
||||
import imp
|
||||
import os
|
||||
|
||||
import gym
|
||||
from gym import wrappers
|
||||
from ray import rllib
|
||||
|
||||
from multiagent.environment import MultiAgentEnv
|
||||
import multiagent.scenarios as scenarios
|
||||
|
||||
|
||||
CUSTOM_SCENARIOS = ['simple_switch']
|
||||
|
||||
|
||||
class ParticleEnvRenderWrapper(gym.Wrapper):
|
||||
def __init__(self, env, horizon):
|
||||
super().__init__(env)
|
||||
self.horizon = horizon
|
||||
|
||||
def reset(self):
|
||||
self._num_steps = 0
|
||||
|
||||
return self.env.reset()
|
||||
|
||||
def render(self, mode):
|
||||
if mode == 'human':
|
||||
self.env.render(mode=mode)
|
||||
else:
|
||||
return self.env.render(mode=mode)[0]
|
||||
|
||||
def step(self, actions):
|
||||
obs_list, rew_list, done_list, info_list = self.env.step(actions)
|
||||
|
||||
self._num_steps += 1
|
||||
done = (all(done_list) or self._num_steps >= self.horizon)
|
||||
|
||||
# Gym monitor expects reward to be an int. This is only used for its
|
||||
# stats reporter, which we're not interested in. To make video recording
|
||||
# work, we package the rewards in the info object and extract it below.
|
||||
return obs_list, 0, done, [rew_list, done_list, info_list]
|
||||
|
||||
|
||||
class RLlibMultiAgentParticleEnv(rllib.MultiAgentEnv):
|
||||
def __init__(self, scenario_name, horizon, monitor_enabled=False, video_frequency=500):
|
||||
self._env = _make_env(scenario_name, horizon, monitor_enabled, video_frequency)
|
||||
self.num_agents = self._env.n
|
||||
self.agent_ids = list(range(self.num_agents))
|
||||
|
||||
self.observation_space_dict = self._make_dict(self._env.observation_space)
|
||||
self.action_space_dict = self._make_dict(self._env.action_space)
|
||||
|
||||
def reset(self):
|
||||
obs_dict = self._make_dict(self._env.reset())
|
||||
return obs_dict
|
||||
|
||||
def step(self, action_dict):
|
||||
actions = list(action_dict.values())
|
||||
obs_list, _, _, infos = self._env.step(actions)
|
||||
rew_list, done_list, _ = infos
|
||||
|
||||
obs_dict = self._make_dict(obs_list)
|
||||
rew_dict = self._make_dict(rew_list)
|
||||
done_dict = self._make_dict(done_list)
|
||||
done_dict['__all__'] = all(done_list)
|
||||
info_dict = self._make_dict([{'done': done} for done in done_list])
|
||||
|
||||
return obs_dict, rew_dict, done_dict, info_dict
|
||||
|
||||
def render(self, mode='human'):
|
||||
self._env.render(mode=mode)
|
||||
|
||||
def _make_dict(self, values):
|
||||
return dict(zip(self.agent_ids, values))
|
||||
|
||||
|
||||
def _video_callable(video_frequency):
|
||||
def should_record_video(episode_id):
|
||||
if episode_id % video_frequency == 0:
|
||||
return True
|
||||
return False
|
||||
|
||||
return should_record_video
|
||||
|
||||
|
||||
def _make_env(scenario_name, horizon, monitor_enabled, video_frequency):
|
||||
if scenario_name in CUSTOM_SCENARIOS:
|
||||
# Scenario file must exist locally
|
||||
file_path = os.path.join(os.path.dirname(__file__), scenario_name + '.py')
|
||||
scenario = imp.load_source('', file_path).Scenario()
|
||||
else:
|
||||
scenario = scenarios.load(scenario_name + '.py').Scenario()
|
||||
|
||||
world = scenario.make_world()
|
||||
|
||||
env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation)
|
||||
env.metadata['video.frames_per_second'] = 8
|
||||
|
||||
env = ParticleEnvRenderWrapper(env, horizon)
|
||||
|
||||
if not monitor_enabled:
|
||||
return env
|
||||
|
||||
return wrappers.Monitor(env, './logs/videos', resume=True, video_callable=_video_callable(video_frequency))
|
||||
|
||||
|
||||
def env_creator(config):
|
||||
monitor_enabled = False
|
||||
if hasattr(config, 'worker_index') and hasattr(config, 'vector_index'):
|
||||
monitor_enabled = (config.worker_index == 1 and config.vector_index == 0)
|
||||
|
||||
return RLlibMultiAgentParticleEnv(**config, monitor_enabled=monitor_enabled)
|
||||
@@ -1,358 +0,0 @@
|
||||
import numpy as np
|
||||
import random
|
||||
|
||||
from multiagent.core import World, Agent, Landmark
|
||||
from multiagent.scenario import BaseScenario
|
||||
|
||||
|
||||
class SwitchWorld(World):
|
||||
""" Extended World with hills and switches """
|
||||
def __init__(self, hills, switches):
|
||||
super().__init__()
|
||||
# add hills and switches
|
||||
self.hills = hills
|
||||
self.switches = switches
|
||||
self.landmarks.extend(self.hills)
|
||||
self.landmarks.extend(self.switches)
|
||||
|
||||
def step(self):
|
||||
|
||||
super().step()
|
||||
|
||||
# if all hills are activated, reset the switches and hills
|
||||
if all([hill.active for hill in self.hills]):
|
||||
self.reset_hills()
|
||||
self.reset_switches()
|
||||
else:
|
||||
# Update switches
|
||||
for switch in self.switches:
|
||||
switch.step(self)
|
||||
# Update hills
|
||||
for hill in self.hills:
|
||||
hill.step(self)
|
||||
|
||||
def reset_hills(self):
|
||||
possible_hill_positions = [np.array([-0.8, 0]), np.array([0, 0.8]), np.array([0.8, 0]), np.array([0, -0.8])]
|
||||
hill_positions = random.sample(possible_hill_positions, k=len(self.hills))
|
||||
for i, hill in enumerate(self.hills):
|
||||
hill.state.p_pos = hill_positions[i]
|
||||
hill.deactivate()
|
||||
|
||||
def reset_switches(self):
|
||||
possible_switch_positions = [
|
||||
np.array([-0.8, -0.8]),
|
||||
np.array([-0.8, 0.8]),
|
||||
np.array([0.8, -0.8]),
|
||||
np.array([0.8, 0.8])]
|
||||
switch_positions = random.sample(possible_switch_positions, k=len(self.switches))
|
||||
for i, switch in enumerate(self.switches):
|
||||
switch.state.p_pos = switch_positions[i]
|
||||
switch.deactivate()
|
||||
|
||||
|
||||
class Scenario(BaseScenario):
|
||||
def make_world(self):
|
||||
|
||||
# main configurations
|
||||
num_agents = 2
|
||||
num_hills = 2
|
||||
num_switches = 1
|
||||
self.max_episode_length = 100
|
||||
|
||||
# create hills (on edges)
|
||||
possible_hill_positions = [np.array([-0.8, 0]), np.array([0, 0.8]), np.array([0.8, 0]), np.array([0, -0.8])]
|
||||
hill_positions = random.sample(possible_hill_positions, k=num_hills)
|
||||
hills = [Hill(hill_positions[i]) for i in range(num_hills)]
|
||||
# create switches (in corners)
|
||||
possible_switch_positions = [
|
||||
np.array([-0.8, -0.8]),
|
||||
np.array([-0.8, 0.8]),
|
||||
np.array([0.8, -0.8]),
|
||||
np.array([0.8, 0.8])]
|
||||
switch_positions = random.sample(possible_switch_positions, k=num_switches)
|
||||
switches = [Switch(switch_positions[i]) for i in range(num_switches)]
|
||||
|
||||
# make world and set basic properties
|
||||
world = SwitchWorld(hills, switches)
|
||||
world.dim_c = 2
|
||||
world.collaborative = True
|
||||
|
||||
# add agents
|
||||
world.agents = [Agent() for i in range(num_agents)]
|
||||
for i, agent in enumerate(world.agents):
|
||||
agent.name = 'agent %d' % i
|
||||
agent.collide = True
|
||||
agent.silent = True
|
||||
agent.size = 0.1
|
||||
agent.accel = 5.0
|
||||
agent.max_speed = 5.0
|
||||
if i == 0:
|
||||
agent.color = np.array([0.35, 0.35, 0.85])
|
||||
else:
|
||||
agent.color = np.array([0.35, 0.85, 0.85])
|
||||
|
||||
# make initial conditions
|
||||
self.reset_world(world)
|
||||
|
||||
return world
|
||||
|
||||
def reset_world(self, world):
|
||||
# set random initial states
|
||||
for agent in world.agents:
|
||||
agent.state.p_pos = np.array([random.uniform(-1, +1) for _ in range(world.dim_p)])
|
||||
agent.state.p_vel = np.zeros(world.dim_p)
|
||||
agent.state.c = np.zeros(world.dim_c)
|
||||
# set hills randomly
|
||||
world.reset_hills()
|
||||
# set switches randomly
|
||||
world.reset_switches()
|
||||
|
||||
def is_collision(self, agent1, agent2):
|
||||
delta_pos = agent1.state.p_pos - agent2.state.p_pos
|
||||
dist = np.sqrt(np.sum(np.square(delta_pos)))
|
||||
dist_min = agent1.size + agent2.size
|
||||
return True if dist < dist_min else False
|
||||
|
||||
def reward(self, agent, world):
|
||||
# Agents are rewarded based on number of landmarks activated
|
||||
rew = 0
|
||||
if all([h.active for h in world.hills]):
|
||||
rew += 100
|
||||
else:
|
||||
# give bonus each time a hill is activated
|
||||
for hill in world.hills:
|
||||
if hill.activated_just_now:
|
||||
rew += 50
|
||||
# penalise timesteps where nothing is happening
|
||||
if rew == 0:
|
||||
rew -= 0.1
|
||||
# add collision penalty
|
||||
if agent.collide:
|
||||
for a in world.agents:
|
||||
# note: this also counts collision with "itself", so gives -1 at every timestep
|
||||
# would be good to tune the reward function and use (not a == agent) here
|
||||
if self.is_collision(a, agent):
|
||||
rew -= 1
|
||||
return rew
|
||||
|
||||
def observation(self, agent, world):
|
||||
# get positions of all entities in this agent's reference frame
|
||||
entity_pos = []
|
||||
for entity in world.landmarks: # world.entities:
|
||||
entity_pos.append(entity.state.p_pos - agent.state.p_pos)
|
||||
# entity colors
|
||||
entity_color = []
|
||||
for entity in world.landmarks: # world.entities:
|
||||
entity_color.append(entity.color)
|
||||
# communication of all other agents
|
||||
comm = []
|
||||
other_pos = []
|
||||
for other in world.agents:
|
||||
if other is agent:
|
||||
continue
|
||||
comm.append(other.state.c)
|
||||
other_pos.append(other.state.p_pos - agent.state.p_pos)
|
||||
return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + comm)
|
||||
|
||||
|
||||
class Hill(Landmark):
|
||||
"""
|
||||
A hill that can be captured by an agent.
|
||||
To be captured, a team must occupy a hill for a fixed amount of time.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
pos=None,
|
||||
size=0.08,
|
||||
capture_time=2
|
||||
):
|
||||
|
||||
# Initialize Landmark super class
|
||||
super().__init__()
|
||||
self.movable = False
|
||||
self.collide = False
|
||||
self.state.p_pos = pos
|
||||
self.size = size
|
||||
|
||||
# Set static configurations
|
||||
self.capture_time = capture_time
|
||||
|
||||
# Initialize all hills to be inactive
|
||||
self.active = False
|
||||
self.color = np.array([0.5, 0.5, 0.5])
|
||||
self.capture_timer = 0
|
||||
|
||||
self.activated_just_now = False
|
||||
|
||||
def activate(self):
|
||||
self.active = True
|
||||
self.color = np.array([0.1, 0.1, 0.9])
|
||||
|
||||
def deactivate(self):
|
||||
self.active = False
|
||||
self.color = np.array([0.5, 0.5, 0.5])
|
||||
|
||||
def _is_occupied(self, agents):
|
||||
# a hill is occupied if an agent stands on it
|
||||
for agent in agents:
|
||||
dist = np.sqrt(np.sum(np.square(agent.state.p_pos - self.state.p_pos)))
|
||||
if dist < agent.size + self.size:
|
||||
return True
|
||||
return False
|
||||
|
||||
def step(self, world):
|
||||
|
||||
self.activated_just_now = False
|
||||
|
||||
# If hill isn't activated yet, check if an agent activates it
|
||||
# if (not self.active) and (world.switch.is_active()):
|
||||
if (not self.active):
|
||||
|
||||
# Check if an agent is on the hill and all switches are active
|
||||
if (self._is_occupied(world.agents)) and all([switch.active for switch in world.switches]):
|
||||
self.capture_timer += 1
|
||||
|
||||
# activate hill (this is irreversible)
|
||||
if self.capture_timer > self.capture_time:
|
||||
self.activate()
|
||||
self.activated_just_now = True
|
||||
|
||||
# Reset capture timer if hill is not occupied
|
||||
else:
|
||||
self.capture_timer = 0
|
||||
|
||||
|
||||
class Switch(Landmark):
|
||||
"""
|
||||
A switch that can be activated by an agent.
|
||||
The agent has to stay on the switch for it to be active.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
pos=None,
|
||||
size=0.03,
|
||||
):
|
||||
|
||||
# Initialize Landmark super class
|
||||
super().__init__()
|
||||
self.movable = False
|
||||
self.collide = False
|
||||
self.state.p_pos = pos
|
||||
self.size = size
|
||||
|
||||
# Initialize all hills to be inactive
|
||||
self.active = False
|
||||
self.color = np.array([0.8, 0.05, 0.3])
|
||||
self.capture_timer = 0
|
||||
|
||||
def activate(self):
|
||||
self.active = True
|
||||
self.color = np.array([0.1, 0.9, 0.4])
|
||||
|
||||
def deactivate(self):
|
||||
self.active = False
|
||||
self.color = np.array([0.8, 0.05, 0.3])
|
||||
|
||||
def _is_occupied(self, agents):
|
||||
# a switch is active if an agent stands on it
|
||||
for agent in agents:
|
||||
dist = np.sqrt(np.sum(np.square(agent.state.p_pos - self.state.p_pos)))
|
||||
if dist < agent.size + self.size:
|
||||
return True
|
||||
return False
|
||||
|
||||
def step(self, world):
|
||||
# check if an agent is on the switch and activate/deactive accordingly
|
||||
if self._is_occupied(world.agents):
|
||||
self.activate()
|
||||
else:
|
||||
self.deactivate()
|
||||
|
||||
|
||||
class SwitchExpertPolicy():
|
||||
"""
|
||||
Hand-coded expert policy for the simple switch environment.
|
||||
Types of possible experts:
|
||||
- always go to the switch
|
||||
- always go to the hills
|
||||
"""
|
||||
def __init__(self, dim_c, agent, world, expert_type=None, discrete_action_input=True):
|
||||
|
||||
self.dim_c = dim_c
|
||||
self.discrete_action_input = discrete_action_input
|
||||
# the agent we control and world we're in
|
||||
self.agent = agent
|
||||
self.world = world
|
||||
|
||||
if expert_type is None:
|
||||
self.expert_type = random.choice(['switch', 'hill'])
|
||||
else:
|
||||
self.expert_type = expert_type
|
||||
if self.expert_type == 'switch':
|
||||
self.target_switch = self.select_inital_target_switch()
|
||||
elif self.expert_type == 'hill':
|
||||
self.target_hill = self.select_inital_target_hill()
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
self.step_count = 0
|
||||
|
||||
def select_inital_target_switch(self):
|
||||
return random.choice(self.world.switches)
|
||||
|
||||
def select_inital_target_hill(self):
|
||||
return random.choice(self.world.hills)
|
||||
|
||||
def action(self):
|
||||
|
||||
# select a target!
|
||||
if self.expert_type == 'switch':
|
||||
# if agent is not already on a switch, choose target switch
|
||||
if not any([switch._is_occupied([self.agent]) for switch in self.world.switches]):
|
||||
# select a target switch if there's an inactive one
|
||||
inactive_switches = [switch for switch in self.world.switches if not switch.active]
|
||||
if len(inactive_switches) > 0 and (self.target_switch not in inactive_switches):
|
||||
self.target_switch = random.choice(inactive_switches)
|
||||
target = self.target_switch.state.p_pos
|
||||
elif self.expert_type == 'hill':
|
||||
# select a target hill if we haven't done so yet, or the current target switch is inactive
|
||||
inactive_hills = [hill for hill in self.world.hills if not hill.active]
|
||||
if len(inactive_hills) > 0 and (self.target_hill not in inactive_hills):
|
||||
self.target_hill = random.choice(inactive_hills)
|
||||
target = self.target_hill.state.p_pos
|
||||
|
||||
self.step_count += 1
|
||||
|
||||
impulse = np.clip(target - self.agent.state.p_pos, -self.agent.u_range, self.agent.u_range)
|
||||
|
||||
if self.discrete_action_input:
|
||||
u_idx = np.argmax(np.abs(impulse))
|
||||
if u_idx == 0 and impulse[u_idx] < 0:
|
||||
u = 1
|
||||
elif u_idx == 0 and impulse[u_idx] > 0:
|
||||
u = 2
|
||||
elif u_idx == 1 and impulse[u_idx] < 0:
|
||||
u = 3
|
||||
elif u_idx == 1 and impulse[u_idx] > 0:
|
||||
u = 4
|
||||
else:
|
||||
u = 0
|
||||
else:
|
||||
u = np.zeros(5)
|
||||
if (impulse[0] == impulse[1] == 0) \
|
||||
or (self.step_count < self.burn_in) \
|
||||
or (self.burn_step != 0 and self.step_count % self.burn_step != 0):
|
||||
u[0] = 0.1
|
||||
else:
|
||||
pass
|
||||
# u: noop (?), right, left, down, up
|
||||
if impulse[0] > 0: # x-direction (- left/right + )
|
||||
u[1] = impulse[0] # right
|
||||
elif impulse[0] < 0:
|
||||
u[2] = -impulse[0]
|
||||
if impulse[1] > 0: # y-direction (- up/down + )
|
||||
u[3] = impulse[1]
|
||||
elif impulse[1] < 0:
|
||||
u[4] = -impulse[1]
|
||||
|
||||
return u
|
||||
@@ -1,82 +0,0 @@
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
|
||||
from rllib_multiagent_particle_env import CUSTOM_SCENARIOS
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser('MADDPG with OpenAI MPE')
|
||||
|
||||
# Environment
|
||||
parser.add_argument('--scenario', type=str, default='simple',
|
||||
choices=['simple', 'simple_speaker_listener',
|
||||
'simple_crypto', 'simple_push',
|
||||
'simple_tag', 'simple_spread', 'simple_adversary'
|
||||
] + CUSTOM_SCENARIOS,
|
||||
help='name of the scenario script')
|
||||
parser.add_argument('--max-episode-len', type=int, default=25,
|
||||
help='maximum episode length')
|
||||
parser.add_argument('--num-episodes', type=int, default=60000,
|
||||
help='number of episodes')
|
||||
parser.add_argument('--num-adversaries', type=int, default=0,
|
||||
help='number of adversaries')
|
||||
parser.add_argument('--good-policy', type=str, default='maddpg',
|
||||
help='policy for good agents')
|
||||
parser.add_argument('--adv-policy', type=str, default='maddpg',
|
||||
help='policy of adversaries')
|
||||
|
||||
# Core training parameters
|
||||
parser.add_argument('--lr', type=float, default=1e-2,
|
||||
help='learning rate for Adam optimizer')
|
||||
parser.add_argument('--gamma', type=float, default=0.95,
|
||||
help='discount factor')
|
||||
# NOTE: 1 iteration = sample_batch_size * num_workers timesteps * num_envs_per_worker
|
||||
parser.add_argument('--sample-batch-size', type=int, default=25,
|
||||
help='number of data points sampled /update /worker')
|
||||
parser.add_argument('--train-batch-size', type=int, default=1024,
|
||||
help='number of data points /update')
|
||||
parser.add_argument('--n-step', type=int, default=1,
|
||||
help='length of multistep value backup')
|
||||
parser.add_argument('--num-units', type=int, default=64,
|
||||
help='number of units in the mlp')
|
||||
parser.add_argument('--final-reward', type=int, default=-400,
|
||||
help='final reward after which to stop training')
|
||||
|
||||
# Checkpoint
|
||||
parser.add_argument('--checkpoint-freq', type=int, default=200,
|
||||
help='save model once every time this many iterations are completed')
|
||||
parser.add_argument('--local-dir', type=str, default='./logs',
|
||||
help='path to save checkpoints')
|
||||
parser.add_argument('--restore', type=str, default=None,
|
||||
help='directory in which training state and model are loaded')
|
||||
|
||||
# Parallelism
|
||||
parser.add_argument('--num-workers', type=int, default=1)
|
||||
parser.add_argument('--num-envs-per-worker', type=int, default=4)
|
||||
parser.add_argument('--num-gpus', type=int, default=0)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def find_final_checkpoint(start_dir):
|
||||
def find(pattern, path):
|
||||
result = []
|
||||
for root, _, files in os.walk(path):
|
||||
for name in files:
|
||||
if pattern.match(name):
|
||||
result.append(os.path.join(root, name))
|
||||
return result
|
||||
|
||||
cp_pattern = re.compile('.*checkpoint-\\d+$')
|
||||
checkpoint_files = find(cp_pattern, start_dir)
|
||||
|
||||
checkpoint_numbers = []
|
||||
for file in checkpoint_files:
|
||||
checkpoint_numbers.append(int(file.split('-')[-1]))
|
||||
|
||||
final_checkpoint_number = max(checkpoint_numbers)
|
||||
|
||||
return next(
|
||||
checkpoint_file for checkpoint_file in checkpoint_files
|
||||
if checkpoint_file.endswith(str(final_checkpoint_number)))
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 350 KiB |
@@ -1,566 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Copyright (c) Microsoft Corporation. All rights reserved.\n",
|
||||
"\n",
|
||||
"Licensed under the MIT License."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Reinforcement Learning in Azure Machine Learning - Training multiple agents on collaborative ParticleEnv tasks\n",
|
||||
"\n",
|
||||
"This tutorial will show you how to train policies in a multi-agent scenario.\n",
|
||||
"We use OpenAI Gym's [Particle environments](https://github.com/openai/multiagent-particle-envs),\n",
|
||||
"which model agents and landmarks in a two-dimensional world. Particle comes with\n",
|
||||
"several predefined scenarios, both competitive and collaborative, and with or without communication.\n",
|
||||
"\n",
|
||||
"For this tutorial, we pick a cooperative navigation scenario where N agents are in a world with N\n",
|
||||
"landmarks. The agents' goal is to cover all the landmarks without collisions,\n",
|
||||
"so agents must learn to avoid each other (social distancing!). The video below shows training\n",
|
||||
"results for N=3 agents/landmarks:\n",
|
||||
"\n",
|
||||
"<table style=\"width:50%\">\n",
|
||||
" <tr>\n",
|
||||
" <th style=\"text-align: center;\">\n",
|
||||
" <img src=\"./images/particle_simple_spread.gif\" alt=\"Particle video\" align=\"middle\" margin-left=\"auto\" margin-right=\"auto\"/>\n",
|
||||
" </th>\n",
|
||||
" </tr>\n",
|
||||
" <tr style=\"text-align: center;\">\n",
|
||||
" <th>Fig 1. Video of 3 agents covering 3 landmarks in a multiagent Particle scenario.</th>\n",
|
||||
" </tr>\n",
|
||||
"</table>\n",
|
||||
"\n",
|
||||
"The tutorial will cover the following steps:\n",
|
||||
"- Initializing Azure Machine Learning resources for training\n",
|
||||
"- Training policies in a multi-agent environment with Azure Machine Learning service\n",
|
||||
"- Monitoring training progress\n",
|
||||
"\n",
|
||||
"## Prerequisites\n",
|
||||
"\n",
|
||||
"The user should have completed the Azure Machine Learning introductory tutorial. You will need to make sure that you have a valid subscription id, a resource group and a workspace. For detailed instructions see [Tutorial: Get started creating your first ML experiment](https://docs.microsoft.com/en-us/azure/machine-learning/tutorial-1st-experiment-sdk-setup).\n",
|
||||
"\n",
|
||||
"Please ensure that you have a current version of IPython (>= 7.15) installed.\n",
|
||||
"\n",
|
||||
"While this is a standalone notebook, we highly recommend going over the introductory notebooks for RL first.\n",
|
||||
"- Getting started:\n",
|
||||
" - [RL using a compute instance with Azure Machine Learning](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/reinforcement-learning/cartpole-on-compute-instance/cartpole_ci.ipynb)\n",
|
||||
" - [RL using Azure Machine Learning compute](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/reinforcement-learning/cartpole-on-single-compute/cartpole_sc.ipynb)\n",
|
||||
"- [Scaling RL training runs with Azure Machine Learning](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/reinforcement-learning/atari-on-distributed-compute/pong_rllib.ipynb)\n",
|
||||
"\n",
|
||||
"## Initialize resources\n",
|
||||
"\n",
|
||||
"All required Azure Machine Learning service resources for this tutorial can be set up from Jupyter. This includes:\n",
|
||||
"\n",
|
||||
"- Connecting to your existing Azure Machine Learning workspace.\n",
|
||||
"- Creating an experiment to track runs.\n",
|
||||
"- Creating remote compute targets for [Ray](https://docs.ray.io/en/latest/index.html).\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"### Azure Machine Learning SDK\n",
|
||||
"\n",
|
||||
"Display the Azure Machine Learning SDK version."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1646249589452
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"print('Azure Machine Learning SDK version: ', azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Connect to workspace\n",
|
||||
"\n",
|
||||
"Get a reference to an existing Azure Machine Learning workspace."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1646250284486
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Workspace\n",
|
||||
"\n",
|
||||
"ws = Workspace.from_config()\n",
|
||||
"print(ws.name, ws.location, ws.resource_group, sep=' | ')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create an experiment\n",
|
||||
"\n",
|
||||
"Create an experiment to track the runs in your workspace. A\n",
|
||||
"workspace can have multiple experiments and each experiment\n",
|
||||
"can be used to track multiple runs (see [documentation](https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.experiment.experiment?view=azure-ml-py)\n",
|
||||
"for details)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1646250342411
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Experiment\n",
|
||||
"\n",
|
||||
"exp = Experiment(workspace=ws, name='particle-multiagent')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create or attach an existing compute resource\n",
|
||||
"\n",
|
||||
"A compute target is a designated compute resource where you run your training script. For more information, see [What are compute targets in Azure Machine Learning service?](https://docs.microsoft.com/en-us/azure/machine-learning/concept-compute-target).\n",
|
||||
"\n",
|
||||
"> Note that if you have an AzureML Data Scientist role, you will not have permission to create compute resources. Talk to your workspace or IT admin to create the compute targets described in this section, if they do not already exist.\n",
|
||||
"\n",
|
||||
"#### CPU target for Ray head\n",
|
||||
"\n",
|
||||
"In the experiment setup for this tutorial, the Ray head node will\n",
|
||||
"run on a CPU node (D3 type). A maximum cluster size of 1 node is\n",
|
||||
"therefore sufficient. If you wish to run multiple experiments in\n",
|
||||
"parallel using the same CPU cluster, you may elect to increase this\n",
|
||||
"number. The cluster will automatically scale down to 0 nodes when\n",
|
||||
"no training jobs are scheduled (see min_nodes).\n",
|
||||
"\n",
|
||||
"The code below creates a compute cluster of D3 type nodes.\n",
|
||||
"If the cluster with the specified name is already in your workspace\n",
|
||||
"the code will skip the creation process.\n",
|
||||
"\n",
|
||||
"**Note: Creation of a compute resource can take several minutes**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1646250346756
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core.compute import AmlCompute, ComputeTarget\n",
|
||||
"\n",
|
||||
"cpu_cluster_name = 'cpu-cl-d3'\n",
|
||||
"\n",
|
||||
"if cpu_cluster_name in ws.compute_targets:\n",
|
||||
" cpu_cluster = ws.compute_targets[cpu_cluster_name]\n",
|
||||
" if cpu_cluster and type(cpu_cluster) is AmlCompute:\n",
|
||||
" if cpu_cluster.provisioning_state == 'Succeeded':\n",
|
||||
" print('Found existing compute target for {}. Using it.'.format(cpu_cluster_name))\n",
|
||||
" else: \n",
|
||||
" raise Exception('Found existing compute target for {} '.format(cpu_cluster_name)\n",
|
||||
" + 'but it is in state {}'.format(cpu_cluster.provisioning_state))\n",
|
||||
"else:\n",
|
||||
" print('Creating a new compute target for {}...'.format(cpu_cluster_name))\n",
|
||||
" provisioning_config = AmlCompute.provisioning_configuration(\n",
|
||||
" vm_size='STANDARD_D3',\n",
|
||||
" min_nodes=0, \n",
|
||||
" max_nodes=1)\n",
|
||||
"\n",
|
||||
" cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, provisioning_config)\n",
|
||||
" cpu_cluster.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)\n",
|
||||
" \n",
|
||||
" print('Cluster created.')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Training the policies\n",
|
||||
"\n",
|
||||
"### Training environment\n",
|
||||
"\n",
|
||||
"This tutorial uses a custom docker image\n",
|
||||
"with the necessary software installed. The [Environment](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-use-environments)\n",
|
||||
"class stores the configuration for the training environment. The\n",
|
||||
"docker image is set via `env.docker.base_image`.\n",
|
||||
"`user_managed_dependencies` is set so that\n",
|
||||
"the preinstalled Python packages in the image are preserved.\n",
|
||||
"\n",
|
||||
"Note that since we want to capture videos of the training runs requiring a display, we set the interpreter_path such that the Python process is started via **xvfb-run**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1646257481631
|
||||
},
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import Environment\n",
|
||||
"import os\n",
|
||||
"\n",
|
||||
"ray_environment_name = 'particle-cpu'\n",
|
||||
"ray_environment_dockerfile_path = os.path.join(os.getcwd(), 'docker', 'cpu', 'Dockerfile')\n",
|
||||
"ray_environment = Environment. \\\n",
|
||||
" from_dockerfile(name=ray_environment_name, dockerfile=ray_environment_dockerfile_path). \\\n",
|
||||
" register(workspace=ws)\n",
|
||||
"ray_cpu_build_details = ray_environment.build(workspace=ws)\n",
|
||||
"\n",
|
||||
"ray_cpu_build_details.wait_for_completion(show_output=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Training script\n",
|
||||
"\n",
|
||||
"This tutorial uses the multiagent algorithm [Multi-Agent Deep Deterministic Policy Gradient (MADDPG)](https://docs.ray.io/en/latest/rllib-algorithms.html?highlight=maddpg#multi-agent-deep-deterministic-policy-gradient-contrib-maddpg).\n",
|
||||
"For training policies in a multiagent scenario, Ray's RLlib also\n",
|
||||
"requires the `multiagent` configuration section to be specified. You\n",
|
||||
"can find more information in the [common parameters](https://docs.ray.io/en/latest/rllib-training.html?highlight=multiagent#common-parameters)\n",
|
||||
"documentation.\n",
|
||||
"\n",
|
||||
"The stopping criteria are set such that the training run is\n",
|
||||
"terminated after either a mean reward of -450 is observed, or\n",
|
||||
"training has run for over 2 hours.\n",
|
||||
"\n",
|
||||
"### Submitting a training run\n",
|
||||
"\n",
|
||||
"You can submit the training run using a `ScriptRunConfig`. By providing the\n",
|
||||
"command to run the training, and a `RunConfig` object configured with your\n",
|
||||
"compute target, number of nodes, and environment image to use.\n",
|
||||
"\n",
|
||||
"Note that you can use the same notebook and scripts to experiment with\n",
|
||||
"different Particle environments. You can find a list of supported\n",
|
||||
"environments [here](https://github.com/openai/multiagent-particle-envs/tree/master#list-of-environments).\n",
|
||||
"Simply change the `--scenario` parameter to a supported scenario.\n",
|
||||
"\n",
|
||||
"In order to get the best training results, you can also adjust the\n",
|
||||
"`--final-reward` parameter to determine when to stop training. A greater\n",
|
||||
"reward means longer running time, but improved results. By default,\n",
|
||||
"the final reward will be -450, which should show good progress after\n",
|
||||
"about one hour of run time.\n",
|
||||
"\n",
|
||||
"For this notebook, we use a single D3 nodes, giving us a total of 4 CPUs and\n",
|
||||
"0 GPUs. One CPU is used by the MADDPG trainer, and an additional CPU is\n",
|
||||
"consumed by the RLlib rollout worker. The other 2 CPUs are not used, though\n",
|
||||
"smaller node types will run out of memory for this task."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"gather": {
|
||||
"logged": 1646275371701
|
||||
},
|
||||
"jupyter": {
|
||||
"outputs_hidden": false,
|
||||
"source_hidden": false
|
||||
},
|
||||
"nteract": {
|
||||
"transient": {
|
||||
"deleting": false
|
||||
}
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from azureml.core import RunConfiguration, ScriptRunConfig, Experiment\n",
|
||||
"from azureml.core.runconfig import DockerConfiguration, RunConfiguration\n",
|
||||
"from azureml.widgets import RunDetails\n",
|
||||
"\n",
|
||||
"experiment_name = 'particle-multiagent'\n",
|
||||
"\n",
|
||||
"experiment = Experiment(workspace=ws, name=experiment_name)\n",
|
||||
"\n",
|
||||
"aml_run_config_ml = RunConfiguration(communicator='OpenMpi')\n",
|
||||
"aml_run_config_ml.target = cpu_cluster\n",
|
||||
"aml_run_config_ml.node_count = 1\n",
|
||||
"aml_run_config_ml.environment = ray_environment\n",
|
||||
"\n",
|
||||
"config = ScriptRunConfig(source_directory='./files',\n",
|
||||
" command=[\n",
|
||||
" 'xvfb-run -s \"-screen 0 640x480x16 -ac +extension GLX +render\" python',\n",
|
||||
" 'particle_train.py',\n",
|
||||
" '--scenario', 'simple_spread',\n",
|
||||
" '--final-reward', '-450'\n",
|
||||
" ],\n",
|
||||
" run_config = aml_run_config_ml\n",
|
||||
" )\n",
|
||||
"train_run = experiment.submit(config)\n",
|
||||
"\n",
|
||||
"RunDetails(train_run).show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Job cancellation\n",
|
||||
"\n",
|
||||
"You may cancel the job by uncommenting and running the cell below."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# If you wish to cancel the run before it completes, uncomment and execute:\n",
|
||||
"# train_run.cancel()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Monitoring training progress\n",
|
||||
"\n",
|
||||
"### View the Tensorboard\n",
|
||||
"\n",
|
||||
"The Tensorboard can be displayed via the Azure Machine Learning\n",
|
||||
"service's [Tensorboard API](https://docs.microsoft.com/en-us/azure/machine-learning/how-to-monitor-tensorboard).\n",
|
||||
"When running locally, please make sure to follow the instructions\n",
|
||||
"in the link and install required packages. Running this cell will output a URL for the Tensorboard.\n",
|
||||
"\n",
|
||||
"Note that the training script sets the log directory when\n",
|
||||
"starting RLlib via the local_dir parameter. ./logs will automatically\n",
|
||||
"appear in the downloadable files for a run. Since this script is\n",
|
||||
"executed on the Ray head node run, we need to get a reference to it\n",
|
||||
"as shown below.\n",
|
||||
"\n",
|
||||
"The Tensorboard API will continuously stream logs from the run.\n",
|
||||
"\n",
|
||||
"**Note: It may take a couple of minutes after the run is in \"Running\"\n",
|
||||
"state before Tensorboard files are available and the board will refresh automatically**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# from azureml.tensorboard import Tensorboard\n",
|
||||
"\n",
|
||||
"# tb = Tensorboard([train_run])\n",
|
||||
"# tb.start()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### View training videos\n",
|
||||
"\n",
|
||||
"As mentioned above, we record videos of the agents interacting with the\n",
|
||||
"Particle world. These videos are often a crucial indicator for training\n",
|
||||
"success. The code below downloads the latest video as it becomes available\n",
|
||||
"and displays it in-line.\n",
|
||||
"\n",
|
||||
"Over time, the agents learn to cooperate and avoid collisions while\n",
|
||||
"traveling to all landmarks.\n",
|
||||
"\n",
|
||||
"**Note: It can take several minutes for a video to appear after the run\n",
|
||||
"was started.**"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"from azureml.core import Dataset\n",
|
||||
"from azureml.data.dataset_error_handling import DatasetValidationError\n",
|
||||
"\n",
|
||||
"from IPython.display import clear_output\n",
|
||||
"from IPython.core.display import display, Video\n",
|
||||
"\n",
|
||||
"datastore = ws.datastores['workspaceartifactstore']\n",
|
||||
"path_prefix = './tmp_videos'\n",
|
||||
"\n",
|
||||
"def download_latest_training_video(run, video_checkpoint_counter):\n",
|
||||
" run_artifacts_path = os.path.join('ExperimentRun', f'dcid.{run.id}', 'logs', 'videos')\n",
|
||||
" \n",
|
||||
" try:\n",
|
||||
" run_artifacts_ds = Dataset.File.from_files(datastore.path(os.path.join(run_artifacts_path, '**')))\n",
|
||||
" except DatasetValidationError as e:\n",
|
||||
" # This happens at the start of the run when there is no data available\n",
|
||||
" # in the run's artifacts\n",
|
||||
" return None, video_checkpoint_counter\n",
|
||||
" \n",
|
||||
" video_files = [file for file in run_artifacts_ds.to_path() if file.endswith('.mp4')]\n",
|
||||
" if len(video_files) == video_checkpoint_counter:\n",
|
||||
" return None, video_checkpoint_counter\n",
|
||||
" \n",
|
||||
" iteration_numbers = [int(vf[vf.rindex('video') + len('video') : vf.index('.mp4')]) for vf in video_files]\n",
|
||||
" latest_video = next(vf for vf in video_files if vf.endswith('{num}.mp4'.format(num=max(iteration_numbers))))\n",
|
||||
" latest_video = os.path.join(run_artifacts_path, os.path.normpath(latest_video[1:]))\n",
|
||||
" \n",
|
||||
" datastore.download(\n",
|
||||
" target_path=path_prefix,\n",
|
||||
" prefix=latest_video.replace('\\\\', '/'),\n",
|
||||
" show_progress=False)\n",
|
||||
" \n",
|
||||
" return os.path.join(path_prefix, latest_video), len(video_files)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def render_video(vf):\n",
|
||||
" clear_output(wait=True)\n",
|
||||
" display(Video(data=vf, embed=True, html_attributes='loop autoplay controls width=50%'))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import shutil, time\n",
|
||||
"\n",
|
||||
"terminal_statuses = ['Canceled', 'Completed', 'Failed']\n",
|
||||
"video_checkpoint_counter = 0\n",
|
||||
"\n",
|
||||
"while train_run.get_status() not in terminal_statuses:\n",
|
||||
" video_file, video_checkpoint_counter = download_latest_training_video(train_run, video_checkpoint_counter)\n",
|
||||
" if video_file is not None:\n",
|
||||
" render_video(video_file)\n",
|
||||
" \n",
|
||||
" print('Displaying video number {}'.format(video_checkpoint_counter))\n",
|
||||
" shutil.rmtree(path_prefix)\n",
|
||||
" \n",
|
||||
" # Interrupting the kernel can take up to 15 seconds\n",
|
||||
" # depending on when time.sleep started\n",
|
||||
" time.sleep(15)\n",
|
||||
" \n",
|
||||
"train_run.wait_for_completion()\n",
|
||||
"print('The training run has reached a terminal status.')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Cleaning up\n",
|
||||
"\n",
|
||||
"Below, you can find code snippets for your convenience to clean up any resources created as part of this tutorial you don't wish to retain."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# to stop the Tensorboard, uncomment and run\n",
|
||||
"# tb.stop()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# to delete the cpu compute target, uncomment and run\n",
|
||||
"# cpu_cluster.delete()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Next steps\n",
|
||||
"\n",
|
||||
"We would love to hear your feedback! Please let us know what you think of Reinforcement Learning in Azure Machine Learning and what features you are looking forward to."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"authors": [
|
||||
{
|
||||
"name": "andress"
|
||||
}
|
||||
],
|
||||
"categories": [
|
||||
"how-to-use-azureml",
|
||||
"reinforcement-learning"
|
||||
],
|
||||
"interpreter": {
|
||||
"hash": "13382f70c1d0595120591d2e358c8d446daf961bf951d1fba9a32631e205d5ab"
|
||||
},
|
||||
"kernel_info": {
|
||||
"name": "python38-azureml"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3.8 - AzureML",
|
||||
"language": "python",
|
||||
"name": "python38-azureml"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"notice": "Copyright (c) Microsoft Corporation. All rights reserved.\u00c3\u0192\u00c2\u00a2\u00c3\u00a2\u00e2\u20ac\u0161\u00c2\u00ac\u00c3\u201a\u00c2\u00afLicensed under the MIT License.\u00c3\u0192\u00c2\u00a2\u00c3\u00a2\u00e2\u20ac\u0161\u00c2\u00ac\u00c3\u201a\u00c2\u00af ",
|
||||
"nteract": {
|
||||
"version": "nteract-front-end@1.0.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
name: particle
|
||||
dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-contrib-reinforcementlearning
|
||||
- azureml-widgets
|
||||
- tensorboard
|
||||
- azureml-tensorboard
|
||||
- ipython
|
||||
@@ -8,7 +8,7 @@ dependencies:
|
||||
- matplotlib
|
||||
- azureml-dataset-runtime
|
||||
- ipywidgets
|
||||
- raiwidgets~=0.23.0
|
||||
- raiwidgets~=0.24.0
|
||||
- liac-arff
|
||||
- packaging>=20.9
|
||||
- itsdangerous==2.0.1
|
||||
|
||||
@@ -101,7 +101,7 @@
|
||||
"\n",
|
||||
"# Check core SDK version number\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using SDK version 1.48.0, you are currently running version\", azureml.core.VERSION)"
|
||||
"print(\"This notebook was created using SDK version 1.49.0, you are currently running version\", azureml.core.VERSION)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
||||
@@ -3,5 +3,6 @@ dependencies:
|
||||
- pip:
|
||||
- azureml-sdk
|
||||
- azureml-tensorboard
|
||||
- tensorboard
|
||||
- tensorflow
|
||||
- setuptools>=41.0.0
|
||||
|
||||
1
index.md
1
index.md
@@ -138,7 +138,6 @@ Machine Learning notebook samples and encourage efficient retrieval of topics an
|
||||
| [pong_rllib](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/reinforcement-learning/atari-on-distributed-compute/pong_rllib.ipynb) | | | | | | |
|
||||
| [cartpole_ci](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/reinforcement-learning/cartpole-on-compute-instance/cartpole_ci.ipynb) | | | | | | |
|
||||
| [cartpole_sc](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/reinforcement-learning/cartpole-on-single-compute/cartpole_sc.ipynb) | | | | | | |
|
||||
| [particle](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/particle.ipynb) | | | | | | |
|
||||
| [rai-loan-decision](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/responsible-ai/visualize-upload-loan-decision/rai-loan-decision.ipynb) | | | | | | |
|
||||
| [Logging APIs](https://github.com/Azure/MachineLearningNotebooks/blob/master//how-to-use-azureml/track-and-monitor-experiments/logging-api/logging-api.ipynb) | Logging APIs and analyzing results | None | None | None | None | None |
|
||||
| [configuration](https://github.com/Azure/MachineLearningNotebooks/blob/master//setup-environment/configuration.ipynb) | | | | | | |
|
||||
|
||||
@@ -102,7 +102,7 @@
|
||||
"source": [
|
||||
"import azureml.core\n",
|
||||
"\n",
|
||||
"print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n",
|
||||
"print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n",
|
||||
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
|
||||
]
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user