diff --git a/configuration.ipynb b/configuration.ipynb index af172e0a..2b4f7069 100644 --- a/configuration.ipynb +++ b/configuration.ipynb @@ -103,7 +103,7 @@ "source": [ "import azureml.core\n", "\n", - "print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n", + "print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n", "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")" ] }, diff --git a/contrib/fairness/fairlearn-azureml-mitigation.yml b/contrib/fairness/fairlearn-azureml-mitigation.yml index 7cb5b437..8d8d753d 100644 --- a/contrib/fairness/fairlearn-azureml-mitigation.yml +++ b/contrib/fairness/fairlearn-azureml-mitigation.yml @@ -6,7 +6,8 @@ dependencies: - fairlearn>=0.6.2 - joblib - liac-arff - - raiwidgets~=0.23.0 + - raiwidgets~=0.24.0 - itsdangerous==2.0.1 - markupsafe<2.1.0 - protobuf==3.20.0 + - numpy<1.24.0 diff --git a/contrib/fairness/upload-fairness-dashboard.yml b/contrib/fairness/upload-fairness-dashboard.yml index 07e4b988..8af4a762 100644 --- a/contrib/fairness/upload-fairness-dashboard.yml +++ b/contrib/fairness/upload-fairness-dashboard.yml @@ -6,7 +6,8 @@ dependencies: - fairlearn>=0.6.2 - joblib - liac-arff - - raiwidgets~=0.23.0 + - raiwidgets~=0.24.0 - itsdangerous==2.0.1 - markupsafe<2.1.0 - protobuf==3.20.0 + - numpy<1.24.0 diff --git a/how-to-use-azureml/automated-machine-learning/automl_env.yml b/how-to-use-azureml/automated-machine-learning/automl_env.yml index aa8def8a..bc7ebcd7 100644 --- a/how-to-use-azureml/automated-machine-learning/automl_env.yml +++ b/how-to-use-azureml/automated-machine-learning/automl_env.yml @@ -8,13 +8,17 @@ dependencies: # Azure ML only supports 3.7.0 and later. - pip==22.3.1 - python>=3.7,<3.9 +- conda-forge::fbprophet==0.7.1 +- pandas==1.1.5 +- scipy==1.5.3 +- Cython==0.29.14 - pip: # Required packages for AzureML execution, history, and data preparation. - - azureml-widgets~=1.48.0 - - azureml-defaults~=1.48.0 - - -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.48.0/validated_win32_requirements.txt [--no-deps] + - azureml-widgets~=1.49.0 + - azureml-defaults~=1.49.0 + - -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.49.0/validated_win32_requirements.txt [--no-deps] - matplotlib==3.6.2 - xgboost==1.3.3 - - arch==4.14 - - mlflow-skinny==1.30.0 + - cmdstanpy==0.9.5 + - setuptools-git==1.2 diff --git a/how-to-use-azureml/automated-machine-learning/automl_env_linux.yml b/how-to-use-azureml/automated-machine-learning/automl_env_linux.yml index 36a8c6ce..cfb3dcc2 100644 --- a/how-to-use-azureml/automated-machine-learning/automl_env_linux.yml +++ b/how-to-use-azureml/automated-machine-learning/automl_env_linux.yml @@ -6,7 +6,7 @@ channels: dependencies: # The python interpreter version. # Azure ML only supports 3.7 and later. -- pip==20.1.1 +- pip==22.3.1 - python>=3.7,<3.9 - matplotlib==3.2.1 - numpy>=1.21.6,<=1.22.3 @@ -20,16 +20,13 @@ dependencies: - pytorch::pytorch=1.11.0 - cudatoolkit=10.1.243 - notebook -- jinja2<=2.11.2 -- markupsafe<2.1.0 - pip: # Required packages for AzureML execution, history, and data preparation. - - azureml-widgets~=1.48.0 - - azureml-defaults~=1.48.0 + - azureml-widgets~=1.49.0 + - azureml-defaults~=1.49.0 - pytorch-transformers==1.0.0 - spacy==2.2.4 - pystan==2.19.1.1 - https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz - - -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.48.0/validated_linux_requirements.txt [--no-deps] - - arch==4.14 + - -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.49.0/validated_linux_requirements.txt [--no-deps] diff --git a/how-to-use-azureml/automated-machine-learning/automl_env_mac.yml b/how-to-use-azureml/automated-machine-learning/automl_env_mac.yml index 019aa437..df5e46b3 100644 --- a/how-to-use-azureml/automated-machine-learning/automl_env_mac.yml +++ b/how-to-use-azureml/automated-machine-learning/automl_env_mac.yml @@ -6,7 +6,7 @@ channels: dependencies: # The python interpreter version. # Currently Azure ML only supports 3.7 and later. -- pip==20.1.1 +- pip==22.3.1 - python>=3.7,<3.9 - matplotlib==3.2.1 - numpy>=1.21.6,<=1.22.3 @@ -20,16 +20,13 @@ dependencies: - pytorch::pytorch=1.11.0 - cudatoolkit=9.0 - notebook -- jinja2<=2.11.2 -- markupsafe<2.1.0 - pip: # Required packages for AzureML execution, history, and data preparation. - - azureml-widgets~=1.48.0 - - azureml-defaults~=1.48.0 + - azureml-widgets~=1.49.0 + - azureml-defaults~=1.49.0 - pytorch-transformers==1.0.0 - spacy==2.2.4 - pystan==2.19.1.1 - https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz - - -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.48.0/validated_darwin_requirements.txt [--no-deps] - - arch==4.14 + - -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.49.0/validated_darwin_requirements.txt [--no-deps] diff --git a/how-to-use-azureml/automated-machine-learning/experimental/autofeaturization-codegen/codegen-for-autofeaturization.ipynb b/how-to-use-azureml/automated-machine-learning/experimental/autofeaturization-codegen/codegen-for-autofeaturization.ipynb index 5d1e2744..5b7793eb 100644 --- a/how-to-use-azureml/automated-machine-learning/experimental/autofeaturization-codegen/codegen-for-autofeaturization.ipynb +++ b/how-to-use-azureml/automated-machine-learning/experimental/autofeaturization-codegen/codegen-for-autofeaturization.ipynb @@ -97,7 +97,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n", + "print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n", "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")" ] }, diff --git a/how-to-use-azureml/automated-machine-learning/experimental/autofeaturization-custom-model-training/custom-model-training-from-autofeaturization-run.ipynb b/how-to-use-azureml/automated-machine-learning/experimental/autofeaturization-custom-model-training/custom-model-training-from-autofeaturization-run.ipynb index 1d90a534..955e2843 100644 --- a/how-to-use-azureml/automated-machine-learning/experimental/autofeaturization-custom-model-training/custom-model-training-from-autofeaturization-run.ipynb +++ b/how-to-use-azureml/automated-machine-learning/experimental/autofeaturization-custom-model-training/custom-model-training-from-autofeaturization-run.ipynb @@ -97,7 +97,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n", + "print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n", "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")" ] }, diff --git a/how-to-use-azureml/automated-machine-learning/experimental/automl_thin_client_env.yml b/how-to-use-azureml/automated-machine-learning/experimental/automl_thin_client_env.yml index b8739cba..76acd2b5 100644 --- a/how-to-use-azureml/automated-machine-learning/experimental/automl_thin_client_env.yml +++ b/how-to-use-azureml/automated-machine-learning/experimental/automl_thin_client_env.yml @@ -3,7 +3,7 @@ dependencies: # The python interpreter version. # Currently Azure ML only supports 3.7.0 and later. - pip<=22.3.1 -- python>=3.7.0,<3.10 +- python>=3.7.0,<3.11 - pip: # Required packages for AzureML execution, history, and data preparation. diff --git a/how-to-use-azureml/automated-machine-learning/experimental/automl_thin_client_env_mac.yml b/how-to-use-azureml/automated-machine-learning/experimental/automl_thin_client_env_mac.yml index 2473ad65..61191beb 100644 --- a/how-to-use-azureml/automated-machine-learning/experimental/automl_thin_client_env_mac.yml +++ b/how-to-use-azureml/automated-machine-learning/experimental/automl_thin_client_env_mac.yml @@ -4,10 +4,10 @@ channels: - main dependencies: # The python interpreter version. - # Currently Azure ML only supports 3.6.0 and later. + # Currently Azure ML only supports 3.7.0 and later. - pip<=20.2.4 - nomkl -- python>=3.6.0,<3.10 +- python>=3.7.0,<3.11 - urllib3==1.26.7 - PyJWT < 2.0.0 - numpy>=1.21.6,<=1.22.3 diff --git a/how-to-use-azureml/automated-machine-learning/experimental/classification-credit-card-fraud-local-managed/auto-ml-classification-credit-card-fraud-local-managed.ipynb b/how-to-use-azureml/automated-machine-learning/experimental/classification-credit-card-fraud-local-managed/auto-ml-classification-credit-card-fraud-local-managed.ipynb index 410d4bef..182d3535 100644 --- a/how-to-use-azureml/automated-machine-learning/experimental/classification-credit-card-fraud-local-managed/auto-ml-classification-credit-card-fraud-local-managed.ipynb +++ b/how-to-use-azureml/automated-machine-learning/experimental/classification-credit-card-fraud-local-managed/auto-ml-classification-credit-card-fraud-local-managed.ipynb @@ -92,7 +92,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n", + "print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n", "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")" ] }, diff --git a/how-to-use-azureml/automated-machine-learning/experimental/regression-model-proxy/auto-ml-regression-model-proxy.ipynb b/how-to-use-azureml/automated-machine-learning/experimental/regression-model-proxy/auto-ml-regression-model-proxy.ipynb index 3d185286..bf305ce6 100644 --- a/how-to-use-azureml/automated-machine-learning/experimental/regression-model-proxy/auto-ml-regression-model-proxy.ipynb +++ b/how-to-use-azureml/automated-machine-learning/experimental/regression-model-proxy/auto-ml-regression-model-proxy.ipynb @@ -91,7 +91,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n", + "print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n", "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")" ] }, diff --git a/how-to-use-azureml/explain-model/azure-integration/gpu-explanation/train-explain-model-gpu-tree-explainer.ipynb b/how-to-use-azureml/explain-model/azure-integration/gpu-explanation/train-explain-model-gpu-tree-explainer.ipynb index 1f73923b..a0602f57 100644 --- a/how-to-use-azureml/explain-model/azure-integration/gpu-explanation/train-explain-model-gpu-tree-explainer.ipynb +++ b/how-to-use-azureml/explain-model/azure-integration/gpu-explanation/train-explain-model-gpu-tree-explainer.ipynb @@ -106,7 +106,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(\"This notebook was created using version 1.48.0 of the Azure ML SDK\")\n", + "print(\"This notebook was created using version 1.49.0 of the Azure ML SDK\")\n", "print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")" ] }, @@ -298,8 +298,9 @@ "pip uninstall -y xgboost && \\\n", "conda install py-xgboost==1.3.3 && \\\n", "pip uninstall -y numpy && \\\n", - "pip install {numpy_dep} &&\\\n", - "pip install {sklearn_dep} \\\n", + "pip install {numpy_dep} && \\\n", + "pip install {sklearn_dep} && \\\n", + "pip install chardet \\\n", "\"\"\"\n", "\n", "env.python.user_managed_dependencies = True\n", diff --git a/how-to-use-azureml/explain-model/azure-integration/gpu-explanation/train-explain-model-gpu-tree-explainer.yml b/how-to-use-azureml/explain-model/azure-integration/gpu-explanation/train-explain-model-gpu-tree-explainer.yml index 3a5841bd..0c761710 100644 --- a/how-to-use-azureml/explain-model/azure-integration/gpu-explanation/train-explain-model-gpu-tree-explainer.yml +++ b/how-to-use-azureml/explain-model/azure-integration/gpu-explanation/train-explain-model-gpu-tree-explainer.yml @@ -10,7 +10,7 @@ dependencies: - ipython - matplotlib - ipywidgets - - raiwidgets~=0.23.0 + - raiwidgets~=0.24.0 - itsdangerous==2.0.1 - markupsafe<2.1.0 - scipy>=1.5.3 diff --git a/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.yml b/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.yml index 27ea0321..7f953d05 100644 --- a/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.yml +++ b/how-to-use-azureml/explain-model/azure-integration/remote-explanation/explain-model-on-amlcompute.yml @@ -10,7 +10,7 @@ dependencies: - matplotlib - azureml-dataset-runtime - ipywidgets - - raiwidgets~=0.23.0 + - raiwidgets~=0.24.0 - itsdangerous==2.0.1 - markupsafe<2.1.0 - scipy>=1.5.3 diff --git a/how-to-use-azureml/explain-model/azure-integration/run-history/save-retrieve-explanations-run-history.yml b/how-to-use-azureml/explain-model/azure-integration/run-history/save-retrieve-explanations-run-history.yml index f67b96fe..47a8848c 100644 --- a/how-to-use-azureml/explain-model/azure-integration/run-history/save-retrieve-explanations-run-history.yml +++ b/how-to-use-azureml/explain-model/azure-integration/run-history/save-retrieve-explanations-run-history.yml @@ -9,7 +9,7 @@ dependencies: - ipython - matplotlib - ipywidgets - - raiwidgets~=0.23.0 + - raiwidgets~=0.24.0 - packaging>=20.9 - itsdangerous==2.0.1 - markupsafe<2.1.0 diff --git a/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-locally-and-deploy.yml b/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-locally-and-deploy.yml index 82673928..1cb9714b 100644 --- a/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-locally-and-deploy.yml +++ b/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-locally-and-deploy.yml @@ -9,7 +9,7 @@ dependencies: - ipython - matplotlib - ipywidgets - - raiwidgets~=0.23.0 + - raiwidgets~=0.24.0 - packaging>=20.9 - itsdangerous==2.0.1 - markupsafe<2.1.0 diff --git a/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.yml b/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.yml index 67753f22..a0945c71 100644 --- a/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.yml +++ b/how-to-use-azureml/explain-model/azure-integration/scoring-time/train-explain-model-on-amlcompute-and-deploy.yml @@ -11,7 +11,7 @@ dependencies: - azureml-dataset-runtime - azureml-core - ipywidgets - - raiwidgets~=0.23.0 + - raiwidgets~=0.24.0 - itsdangerous==2.0.1 - markupsafe<2.1.0 - scipy>=1.5.3 diff --git a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb index a83ac97c..b0a4a923 100644 --- a/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb +++ b/how-to-use-azureml/machine-learning-pipelines/intro-to-pipelines/aml-pipelines-use-databricks-as-compute-target.ipynb @@ -330,7 +330,7 @@ "- **inputs:** List of input connections for data consumed by this step. Fetch this inside the notebook using dbutils.widgets.get(\"input\")\n", "- **outputs:** List of output port definitions for outputs produced by this step. Fetch this inside the notebook using dbutils.widgets.get(\"output\")\n", "- **existing_cluster_id:** Cluster ID of an existing Interactive cluster on the Databricks workspace. If you are providing this, do not provide any of the parameters below that are used to create a new cluster such as spark_version, node_type, etc.\n", - "- **spark_version:** Version of spark for the databricks run cluster. You can refer to [DataBricks runtime version](https://learn.microsoft.com/azure/databricks/dev-tools/api/#--runtime-version-strings) to specify the spark version. default value: 4.0.x-scala2.11\n", + "- **spark_version:** Version of spark for the databricks run cluster. You can refer to [DataBricks runtime version](https://learn.microsoft.com/azure/databricks/dev-tools/api/#--runtime-version-strings) to specify the spark version. default value: 10.4.x-scala2.12\n", "- **node_type:** Azure vm node types for the databricks run cluster. default value: Standard_D3_v2\n", "- **num_workers:** Specifies a static number of workers for the databricks run cluster\n", "- **min_workers:** Specifies a min number of workers to use for auto-scaling the databricks run cluster\n", diff --git a/how-to-use-azureml/machine-learning-pipelines/parallel-run/tabular-dataset-partition-per-column.ipynb b/how-to-use-azureml/machine-learning-pipelines/parallel-run/tabular-dataset-partition-per-column.ipynb index 74343a87..726ea9ef 100644 --- a/how-to-use-azureml/machine-learning-pipelines/parallel-run/tabular-dataset-partition-per-column.ipynb +++ b/how-to-use-azureml/machine-learning-pipelines/parallel-run/tabular-dataset-partition-per-column.ipynb @@ -86,7 +86,7 @@ "import requests\n", "\n", "oj_sales_path = \"./oj.csv\"\n", - "r = requests.get(\"http://www.cs.unitn.it/~taufer/Data/oj.csv\")\n", + "r = requests.get(\"https://raw.githubusercontent.com/Azure/azureml-examples/main/sdk/python/jobs/automl-standalone-jobs/automl-forecasting-orange-juice-sales/data/dominicks_OJ.csv\")\n", "open(oj_sales_path, \"wb\").write(r.content)" ] }, @@ -140,7 +140,7 @@ "metadata": {}, "outputs": [], "source": [ - "partitioned_dataset = dataset.partition_by(partition_keys=['store', 'brand'], target=(datastore, \"partition_by_key_res\"), name=\"partitioned_oj_data\")\n", + "partitioned_dataset = dataset.partition_by(partition_keys=['Store', 'Brand'], target=(datastore, \"partition_by_key_res\"), name=\"partitioned_oj_data\")\n", "partitioned_dataset.partition_keys" ] }, @@ -274,7 +274,7 @@ "parallel_run_config = ParallelRunConfig(\n", " source_directory=scripts_folder,\n", " entry_script=script_file, # the user script to run against each input\n", - " partition_keys=['store', 'brand'],\n", + " partition_keys=['Store', 'Brand'],\n", " error_threshold=5,\n", " output_action='append_row',\n", " append_row_file_name=\"revenue_outputs.txt\",\n", @@ -362,8 +362,8 @@ "result_file = os.path.join(target_dir, batch_output.path_on_datastore, parallel_run_config.append_row_file_name)\n", "\n", "df = pd.read_csv(result_file, delimiter=\" \", header=None)\n", + "df.columns=[\"WeekStarting\", \"Quantity\", \"logQuantity\", \"Advert\", \"Price\", \"Age60\", \"COLLEGE\", \"INCOME\", \"Hincome150\", \"Large HH\", \"Minorities\", \"WorkingWoman\", \"SSTRDIST\", \"SSTRVOL\", \"CPDIST5\", \"CPWVOL5\", \"Store\", \"Brand\", \"total_income\"]\n", "\n", - "df.columns = [\"week\", \"logmove\", \"feat\", \"price\", \"AGE60\", \"EDUC\", \"ETHNIC\", \"INCOME\", \"HHLARGE\", \"WORKWOM\", \"HVAL150\", \"SSTRDIST\", \"SSTRVOL\", \"CPDIST5\", \"CPWVOL5\", \"store\", \"brand\", \"total_income\"]\n", "print(\"Prediction has \", df.shape[0], \" rows\")\n", "df.head(10)" ] @@ -413,7 +413,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.13" } }, "nbformat": 4, diff --git a/how-to-use-azureml/reinforcement-learning/README.md b/how-to-use-azureml/reinforcement-learning/README.md index d7965894..cb775c30 100644 --- a/how-to-use-azureml/reinforcement-learning/README.md +++ b/how-to-use-azureml/reinforcement-learning/README.md @@ -36,8 +36,6 @@ Using these samples, you will learn how to do the following. | [cartpole_ci.ipynb](cartpole-on-compute-instance/cartpole_ci.ipynb) | Notebook to train a Cartpole playing agent on an Azure Machine Learning Compute Instance | | [cartpole_sc.ipynb](cartpole-on-single-compute/cartpole_sc.ipynb) | Notebook to train a Cartpole playing agent on an Azure Machine Learning Compute Cluster (single node) | | [pong_rllib.ipynb](atari-on-distributed-compute/pong_rllib.ipynb) | Notebook for distributed training of Pong agent using RLlib on multiple compute targets | -| [minecraft.ipynb](minecraft-on-distributed-compute/minecraft.ipynb) | Notebook to train an agent to navigate through a lava maze in the Minecraft game | -| [particle.ipynb](multiagent-particle-envs/particle.ipynb) | Notebook to train policies in a multiagent cooperative navigation scenario based on OpenAI's Particle environments | ## Prerequisites diff --git a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/docker/cpu/Dockerfile b/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/docker/cpu/Dockerfile deleted file mode 100644 index b03de989..00000000 --- a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/docker/cpu/Dockerfile +++ /dev/null @@ -1,39 +0,0 @@ -# DisableDockerDetector "Disabled to unblock PRs until the owner can fix the file. Not used in any prod deployments - only as a documentation for the customers" -FROM akdmsft/particle-cpu - -RUN conda install -c anaconda python=3.7 - -# Install required pip packages -RUN pip3 install --upgrade pip setuptools && pip3 install --upgrade \ - pandas \ - matplotlib \ - psutil \ - numpy \ - scipy \ - gym \ - azureml-defaults \ - tensorboardX \ - tensorflow==1.15 \ - tensorflow-probability==0.8.0 \ - onnxruntime \ - tf2onnx \ - cloudpickle==1.1.1 \ - tabulate \ - dm_tree \ - lz4 \ - opencv-python - -RUN cd multiagent-particle-envs && \ - pip3 install -e . && \ - pip3 install --upgrade pyglet==1.3.2 - -RUN pip3 install ray-on-aml==0.1.6 - -RUN pip install protobuf==3.20.0 - -RUN pip3 install --upgrade \ - ray==0.8.7 \ - ray[rllib]==0.8.7 \ - ray[tune]==0.8.7 - -RUN pip install 'msrest<0.7.0' \ No newline at end of file diff --git a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/docker/cpu/patch_files/multi_discrete.py b/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/docker/cpu/patch_files/multi_discrete.py deleted file mode 100644 index 8980ba36..00000000 --- a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/docker/cpu/patch_files/multi_discrete.py +++ /dev/null @@ -1,70 +0,0 @@ -# MIT License - -# Copyright (c) 2018 OpenAI - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -import numpy as np -import gym - - -class MultiDiscrete(gym.Space): - """ - - The multi-discrete action space consists of a series of discrete action spaces with different - parameters - - It can be adapted to both a Discrete action space or a continuous (Box) action space - - It is useful to represent game controllers or keyboards where each key can be represented as - a discrete action space - - It is parametrized by passing an array of arrays containing [min, max] for each discrete action - space where the discrete action space can take any integers from `min` to `max` (both inclusive) - Note: A value of 0 always need to represent the NOOP action. - e.g. Nintendo Game Controller - - Can be conceptualized as 3 discrete action spaces: - 1) Arrow Keys: Discrete 5 - NOOP[0], UP[1], RIGHT[2], DOWN[3], LEFT[4] - params: min: 0, max: 4 - 2) Button A: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 - 3) Button B: Discrete 2 - NOOP[0], Pressed[1] - params: min: 0, max: 1 - - Can be initialized as - MultiDiscrete([ [0,4], [0,1], [0,1] ]) - """ - def __init__(self, array_of_param_array): - self.low = np.array([x[0] for x in array_of_param_array]) - self.high = np.array([x[1] for x in array_of_param_array]) - self.num_discrete_space = self.low.shape[0] - - def sample(self): - """ Returns a array with one sample from each discrete action space """ - # For each row: round(random .* (max - min) + min, 0) - # random_array = prng.np_random.rand(self.num_discrete_space) - random_array = np.random.RandomState().rand(self.num_discrete_space) - return [int(x) for x in np.floor(np.multiply((self.high - self.low + 1.), random_array) + self.low)] - - def contains(self, x): - return len(x) == self.num_discrete_space \ - and (np.array(x) >= self.low).all() \ - and (np.array(x) <= self.high).all() - - @property - def shape(self): - return self.num_discrete_space - - def __repr__(self): - return "MultiDiscrete" + str(self.num_discrete_space) - - def __eq__(self, other): - return np.array_equal(self.low, other.low) and np.array_equal(self.high, other.high) diff --git a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/docker/cpu/patch_files/rendering.py b/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/docker/cpu/patch_files/rendering.py deleted file mode 100644 index e2b2a405..00000000 --- a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/docker/cpu/patch_files/rendering.py +++ /dev/null @@ -1,413 +0,0 @@ -# MIT License - -# Copyright (c) 2018 OpenAI - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -""" -2D rendering framework -""" -from __future__ import division -import os -import six -import sys -from gym import error -import math -import numpy as np -import pyglet - -from pyglet.gl import glEnable, glHint, glLineWidth, glBlendFunc, glClearColor, glPushMatrix, \ - glTranslatef, glRotatef, glScalef, glPopMatrix, glColor4f, glBegin, glVertex3f, glEnd, glLineStipple, \ - glDisable, glVertex2f, GL_BLEND, GL_LINE_SMOOTH, GL_LINE_SMOOTH_HINT, GL_NICEST, GL_SRC_ALPHA, \ - GL_ONE_MINUS_SRC_ALPHA, GL_LINE_STIPPLE, GL_POINTS, GL_QUADS, GL_TRIANGLES, GL_POLYGON, GL_LINE_LOOP, \ - GL_LINE_STRIP, GL_LINES - - -if "Apple" in sys.version: - if 'DYLD_FALLBACK_LIBRARY_PATH' in os.environ: - os.environ['DYLD_FALLBACK_LIBRARY_PATH'] += ':/usr/lib' - # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite - - -RAD2DEG = 57.29577951308232 - - -def get_display(spec): - """Convert a display specification (such as :0) into an actual Display - object. - - Pyglet only supports multiple Displays on Linux. - """ - if spec is None: - return None - elif isinstance(spec, six.string_types): - return pyglet.canvas.Display(spec) - else: - raise error.Error('Invalid display specification: {}. (Must be a string like :0 or None.)'.format(spec)) - - -class Viewer(object): - def __init__(self, width, height, display=None): - display = get_display(display) - - self.width = width - self.height = height - - self.window = pyglet.window.Window(width=width, height=height, display=display) - self.window.on_close = self.window_closed_by_user - self.geoms = [] - self.onetime_geoms = [] - self.transform = Transform() - - glEnable(GL_BLEND) - # glEnable(GL_MULTISAMPLE) - glEnable(GL_LINE_SMOOTH) - # glHint(GL_LINE_SMOOTH_HINT, GL_DONT_CARE) - glHint(GL_LINE_SMOOTH_HINT, GL_NICEST) - glLineWidth(2.0) - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA) - - def close(self): - self.window.close() - - def window_closed_by_user(self): - self.close() - - def set_bounds(self, left, right, bottom, top): - assert right > left and top > bottom - scalex = self.width / (right - left) - scaley = self.height / (top - bottom) - self.transform = Transform( - translation=(-left * scalex, -bottom * scaley), - scale=(scalex, scaley)) - - def add_geom(self, geom): - self.geoms.append(geom) - - def add_onetime(self, geom): - self.onetime_geoms.append(geom) - - def render(self, return_rgb_array=False): - glClearColor(1, 1, 1, 1) - self.window.clear() - self.window.switch_to() - self.window.dispatch_events() - self.transform.enable() - for geom in self.geoms: - geom.render() - for geom in self.onetime_geoms: - geom.render() - self.transform.disable() - arr = None - if return_rgb_array: - buffer = pyglet.image.get_buffer_manager().get_color_buffer() - image_data = buffer.get_image_data() - arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') - # In https://github.com/openai/gym-http-api/issues/2, we - # discovered that someone using Xmonad on Arch was having - # a window of size 598 x 398, though a 600 x 400 window - # was requested. (Guess Xmonad was preserving a pixel for - # the boundary.) So we use the buffer height/width rather - # than the requested one. - arr = arr.reshape(buffer.height, buffer.width, 4) - arr = arr[::-1, :, 0:3] - self.window.flip() - self.onetime_geoms = [] - return arr - - # Convenience - def draw_circle(self, radius=10, res=30, filled=True, **attrs): - geom = make_circle(radius=radius, res=res, filled=filled) - _add_attrs(geom, attrs) - self.add_onetime(geom) - return geom - - def draw_polygon(self, v, filled=True, **attrs): - geom = make_polygon(v=v, filled=filled) - _add_attrs(geom, attrs) - self.add_onetime(geom) - return geom - - def draw_polyline(self, v, **attrs): - geom = make_polyline(v=v) - _add_attrs(geom, attrs) - self.add_onetime(geom) - return geom - - def draw_line(self, start, end, **attrs): - geom = Line(start, end) - _add_attrs(geom, attrs) - self.add_onetime(geom) - return geom - - def get_array(self): - self.window.flip() - image_data = pyglet.image.get_buffer_manager().get_color_buffer().get_image_data() - self.window.flip() - arr = np.fromstring(image_data.data, dtype=np.uint8, sep='') - arr = arr.reshape(self.height, self.width, 4) - return arr[::-1, :, 0:3] - - -def _add_attrs(geom, attrs): - if "color" in attrs: - geom.set_color(*attrs["color"]) - if "linewidth" in attrs: - geom.set_linewidth(attrs["linewidth"]) - - -class Geom(object): - def __init__(self): - self._color = Color((0, 0, 0, 1.0)) - self.attrs = [self._color] - - def render(self): - for attr in reversed(self.attrs): - attr.enable() - self.render1() - for attr in self.attrs: - attr.disable() - - def render1(self): - raise NotImplementedError - - def add_attr(self, attr): - self.attrs.append(attr) - - def set_color(self, r, g, b, alpha=1): - self._color.vec4 = (r, g, b, alpha) - - -class Attr(object): - def enable(self): - raise NotImplementedError - - def disable(self): - pass - - -class Transform(Attr): - def __init__(self, translation=(0.0, 0.0), rotation=0.0, scale=(1, 1)): - self.set_translation(*translation) - self.set_rotation(rotation) - self.set_scale(*scale) - - def enable(self): - glPushMatrix() - glTranslatef(self.translation[0], self.translation[1], 0) # translate to GL loc ppint - glRotatef(RAD2DEG * self.rotation, 0, 0, 1.0) - glScalef(self.scale[0], self.scale[1], 1) - - def disable(self): - glPopMatrix() - - def set_translation(self, newx, newy): - self.translation = (float(newx), float(newy)) - - def set_rotation(self, new): - self.rotation = float(new) - - def set_scale(self, newx, newy): - self.scale = (float(newx), float(newy)) - - -class Color(Attr): - def __init__(self, vec4): - self.vec4 = vec4 - - def enable(self): - glColor4f(*self.vec4) - - -class LineStyle(Attr): - def __init__(self, style): - self.style = style - - def enable(self): - glEnable(GL_LINE_STIPPLE) - glLineStipple(1, self.style) - - def disable(self): - glDisable(GL_LINE_STIPPLE) - - -class LineWidth(Attr): - def __init__(self, stroke): - self.stroke = stroke - - def enable(self): - glLineWidth(self.stroke) - - -class Point(Geom): - def __init__(self): - Geom.__init__(self) - - def render1(self): - glBegin(GL_POINTS) # draw point - glVertex3f(0.0, 0.0, 0.0) - glEnd() - - -class FilledPolygon(Geom): - def __init__(self, v): - Geom.__init__(self) - self.v = v - - def render1(self): - if len(self.v) == 4: - glBegin(GL_QUADS) - elif len(self.v) > 4: - glBegin(GL_POLYGON) - else: - glBegin(GL_TRIANGLES) - for p in self.v: - glVertex3f(p[0], p[1], 0) # draw each vertex - glEnd() - - color = ( - self._color.vec4[0] * 0.5, - self._color.vec4[1] * 0.5, - self._color.vec4[2] * 0.5, - self._color.vec4[3] * 0.5) - glColor4f(*color) - glBegin(GL_LINE_LOOP) - for p in self.v: - glVertex3f(p[0], p[1], 0) # draw each vertex - glEnd() - - -def make_circle(radius=10, res=30, filled=True): - points = [] - for i in range(res): - ang = 2 * math.pi * i / res - points.append((math.cos(ang) * radius, math.sin(ang) * radius)) - if filled: - return FilledPolygon(points) - else: - return PolyLine(points, True) - - -def make_polygon(v, filled=True): - if filled: - return FilledPolygon(v) - else: - return PolyLine(v, True) - - -def make_polyline(v): - return PolyLine(v, False) - - -def make_capsule(length, width): - l, r, t, b = 0, length, width / 2, -width / 2 - box = make_polygon([(l, b), (l, t), (r, t), (r, b)]) - circ0 = make_circle(width / 2) - circ1 = make_circle(width / 2) - circ1.add_attr(Transform(translation=(length, 0))) - geom = Compound([box, circ0, circ1]) - return geom - - -class Compound(Geom): - def __init__(self, gs): - Geom.__init__(self) - self.gs = gs - for g in self.gs: - g.attrs = [a for a in g.attrs if not isinstance(a, Color)] - - def render1(self): - for g in self.gs: - g.render() - - -class PolyLine(Geom): - def __init__(self, v, close): - Geom.__init__(self) - self.v = v - self.close = close - self.linewidth = LineWidth(1) - self.add_attr(self.linewidth) - - def render1(self): - glBegin(GL_LINE_LOOP if self.close else GL_LINE_STRIP) - for p in self.v: - glVertex3f(p[0], p[1], 0) # draw each vertex - glEnd() - - def set_linewidth(self, x): - self.linewidth.stroke = x - - -class Line(Geom): - def __init__(self, start=(0.0, 0.0), end=(0.0, 0.0)): - Geom.__init__(self) - self.start = start - self.end = end - self.linewidth = LineWidth(1) - self.add_attr(self.linewidth) - - def render1(self): - glBegin(GL_LINES) - glVertex2f(*self.start) - glVertex2f(*self.end) - glEnd() - - -class Image(Geom): - def __init__(self, fname, width, height): - Geom.__init__(self) - self.width = width - self.height = height - img = pyglet.image.load(fname) - self.img = img - self.flip = False - - def render1(self): - self.img.blit(-self.width / 2, -self.height / 2, width=self.width, height=self.height) - - -class SimpleImageViewer(object): - def __init__(self, display=None): - self.window = None - self.isopen = False - self.display = display - - def imshow(self, arr): - if self.window is None: - height, width, channels = arr.shape - self.window = pyglet.window.Window(width=width, height=height, display=self.display) - self.width = width - self.height = height - self.isopen = True - assert arr.shape == (self.height, self.width, 3), "You passed in an image with the wrong number shape" - image = pyglet.image.ImageData(self.width, self.height, 'RGB', arr.tobytes(), pitch=self.width * -3) - self.window.clear() - self.window.switch_to() - self.window.dispatch_events() - image.blit(0, 0) - self.window.flip() - - def close(self): - if self.isopen: - self.window.close() - self.isopen = False - - def __del__(self): - self.close() diff --git a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/files/particle_train.py b/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/files/particle_train.py deleted file mode 100644 index 34e25a5d..00000000 --- a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/files/particle_train.py +++ /dev/null @@ -1,123 +0,0 @@ -import os - -from ray_on_aml.core import Ray_On_AML - -from ray.tune import run_experiments -from ray.tune.registry import register_trainable, register_env, get_trainable_cls -import ray.rllib.contrib.maddpg.maddpg as maddpg - -from rllib_multiagent_particle_env import env_creator -from util import parse_args - - -def setup_ray(): - ray_on_aml = Ray_On_AML() - ray_on_aml.getRay() - - register_env('particle', env_creator) - - -def gen_policy(args, env, id): - use_local_critic = [ - args.adv_policy == 'ddpg' if id < args.num_adversaries else - args.good_policy == 'ddpg' for id in range(env.num_agents) - ] - return ( - None, - env.observation_space_dict[id], - env.action_space_dict[id], - { - 'agent_id': id, - 'use_local_critic': use_local_critic[id], - 'obs_space_dict': env.observation_space_dict, - 'act_space_dict': env.action_space_dict, - } - ) - - -def gen_policies(args, env_config): - env = env_creator(env_config) - return {'policy_%d' % i: gen_policy(args, env, i) for i in range(len(env.observation_space_dict))} - - -def to_multiagent_config(policies): - policy_ids = list(policies.keys()) - return { - 'policies': policies, - 'policy_mapping_fn': lambda index: policy_ids[index] - } - - -def train(args, env_config): - def stop(trial_id, result): - max_train_time = int(os.environ.get('AML_MAX_TRAIN_TIME_SECONDS', 2 * 60 * 60)) - - return result['episode_reward_mean'] >= args.final_reward \ - or result['time_total_s'] >= max_train_time - - run_experiments({ - 'MADDPG_RLLib': { - 'run': 'contrib/MADDPG', - 'env': 'particle', - 'stop': stop, - # Uncomment to enable more frequent checkpoints: - # 'checkpoint_freq': args.checkpoint_freq, - 'checkpoint_at_end': True, - 'local_dir': args.local_dir, - 'restore': args.restore, - 'config': { - # === Log === - 'log_level': 'ERROR', - - # === Environment === - 'env_config': env_config, - 'num_envs_per_worker': args.num_envs_per_worker, - 'horizon': args.max_episode_len, - - # === Policy Config === - # --- Model --- - 'good_policy': args.good_policy, - 'adv_policy': args.adv_policy, - 'actor_hiddens': [args.num_units] * 2, - 'actor_hidden_activation': 'relu', - 'critic_hiddens': [args.num_units] * 2, - 'critic_hidden_activation': 'relu', - 'n_step': args.n_step, - 'gamma': args.gamma, - - # --- Exploration --- - 'tau': 0.01, - - # --- Replay buffer --- - 'buffer_size': int(1e6), - - # --- Optimization --- - 'actor_lr': args.lr, - 'critic_lr': args.lr, - 'learning_starts': args.train_batch_size * args.max_episode_len, - 'sample_batch_size': args.sample_batch_size, - 'train_batch_size': args.train_batch_size, - 'batch_mode': 'truncate_episodes', - - # --- Parallelism --- - 'num_workers': args.num_workers, - 'num_gpus': args.num_gpus, - 'num_gpus_per_worker': 0, - - # === Multi-agent setting === - 'multiagent': to_multiagent_config(gen_policies(args, env_config)), - }, - }, - }, verbose=1) - - -if __name__ == '__main__': - args = parse_args() - setup_ray() - - env_config = { - 'scenario_name': args.scenario, - 'horizon': args.max_episode_len, - 'video_frequency': args.checkpoint_freq, - } - train(args, env_config) diff --git a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/files/rllib_multiagent_particle_env.py b/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/files/rllib_multiagent_particle_env.py deleted file mode 100644 index d2954daa..00000000 --- a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/files/rllib_multiagent_particle_env.py +++ /dev/null @@ -1,113 +0,0 @@ -# Some code taken from: https://github.com/wsjeon/maddpg-rllib/ - -import imp -import os - -import gym -from gym import wrappers -from ray import rllib - -from multiagent.environment import MultiAgentEnv -import multiagent.scenarios as scenarios - - -CUSTOM_SCENARIOS = ['simple_switch'] - - -class ParticleEnvRenderWrapper(gym.Wrapper): - def __init__(self, env, horizon): - super().__init__(env) - self.horizon = horizon - - def reset(self): - self._num_steps = 0 - - return self.env.reset() - - def render(self, mode): - if mode == 'human': - self.env.render(mode=mode) - else: - return self.env.render(mode=mode)[0] - - def step(self, actions): - obs_list, rew_list, done_list, info_list = self.env.step(actions) - - self._num_steps += 1 - done = (all(done_list) or self._num_steps >= self.horizon) - - # Gym monitor expects reward to be an int. This is only used for its - # stats reporter, which we're not interested in. To make video recording - # work, we package the rewards in the info object and extract it below. - return obs_list, 0, done, [rew_list, done_list, info_list] - - -class RLlibMultiAgentParticleEnv(rllib.MultiAgentEnv): - def __init__(self, scenario_name, horizon, monitor_enabled=False, video_frequency=500): - self._env = _make_env(scenario_name, horizon, monitor_enabled, video_frequency) - self.num_agents = self._env.n - self.agent_ids = list(range(self.num_agents)) - - self.observation_space_dict = self._make_dict(self._env.observation_space) - self.action_space_dict = self._make_dict(self._env.action_space) - - def reset(self): - obs_dict = self._make_dict(self._env.reset()) - return obs_dict - - def step(self, action_dict): - actions = list(action_dict.values()) - obs_list, _, _, infos = self._env.step(actions) - rew_list, done_list, _ = infos - - obs_dict = self._make_dict(obs_list) - rew_dict = self._make_dict(rew_list) - done_dict = self._make_dict(done_list) - done_dict['__all__'] = all(done_list) - info_dict = self._make_dict([{'done': done} for done in done_list]) - - return obs_dict, rew_dict, done_dict, info_dict - - def render(self, mode='human'): - self._env.render(mode=mode) - - def _make_dict(self, values): - return dict(zip(self.agent_ids, values)) - - -def _video_callable(video_frequency): - def should_record_video(episode_id): - if episode_id % video_frequency == 0: - return True - return False - - return should_record_video - - -def _make_env(scenario_name, horizon, monitor_enabled, video_frequency): - if scenario_name in CUSTOM_SCENARIOS: - # Scenario file must exist locally - file_path = os.path.join(os.path.dirname(__file__), scenario_name + '.py') - scenario = imp.load_source('', file_path).Scenario() - else: - scenario = scenarios.load(scenario_name + '.py').Scenario() - - world = scenario.make_world() - - env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation) - env.metadata['video.frames_per_second'] = 8 - - env = ParticleEnvRenderWrapper(env, horizon) - - if not monitor_enabled: - return env - - return wrappers.Monitor(env, './logs/videos', resume=True, video_callable=_video_callable(video_frequency)) - - -def env_creator(config): - monitor_enabled = False - if hasattr(config, 'worker_index') and hasattr(config, 'vector_index'): - monitor_enabled = (config.worker_index == 1 and config.vector_index == 0) - - return RLlibMultiAgentParticleEnv(**config, monitor_enabled=monitor_enabled) diff --git a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/files/simple_switch.py b/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/files/simple_switch.py deleted file mode 100644 index ca419810..00000000 --- a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/files/simple_switch.py +++ /dev/null @@ -1,358 +0,0 @@ -import numpy as np -import random - -from multiagent.core import World, Agent, Landmark -from multiagent.scenario import BaseScenario - - -class SwitchWorld(World): - """ Extended World with hills and switches """ - def __init__(self, hills, switches): - super().__init__() - # add hills and switches - self.hills = hills - self.switches = switches - self.landmarks.extend(self.hills) - self.landmarks.extend(self.switches) - - def step(self): - - super().step() - - # if all hills are activated, reset the switches and hills - if all([hill.active for hill in self.hills]): - self.reset_hills() - self.reset_switches() - else: - # Update switches - for switch in self.switches: - switch.step(self) - # Update hills - for hill in self.hills: - hill.step(self) - - def reset_hills(self): - possible_hill_positions = [np.array([-0.8, 0]), np.array([0, 0.8]), np.array([0.8, 0]), np.array([0, -0.8])] - hill_positions = random.sample(possible_hill_positions, k=len(self.hills)) - for i, hill in enumerate(self.hills): - hill.state.p_pos = hill_positions[i] - hill.deactivate() - - def reset_switches(self): - possible_switch_positions = [ - np.array([-0.8, -0.8]), - np.array([-0.8, 0.8]), - np.array([0.8, -0.8]), - np.array([0.8, 0.8])] - switch_positions = random.sample(possible_switch_positions, k=len(self.switches)) - for i, switch in enumerate(self.switches): - switch.state.p_pos = switch_positions[i] - switch.deactivate() - - -class Scenario(BaseScenario): - def make_world(self): - - # main configurations - num_agents = 2 - num_hills = 2 - num_switches = 1 - self.max_episode_length = 100 - - # create hills (on edges) - possible_hill_positions = [np.array([-0.8, 0]), np.array([0, 0.8]), np.array([0.8, 0]), np.array([0, -0.8])] - hill_positions = random.sample(possible_hill_positions, k=num_hills) - hills = [Hill(hill_positions[i]) for i in range(num_hills)] - # create switches (in corners) - possible_switch_positions = [ - np.array([-0.8, -0.8]), - np.array([-0.8, 0.8]), - np.array([0.8, -0.8]), - np.array([0.8, 0.8])] - switch_positions = random.sample(possible_switch_positions, k=num_switches) - switches = [Switch(switch_positions[i]) for i in range(num_switches)] - - # make world and set basic properties - world = SwitchWorld(hills, switches) - world.dim_c = 2 - world.collaborative = True - - # add agents - world.agents = [Agent() for i in range(num_agents)] - for i, agent in enumerate(world.agents): - agent.name = 'agent %d' % i - agent.collide = True - agent.silent = True - agent.size = 0.1 - agent.accel = 5.0 - agent.max_speed = 5.0 - if i == 0: - agent.color = np.array([0.35, 0.35, 0.85]) - else: - agent.color = np.array([0.35, 0.85, 0.85]) - - # make initial conditions - self.reset_world(world) - - return world - - def reset_world(self, world): - # set random initial states - for agent in world.agents: - agent.state.p_pos = np.array([random.uniform(-1, +1) for _ in range(world.dim_p)]) - agent.state.p_vel = np.zeros(world.dim_p) - agent.state.c = np.zeros(world.dim_c) - # set hills randomly - world.reset_hills() - # set switches randomly - world.reset_switches() - - def is_collision(self, agent1, agent2): - delta_pos = agent1.state.p_pos - agent2.state.p_pos - dist = np.sqrt(np.sum(np.square(delta_pos))) - dist_min = agent1.size + agent2.size - return True if dist < dist_min else False - - def reward(self, agent, world): - # Agents are rewarded based on number of landmarks activated - rew = 0 - if all([h.active for h in world.hills]): - rew += 100 - else: - # give bonus each time a hill is activated - for hill in world.hills: - if hill.activated_just_now: - rew += 50 - # penalise timesteps where nothing is happening - if rew == 0: - rew -= 0.1 - # add collision penalty - if agent.collide: - for a in world.agents: - # note: this also counts collision with "itself", so gives -1 at every timestep - # would be good to tune the reward function and use (not a == agent) here - if self.is_collision(a, agent): - rew -= 1 - return rew - - def observation(self, agent, world): - # get positions of all entities in this agent's reference frame - entity_pos = [] - for entity in world.landmarks: # world.entities: - entity_pos.append(entity.state.p_pos - agent.state.p_pos) - # entity colors - entity_color = [] - for entity in world.landmarks: # world.entities: - entity_color.append(entity.color) - # communication of all other agents - comm = [] - other_pos = [] - for other in world.agents: - if other is agent: - continue - comm.append(other.state.c) - other_pos.append(other.state.p_pos - agent.state.p_pos) - return np.concatenate([agent.state.p_vel] + [agent.state.p_pos] + entity_pos + other_pos + comm) - - -class Hill(Landmark): - """ - A hill that can be captured by an agent. - To be captured, a team must occupy a hill for a fixed amount of time. - """ - - def __init__(self, - pos=None, - size=0.08, - capture_time=2 - ): - - # Initialize Landmark super class - super().__init__() - self.movable = False - self.collide = False - self.state.p_pos = pos - self.size = size - - # Set static configurations - self.capture_time = capture_time - - # Initialize all hills to be inactive - self.active = False - self.color = np.array([0.5, 0.5, 0.5]) - self.capture_timer = 0 - - self.activated_just_now = False - - def activate(self): - self.active = True - self.color = np.array([0.1, 0.1, 0.9]) - - def deactivate(self): - self.active = False - self.color = np.array([0.5, 0.5, 0.5]) - - def _is_occupied(self, agents): - # a hill is occupied if an agent stands on it - for agent in agents: - dist = np.sqrt(np.sum(np.square(agent.state.p_pos - self.state.p_pos))) - if dist < agent.size + self.size: - return True - return False - - def step(self, world): - - self.activated_just_now = False - - # If hill isn't activated yet, check if an agent activates it - # if (not self.active) and (world.switch.is_active()): - if (not self.active): - - # Check if an agent is on the hill and all switches are active - if (self._is_occupied(world.agents)) and all([switch.active for switch in world.switches]): - self.capture_timer += 1 - - # activate hill (this is irreversible) - if self.capture_timer > self.capture_time: - self.activate() - self.activated_just_now = True - - # Reset capture timer if hill is not occupied - else: - self.capture_timer = 0 - - -class Switch(Landmark): - """ - A switch that can be activated by an agent. - The agent has to stay on the switch for it to be active. - """ - - def __init__(self, - pos=None, - size=0.03, - ): - - # Initialize Landmark super class - super().__init__() - self.movable = False - self.collide = False - self.state.p_pos = pos - self.size = size - - # Initialize all hills to be inactive - self.active = False - self.color = np.array([0.8, 0.05, 0.3]) - self.capture_timer = 0 - - def activate(self): - self.active = True - self.color = np.array([0.1, 0.9, 0.4]) - - def deactivate(self): - self.active = False - self.color = np.array([0.8, 0.05, 0.3]) - - def _is_occupied(self, agents): - # a switch is active if an agent stands on it - for agent in agents: - dist = np.sqrt(np.sum(np.square(agent.state.p_pos - self.state.p_pos))) - if dist < agent.size + self.size: - return True - return False - - def step(self, world): - # check if an agent is on the switch and activate/deactive accordingly - if self._is_occupied(world.agents): - self.activate() - else: - self.deactivate() - - -class SwitchExpertPolicy(): - """ - Hand-coded expert policy for the simple switch environment. - Types of possible experts: - - always go to the switch - - always go to the hills - """ - def __init__(self, dim_c, agent, world, expert_type=None, discrete_action_input=True): - - self.dim_c = dim_c - self.discrete_action_input = discrete_action_input - # the agent we control and world we're in - self.agent = agent - self.world = world - - if expert_type is None: - self.expert_type = random.choice(['switch', 'hill']) - else: - self.expert_type = expert_type - if self.expert_type == 'switch': - self.target_switch = self.select_inital_target_switch() - elif self.expert_type == 'hill': - self.target_hill = self.select_inital_target_hill() - else: - raise NotImplementedError - - self.step_count = 0 - - def select_inital_target_switch(self): - return random.choice(self.world.switches) - - def select_inital_target_hill(self): - return random.choice(self.world.hills) - - def action(self): - - # select a target! - if self.expert_type == 'switch': - # if agent is not already on a switch, choose target switch - if not any([switch._is_occupied([self.agent]) for switch in self.world.switches]): - # select a target switch if there's an inactive one - inactive_switches = [switch for switch in self.world.switches if not switch.active] - if len(inactive_switches) > 0 and (self.target_switch not in inactive_switches): - self.target_switch = random.choice(inactive_switches) - target = self.target_switch.state.p_pos - elif self.expert_type == 'hill': - # select a target hill if we haven't done so yet, or the current target switch is inactive - inactive_hills = [hill for hill in self.world.hills if not hill.active] - if len(inactive_hills) > 0 and (self.target_hill not in inactive_hills): - self.target_hill = random.choice(inactive_hills) - target = self.target_hill.state.p_pos - - self.step_count += 1 - - impulse = np.clip(target - self.agent.state.p_pos, -self.agent.u_range, self.agent.u_range) - - if self.discrete_action_input: - u_idx = np.argmax(np.abs(impulse)) - if u_idx == 0 and impulse[u_idx] < 0: - u = 1 - elif u_idx == 0 and impulse[u_idx] > 0: - u = 2 - elif u_idx == 1 and impulse[u_idx] < 0: - u = 3 - elif u_idx == 1 and impulse[u_idx] > 0: - u = 4 - else: - u = 0 - else: - u = np.zeros(5) - if (impulse[0] == impulse[1] == 0) \ - or (self.step_count < self.burn_in) \ - or (self.burn_step != 0 and self.step_count % self.burn_step != 0): - u[0] = 0.1 - else: - pass - # u: noop (?), right, left, down, up - if impulse[0] > 0: # x-direction (- left/right + ) - u[1] = impulse[0] # right - elif impulse[0] < 0: - u[2] = -impulse[0] - if impulse[1] > 0: # y-direction (- up/down + ) - u[3] = impulse[1] - elif impulse[1] < 0: - u[4] = -impulse[1] - - return u diff --git a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/files/util.py b/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/files/util.py deleted file mode 100644 index 75c0c619..00000000 --- a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/files/util.py +++ /dev/null @@ -1,82 +0,0 @@ -import argparse -import os -import re - -from rllib_multiagent_particle_env import CUSTOM_SCENARIOS - - -def parse_args(): - parser = argparse.ArgumentParser('MADDPG with OpenAI MPE') - - # Environment - parser.add_argument('--scenario', type=str, default='simple', - choices=['simple', 'simple_speaker_listener', - 'simple_crypto', 'simple_push', - 'simple_tag', 'simple_spread', 'simple_adversary' - ] + CUSTOM_SCENARIOS, - help='name of the scenario script') - parser.add_argument('--max-episode-len', type=int, default=25, - help='maximum episode length') - parser.add_argument('--num-episodes', type=int, default=60000, - help='number of episodes') - parser.add_argument('--num-adversaries', type=int, default=0, - help='number of adversaries') - parser.add_argument('--good-policy', type=str, default='maddpg', - help='policy for good agents') - parser.add_argument('--adv-policy', type=str, default='maddpg', - help='policy of adversaries') - - # Core training parameters - parser.add_argument('--lr', type=float, default=1e-2, - help='learning rate for Adam optimizer') - parser.add_argument('--gamma', type=float, default=0.95, - help='discount factor') - # NOTE: 1 iteration = sample_batch_size * num_workers timesteps * num_envs_per_worker - parser.add_argument('--sample-batch-size', type=int, default=25, - help='number of data points sampled /update /worker') - parser.add_argument('--train-batch-size', type=int, default=1024, - help='number of data points /update') - parser.add_argument('--n-step', type=int, default=1, - help='length of multistep value backup') - parser.add_argument('--num-units', type=int, default=64, - help='number of units in the mlp') - parser.add_argument('--final-reward', type=int, default=-400, - help='final reward after which to stop training') - - # Checkpoint - parser.add_argument('--checkpoint-freq', type=int, default=200, - help='save model once every time this many iterations are completed') - parser.add_argument('--local-dir', type=str, default='./logs', - help='path to save checkpoints') - parser.add_argument('--restore', type=str, default=None, - help='directory in which training state and model are loaded') - - # Parallelism - parser.add_argument('--num-workers', type=int, default=1) - parser.add_argument('--num-envs-per-worker', type=int, default=4) - parser.add_argument('--num-gpus', type=int, default=0) - - return parser.parse_args() - - -def find_final_checkpoint(start_dir): - def find(pattern, path): - result = [] - for root, _, files in os.walk(path): - for name in files: - if pattern.match(name): - result.append(os.path.join(root, name)) - return result - - cp_pattern = re.compile('.*checkpoint-\\d+$') - checkpoint_files = find(cp_pattern, start_dir) - - checkpoint_numbers = [] - for file in checkpoint_files: - checkpoint_numbers.append(int(file.split('-')[-1])) - - final_checkpoint_number = max(checkpoint_numbers) - - return next( - checkpoint_file for checkpoint_file in checkpoint_files - if checkpoint_file.endswith(str(final_checkpoint_number))) diff --git a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/images/particle_simple_spread.gif b/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/images/particle_simple_spread.gif deleted file mode 100644 index 520e0c0e..00000000 Binary files a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/images/particle_simple_spread.gif and /dev/null differ diff --git a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/particle.ipynb b/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/particle.ipynb deleted file mode 100644 index 6f3351dc..00000000 --- a/how-to-use-azureml/reinforcement-learning/multiagent-particle-envs/particle.ipynb +++ /dev/null @@ -1,566 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Copyright (c) Microsoft Corporation. All rights reserved.\n", - "\n", - "Licensed under the MIT License." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Reinforcement Learning in Azure Machine Learning - Training multiple agents on collaborative ParticleEnv tasks\n", - "\n", - "This tutorial will show you how to train policies in a multi-agent scenario.\n", - "We use OpenAI Gym's [Particle environments](https://github.com/openai/multiagent-particle-envs),\n", - "which model agents and landmarks in a two-dimensional world. Particle comes with\n", - "several predefined scenarios, both competitive and collaborative, and with or without communication.\n", - "\n", - "For this tutorial, we pick a cooperative navigation scenario where N agents are in a world with N\n", - "landmarks. The agents' goal is to cover all the landmarks without collisions,\n", - "so agents must learn to avoid each other (social distancing!). The video below shows training\n", - "results for N=3 agents/landmarks:\n", - "\n", - "
\n",
- " \n",
- " | \n",
- "
|---|
| Fig 1. Video of 3 agents covering 3 landmarks in a multiagent Particle scenario. | \n", - "