Compare commits

..

1 Commits

Author SHA1 Message Date
amlrelsa-ms
9a43384938 update samples from Release-134 as a part of SDK release 2022-04-01 12:00:36 +00:00
22 changed files with 105 additions and 71 deletions

View File

@@ -103,7 +103,7 @@
"source": [
"import azureml.core\n",
"\n",
"print(\"This notebook was created using version 1.41.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version Latest of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -8,24 +8,23 @@ dependencies:
# Currently Azure ML only supports 3.6.0 and later.
- pip==20.2.4
- python>=3.6,<3.9
- matplotlib==3.2.1
- matplotlib==3.3.4
- py-xgboost==1.3.3
- pytorch::pytorch=1.4.0
- conda-forge::fbprophet==0.7.1
- cudatoolkit=10.1.243
- scipy==1.5.2
- tqdm==4.63.1
- notebook
- pywin32==227
- pywin32==225
- PySocks==1.7.1
- Pygments==2.11.2
- conda-forge::pyqt==5.12.3
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.41.0
- azureml-widgets~=Latest
- pytorch-transformers==1.0.0
- spacy==2.2.4
- pystan==2.19.1.1
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.41.0/validated_win32_requirements.txt [--no-deps]
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/Latest/validated_win32_requirements.txt [--no-deps]
- arch==4.14

View File

@@ -10,7 +10,7 @@ dependencies:
- python>=3.6,<3.9
- boto3==1.20.19
- botocore<=1.23.19
- matplotlib==3.2.1
- matplotlib==3.3.4
- numpy==1.19.5
- cython==0.29.14
- urllib3==1.26.7
@@ -24,10 +24,10 @@ dependencies:
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.41.0
- azureml-widgets~=Latest
- pytorch-transformers==1.0.0
- spacy==2.2.4
- pystan==2.19.1.1
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.41.0/validated_linux_requirements.txt [--no-deps]
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/Latest/validated_linux_requirements.txt [--no-deps]
- arch==4.14

View File

@@ -11,7 +11,7 @@ dependencies:
- python>=3.6,<3.9
- boto3==1.20.19
- botocore<=1.23.19
- matplotlib==3.2.1
- matplotlib==3.3.4
- numpy==1.19.5
- cython==0.29.14
- urllib3==1.26.7
@@ -25,10 +25,10 @@ dependencies:
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azureml-widgets~=1.41.0
- azureml-widgets~=Latest
- pytorch-transformers==1.0.0
- spacy==2.2.4
- pystan==2.19.1.1
- https://aka.ms/automl-resources/packages/en_core_web_sm-2.1.0.tar.gz
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/1.41.0/validated_darwin_requirements.txt [--no-deps]
- -r https://automlsdkdataresources.blob.core.windows.net/validated-requirements/Latest/validated_darwin_requirements.txt [--no-deps]
- arch==4.14

View File

@@ -134,7 +134,6 @@
"output[\"Resource Group\"] = ws.resource_group\n",
"output[\"Location\"] = ws.location\n",
"output[\"Experiment Name\"] = experiment.name\n",
"output[\"SDK Version\"] = azureml.core.VERSION\n",
"pd.set_option(\"display.max_colwidth\", None)\n",
"outputDf = pd.DataFrame(data=output, index=[\"\"])\n",
"outputDf.T"

View File

@@ -8,12 +8,9 @@ dependencies:
- urllib3==1.26.7
- PyJWT < 2.0.0
- numpy==1.18.5
- pywin32==227
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azure-core==1.21.1
- azure-identity==1.7.0
- azureml-defaults
- azureml-sdk
- azureml-widgets

View File

@@ -14,8 +14,6 @@ dependencies:
- pip:
# Required packages for AzureML execution, history, and data preparation.
- azure-core==1.21.1
- azure-identity==1.7.0
- azureml-defaults
- azureml-sdk
- azureml-widgets

View File

@@ -92,7 +92,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.41.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version Latest of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -91,7 +91,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.41.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version Latest of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -121,7 +121,7 @@ def calculate_scores_and_build_plots(
input_dir: str, output_dir: str, automl_settings: Dict[str, Any]
):
os.makedirs(output_dir, exist_ok=True)
grains = automl_settings.get(constants.TimeSeries.TIME_SERIES_ID_COLUMN_NAMES)
grains = automl_settings.get(constants.TimeSeries.GRAIN_COLUMN_NAMES)
time_column_name = automl_settings.get(constants.TimeSeries.TIME_COLUMN_NAME)
if grains is None:
grains = []

View File

@@ -322,10 +322,10 @@
"| **iterations** | Number of models to train. This is optional but provides customers with greater control on exit criteria. |\n",
"| **experiment_timeout_hours** | Maximum amount of time in hours that the experiment can take before it terminates. This is optional but provides customers with greater control on exit criteria. |\n",
"| **label_column_name** | The name of the label column. |\n",
"| **forecast_horizon** | The forecast horizon is how many periods forward you would like to forecast. This integer horizon is in units of the timeseries frequency (e.g. daily, weekly). Periods are inferred from your data. |\n",
"| **max_horizon** | The forecast horizon is how many periods forward you would like to forecast. This integer horizon is in units of the timeseries frequency (e.g. daily, weekly). Periods are inferred from your data. |\n",
"| **n_cross_validations** | Number of cross validation splits. Rolling Origin Validation is used to split time-series in a temporally consistent way. |\n",
"| **time_column_name** | The name of your time column. |\n",
"| **time_series_id_column_names** | The column names used to uniquely identify timeseries in data that has multiple rows with the same timestamp. |\n",
"| **grain_column_names** | The column names used to uniquely identify timeseries in data that has multiple rows with the same timestamp. |\n",
"| **track_child_runs** | Flag to disable tracking of child runs. Only best run is tracked if the flag is set to False (this includes the model and metrics of the run). |\n",
"| **partition_column_names** | The names of columns used to group your models. For timeseries, the groups must not split up individual time-series. That is, each group must contain one or more whole time-series. |"
]
@@ -354,8 +354,8 @@
" \"label_column_name\": TARGET_COLNAME,\n",
" \"n_cross_validations\": 3,\n",
" \"time_column_name\": TIME_COLNAME,\n",
" \"forecast_horizon\": 6,\n",
" \"time_series_id_column_names\": partition_column_names,\n",
" \"max_horizon\": 6,\n",
" \"grain_column_names\": partition_column_names,\n",
" \"track_child_runs\": False,\n",
"}\n",
"\n",

View File

@@ -57,7 +57,7 @@
"Notebook synopsis:\n",
"\n",
"1. Creating an Experiment in an existing Workspace\n",
"2. Configuration and remote run of AutoML for a time-series model exploring DNNs\n",
"2. Configuration and remote run of AutoML for a time-series model exploring Regression learners, Arima, Prophet and DNNs\n",
"4. Evaluating the fitted model using a rolling test "
]
},
@@ -92,7 +92,8 @@
"# Squash warning messages for cleaner output in the notebook\n",
"warnings.showwarning = lambda *args, **kwargs: None\n",
"\n",
"from azureml.core import Workspace, Experiment, Dataset\n",
"from azureml.core.workspace import Workspace\n",
"from azureml.core.experiment import Experiment\n",
"from azureml.train.automl import AutoMLConfig\n",
"from matplotlib import pyplot as plt\n",
"from sklearn.metrics import mean_absolute_error, mean_squared_error\n",
@@ -297,21 +298,40 @@
"from helper import split_full_for_forecasting\n",
"\n",
"train, valid = split_full_for_forecasting(df, time_column_name)\n",
"\n",
"# Reset index to create a Tabualr Dataset.\n",
"train.reset_index(inplace=True)\n",
"valid.reset_index(inplace=True)\n",
"test_df.reset_index(inplace=True)\n",
"train.to_csv(\"train.csv\")\n",
"valid.to_csv(\"valid.csv\")\n",
"test_df.to_csv(\"test.csv\")\n",
"\n",
"datastore = ws.get_default_datastore()\n",
"train_dataset = Dataset.Tabular.register_pandas_dataframe(\n",
" train, target=(datastore, \"dataset/\"), name=\"Github_DAU_train\"\n",
"datastore.upload_files(\n",
" files=[\"./train.csv\"],\n",
" target_path=\"github-dataset/tabular/\",\n",
" overwrite=True,\n",
" show_progress=True,\n",
")\n",
"valid_dataset = Dataset.Tabular.register_pandas_dataframe(\n",
" valid, target=(datastore, \"dataset/\"), name=\"Github_DAU_valid\"\n",
"datastore.upload_files(\n",
" files=[\"./valid.csv\"],\n",
" target_path=\"github-dataset/tabular/\",\n",
" overwrite=True,\n",
" show_progress=True,\n",
")\n",
"test_dataset = Dataset.Tabular.register_pandas_dataframe(\n",
" test_df, target=(datastore, \"dataset/\"), name=\"Github_DAU_test\"\n",
"datastore.upload_files(\n",
" files=[\"./test.csv\"],\n",
" target_path=\"github-dataset/tabular/\",\n",
" overwrite=True,\n",
" show_progress=True,\n",
")\n",
"\n",
"from azureml.core import Dataset\n",
"\n",
"train_dataset = Dataset.Tabular.from_delimited_files(\n",
" path=[(datastore, \"github-dataset/tabular/train.csv\")]\n",
")\n",
"valid_dataset = Dataset.Tabular.from_delimited_files(\n",
" path=[(datastore, \"github-dataset/tabular/valid.csv\")]\n",
")\n",
"test_dataset = Dataset.Tabular.from_delimited_files(\n",
" path=[(datastore, \"github-dataset/tabular/test.csv\")]\n",
")"
]
},
@@ -377,7 +397,7 @@
" freq=\"D\", # Set the forecast frequency to be daily\n",
")\n",
"\n",
"# To only allow the TCNForecaster we set the allowed_models parameter to reflect this.\n",
"# We will disable the enable_early_stopping flag to ensure the DNN model is recommended for demonstration purpose.\n",
"automl_config = AutoMLConfig(\n",
" task=\"forecasting\",\n",
" primary_metric=\"normalized_root_mean_squared_error\",\n",
@@ -390,7 +410,7 @@
" max_concurrent_iterations=4,\n",
" max_cores_per_iteration=-1,\n",
" enable_dnn=True,\n",
" allowed_models=[\"TCNForecaster\"],\n",
" enable_early_stopping=False,\n",
" forecasting_parameters=forecasting_parameters,\n",
")"
]
@@ -483,9 +503,7 @@
"if not forecast_model in summary_df[\"run_id\"]:\n",
" forecast_model = \"ForecastTCN\"\n",
"\n",
"best_dnn_run_id = summary_df[summary_df[\"Score\"] == summary_df[\"Score\"].min()][\n",
" \"run_id\"\n",
"][forecast_model]\n",
"best_dnn_run_id = summary_df[\"run_id\"][forecast_model]\n",
"best_dnn_run = Run(experiment, best_dnn_run_id)"
]
},
@@ -546,6 +564,11 @@
},
"outputs": [],
"source": [
"from azureml.core import Dataset\n",
"\n",
"test_dataset = Dataset.Tabular.from_delimited_files(\n",
" path=[(datastore, \"github-dataset/tabular/test.csv\")]\n",
")\n",
"# preview the first 3 rows of the dataset\n",
"test_dataset.take(5).to_pandas_dataframe()"
]

View File

@@ -79,7 +79,9 @@ def get_result_df(remote_run):
if "goal" in run.properties:
goal_minimize = run.properties["goal"].split("_")[-1] == "min"
summary_df = summary_df.T.sort_values("Score", ascending=goal_minimize)
summary_df = summary_df.T.sort_values(
"Score", ascending=goal_minimize
).drop_duplicates(["run_algorithm"])
summary_df = summary_df.set_index("run_algorithm")
return summary_df

View File

@@ -324,7 +324,7 @@
"| **enable_early_stopping** | Flag to enable early termination if the score is not improving in the short term. |\n",
"| **time_column_name** | The name of your time column. |\n",
"| **enable_engineered_explanations** | Engineered feature explanations will be downloaded if enable_engineered_explanations flag is set to True. By default it is set to False to save storage space. |\n",
"| **time_series_id_column_names** | The column names used to uniquely identify timeseries in data that has multiple rows with the same timestamp. |\n",
"| **time_series_id_column_name** | The column names used to uniquely identify timeseries in data that has multiple rows with the same timestamp. |\n",
"| **track_child_runs** | Flag to disable tracking of child runs. Only best run is tracked if the flag is set to False (this includes the model and metrics of the run). |\n",
"| **pipeline_fetch_max_batch_size** | Determines how many pipelines (training algorithms) to fetch at a time for training, this helps reduce throttling when training at large scale. |\n",
"| **partition_column_names** | The names of columns used to group your models. For timeseries, the groups must not split up individual time-series. That is, each group must contain one or more whole time-series. |"
@@ -355,8 +355,8 @@
" \"n_cross_validations\": 3,\n",
" \"time_column_name\": \"WeekStarting\",\n",
" \"drop_column_names\": \"Revenue\",\n",
" \"forecast_horizon\": 6,\n",
" \"time_series_id_column_names\": partition_column_names,\n",
" \"max_horizon\": 6,\n",
" \"grain_column_names\": partition_column_names,\n",
" \"track_child_runs\": False,\n",
"}\n",
"\n",

View File

@@ -106,7 +106,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.41.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version Latest of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -57,10 +57,6 @@ RUN pip install --no-cache-dir \
lz4 \
psutil \
setproctitle
# This is required for ray 0.8.7
RUN pip install -U aiohttp==3.7.4
# This is needed for mpi to locate libpython
ENV LD_LIBRARY_PATH $AZUREML_CONDA_ENVIRONMENT_PATH/lib:$LD_LIBRARY_PATH

View File

@@ -28,7 +28,7 @@ RUN conda install -y conda=4.7.12 python=3.7 && conda clean -ay && \
psutil \
setproctitle \
pygame \
gym[classic_control]==0.19.0 && \
gym[atari]==0.17.3 && \
conda install -y -c conda-forge x264='1!152.20180717' ffmpeg=4.0.2 && \
conda install -c anaconda opencv

View File

@@ -95,7 +95,7 @@
"metadata": {},
"outputs": [],
"source": [
"print(\"This notebook was created using version 1.41.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version Latest of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},

View File

@@ -30,7 +30,7 @@ _categorical_columns = [
def fetch_census_dataset():
"""Fetch the Adult Census Dataset
"""Fetch the Adult Census Dataset.
This uses a particular URL for the Adult Census dataset. The code
is a simplified version of fetch_openml() in sklearn.
@@ -39,25 +39,45 @@ def fetch_census_dataset():
https://openml.org/data/v1/download/1595261.gz
(as of 2021-03-31)
"""
dataset_path = "1595261.gz"
try:
file_stream = gzip.GzipFile(filename=dataset_path, mode='rb')
from urllib import urlretrieve
except ImportError:
from urllib.request import urlretrieve
with closing(file_stream):
def _stream_generator(response):
for line in response:
yield line.decode('utf-8')
filename = "1595261.gz"
data_url = "https://rainotebookscdn.blob.core.windows.net/datasets/"
stream = _stream_generator(file_stream)
data = arff.load(stream)
except Exception as exc:
raise Exception("Could not load dataset from {} with exception {}".format(dataset_path, exc))
remaining_attempts = 5
sleep_duration = 10
while remaining_attempts > 0:
try:
urlretrieve(data_url + filename, filename)
http_stream = gzip.GzipFile(filename=filename, mode='rb')
with closing(http_stream):
def _stream_generator(response):
for line in response:
yield line.decode('utf-8')
stream = _stream_generator(http_stream)
data = arff.load(stream)
except Exception as exc: # noqa: B902
remaining_attempts -= 1
print("Error downloading dataset from {} ({} attempt(s) remaining)"
.format(data_url, remaining_attempts))
print(exc)
sleep(sleep_duration)
sleep_duration *= 2
continue
else:
# dataset successfully downloaded
break
else:
raise Exception("Could not retrieve dataset from {}.".format(data_url))
attributes = OrderedDict(data['attributes'])
arff_columns = list(attributes)
raw_df = pd.DataFrame(data=data['data'], columns=arff_columns)
target_column_name = 'class'

View File

@@ -100,7 +100,7 @@
"\n",
"# Check core SDK version number\n",
"\n",
"print(\"This notebook was created using SDK version 1.41.0, you are currently running version\", azureml.core.VERSION)"
"print(\"This notebook was created using SDK version Latest, you are currently running version\", azureml.core.VERSION)"
]
},
{

View File

@@ -102,7 +102,7 @@
"source": [
"import azureml.core\n",
"\n",
"print(\"This notebook was created using version 1.41.0 of the Azure ML SDK\")\n",
"print(\"This notebook was created using version Latest of the Azure ML SDK\")\n",
"print(\"You are currently using version\", azureml.core.VERSION, \"of the Azure ML SDK\")"
]
},