diff --git a/work-with-data/datasets/datasets-tutorial/train-dataset/train.py b/work-with-data/datasets/datasets-tutorial/train-dataset/train.py deleted file mode 100644 index 9fbca891..00000000 --- a/work-with-data/datasets/datasets-tutorial/train-dataset/train.py +++ /dev/null @@ -1,37 +0,0 @@ -import azureml.dataprep as dprep -import azureml.core -import pandas as pd -import logging -import os -import datetime -import shutil - -from azureml.core import Workspace, Datastore, Dataset, Experiment, Run -from sklearn.model_selection import train_test_split -from azureml.core.compute import ComputeTarget, AmlCompute -from azureml.core.compute_target import ComputeTargetException -from sklearn.tree import DecisionTreeClassifier - -run = Run.get_context() -workspace = run.experiment.workspace - -dataset_name = 'clean_Titanic_tutorial' - -dataset = Dataset.get(workspace=workspace, name=dataset_name) -df = dataset.to_pandas_dataframe() - -x_col = ['Pclass', 'Sex', 'SibSp', 'Parch'] -y_col = ['Survived'] -x_df = df.loc[:, x_col] -y_df = df.loc[:, y_col] - -x_train, x_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=223) - -data = {"train": {"X": x_train, "y": y_train}, - - "test": {"X": x_test, "y": y_test}} - -clf = DecisionTreeClassifier().fit(data["train"]["X"], data["train"]["y"]) - -print('Accuracy of Decision Tree classifier on training set: {:.2f}'.format(clf.score(x_train, y_train))) -print('Accuracy of Decision Tree classifier on test set: {:.2f}'.format(clf.score(x_test, y_test)))