From c675c9ae04a085c1db710b87c74bc90140b1ed51 Mon Sep 17 00:00:00 2001 From: Keita Onabuta Date: Fri, 4 Oct 2019 13:37:42 +0900 Subject: [PATCH] add commen --- .../FactoryQC-classification-explainer.ipynb | 425 +++++++++++++----- 1 file changed, 322 insertions(+), 103 deletions(-) diff --git a/Sample/Automated-Machine-Learning/FactoryQC-classification-explainer.ipynb b/Sample/Automated-Machine-Learning/FactoryQC-classification-explainer.ipynb index e810ce5..31173b9 100644 --- a/Sample/Automated-Machine-Learning/FactoryQC-classification-explainer.ipynb +++ b/Sample/Automated-Machine-Learning/FactoryQC-classification-explainer.ipynb @@ -29,19 +29,7 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING: Logging before flag parsing goes to stderr.\n", - "W0923 09:51:41.159162 4564993472 deprecation_wrapper.py:119] From /Users/konabuta/miniconda3/envs/myenv/lib/python3.6/site-packages/azureml/automl/core/_vendor/automl/client/core/common/tf_wrappers.py:36: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead.\n", - "\n", - "W0923 09:51:41.160236 4564993472 deprecation_wrapper.py:119] From /Users/konabuta/miniconda3/envs/myenv/lib/python3.6/site-packages/azureml/automl/core/_vendor/automl/client/core/common/tf_wrappers.py:36: The name tf.logging.ERROR is deprecated. Please use tf.compat.v1.logging.ERROR instead.\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "import logging\n", "\n", @@ -86,7 +74,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 実験名の設定" + "### 実験名の設定\n", + "機械学習の実験の名称を指定します。後で記録されたメトリックなどを確認する際などに利用します。" ] }, { @@ -104,23 +93,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 学習データの準備" + "### 学習データの準備\n", + "Pandas Dataframe や Numpy が利用できます。また、Azure Machine Learnining に _Dataset_ として登録してある場合には、Python SDK 経由でそのデータを呼び出して、そのまま利用することもできます。 " ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, - "outputs": [], - "source": [ - "df = Dataset.get(ws, name='factory').to_pandas_dataframe()" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/konabuta/miniconda3/envs/azureml/lib/python3.6/site-packages/azureml/dataprep/api/dataflow.py:681: UserWarning: Please install pyarrow>=0.11.0 for improved performance of to_pandas_dataframe. You can ensure the correct version is installed by running: pip install azureml-dataprep[pandas].\n", + " warnings.warn('Please install pyarrow>=0.11.0 for improved performance of to_pandas_dataframe. '\n" + ] + }, { "data": { "text/html": [ @@ -265,15 +254,40 @@ "4 1.00 3.19 0.40 9.90 " ] }, - "execution_count": 5, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Azure ML service Web Interface で Dataset が登録済みの場合\n", + "df = Dataset.get(ws, name='factory').to_pandas_dataframe() # Pandas Dataframe に変換\n", "df.head()" ] }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# # 本ノートブックで Dataset として登録する場合はこちら (ここでは、factory-dataset という名称)\n", + "# datastore = ws.get_default_datastore()\n", + "# datastore.upload_files(files = ['../data/Factory.csv'],\n", + "# target_path = 'dllab/',\n", + "# overwrite = True,\n", + "# show_progress = True)\n", + "# dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, 'dllab/Factory.csv')])\n", + "\n", + "\n", + "# dataset = dataset.register(workspace = ws,\n", + "# name = 'factory-dataset',\n", + "# description='training dataset from client python',\n", + "# create_new_version=True)\n", + "# df = dataset.to_pandas_dataframe() # Pandas Dataframe に変換\n", + "# df.head()" + ] + }, { "cell_type": "code", "execution_count": 6, @@ -288,6 +302,172 @@ "X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.1,random_state=100,stratify=y)" ] }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ProcessA-PressureProcessA-HumidityProcessA-VibrationProcessB-LightProcessB-SkillProcessB-TempProcessB-RotationProcessC-DensityProcessC-PHProcessC-skewnessProcessC-Time
32947.300.250.281.500.0419.00113.000.993.380.5610.10
19358.800.340.339.700.0446.00172.001.003.080.4010.20
9177.700.300.321.600.0423.00124.000.992.930.3311.00
14787.900.220.244.600.0439.00159.000.992.990.2811.50
8586.700.220.3910.200.0460.00149.001.003.170.5410.00
\n", + "
" + ], + "text/plain": [ + " ProcessA-Pressure ProcessA-Humidity ProcessA-Vibration \\\n", + "3294 7.30 0.25 0.28 \n", + "1935 8.80 0.34 0.33 \n", + "917 7.70 0.30 0.32 \n", + "1478 7.90 0.22 0.24 \n", + "858 6.70 0.22 0.39 \n", + "\n", + " ProcessB-Light ProcessB-Skill ProcessB-Temp ProcessB-Rotation \\\n", + "3294 1.50 0.04 19.00 113.00 \n", + "1935 9.70 0.04 46.00 172.00 \n", + "917 1.60 0.04 23.00 124.00 \n", + "1478 4.60 0.04 39.00 159.00 \n", + "858 10.20 0.04 60.00 149.00 \n", + "\n", + " ProcessC-Density ProcessC-PH ProcessC-skewness ProcessC-Time \n", + "3294 0.99 3.38 0.56 10.10 \n", + "1935 1.00 3.08 0.40 10.20 \n", + "917 0.99 2.93 0.33 11.00 \n", + "1478 0.99 2.99 0.28 11.50 \n", + "858 1.00 3.17 0.54 10.00 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_train.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, ..., 1, 0, 0], dtype=int64)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_train" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -296,15 +476,34 @@ "### 学習事前設定" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "\n", + "|Property|Description|\n", + "|-|-|\n", + "|**task**|classification, regression or forecasting|\n", + "|**primary_metric**|精度指標の指定, 回帰は下記のメトリックをサポート:
accuracy
AUC_weighted
average_precision_score_weighted
norm_macro_recall
precision_score_weighted
※ 詳細については、[主要なメトリック](https://docs.microsoft.com/ja-JP/azure/machine-learning/service/how-to-configure-auto-train#primary-metric) を参照|\n", + "|**iteration_timeout_minutes**|イテレーション毎の最大実行時間|\n", + "|**iterations**|イテレーション回数 (=試行するパイプライン数) |\n", + "|**X**|学習データ (説明変数)|\n", + "|**y**|学習データ (ターゲット変数)|\n", + " \n", + "
\n", + "\n" + ] + }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "automl_settings = {\n", " \"iteration_timeout_minutes\": 5,\n", - " \"iterations\": 10,\n", + " \"iterations\": 5,\n", " \"n_cross_validations\": 3,\n", " \"primary_metric\": 'AUC_weighted',\n", " \"preprocess\": True,\n", @@ -312,7 +511,7 @@ " \"enable_stack_ensemble\": False\n", "}\n", "\n", - "automl_config = AutoMLConfig(task = 'classification',\n", + "automl_config = AutoMLConfig(task = 'classification', # regression, forecasting\n", " X = X_train,\n", " y = y_train,\n", " **automl_settings\n", @@ -328,7 +527,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -336,7 +535,7 @@ "output_type": "stream", "text": [ "Running on local machine\n", - "Parent Run ID: AutoML_c6ca036d-5d67-48b8-b263-d1b687aaf4fc\n", + "Parent Run ID: AutoML_35685078-a4d6-46b0-b761-1cd0137037f1\n", "Current status: DatasetFeaturization. Beginning to featurize the dataset.\n", "Current status: DatasetEvaluation. Gathering dataset statistics.\n", "Current status: FeaturesGeneration. Generating features for the dataset.\n", @@ -347,6 +546,10 @@ "DATA GUARDRAILS SUMMARY:\n", "For more details, use API: run.get_guardrails()\n", "\n", + "TYPE: Class Balancing Detection\n", + "STATUS: PASSED\n", + "DESCRIPTION: Classes are balanced in the training data.\n", + "\n", "TYPE: Missing Values Imputation\n", "STATUS: PASSED\n", "DESCRIPTION: There were no missing values found in the training data.\n", @@ -367,16 +570,11 @@ "****************************************************************************************************\n", "\n", " ITERATION PIPELINE DURATION METRIC BEST\n", - " 0 StandardScalerWrapper SGD 0:00:23 0.7815 0.7815\n", - " 1 StandardScalerWrapper SGD 0:00:23 0.7880 0.7880\n", - " 2 MinMaxScaler LightGBM 0:00:23 0.8421 0.8421\n", - " 3 StandardScalerWrapper SGD 0:00:22 0.7834 0.8421\n", - " 4 StandardScalerWrapper ExtremeRandomTrees 0:00:24 0.8151 0.8421\n", - " 5 StandardScalerWrapper LightGBM 0:00:23 0.8482 0.8482\n", - " 6 StandardScalerWrapper SGD 0:00:23 0.7841 0.8482\n", - " 7 MinMaxScaler RandomForest 0:00:23 0.8166 0.8482\n", - " 8 StandardScalerWrapper SGD 0:00:22 0.7495 0.8482\n", - " 9 MinMaxScaler SGD 0:00:23 0.7648 0.8482\n" + " 0 StandardScalerWrapper SGD 0:00:24 0.7817 0.7817\n", + " 1 StandardScalerWrapper SGD 0:00:24 0.7841 0.7841\n", + " 2 MinMaxScaler LightGBM 0:00:24 0.8421 0.8421\n", + " 3 StandardScalerWrapper SGD 0:00:24 0.7831 0.8421\n", + " 4 StandardScalerWrapper ExtremeRandomTrees 0:00:28 0.8160 0.8421\n" ] } ], @@ -386,13 +584,13 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "cec82b729d024509aa061881c4f91322", + "model_id": "096df591596b4a9bbfa5e269d56cf782", "version_major": 2, "version_minor": 0 }, @@ -412,18 +610,18 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{'runId': 'AutoML_c6ca036d-5d67-48b8-b263-d1b687aaf4fc',\n", + "{'runId': 'AutoML_35685078-a4d6-46b0-b761-1cd0137037f1',\n", " 'target': 'local',\n", " 'status': 'Completed',\n", - " 'startTimeUtc': '2019-09-23T00:52:08.114743Z',\n", - " 'endTimeUtc': '2019-09-23T00:56:08.788078Z',\n", - " 'properties': {'num_iterations': '10',\n", + " 'startTimeUtc': '2019-10-04T04:33:00.602713Z',\n", + " 'endTimeUtc': '2019-10-04T04:35:14.483804Z',\n", + " 'properties': {'num_iterations': '5',\n", " 'training_type': 'TrainFull',\n", " 'acquisition_function': 'EI',\n", " 'primary_metric': 'AUC_weighted',\n", @@ -432,26 +630,27 @@ " 'acquisition_parameter': '0',\n", " 'num_cross_validation': '3',\n", " 'target': 'local',\n", - " 'RawAMLSettingsString': \"{'name': 'automl-classif-factoryQC', 'path': '.', 'subscription_id': '9c0f91b8-eb2f-484c-979c-15848c098a6b', 'resource_group': 'mlservice', 'workspace_name': 'azureml', 'region': 'eastus', 'compute_target': 'local', 'spark_service': None, 'iterations': 10, 'primary_metric': 'AUC_weighted', 'task_type': 'classification', 'data_script': None, 'validation_size': 0.0, 'n_cross_validations': 3, 'y_min': None, 'y_max': None, 'num_classes': 2, 'preprocess': True, 'lag_length': 0, 'is_timeseries': False, 'max_cores_per_iteration': 1, 'max_concurrent_iterations': 1, 'iteration_timeout_minutes': 5, 'mem_in_mb': None, 'enforce_time_on_windows': False, 'experiment_timeout_minutes': None, 'experiment_exit_score': None, 'whitelist_models': None, 'blacklist_algos': ['XGBoostClassifier', 'XGBoostClassifier'], 'supported_models': ['LogisticRegression', 'SGD', 'MultinomialNaiveBayes', 'BernoulliNaiveBayes', 'SVM', 'LinearSVM', 'KNN', 'DecisionTree', 'RandomForest', 'ExtremeRandomTrees', 'LightGBM', 'XGBoostClassifier', 'NimbusMLAveragedPerceptronClassifier', 'NimbusMLLinearSVMClassifier', 'CatBoostClassifier', 'GradientBoosting', 'TensorFlowDNN', 'TensorFlowLinearClassifier'], 'auto_blacklist': True, 'blacklist_samples_reached': False, 'exclude_nan_labels': True, 'verbosity': 20, 'debug_log': 'automl.log', 'show_warnings': False, 'model_explainability': False, 'service_url': None, 'sdk_url': None, 'sdk_packages': None, 'enable_onnx_compatible_models': False, 'enable_feature_sweeping': True, 'telemetry_verbosity': 'INFO', 'send_telemetry': True, 'enable_early_stopping': False, 'early_stopping_n_iters': 10, 'metrics': None, 'enable_ensembling': False, 'enable_stack_ensembling': False, 'ensemble_iterations': 10, 'enable_tf': False, 'enable_cache': True, 'enable_subsampling': False, 'subsample_seed': None, 'enable_nimbusml': False, 'use_incremental_learning': False, 'label_column_name': None, 'weight_column_name': None, 'cost_mode': 0, 'metric_operation': 'maximize'}\",\n", - " 'AMLSettingsJsonString': '{\\n \"name\": \"automl-classif-factoryQC\",\\n \"path\": \".\",\\n \"subscription_id\": \"9c0f91b8-eb2f-484c-979c-15848c098a6b\",\\n \"resource_group\": \"mlservice\",\\n \"workspace_name\": \"azureml\",\\n \"region\": \"eastus\",\\n \"compute_target\": \"local\",\\n \"spark_service\": null,\\n \"iterations\": 10,\\n \"primary_metric\": \"AUC_weighted\",\\n \"task_type\": \"classification\",\\n \"data_script\": null,\\n \"validation_size\": 0.0,\\n \"n_cross_validations\": 3,\\n \"y_min\": null,\\n \"y_max\": null,\\n \"num_classes\": 2,\\n \"preprocess\": true,\\n \"lag_length\": 0,\\n \"is_timeseries\": false,\\n \"max_cores_per_iteration\": 1,\\n \"max_concurrent_iterations\": 1,\\n \"iteration_timeout_minutes\": 5,\\n \"mem_in_mb\": null,\\n \"enforce_time_on_windows\": false,\\n \"experiment_timeout_minutes\": null,\\n \"experiment_exit_score\": null,\\n \"whitelist_models\": null,\\n \"blacklist_algos\": [\\n \"XGBoostClassifier\",\\n \"XGBoostClassifier\"\\n ],\\n \"supported_models\": [\\n \"LogisticRegression\",\\n \"SGD\",\\n \"MultinomialNaiveBayes\",\\n \"BernoulliNaiveBayes\",\\n \"SVM\",\\n \"LinearSVM\",\\n \"KNN\",\\n \"DecisionTree\",\\n \"RandomForest\",\\n \"ExtremeRandomTrees\",\\n \"LightGBM\",\\n \"XGBoostClassifier\",\\n \"NimbusMLAveragedPerceptronClassifier\",\\n \"NimbusMLLinearSVMClassifier\",\\n \"CatBoostClassifier\",\\n \"GradientBoosting\",\\n \"TensorFlowDNN\",\\n \"TensorFlowLinearClassifier\"\\n ],\\n \"auto_blacklist\": true,\\n \"blacklist_samples_reached\": false,\\n \"exclude_nan_labels\": true,\\n \"verbosity\": 20,\\n \"debug_log\": \"automl.log\",\\n \"show_warnings\": false,\\n \"model_explainability\": false,\\n \"service_url\": null,\\n \"sdk_url\": null,\\n \"sdk_packages\": null,\\n \"enable_onnx_compatible_models\": false,\\n \"enable_feature_sweeping\": true,\\n \"telemetry_verbosity\": \"INFO\",\\n \"send_telemetry\": true,\\n \"enable_early_stopping\": false,\\n \"early_stopping_n_iters\": 10,\\n \"metrics\": null,\\n \"enable_ensembling\": false,\\n \"enable_stack_ensembling\": false,\\n \"ensemble_iterations\": 10,\\n \"enable_tf\": false,\\n \"enable_cache\": true,\\n \"enable_subsampling\": false,\\n \"subsample_seed\": null,\\n \"enable_nimbusml\": false,\\n \"use_incremental_learning\": false,\\n \"label_column_name\": null,\\n \"weight_column_name\": null,\\n \"cost_mode\": 0,\\n \"metric_operation\": \"maximize\"\\n}',\n", + " 'RawAMLSettingsString': \"{'name': 'automl-classif-factoryQC', 'path': '.', 'subscription_id': '9c0f91b8-eb2f-484c-979c-15848c098a6b', 'resource_group': 'mlservice', 'workspace_name': 'azureml', 'region': 'eastus', 'compute_target': 'local', 'spark_service': None, 'azure_service': None, 'iterations': 5, 'primary_metric': 'AUC_weighted', 'task_type': 'classification', 'data_script': None, 'validation_size': 0.0, 'n_cross_validations': 3, 'y_min': None, 'y_max': None, 'num_classes': 2, 'featurization': 'off', 'preprocess': True, 'lag_length': 0, 'is_timeseries': False, 'max_cores_per_iteration': 1, 'max_concurrent_iterations': 1, 'iteration_timeout_minutes': 5, 'mem_in_mb': None, 'enforce_time_on_windows': False, 'experiment_timeout_minutes': None, 'experiment_exit_score': None, 'whitelist_models': None, 'blacklist_algos': ['XGBoostClassifier', 'XGBoostClassifier'], 'supported_models': ['LogisticRegression', 'SGD', 'MultinomialNaiveBayes', 'BernoulliNaiveBayes', 'SVM', 'LinearSVM', 'KNN', 'DecisionTree', 'RandomForest', 'ExtremeRandomTrees', 'LightGBM', 'XGBoostClassifier', 'NimbusMLAveragedPerceptronClassifier', 'NimbusMLLinearSVMClassifier', 'GradientBoosting', 'TensorFlowDNN', 'TensorFlowLinearClassifier'], 'auto_blacklist': True, 'blacklist_samples_reached': False, 'exclude_nan_labels': True, 'verbosity': 20, 'debug_log': 'automl.log', 'show_warnings': False, 'model_explainability': False, 'service_url': None, 'sdk_url': None, 'sdk_packages': None, 'enable_onnx_compatible_models': False, 'enable_feature_sweeping': True, 'vm_type': None, 'telemetry_verbosity': 'INFO', 'send_telemetry': True, 'enable_early_stopping': False, 'early_stopping_n_iters': 10, 'metrics': None, 'enable_ensembling': False, 'enable_stack_ensembling': False, 'ensemble_iterations': 5, 'enable_tf': False, 'enable_cache': True, 'enable_subsampling': False, 'subsample_seed': None, 'enable_nimbusml': False, 'enable_streaming': False, 'label_column_name': None, 'weight_column_name': None, 'cost_mode': 0, 'metric_operation': 'maximize'}\",\n", + " 'AMLSettingsJsonString': '{\"name\": \"automl-classif-factoryQC\", \"path\": \".\", \"subscription_id\": \"9c0f91b8-eb2f-484c-979c-15848c098a6b\", \"resource_group\": \"mlservice\", \"workspace_name\": \"azureml\", \"region\": \"eastus\", \"compute_target\": \"local\", \"spark_service\": null, \"azure_service\": null, \"iterations\": 5, \"primary_metric\": \"AUC_weighted\", \"task_type\": \"classification\", \"data_script\": null, \"validation_size\": 0.0, \"n_cross_validations\": 3, \"y_min\": null, \"y_max\": null, \"num_classes\": 2, \"featurization\": \"off\", \"preprocess\": true, \"lag_length\": 0, \"is_timeseries\": false, \"max_cores_per_iteration\": 1, \"max_concurrent_iterations\": 1, \"iteration_timeout_minutes\": 5, \"mem_in_mb\": null, \"enforce_time_on_windows\": false, \"experiment_timeout_minutes\": null, \"experiment_exit_score\": null, \"whitelist_models\": null, \"blacklist_algos\": [\"XGBoostClassifier\", \"XGBoostClassifier\"], \"supported_models\": [\"LogisticRegression\", \"SGD\", \"MultinomialNaiveBayes\", \"BernoulliNaiveBayes\", \"SVM\", \"LinearSVM\", \"KNN\", \"DecisionTree\", \"RandomForest\", \"ExtremeRandomTrees\", \"LightGBM\", \"XGBoostClassifier\", \"NimbusMLAveragedPerceptronClassifier\", \"NimbusMLLinearSVMClassifier\", \"GradientBoosting\", \"TensorFlowDNN\", \"TensorFlowLinearClassifier\"], \"auto_blacklist\": true, \"blacklist_samples_reached\": false, \"exclude_nan_labels\": true, \"verbosity\": 20, \"debug_log\": \"automl.log\", \"show_warnings\": false, \"model_explainability\": false, \"service_url\": null, \"sdk_url\": null, \"sdk_packages\": null, \"enable_onnx_compatible_models\": false, \"enable_feature_sweeping\": true, \"vm_type\": null, \"telemetry_verbosity\": \"INFO\", \"send_telemetry\": true, \"enable_early_stopping\": false, \"early_stopping_n_iters\": 10, \"metrics\": null, \"enable_ensembling\": false, \"enable_stack_ensembling\": false, \"ensemble_iterations\": 5, \"enable_tf\": false, \"enable_cache\": true, \"enable_subsampling\": false, \"subsample_seed\": null, \"enable_nimbusml\": false, \"enable_streaming\": false, \"label_column_name\": null, \"weight_column_name\": null, \"cost_mode\": 0, \"metric_operation\": \"maximize\"}',\n", " 'DataPrepJsonString': None,\n", " 'EnableSubsampling': 'False',\n", " 'runTemplate': 'AutoML',\n", " 'azureml.runsource': 'automl',\n", " 'display_task_type': 'classification',\n", - " 'dependencies_versions': '{\"azureml-widgets\": \"1.0.62\", \"azureml-train\": \"1.0.62\", \"azureml-train-restclients-hyperdrive\": \"1.0.62\", \"azureml-train-core\": \"1.0.62\", \"azureml-train-automl\": \"1.0.62\", \"azureml-telemetry\": \"1.0.62\", \"azureml-sdk\": \"1.0.62\", \"azureml-pipeline\": \"1.0.62\", \"azureml-pipeline-steps\": \"1.0.62\", \"azureml-pipeline-core\": \"1.0.62\", \"azureml-opendatasets\": \"1.0.55\", \"azureml-explain-model\": \"1.0.62\", \"azureml-dataprep\": \"1.1.18\", \"azureml-dataprep-native\": \"13.0.3\", \"azureml-core\": \"1.0.62\", \"azureml-contrib-services\": \"1.0.62\", \"azureml-contrib-server\": \"1.0.62\", \"azureml-contrib-notebook\": \"1.0.62\", \"azureml-contrib-explain-model\": \"1.0.62\", \"azureml-contrib-datadrift\": \"1.0.55\", \"azureml-automl-core\": \"1.0.62\"}',\n", + " 'dependencies_versions': '{\"azureml-widgets\": \"1.0.65\", \"azureml-train\": \"1.0.65\", \"azureml-train-restclients-hyperdrive\": \"1.0.65\", \"azureml-train-core\": \"1.0.65\", \"azureml-train-automl\": \"1.0.65\", \"azureml-telemetry\": \"1.0.65\", \"azureml-sdk\": \"1.0.65\", \"azureml-pipeline\": \"1.0.65\", \"azureml-pipeline-steps\": \"1.0.65\", \"azureml-pipeline-core\": \"1.0.65\", \"azureml-explain-model\": \"1.0.65\", \"azureml-dataprep\": \"1.1.19\", \"azureml-dataprep-native\": \"13.0.3\", \"azureml-core\": \"1.0.65\", \"azureml-contrib-services\": \"1.0.65\", \"azureml-contrib-server\": \"1.0.65\", \"azureml-contrib-notebook\": \"1.0.65\", \"azureml-contrib-explain-model\": \"1.0.65\", \"azureml-automl-core\": \"1.0.65.1\"}',\n", " 'ProblemInfoJsonString': '{\"dataset_num_categorical\": 0, \"is_sparse\": false, \"subsampling\": false, \"dataset_classes\": 2, \"dataset_features\": 11, \"dataset_samples\": 4408, \"single_frequency_class_detected\": false}',\n", " 'azureml.git.repository_uri': 'https://github.com/konabuta/DataExplore-Workshop.git',\n", " 'mlflow.source.git.repoURL': 'https://github.com/konabuta/DataExplore-Workshop.git',\n", " 'azureml.git.branch': 'master',\n", " 'mlflow.source.git.branch': 'master',\n", - " 'azureml.git.commit': '3a1518cde5efd2218e36e679b74886613d09bcf6',\n", - " 'mlflow.source.git.commit': '3a1518cde5efd2218e36e679b74886613d09bcf6',\n", + " 'azureml.git.commit': '754d84b491ecff02de2c4f52f4df3359531d94de',\n", + " 'mlflow.source.git.commit': '754d84b491ecff02de2c4f52f4df3359531d94de',\n", " 'azureml.git.dirty': 'True'},\n", + " 'inputDatasets': [],\n", " 'logFiles': {}}" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -461,24 +660,31 @@ "local_run.get_details()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### チャンピョンモデルの取得" + ] + }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
ExperimentIdTypeStatusDetails PageDocs Page
automl-classif-factoryQCAutoML_c6ca036d-5d67-48b8-b263-d1b687aaf4fc_5CompletedLink to Azure PortalLink to Documentation
" + "
ExperimentIdTypeStatusDetails PageDocs Page
automl-classif-factoryQCAutoML_35685078-a4d6-46b0-b761-1cd0137037f1_2CompletedLink to Azure PortalLink to Documentation
" ], "text/plain": [ "Run(Experiment: automl-classif-factoryQC,\n", - "Id: AutoML_c6ca036d-5d67-48b8-b263-d1b687aaf4fc_5,\n", + "Id: AutoML_35685078-a4d6-46b0-b761-1cd0137037f1_2,\n", "Type: None,\n", "Status: Completed)" ] }, - "execution_count": 11, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -492,12 +698,13 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### モデルの理解" + "### モデルの理解\n", + "参考 : [自動化された ML モデルを理解する](https://docs.microsoft.com/ja-JP/azure/machine-learning/service/how-to-configure-auto-train#understand-automated-ml-models)" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -560,7 +767,7 @@ " 'Tranformations': ['MeanImputer']}]" ] }, - "execution_count": 12, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -571,7 +778,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -581,38 +788,38 @@ "datatransformer\n", "{'enable_feature_sweeping': None,\n", " 'feature_sweeping_timeout': None,\n", + " 'featurization_config': None,\n", + " 'is_cross_validation': None,\n", " 'is_onnx_compatible': None,\n", + " 'jasmine_client': None,\n", " 'logger': None,\n", " 'observer': None,\n", + " 'parent_run_id': 'AutoML_35685078-a4d6-46b0-b761-1cd0137037f1',\n", " 'task': None}\n", "\n", - "StandardScalerWrapper\n", - "{'class_name': 'StandardScaler',\n", - " 'copy': True,\n", - " 'module_name': 'sklearn.preprocessing.data',\n", - " 'with_mean': False,\n", - " 'with_std': False}\n", + "MinMaxScaler\n", + "{'copy': True, 'feature_range': (0, 1)}\n", "\n", "LightGBMClassifier\n", - "{'boosting_type': 'gbdt',\n", + "{'boosting_type': 'goss',\n", " 'class_weight': None,\n", - " 'colsample_bytree': 0.6933333333333332,\n", + " 'colsample_bytree': 0.7922222222222222,\n", " 'importance_type': 'split',\n", - " 'learning_rate': 0.07894947368421053,\n", - " 'max_bin': 240,\n", - " 'max_depth': 3,\n", - " 'min_child_samples': 77,\n", - " 'min_child_weight': 6,\n", - " 'min_split_gain': 0.631578947368421,\n", + " 'learning_rate': 0.1,\n", + " 'max_bin': 170,\n", + " 'max_depth': 4,\n", + " 'min_child_samples': 168,\n", + " 'min_child_weight': 4,\n", + " 'min_split_gain': 0.8421052631578947,\n", " 'n_estimators': 50,\n", " 'n_jobs': 1,\n", - " 'num_leaves': 65,\n", + " 'num_leaves': 62,\n", " 'objective': None,\n", " 'random_state': None,\n", - " 'reg_alpha': 0.5789473684210527,\n", - " 'reg_lambda': 0.631578947368421,\n", + " 'reg_alpha': 0.7894736842105263,\n", + " 'reg_lambda': 0.15789473684210525,\n", " 'silent': True,\n", - " 'subsample': 0.3963157894736842,\n", + " 'subsample': 1,\n", " 'subsample_for_bin': 200000,\n", " 'subsample_freq': 0,\n", " 'verbose': -10}\n", @@ -648,9 +855,16 @@ "## 3. モデルの解釈" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Azure Machine Learning Interpretability SDK は、Microsoftと主要な3rd Partyのライブラリ(LIME,SHAP etc)で構成されたモデル解釈のフレームワークで、統合APIをご提供しています。" + ] + }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -668,28 +882,23 @@ } ], "source": [ + "# Automated ML から情報を収集\n", "from azureml.train.automl.automl_explain_utilities import AutoMLExplainerSetupClass, automl_setup_model_explanations\n", - "\n", - "automl_explainer_setup_obj = automl_setup_model_explanations(fitted_model, X=X_train, X_test=X_test, y=y_train, task='classification')" + "automl_explainer_setup_obj = automl_setup_model_explanations(fitted_model, X=X_train, X_test=X_test, y=y_train, task='classification')\n", + "\n" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using older than supported version of lightgbm, please upgrade to version greater than 2.2.1\n" - ] - } - ], + "outputs": [], "source": [ + "# Automated ML のモデルを解釈する際は、MimicWrapper を利用\n", "from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel\n", "from azureml.explain.model.mimic_wrapper import MimicWrapper\n", - "explainer = MimicWrapper(ws, automl_explainer_setup_obj.automl_estimator, LGBMExplainableModel, \n", + "explainer = MimicWrapper(ws, automl_explainer_setup_obj.automl_estimator, \n", + " LGBMExplainableModel, \n", " init_dataset=automl_explainer_setup_obj.X_transform, run=best_run,\n", " features=automl_explainer_setup_obj.engineered_feature_names, \n", " feature_maps=[automl_explainer_setup_obj.feature_map],\n", @@ -698,7 +907,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -710,18 +919,20 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": {}, + "execution_count": 19, + "metadata": { + "scrolled": false + }, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "0053e22932164a87b68b33105f095952", + "model_id": "5b739bb51c9143678cfcc391c6131af9", "version_major": 2, "version_minor": 0 }, "text/plain": [ - "ExplanationWidget(value={'predictedY': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1…" + "ExplanationWidget(value={'predictedY': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…" ] }, "metadata": {}, @@ -730,19 +941,27 @@ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 17, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "# Global, Local なモデルの解釈専用のダッシュボード\n", "from azureml.contrib.explain.model.visualize import ExplanationDashboard\n", "ExplanationDashboard(raw_explanations, automl_explainer_setup_obj.automl_pipeline, automl_explainer_setup_obj.X_test_raw)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, @@ -753,9 +972,9 @@ ], "metadata": { "kernelspec": { - "display_name": "myenv", + "display_name": "azureml", "language": "python", - "name": "myenv" + "name": "azureml" }, "language_info": { "codemirror_mode": { @@ -767,7 +986,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.6.9" } }, "nbformat": 4,