Made minor changes to sdk_automl_image_object_detection_batch.ipynb file (#513)

sudarshan-SpringML · andrewferlitsch · web-flow · commit c73d995680b5 · 2022-06-05T12:45:55.000-07:00
* modified notebook

* ran linter test

Co-authored-by: Andrew Ferlitsch &lt;aferlitsch@google.com&gt;
diff --git a/notebooks/official/automl/sdk_automl_image_object_detection_batch.ipynb b/notebooks/official/automl/sdk_automl_image_object_detection_batch.ipynb
@@ -32,23 +32,24 @@
         "# Vertex AI SDK : AutoML training image object detection model for batch prediction\n",
         "\n",
         "<table align=\"left\">\n",
+        " \n",
         "  <td>\n",
-        "    <a href=\"https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/master/notebooks/official/automl/sdk_automl_image_object_detection_batch.ipynb\">\n",
+        "    <a href=\"https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/automl/sdk_automl_image_object_detection_batch.ipynb\">\n",
         "      <img src=\"https://cloud.google.com/ml-engine/images/colab-logo-32px.png\" alt=\"Colab logo\"> Run in Colab\n",
         "    </a>\n",
         "  </td>\n",
         "  <td>\n",
-        "    <a href=\"https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/master/notebooks/official/automl/sdk_automl_image_object_detection_batch.ipynb\">\n",
+        "    <a href=\"https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/automl/sdk_automl_image_object_detection_batch.ipynb\">\n",
         "      <img src=\"https://cloud.google.com/ml-engine/images/github-logo-32px.png\" alt=\"GitHub logo\">\n",
         "      View on GitHub\n",
         "    </a>\n",
         "  </td>\n",
         "  <td>\n",
-        "    <a href=\"https://console.cloud.google.com/ai/platform/notebooks/deploy-notebook?download_url=https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/master/notebooks/official/automl/sdk_automl_image_object_detection_batch.ipynb\">\n",
-        "    <img src=\"https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32\" alt=\"Vertex AI logo\">\n",
+        "    <a href=\"https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/automl/sdk_automl_image_object_detection_batch.ipynb\">\n",
+        "      <img src=\"https://lh3.googleusercontent.com/UiNooY4LUgW_oTvpsNhPpQzsstV5W8F7rYgxgGBD85cWJoLmrOzhVs_ksK_vgx40SHs7jCqkTkCk=e14-rj-sc0xffffff-h130-w32\" alt=\"Vertex AI logo\">\n",
         "      Open in Vertex AI Workbench\n",
         "    </a>\n",
-        "  </td>\n",
+        "  </td>                    \n",
         "</table>\n",
         "<br/><br/><br/>"
       ]
@@ -168,7 +169,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "install_aip:mbsdk"
+        "id": "hw7H6ADSv5mI"
       },
       "outputs": [],
       "source": [
@@ -189,20 +190,29 @@
         "id": "install_storage"
       },
       "source": [
-        "Install the latest GA version of *google-cloud-storage* library as well."
+        "Install the latest GA version of *google-cloud-storage* library."
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "install_storage"
+        "id": "d6Pa6Sybv5mK"
       },
       "outputs": [],
       "source": [
         "! pip3 install -U google-cloud-storage $USER_FLAG"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9_zWlX10v5mL"
+      },
+      "source": [
+        "Install the latest version of *tensorflow* library."
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -211,8 +221,7 @@
       },
       "outputs": [],
       "source": [
-        "if os.getenv(\"IS_TESTING\"):\n",
-        "    ! pip3 install --upgrade tensorflow $USER_FLAG"
+        "! pip3 install --upgrade tensorflow $USER_FLAG"
       ]
     },
     {
@@ -230,7 +239,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "restart"
+        "id": "D-ZBOjErv5mM"
       },
       "outputs": [],
       "source": [
@@ -282,7 +291,9 @@
       },
       "outputs": [],
       "source": [
-        "PROJECT_ID = \"[your-project-id]\"  # @param {type:\"string\"}"
+        "import os\n",
+        "\n",
+        "PROJECT_ID = \"\""
       ]
     },
     {
@@ -293,11 +304,32 @@
       },
       "outputs": [],
       "source": [
-        "if PROJECT_ID == \"\" or PROJECT_ID is None or PROJECT_ID == \"[your-project-id]\":\n",
-        "    # Get your GCP project id from gcloud\n",
-        "    shell_output = ! gcloud config list --format 'value(core.project)' 2>/dev/null\n",
+        "# Get your Google Cloud project ID from gcloud\n",
+        "if not os.getenv(\"IS_TESTING\"):\n",
+        "    shell_output = !gcloud config list --format 'value(core.project)' 2>/dev/null\n",
         "    PROJECT_ID = shell_output[0]\n",
-        "    print(\"Project ID:\", PROJECT_ID)"
+        "    print(\"Project ID: \", PROJECT_ID)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "W2F5WRyhv5mO"
+      },
+      "source": [
+        "Otherwise, set your project ID here."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "d7-MjQafv5mO"
+      },
+      "outputs": [],
+      "source": [
+        "if PROJECT_ID == \"\" or PROJECT_ID is None:\n",
+        "    PROJECT_ID = \"[your-project-id]\"  # @param {type:\"string\"}"
       ]
     },
     {
@@ -335,11 +367,14 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "region"
+        "id": "kAfG6tDAv5mQ"
       },
       "outputs": [],
       "source": [
-        "REGION = \"us-central1\"  # @param {type: \"string\"}"
+        "REGION = \"[your-region]\"  # @param {type:\"string\"}\n",
+        "\n",
+        "if REGION == \"[your-region]\":\n",
+        "    REGION = \"us-central1\""
       ]
     },
     {
@@ -357,7 +392,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "timestamp"
+        "id": "PzKW-zT_v5mR"
       },
       "outputs": [],
       "source": [
@@ -397,7 +432,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "gcp_authenticate"
+        "id": "FvQeFm3Gv5mR"
       },
       "outputs": [],
       "source": [
@@ -475,7 +510,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "create_bucket"
+        "id": "09kHSsKmv5mT"
       },
       "outputs": [],
       "source": [
@@ -495,7 +530,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "validate_bucket"
+        "id": "N9JY-esPv5mU"
       },
       "outputs": [],
       "source": [
@@ -540,7 +575,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "init_aip:mbsdk"
+        "id": "w2Oa_jZSv5mV"
       },
       "outputs": [],
       "source": [
@@ -597,7 +632,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "quick_peek:csv"
+        "id": "ITshYFagv5mZ"
       },
       "outputs": [],
       "source": [
@@ -629,7 +664,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "create_dataset:image,iod"
+        "id": "FYyaPzfRv5mc"
       },
       "outputs": [],
       "source": [
@@ -677,7 +712,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "create_automl_pipeline:image,iod"
+        "id": "vtAgY1Nmv5md"
       },
       "outputs": [],
       "source": [
@@ -719,7 +754,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "run_automl_pipeline:image"
+        "id": "SvV1nFDTv5md"
       },
       "outputs": [],
       "source": [
@@ -750,7 +785,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "evaluate_the_model:mbsdk"
+        "id": "RcpDJMgev5me"
       },
       "outputs": [],
       "source": [
@@ -833,7 +868,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "copy_test_items:batch_prediction"
+        "id": "JXpg67zjv5mf"
       },
       "outputs": [],
       "source": [
@@ -869,7 +904,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "make_batch_file:automl,image"
+        "id": "hFjX62hvv5mg"
       },
       "outputs": [],
       "source": [
@@ -901,21 +936,37 @@
         "- `job_display_name`: The human readable name for the batch prediction job.\n",
         "- `gcs_source`: A list of one or more batch request input files.\n",
         "- `gcs_destination_prefix`: The Cloud Storage location for storing the batch prediction resuls.\n",
+        "-  `machine_type`: The type of machine for running batch prediction on dedicated resources. Not specifying machine type will                      result in batch prediction job being run with automatic resources.\n",
+        "-  `starting_replica_count`: The number of machine replicas used at the start of the batch operation. If not set, Vertex AI decides starting number, not greater than `max_replica_count`. Only used if `machine_type` is set.\n",
+        "-  `max_replica_count`: The maximum number of machine replicas the batch operation may be scaled to. Only used if `machine_type` is set. Default is 10.\n",
         "- `sync`: If set to True, the call will block while waiting for the asynchronous batch job to complete."
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "axn-CW7xv5mg"
+      },
+      "source": [
+        "For AutoML models, only manual scaling is supported. In manual scaling both starting_replica_count and max_replica_count have the same value.\n",
+        "For this batch job we are using manual scaling. Here we are setting both starting_replica_count and max_replica_count to the same value that is 1. "
+      ]
+    },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "batch_request:mbsdk"
+        "id": "5VMMaJhbv5mh"
       },
       "outputs": [],
       "source": [
         "batch_predict_job = model.batch_predict(\n",
         "    job_display_name=\"salads_\" + TIMESTAMP,\n",
         "    gcs_source=gcs_input_uri,\n",
         "    gcs_destination_prefix=BUCKET_URI,\n",
+        "    machine_type=\"n1-standard-4\",\n",
+        "    starting_replica_count=1,\n",
+        "    max_replica_count=1,\n",
         "    sync=False,\n",
         ")\n",
         "\n",
@@ -937,7 +988,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "batch_request_wait:mbsdk"
+        "id": "Yc3YaEqGv5mh"
       },
       "outputs": [],
       "source": [
@@ -967,7 +1018,7 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "get_batch_prediction:mbsdk,iod"
+        "id": "UGGJxFjEv5mh"
       },
       "outputs": [],
       "source": [
@@ -986,8 +1037,7 @@
         "    with tf.io.gfile.GFile(name=gfile_name, mode=\"r\") as gfile:\n",
         "        for line in gfile.readlines():\n",
         "            line = json.loads(line)\n",
-        "            print(line)\n",
-        "            break"
+        "            print(line)"
       ]
     },
     {
@@ -1014,27 +1064,26 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "cleanup:mbsdk"
+        "id": "_Olkhs6xv5mi"
       },
       "outputs": [],
       "source": [
-        "delete_all = True\n",
+        "delete_bucket = False\n",
         "\n",
-        "if delete_all:\n",
-        "    # Delete the dataset using the Vertex dataset object\n",
-        "    dataset.delete()\n",
+        "# Delete the dataset using the Vertex dataset object\n",
+        "dataset.delete()\n",
         "\n",
-        "    # Delete the model using the Vertex model object\n",
-        "    model.delete()\n",
+        "# Delete the model using the Vertex model object\n",
+        "model.delete()\n",
         "\n",
-        "    # Delete the AutoML or Pipeline trainig job\n",
-        "    job.delete()\n",
+        "# Delete the AutoML or Pipeline trainig job\n",
+        "job.delete()\n",
         "\n",
-        "    # Delete the batch prediction job using the Vertex batch prediction object\n",
-        "    batch_predict_job.delete()\n",
+        "# Delete the batch prediction job using the Vertex batch prediction object\n",
+        "batch_predict_job.delete()\n",
         "\n",
-        "    if os.getenv(\"IS_TESTING\"):\n",
-        "        ! gsutil rm -r $BUCKET_URI"
+        "if delete_bucket or os.getenv(\"IS_TESTING\"):\n",
+        "    ! gsutil rm -r $BUCKET_URI"
       ]
     }
   ],