Skip to content

Commit

Permalink
fix: Update Toolbox Batch Notebook to use `from_batch_process_metadat…
Browse files Browse the repository at this point in the history
…a()` (#849)

- Workaround for
googleapis/python-documentai-toolbox#285

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
holtskinner and gcf-owl-bot[bot] authored Jul 2, 2024
1 parent 46ea450 commit 2a506b8
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 13 deletions.
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
ISORT_VERSION = "isort==5.11.0"
LINT_PATHS = ["."]

DEFAULT_PYTHON_VERSION = "3.7"
DEFAULT_PYTHON_VERSION = "3.8"

UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11"]
UNIT_TEST_STANDARD_DEPENDENCIES = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@
"\n",
"<table align=\"left\">\n",
" <td style=\"text-align: center\">\n",
" <a href=\"https://colab.research.google.com/github/GoogleCloudPlatform/document-ai-samples/blob/main/toolbox-batch-processing/documentai-toolbox-batch-entity-extraction.ipynb\">\n",
" <img src=\"https://cloud.google.com/ml-engine/images/colab-logo-32px.png\" alt=\"Google Colaboratory logo\"><br> Open in Colab\n",
" </a>\n",
" </td>\n",
" <td style=\"text-align: center\">\n",
" <a href=\"https://console.cloud.google.com/vertex-ai/colab/import/https:%2F%2Fraw.githubusercontent.com%2FGoogleCloudPlatform%2Fdocument-ai-samples%2Fmain%2Ftoolbox-batch-processing%2Fdocumentai-toolbox-batch-entity-extraction.ipynb\">\n",
" <img width=\"32px\" src=\"https://storage.googleapis.com/github-repo/colab_enterprise.svg\" alt=\"Google Cloud Colab Enterprise logo\"><br> Run in Colab Enterprise\n",
" </a>\n",
Expand Down Expand Up @@ -288,7 +293,7 @@
" batch_size: int,\n",
" field_mask: Optional[str] = None,\n",
" skip_human_review: bool = True,\n",
") -> List[str]:\n",
") -> List:\n",
" client = documentai.DocumentProcessorServiceClient(\n",
" client_options=ClientOptions(\n",
" api_endpoint=f\"{location}-documentai.googleapis.com\"\n",
Expand Down Expand Up @@ -317,7 +322,7 @@
" gcs_bucket_name, gcs_prefix, batch_size=batch_size\n",
" )\n",
"\n",
" operation_names: List[str] = []\n",
" operations = []\n",
"\n",
" print(f\"{len(batches)} batches created.\")\n",
" for batch in batches:\n",
Expand All @@ -334,10 +339,9 @@
" skip_human_review=skip_human_review,\n",
" )\n",
" )\n",
" # Operation Name Format: `projects/{project_id}/locations/{location}/operations/{operation_id}`\n",
" operation_names.append(operation.operation.name)\n",
" operations.append(operation)\n",
"\n",
" return operation_names"
" return operations"
]
},
{
Expand Down Expand Up @@ -375,7 +379,7 @@
},
"outputs": [],
"source": [
"operation_names = batch_process_toolbox(\n",
"operations = batch_process_toolbox(\n",
" project_id,\n",
" location,\n",
" processor_id,\n",
Expand All @@ -389,12 +393,15 @@
"# Can do this asynchronously to avoid blocking\n",
"documents: List[documentai_toolbox.document.Document] = []\n",
"\n",
"for operation in operation_names:\n",
"TIMEOUT = 60\n",
"\n",
"for operation in operations:\n",
" # https://cloud.google.com/document-ai/docs/long-running-operations\n",
" print(f\"Waiting for operation {operation}\")\n",
" print(f\"Waiting for operation {operation.operation.name}\")\n",
" operation.result(timeout=TIMEOUT)\n",
" documents.extend(\n",
" documentai_toolbox.document.Document.from_batch_process_operation(\n",
" location=location, operation_name=operation\n",
" documentai_toolbox.document.Document.from_batch_process_metadata(\n",
" documentai.BatchProcessMetadata(operation.metadata)\n",
" )\n",
" )"
]
Expand All @@ -410,7 +417,7 @@
"\n",
"- Export extracted entities as dictionary\n",
"- Load into Pandas DataFrame\n",
"- Print Dataframe"
"- Print DataFrame"
]
},
{
Expand Down Expand Up @@ -451,7 +458,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
"version": "3.undefined.undefined"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 2a506b8

Please sign in to comment.