From b087a8e2730f08c97889315b58b9c70f81be4556 Mon Sep 17 00:00:00 2001 From: Simon Kurtz Date: Thu, 16 May 2024 20:17:01 -0400 Subject: [PATCH 01/10] Add openai-priority-loadbalancer --- app/backend/app.py | 14 +++++++++++++- app/backend/requirements.in | 1 + app/backend/requirements.txt | 1 + 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/app/backend/app.py b/app/backend/app.py index a1ebe5225a..fd85de2810 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -5,7 +5,7 @@ import mimetypes import os from pathlib import Path -from typing import Any, AsyncGenerator, Dict, Union, cast +from typing import Any, AsyncGenerator, Dict, Union, cast, List from azure.core.credentials import AzureKeyCredential from azure.core.credentials_async import AsyncTokenCredential @@ -19,7 +19,12 @@ from azure.storage.blob.aio import StorageStreamDownloader as BlobDownloader from azure.storage.filedatalake.aio import FileSystemClient from azure.storage.filedatalake.aio import StorageStreamDownloader as DatalakeDownloader +# Using httpx.Client and httpx.AsyncClient avoids having to update openai to 1.17.1 or newer. +# The openai properties for DefaultHttpxClient and DefaultAsyncHttpxClient are mere wrappers for httpx.Client and httpx.AsyncClient. +# https://github.com/openai/openai-python/releases/tag/v1.17.0 +import httpx from openai import AsyncAzureOpenAI, AsyncOpenAI +from openai_priority_loadbalancer import AsyncLoadBalancer, Backend from opentelemetry.instrumentation.aiohttp_client import AioHttpClientInstrumentor from opentelemetry.instrumentation.asgi import OpenTelemetryMiddleware from opentelemetry.instrumentation.httpx import ( @@ -77,6 +82,10 @@ mimetypes.add_type("application/javascript", ".js") mimetypes.add_type("text/css", ".css") +backends: List[Backend] = [ + Backend("cog-w2og7ojyhvoq6.openai.azure.com", 1), + #Backend("oai-k9b2z6f8x1v3q.openai.azure.com", 1) +] @bp.route("/") async def index(): @@ -452,10 +461,13 @@ async def setup_clients(): api_version = os.getenv("AZURE_OPENAI_API_VERSION") or "2024-03-01-preview" + lb = AsyncLoadBalancer(backends) + openai_client = AsyncAzureOpenAI( api_version=api_version, azure_endpoint=endpoint, azure_ad_token_provider=token_provider, + http_client = httpx.AsyncClient(transport = lb) # Inject the load balancer as the transport in a new default httpx client ) elif OPENAI_HOST == "local": openai_client = AsyncOpenAI( diff --git a/app/backend/requirements.in b/app/backend/requirements.in index b0147e2f02..df31c9d651 100644 --- a/app/backend/requirements.in +++ b/app/backend/requirements.in @@ -2,6 +2,7 @@ azure-identity quart quart-cors openai[datalib]>=1.3.7 +openai_priority_loadbalancer tiktoken tenacity azure-ai-documentintelligence diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt index ffacba9668..cdb66a0dbf 100644 --- a/app/backend/requirements.txt +++ b/app/backend/requirements.txt @@ -200,6 +200,7 @@ oauthlib==3.2.2 # via requests-oauthlib openai[datalib]==1.13.3 # via -r requirements.in +openai-priority-loadbalancer>=1.0.5 opentelemetry-api==1.23.0 # via # azure-core-tracing-opentelemetry From 791e51a689c6a11037e696aa1a39096a08e63c80 Mon Sep 17 00:00:00 2001 From: Simon Kurtz Date: Fri, 17 May 2024 11:44:00 -0400 Subject: [PATCH 02/10] Add second working backend --- app/backend/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/backend/app.py b/app/backend/app.py index fd85de2810..ebdf0d9e76 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -84,7 +84,7 @@ backends: List[Backend] = [ Backend("cog-w2og7ojyhvoq6.openai.azure.com", 1), - #Backend("oai-k9b2z6f8x1v3q.openai.azure.com", 1) + Backend("cog-kfdf7d5q443bu.openai.azure.com", 1), ] @bp.route("/") From 77d45163164aeb2cc26b4abbd018645ebac2692f Mon Sep 17 00:00:00 2001 From: Simon Kurtz Date: Fri, 17 May 2024 11:47:48 -0400 Subject: [PATCH 03/10] Lock openai_priority_loadbalancer to 1.0.6 --- app/backend/requirements.in | 2 +- app/backend/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/backend/requirements.in b/app/backend/requirements.in index c7e1d13020..b1693bae1e 100644 --- a/app/backend/requirements.in +++ b/app/backend/requirements.in @@ -2,7 +2,7 @@ azure-identity quart quart-cors openai[datalib]>=1.3.7 -openai_priority_loadbalancer +openai_priority_loadbalancer==1.0.6 tiktoken tenacity azure-ai-documentintelligence diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt index e35459c715..f11810d722 100644 --- a/app/backend/requirements.txt +++ b/app/backend/requirements.txt @@ -196,7 +196,7 @@ oauthlib==3.2.2 # via requests-oauthlib openai[datalib]==1.13.3 # via -r requirements.in -openai-priority-loadbalancer>=1.0.5 +openai-priority-loadbalancer==1.0.6 opentelemetry-api==1.23.0 # via # azure-core-tracing-opentelemetry From 64428932a7aff37f50ee3ce3204451da77083cd4 Mon Sep 17 00:00:00 2001 From: Simon Kurtz Date: Fri, 17 May 2024 11:59:28 -0400 Subject: [PATCH 04/10] Clean up --- app/backend/app.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/app/backend/app.py b/app/backend/app.py index 8269ea83d5..6a66deb092 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -4,6 +4,7 @@ import logging import mimetypes import os +import httpx from pathlib import Path from typing import Any, AsyncGenerator, Dict, Union, cast, List @@ -16,10 +17,6 @@ from azure.storage.blob.aio import StorageStreamDownloader as BlobDownloader from azure.storage.filedatalake.aio import FileSystemClient from azure.storage.filedatalake.aio import StorageStreamDownloader as DatalakeDownloader -# Using httpx.Client and httpx.AsyncClient avoids having to update openai to 1.17.1 or newer. -# The openai properties for DefaultHttpxClient and DefaultAsyncHttpxClient are mere wrappers for httpx.Client and httpx.AsyncClient. -# https://github.com/openai/openai-python/releases/tag/v1.17.0 -import httpx from openai import AsyncAzureOpenAI, AsyncOpenAI from openai_priority_loadbalancer import AsyncLoadBalancer, Backend from opentelemetry.instrumentation.aiohttp_client import AioHttpClientInstrumentor @@ -79,6 +76,7 @@ mimetypes.add_type("application/javascript", ".js") mimetypes.add_type("text/css", ".css") +# Data for the backends could be supplied through config. This data is simply here to illustrate usage. backends: List[Backend] = [ Backend("cog-w2og7ojyhvoq6.openai.azure.com", 1), Backend("cog-kfdf7d5q443bu.openai.azure.com", 1), From 4239b03b96d2c44c472a0c0aa7dc1cc9738094d6 Mon Sep 17 00:00:00 2001 From: Simon Kurtz Date: Tue, 21 May 2024 12:39:07 -0400 Subject: [PATCH 05/10] Update openai-priority-loadbalancer to 1.0.8 --- app/backend/requirements.in | 2 +- app/backend/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/backend/requirements.in b/app/backend/requirements.in index b1693bae1e..fa9b3adf3f 100644 --- a/app/backend/requirements.in +++ b/app/backend/requirements.in @@ -2,7 +2,7 @@ azure-identity quart quart-cors openai[datalib]>=1.3.7 -openai_priority_loadbalancer==1.0.6 +openai_priority_loadbalancer==1.0.8 tiktoken tenacity azure-ai-documentintelligence diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt index f11810d722..0bce5e284b 100644 --- a/app/backend/requirements.txt +++ b/app/backend/requirements.txt @@ -196,7 +196,7 @@ oauthlib==3.2.2 # via requests-oauthlib openai[datalib]==1.13.3 # via -r requirements.in -openai-priority-loadbalancer==1.0.6 +openai-priority-loadbalancer==1.0.8 opentelemetry-api==1.23.0 # via # azure-core-tracing-opentelemetry From f3857367ff54d54bd107e8d299b531ffb73e07cf Mon Sep 17 00:00:00 2001 From: Simon Kurtz Date: Tue, 28 May 2024 22:37:18 -0400 Subject: [PATCH 06/10] Update openai-priority-loadbalancer to 1.0.9 --- app/backend/requirements.in | 2 +- app/backend/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/backend/requirements.in b/app/backend/requirements.in index fa9b3adf3f..8211066689 100644 --- a/app/backend/requirements.in +++ b/app/backend/requirements.in @@ -2,7 +2,7 @@ azure-identity quart quart-cors openai[datalib]>=1.3.7 -openai_priority_loadbalancer==1.0.8 +openai_priority_loadbalancer==1.0.9 tiktoken tenacity azure-ai-documentintelligence diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt index 0bce5e284b..5ff9bf260d 100644 --- a/app/backend/requirements.txt +++ b/app/backend/requirements.txt @@ -196,7 +196,7 @@ oauthlib==3.2.2 # via requests-oauthlib openai[datalib]==1.13.3 # via -r requirements.in -openai-priority-loadbalancer==1.0.8 +openai-priority-loadbalancer==1.0.9 opentelemetry-api==1.23.0 # via # azure-core-tracing-opentelemetry From fc1a681b71a041aa78c8a39449bbfcac99d2926e Mon Sep 17 00:00:00 2001 From: Pamela Fox Date: Mon, 3 Jun 2024 07:13:20 -0700 Subject: [PATCH 07/10] Use bicep and env vars for backends --- app/backend/app.py | 22 +++++++++++++--------- infra/main.bicep | 20 ++++++++++++++++++++ infra/main.parameters.json | 3 +++ 3 files changed, 36 insertions(+), 9 deletions(-) diff --git a/app/backend/app.py b/app/backend/app.py index 6a66deb092..dc509eafa6 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -4,10 +4,10 @@ import logging import mimetypes import os -import httpx from pathlib import Path -from typing import Any, AsyncGenerator, Dict, Union, cast, List +from typing import Any, AsyncGenerator, Dict, List, Union, cast +import httpx from azure.core.exceptions import ResourceNotFoundError from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider from azure.monitor.opentelemetry import configure_azure_monitor @@ -76,11 +76,6 @@ mimetypes.add_type("application/javascript", ".js") mimetypes.add_type("text/css", ".css") -# Data for the backends could be supplied through config. This data is simply here to illustrate usage. -backends: List[Backend] = [ - Backend("cog-w2og7ojyhvoq6.openai.azure.com", 1), - Backend("cog-kfdf7d5q443bu.openai.azure.com", 1), -] @bp.route("/") async def index(): @@ -438,13 +433,22 @@ async def setup_clients(): api_version = os.getenv("AZURE_OPENAI_API_VERSION") or "2024-03-01-preview" - lb = AsyncLoadBalancer(backends) + client_args = {} + if AZURE_OPENAI_SERVICE_BACKEND2 := os.environ.get("AZURE_OPENAI_SERVICE_BACKEND2"): + backends: List[Backend] = [ + Backend(f"{AZURE_OPENAI_SERVICE}.openai.azure.com", 1), + Backend(f"{AZURE_OPENAI_SERVICE_BACKEND2}.openai.azure.com", 1), + ] + + lb = AsyncLoadBalancer(backends) + # Inject the load balancer as the transport in a new default httpx client + client_args["http_client"] = httpx.AsyncClient(transport=lb) openai_client = AsyncAzureOpenAI( api_version=api_version, azure_endpoint=endpoint, azure_ad_token_provider=token_provider, - http_client = httpx.AsyncClient(transport = lb) # Inject the load balancer as the transport in a new default httpx client + **client_args, ) elif OPENAI_HOST == "local": openai_client = AsyncOpenAI( diff --git a/infra/main.bicep b/infra/main.bicep index 7ac59b7272..ae26235030 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -43,6 +43,7 @@ param appServiceSkuName string // Set in main.parameters.json @allowed([ 'azure', 'openai', 'azure_custom' ]) param openAiHost string // Set in main.parameters.json param isAzureOpenAiHost bool = startsWith(openAiHost, 'azure') +param deployAzureOpenAiBackendTwo bool = false param azureOpenAiCustomUrl string = '' param azureOpenAiApiVersion string = '' @@ -294,6 +295,7 @@ module backend 'core/host/appservice.bicep' = { AZURE_OPENAI_GPT4V_MODEL: gpt4vModelName // Specific to Azure OpenAI AZURE_OPENAI_SERVICE: isAzureOpenAiHost ? openAi.outputs.name : '' + AZURE_OPENAI_SERVICE_BACKEND2: isAzureOpenAiHost && deployAzureOpenAiBackendTwo ? openAiBackendTwo.outputs.name : '' AZURE_OPENAI_CHATGPT_DEPLOYMENT: chatGpt.deploymentName AZURE_OPENAI_EMB_DEPLOYMENT: embedding.deploymentName AZURE_OPENAI_GPT4V_DEPLOYMENT: useGPT4V ? gpt4vDeploymentName : '' @@ -385,6 +387,23 @@ module openAi 'core/ai/cognitiveservices.bicep' = if (isAzureOpenAiHost) { } } +module openAiBackendTwo 'core/ai/cognitiveservices.bicep' = if (isAzureOpenAiHost && deployAzureOpenAiBackendTwo) { + name: 'openai-backend-two' + scope: openAiResourceGroup + params: { + name: '${abbrs.cognitiveServicesAccounts}${resourceToken}-b2' + location: openAiResourceGroupLocation + tags: tags + publicNetworkAccess: publicNetworkAccess + bypass: bypass + sku: { + name: openAiSkuName + } + deployments: openAiDeployments + disableLocalAuth: true + } +} + // Formerly known as Form Recognizer // Does not support bypass module documentIntelligence 'core/ai/cognitiveservices.bicep' = { @@ -766,6 +785,7 @@ output AZURE_OPENAI_GPT4V_MODEL string = gpt4vModelName // Specific to Azure OpenAI output AZURE_OPENAI_SERVICE string = isAzureOpenAiHost ? openAi.outputs.name : '' +output AZURE_OPENAI_SERVICE_BACKEND2 string = isAzureOpenAiHost && deployAzureOpenAiBackendTwo ? openAiBackendTwo.outputs.name : '' output AZURE_OPENAI_API_VERSION string = isAzureOpenAiHost ? azureOpenAiApiVersion : '' output AZURE_OPENAI_RESOURCE_GROUP string = isAzureOpenAiHost ? openAiResourceGroup.name : '' output AZURE_OPENAI_CHATGPT_DEPLOYMENT string = isAzureOpenAiHost ? chatGpt.deploymentName : '' diff --git a/infra/main.parameters.json b/infra/main.parameters.json index bfaa0deff9..9b87e68f29 100644 --- a/infra/main.parameters.json +++ b/infra/main.parameters.json @@ -119,6 +119,9 @@ "openAiHost": { "value": "${OPENAI_HOST=azure}" }, + "deployAzureOpenAiBackendTwo": { + "value": "${DEPLOY_AZURE_OPENAI_BACKEND_TWO=false}" + }, "azureOpenAiCustomUrl":{ "value": "${AZURE_OPENAI_CUSTOM_URL}" }, From eecfac27889b57423dd6530c588c9adbad9eaf8d Mon Sep 17 00:00:00 2001 From: Simon Kurtz Date: Mon, 3 Jun 2024 13:18:49 -0400 Subject: [PATCH 08/10] Fix linter error --- app/backend/app.py | 1 + 1 file changed, 1 insertion(+) diff --git a/app/backend/app.py b/app/backend/app.py index a4b49c242e..6948a62cd1 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -7,6 +7,7 @@ import time from pathlib import Path from typing import Any, AsyncGenerator, Dict, List, Union, cast + import httpx from azure.cognitiveservices.speech import ( ResultReason, From d140570f7946eedab777bcf4dda4860066fc09d2 Mon Sep 17 00:00:00 2001 From: Simon Kurtz <84809797+simonkurtz-MSFT@users.noreply.github.com> Date: Fri, 7 Jun 2024 21:54:45 -0400 Subject: [PATCH 09/10] Update openai_priority_loadbalancer to 1.1.0 --- app/backend/requirements.in | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/backend/requirements.in b/app/backend/requirements.in index 289f3356ae..9a6f5b90a3 100644 --- a/app/backend/requirements.in +++ b/app/backend/requirements.in @@ -2,7 +2,7 @@ azure-identity quart quart-cors openai[datalib]>=1.3.7 -openai_priority_loadbalancer==1.0.9 +openai_priority_loadbalancer==1.1.0 tiktoken tenacity azure-ai-documentintelligence From b9f2e53de1b8ec0e263423bd6889c6b09789cedb Mon Sep 17 00:00:00 2001 From: Simon Kurtz <84809797+simonkurtz-MSFT@users.noreply.github.com> Date: Fri, 7 Jun 2024 21:55:22 -0400 Subject: [PATCH 10/10] Update openai_priority_loadbalancer to 1.1.0 --- app/backend/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt index 7587be9674..660063239e 100644 --- a/app/backend/requirements.txt +++ b/app/backend/requirements.txt @@ -204,7 +204,7 @@ openai[datalib]==1.30.5 # openai-messages-token-helper openai-messages-token-helper==0.1.4 # via -r requirements.in -openai-priority-loadbalancer==1.0.9 +openai-priority-loadbalancer==1.1.0 opentelemetry-api==1.24.0 # via # azure-core-tracing-opentelemetry