diff --git a/app/backend/app.py b/app/backend/app.py index 9911560227..6948a62cd1 100644 --- a/app/backend/app.py +++ b/app/backend/app.py @@ -6,8 +6,9 @@ import os import time from pathlib import Path -from typing import Any, AsyncGenerator, Dict, Union, cast +from typing import Any, AsyncGenerator, Dict, List, Union, cast +import httpx from azure.cognitiveservices.speech import ( ResultReason, SpeechConfig, @@ -25,6 +26,7 @@ from azure.storage.filedatalake.aio import FileSystemClient from azure.storage.filedatalake.aio import StorageStreamDownloader as DatalakeDownloader from openai import AsyncAzureOpenAI, AsyncOpenAI +from openai_priority_loadbalancer import AsyncLoadBalancer, Backend from opentelemetry.instrumentation.aiohttp_client import AioHttpClientInstrumentor from opentelemetry.instrumentation.asgi import OpenTelemetryMiddleware from opentelemetry.instrumentation.httpx import ( @@ -510,10 +512,22 @@ async def setup_clients(): api_version = os.getenv("AZURE_OPENAI_API_VERSION") or "2024-03-01-preview" + client_args = {} + if AZURE_OPENAI_SERVICE_BACKEND2 := os.environ.get("AZURE_OPENAI_SERVICE_BACKEND2"): + backends: List[Backend] = [ + Backend(f"{AZURE_OPENAI_SERVICE}.openai.azure.com", 1), + Backend(f"{AZURE_OPENAI_SERVICE_BACKEND2}.openai.azure.com", 1), + ] + + lb = AsyncLoadBalancer(backends) + # Inject the load balancer as the transport in a new default httpx client + client_args["http_client"] = httpx.AsyncClient(transport=lb) + openai_client = AsyncAzureOpenAI( api_version=api_version, azure_endpoint=endpoint, azure_ad_token_provider=token_provider, + **client_args, ) elif OPENAI_HOST == "local": openai_client = AsyncOpenAI( diff --git a/app/backend/requirements.in b/app/backend/requirements.in index ad552cdae8..9a6f5b90a3 100644 --- a/app/backend/requirements.in +++ b/app/backend/requirements.in @@ -2,6 +2,7 @@ azure-identity quart quart-cors openai[datalib]>=1.3.7 +openai_priority_loadbalancer==1.1.0 tiktoken tenacity azure-ai-documentintelligence diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt index 2cf2c249b8..660063239e 100644 --- a/app/backend/requirements.txt +++ b/app/backend/requirements.txt @@ -204,6 +204,7 @@ openai[datalib]==1.30.5 # openai-messages-token-helper openai-messages-token-helper==0.1.4 # via -r requirements.in +openai-priority-loadbalancer==1.1.0 opentelemetry-api==1.24.0 # via # azure-core-tracing-opentelemetry diff --git a/infra/main.bicep b/infra/main.bicep index 57e5a0b891..5dfba260d2 100644 --- a/infra/main.bicep +++ b/infra/main.bicep @@ -43,6 +43,7 @@ param appServiceSkuName string // Set in main.parameters.json @allowed([ 'azure', 'openai', 'azure_custom' ]) param openAiHost string // Set in main.parameters.json param isAzureOpenAiHost bool = startsWith(openAiHost, 'azure') +param deployAzureOpenAiBackendTwo bool = false param azureOpenAiCustomUrl string = '' param azureOpenAiApiVersion string = '' @@ -310,6 +311,7 @@ module backend 'core/host/appservice.bicep' = { AZURE_OPENAI_GPT4V_MODEL: gpt4vModelName // Specific to Azure OpenAI AZURE_OPENAI_SERVICE: isAzureOpenAiHost ? openAi.outputs.name : '' + AZURE_OPENAI_SERVICE_BACKEND2: isAzureOpenAiHost && deployAzureOpenAiBackendTwo ? openAiBackendTwo.outputs.name : '' AZURE_OPENAI_CHATGPT_DEPLOYMENT: chatGpt.deploymentName AZURE_OPENAI_EMB_DEPLOYMENT: embedding.deploymentName AZURE_OPENAI_GPT4V_DEPLOYMENT: useGPT4V ? gpt4vDeploymentName : '' @@ -401,6 +403,23 @@ module openAi 'core/ai/cognitiveservices.bicep' = if (isAzureOpenAiHost) { } } +module openAiBackendTwo 'core/ai/cognitiveservices.bicep' = if (isAzureOpenAiHost && deployAzureOpenAiBackendTwo) { + name: 'openai-backend-two' + scope: openAiResourceGroup + params: { + name: '${abbrs.cognitiveServicesAccounts}${resourceToken}-b2' + location: openAiResourceGroupLocation + tags: tags + publicNetworkAccess: publicNetworkAccess + bypass: bypass + sku: { + name: openAiSkuName + } + deployments: openAiDeployments + disableLocalAuth: true + } +} + // Formerly known as Form Recognizer // Does not support bypass module documentIntelligence 'core/ai/cognitiveservices.bicep' = { @@ -827,6 +846,7 @@ output AZURE_OPENAI_GPT4V_MODEL string = gpt4vModelName // Specific to Azure OpenAI output AZURE_OPENAI_SERVICE string = isAzureOpenAiHost ? openAi.outputs.name : '' +output AZURE_OPENAI_SERVICE_BACKEND2 string = isAzureOpenAiHost && deployAzureOpenAiBackendTwo ? openAiBackendTwo.outputs.name : '' output AZURE_OPENAI_API_VERSION string = isAzureOpenAiHost ? azureOpenAiApiVersion : '' output AZURE_OPENAI_RESOURCE_GROUP string = isAzureOpenAiHost ? openAiResourceGroup.name : '' output AZURE_OPENAI_CHATGPT_DEPLOYMENT string = isAzureOpenAiHost ? chatGpt.deploymentName : '' diff --git a/infra/main.parameters.json b/infra/main.parameters.json index 541bc4cfff..9115cf28c2 100644 --- a/infra/main.parameters.json +++ b/infra/main.parameters.json @@ -119,6 +119,9 @@ "openAiHost": { "value": "${OPENAI_HOST=azure}" }, + "deployAzureOpenAiBackendTwo": { + "value": "${DEPLOY_AZURE_OPENAI_BACKEND_TWO=false}" + }, "azureOpenAiCustomUrl":{ "value": "${AZURE_OPENAI_CUSTOM_URL}" },