Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add OpenAI Priority Load Balancer for Azure OpenAI #1626

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions app/backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import os
import time
from pathlib import Path
from typing import Any, AsyncGenerator, Dict, Union, cast

from typing import Any, AsyncGenerator, Dict, List, Union, cast
import httpx
from azure.cognitiveservices.speech import (
ResultReason,
SpeechConfig,
Expand All @@ -25,6 +25,7 @@
from azure.storage.filedatalake.aio import FileSystemClient
from azure.storage.filedatalake.aio import StorageStreamDownloader as DatalakeDownloader
from openai import AsyncAzureOpenAI, AsyncOpenAI
from openai_priority_loadbalancer import AsyncLoadBalancer, Backend
from opentelemetry.instrumentation.aiohttp_client import AioHttpClientInstrumentor
from opentelemetry.instrumentation.asgi import OpenTelemetryMiddleware
from opentelemetry.instrumentation.httpx import (
Expand Down Expand Up @@ -510,10 +511,22 @@ async def setup_clients():

api_version = os.getenv("AZURE_OPENAI_API_VERSION") or "2024-03-01-preview"

client_args = {}
if AZURE_OPENAI_SERVICE_BACKEND2 := os.environ.get("AZURE_OPENAI_SERVICE_BACKEND2"):
backends: List[Backend] = [
Backend(f"{AZURE_OPENAI_SERVICE}.openai.azure.com", 1),
Backend(f"{AZURE_OPENAI_SERVICE_BACKEND2}.openai.azure.com", 1),
]

lb = AsyncLoadBalancer(backends)
# Inject the load balancer as the transport in a new default httpx client
client_args["http_client"] = httpx.AsyncClient(transport=lb)

openai_client = AsyncAzureOpenAI(
api_version=api_version,
azure_endpoint=endpoint,
azure_ad_token_provider=token_provider,
**client_args,
)
elif OPENAI_HOST == "local":
openai_client = AsyncOpenAI(
Expand Down
1 change: 1 addition & 0 deletions app/backend/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ azure-identity
quart
quart-cors
openai[datalib]>=1.3.7
openai_priority_loadbalancer==1.0.9
tiktoken
tenacity
azure-ai-documentintelligence
Expand Down
1 change: 1 addition & 0 deletions app/backend/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ openai[datalib]==1.30.5
# openai-messages-token-helper
openai-messages-token-helper==0.1.4
# via -r requirements.in
openai-priority-loadbalancer==1.0.9
opentelemetry-api==1.24.0
# via
# azure-core-tracing-opentelemetry
Expand Down
20 changes: 20 additions & 0 deletions infra/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ param appServiceSkuName string // Set in main.parameters.json
@allowed([ 'azure', 'openai', 'azure_custom' ])
param openAiHost string // Set in main.parameters.json
param isAzureOpenAiHost bool = startsWith(openAiHost, 'azure')
param deployAzureOpenAiBackendTwo bool = false
param azureOpenAiCustomUrl string = ''
param azureOpenAiApiVersion string = ''

Expand Down Expand Up @@ -310,6 +311,7 @@ module backend 'core/host/appservice.bicep' = {
AZURE_OPENAI_GPT4V_MODEL: gpt4vModelName
// Specific to Azure OpenAI
AZURE_OPENAI_SERVICE: isAzureOpenAiHost ? openAi.outputs.name : ''
AZURE_OPENAI_SERVICE_BACKEND2: isAzureOpenAiHost && deployAzureOpenAiBackendTwo ? openAiBackendTwo.outputs.name : ''
AZURE_OPENAI_CHATGPT_DEPLOYMENT: chatGpt.deploymentName
AZURE_OPENAI_EMB_DEPLOYMENT: embedding.deploymentName
AZURE_OPENAI_GPT4V_DEPLOYMENT: useGPT4V ? gpt4vDeploymentName : ''
Expand Down Expand Up @@ -401,6 +403,23 @@ module openAi 'core/ai/cognitiveservices.bicep' = if (isAzureOpenAiHost) {
}
}

module openAiBackendTwo 'core/ai/cognitiveservices.bicep' = if (isAzureOpenAiHost && deployAzureOpenAiBackendTwo) {
name: 'openai-backend-two'
scope: openAiResourceGroup
params: {
name: '${abbrs.cognitiveServicesAccounts}${resourceToken}-b2'
location: openAiResourceGroupLocation
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@simonkurtz-MSFT Do your customers typically deploy backends in multiple regions or same region? @mattgotteiner is wondering if the location should be a second location.

Copy link
Contributor Author

@simonkurtz-MSFT simonkurtz-MSFT Jun 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pamelafox & @mattgotteiner, that's a very important question. My customers almost exclusively deploy to multiple regions. Being able to define a second region would be helpful. If not defined, we could fall back to setting the second region to the value of the first region, if need be.

tags: tags
publicNetworkAccess: publicNetworkAccess
bypass: bypass
sku: {
name: openAiSkuName
}
deployments: openAiDeployments
disableLocalAuth: true
}
}

// Formerly known as Form Recognizer
// Does not support bypass
module documentIntelligence 'core/ai/cognitiveservices.bicep' = {
Expand Down Expand Up @@ -818,6 +837,7 @@ output AZURE_OPENAI_GPT4V_MODEL string = gpt4vModelName

// Specific to Azure OpenAI
output AZURE_OPENAI_SERVICE string = isAzureOpenAiHost ? openAi.outputs.name : ''
output AZURE_OPENAI_SERVICE_BACKEND2 string = isAzureOpenAiHost && deployAzureOpenAiBackendTwo ? openAiBackendTwo.outputs.name : ''
output AZURE_OPENAI_API_VERSION string = isAzureOpenAiHost ? azureOpenAiApiVersion : ''
output AZURE_OPENAI_RESOURCE_GROUP string = isAzureOpenAiHost ? openAiResourceGroup.name : ''
output AZURE_OPENAI_CHATGPT_DEPLOYMENT string = isAzureOpenAiHost ? chatGpt.deploymentName : ''
Expand Down
3 changes: 3 additions & 0 deletions infra/main.parameters.json
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,9 @@
"openAiHost": {
"value": "${OPENAI_HOST=azure}"
},
"deployAzureOpenAiBackendTwo": {
"value": "${DEPLOY_AZURE_OPENAI_BACKEND_TWO=false}"
},
"azureOpenAiCustomUrl":{
"value": "${AZURE_OPENAI_CUSTOM_URL}"
},
Expand Down
Loading