From b087a8e2730f08c97889315b58b9c70f81be4556 Mon Sep 17 00:00:00 2001
From: Simon Kurtz <simonkurtz@microsoft.com>
Date: Thu, 16 May 2024 20:17:01 -0400
Subject: [PATCH 01/10] Add openai-priority-loadbalancer

---
 app/backend/app.py           | 14 +++++++++++++-
 app/backend/requirements.in  |  1 +
 app/backend/requirements.txt |  1 +
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/app/backend/app.py b/app/backend/app.py
index a1ebe5225a..fd85de2810 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -5,7 +5,7 @@
 import mimetypes
 import os
 from pathlib import Path
-from typing import Any, AsyncGenerator, Dict, Union, cast
+from typing import Any, AsyncGenerator, Dict, Union, cast, List
 
 from azure.core.credentials import AzureKeyCredential
 from azure.core.credentials_async import AsyncTokenCredential
@@ -19,7 +19,12 @@
 from azure.storage.blob.aio import StorageStreamDownloader as BlobDownloader
 from azure.storage.filedatalake.aio import FileSystemClient
 from azure.storage.filedatalake.aio import StorageStreamDownloader as DatalakeDownloader
+# Using httpx.Client and httpx.AsyncClient avoids having to update openai to 1.17.1 or newer.
+# The openai properties for DefaultHttpxClient and DefaultAsyncHttpxClient are mere wrappers for httpx.Client and httpx.AsyncClient.
+# https://github.com/openai/openai-python/releases/tag/v1.17.0
+import httpx
 from openai import AsyncAzureOpenAI, AsyncOpenAI
+from openai_priority_loadbalancer import AsyncLoadBalancer, Backend
 from opentelemetry.instrumentation.aiohttp_client import AioHttpClientInstrumentor
 from opentelemetry.instrumentation.asgi import OpenTelemetryMiddleware
 from opentelemetry.instrumentation.httpx import (
@@ -77,6 +82,10 @@
 mimetypes.add_type("application/javascript", ".js")
 mimetypes.add_type("text/css", ".css")
 
+backends: List[Backend] = [
+    Backend("cog-w2og7ojyhvoq6.openai.azure.com", 1),
+    #Backend("oai-k9b2z6f8x1v3q.openai.azure.com", 1)
+]
 
 @bp.route("/")
 async def index():
@@ -452,10 +461,13 @@ async def setup_clients():
 
         api_version = os.getenv("AZURE_OPENAI_API_VERSION") or "2024-03-01-preview"
 
+        lb = AsyncLoadBalancer(backends)
+
         openai_client = AsyncAzureOpenAI(
             api_version=api_version,
             azure_endpoint=endpoint,
             azure_ad_token_provider=token_provider,
+            http_client = httpx.AsyncClient(transport = lb)        # Inject the load balancer as the transport in a new default httpx client
         )
     elif OPENAI_HOST == "local":
         openai_client = AsyncOpenAI(
diff --git a/app/backend/requirements.in b/app/backend/requirements.in
index b0147e2f02..df31c9d651 100644
--- a/app/backend/requirements.in
+++ b/app/backend/requirements.in
@@ -2,6 +2,7 @@ azure-identity
 quart
 quart-cors
 openai[datalib]>=1.3.7
+openai_priority_loadbalancer
 tiktoken
 tenacity
 azure-ai-documentintelligence
diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt
index ffacba9668..cdb66a0dbf 100644
--- a/app/backend/requirements.txt
+++ b/app/backend/requirements.txt
@@ -200,6 +200,7 @@ oauthlib==3.2.2
     # via requests-oauthlib
 openai[datalib]==1.13.3
     # via -r requirements.in
+openai-priority-loadbalancer>=1.0.5
 opentelemetry-api==1.23.0
     # via
     #   azure-core-tracing-opentelemetry

From 791e51a689c6a11037e696aa1a39096a08e63c80 Mon Sep 17 00:00:00 2001
From: Simon Kurtz <simonkurtz@microsoft.com>
Date: Fri, 17 May 2024 11:44:00 -0400
Subject: [PATCH 02/10] Add second working backend

---
 app/backend/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/backend/app.py b/app/backend/app.py
index fd85de2810..ebdf0d9e76 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -84,7 +84,7 @@
 
 backends: List[Backend] = [
     Backend("cog-w2og7ojyhvoq6.openai.azure.com", 1),
-    #Backend("oai-k9b2z6f8x1v3q.openai.azure.com", 1)
+    Backend("cog-kfdf7d5q443bu.openai.azure.com", 1),
 ]
 
 @bp.route("/")

From 77d45163164aeb2cc26b4abbd018645ebac2692f Mon Sep 17 00:00:00 2001
From: Simon Kurtz <simonkurtz@microsoft.com>
Date: Fri, 17 May 2024 11:47:48 -0400
Subject: [PATCH 03/10] Lock openai_priority_loadbalancer to 1.0.6

---
 app/backend/requirements.in  | 2 +-
 app/backend/requirements.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/backend/requirements.in b/app/backend/requirements.in
index c7e1d13020..b1693bae1e 100644
--- a/app/backend/requirements.in
+++ b/app/backend/requirements.in
@@ -2,7 +2,7 @@ azure-identity
 quart
 quart-cors
 openai[datalib]>=1.3.7
-openai_priority_loadbalancer
+openai_priority_loadbalancer==1.0.6
 tiktoken
 tenacity
 azure-ai-documentintelligence
diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt
index e35459c715..f11810d722 100644
--- a/app/backend/requirements.txt
+++ b/app/backend/requirements.txt
@@ -196,7 +196,7 @@ oauthlib==3.2.2
     # via requests-oauthlib
 openai[datalib]==1.13.3
     # via -r requirements.in
-openai-priority-loadbalancer>=1.0.5
+openai-priority-loadbalancer==1.0.6
 opentelemetry-api==1.23.0
     # via
     #   azure-core-tracing-opentelemetry

From 64428932a7aff37f50ee3ce3204451da77083cd4 Mon Sep 17 00:00:00 2001
From: Simon Kurtz <simonkurtz@microsoft.com>
Date: Fri, 17 May 2024 11:59:28 -0400
Subject: [PATCH 04/10] Clean up

---
 app/backend/app.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/app/backend/app.py b/app/backend/app.py
index 8269ea83d5..6a66deb092 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -4,6 +4,7 @@
 import logging
 import mimetypes
 import os
+import httpx
 from pathlib import Path
 from typing import Any, AsyncGenerator, Dict, Union, cast, List
 
@@ -16,10 +17,6 @@
 from azure.storage.blob.aio import StorageStreamDownloader as BlobDownloader
 from azure.storage.filedatalake.aio import FileSystemClient
 from azure.storage.filedatalake.aio import StorageStreamDownloader as DatalakeDownloader
-# Using httpx.Client and httpx.AsyncClient avoids having to update openai to 1.17.1 or newer.
-# The openai properties for DefaultHttpxClient and DefaultAsyncHttpxClient are mere wrappers for httpx.Client and httpx.AsyncClient.
-# https://github.com/openai/openai-python/releases/tag/v1.17.0
-import httpx
 from openai import AsyncAzureOpenAI, AsyncOpenAI
 from openai_priority_loadbalancer import AsyncLoadBalancer, Backend
 from opentelemetry.instrumentation.aiohttp_client import AioHttpClientInstrumentor
@@ -79,6 +76,7 @@
 mimetypes.add_type("application/javascript", ".js")
 mimetypes.add_type("text/css", ".css")
 
+# Data for the backends could be supplied through config. This data is simply here to illustrate usage.
 backends: List[Backend] = [
     Backend("cog-w2og7ojyhvoq6.openai.azure.com", 1),
     Backend("cog-kfdf7d5q443bu.openai.azure.com", 1),

From 4239b03b96d2c44c472a0c0aa7dc1cc9738094d6 Mon Sep 17 00:00:00 2001
From: Simon Kurtz <simonkurtz@microsoft.com>
Date: Tue, 21 May 2024 12:39:07 -0400
Subject: [PATCH 05/10] Update openai-priority-loadbalancer to 1.0.8

---
 app/backend/requirements.in  | 2 +-
 app/backend/requirements.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/backend/requirements.in b/app/backend/requirements.in
index b1693bae1e..fa9b3adf3f 100644
--- a/app/backend/requirements.in
+++ b/app/backend/requirements.in
@@ -2,7 +2,7 @@ azure-identity
 quart
 quart-cors
 openai[datalib]>=1.3.7
-openai_priority_loadbalancer==1.0.6
+openai_priority_loadbalancer==1.0.8
 tiktoken
 tenacity
 azure-ai-documentintelligence
diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt
index f11810d722..0bce5e284b 100644
--- a/app/backend/requirements.txt
+++ b/app/backend/requirements.txt
@@ -196,7 +196,7 @@ oauthlib==3.2.2
     # via requests-oauthlib
 openai[datalib]==1.13.3
     # via -r requirements.in
-openai-priority-loadbalancer==1.0.6
+openai-priority-loadbalancer==1.0.8
 opentelemetry-api==1.23.0
     # via
     #   azure-core-tracing-opentelemetry

From f3857367ff54d54bd107e8d299b531ffb73e07cf Mon Sep 17 00:00:00 2001
From: Simon Kurtz <simonkurtz@microsoft.com>
Date: Tue, 28 May 2024 22:37:18 -0400
Subject: [PATCH 06/10] Update openai-priority-loadbalancer to 1.0.9

---
 app/backend/requirements.in  | 2 +-
 app/backend/requirements.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/backend/requirements.in b/app/backend/requirements.in
index fa9b3adf3f..8211066689 100644
--- a/app/backend/requirements.in
+++ b/app/backend/requirements.in
@@ -2,7 +2,7 @@ azure-identity
 quart
 quart-cors
 openai[datalib]>=1.3.7
-openai_priority_loadbalancer==1.0.8
+openai_priority_loadbalancer==1.0.9
 tiktoken
 tenacity
 azure-ai-documentintelligence
diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt
index 0bce5e284b..5ff9bf260d 100644
--- a/app/backend/requirements.txt
+++ b/app/backend/requirements.txt
@@ -196,7 +196,7 @@ oauthlib==3.2.2
     # via requests-oauthlib
 openai[datalib]==1.13.3
     # via -r requirements.in
-openai-priority-loadbalancer==1.0.8
+openai-priority-loadbalancer==1.0.9
 opentelemetry-api==1.23.0
     # via
     #   azure-core-tracing-opentelemetry

From fc1a681b71a041aa78c8a39449bbfcac99d2926e Mon Sep 17 00:00:00 2001
From: Pamela Fox <pamela.fox@gmail.com>
Date: Mon, 3 Jun 2024 07:13:20 -0700
Subject: [PATCH 07/10] Use bicep and env vars for backends

---
 app/backend/app.py         | 22 +++++++++++++---------
 infra/main.bicep           | 20 ++++++++++++++++++++
 infra/main.parameters.json |  3 +++
 3 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/app/backend/app.py b/app/backend/app.py
index 6a66deb092..dc509eafa6 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -4,10 +4,10 @@
 import logging
 import mimetypes
 import os
-import httpx
 from pathlib import Path
-from typing import Any, AsyncGenerator, Dict, Union, cast, List
+from typing import Any, AsyncGenerator, Dict, List, Union, cast
 
+import httpx
 from azure.core.exceptions import ResourceNotFoundError
 from azure.identity.aio import DefaultAzureCredential, get_bearer_token_provider
 from azure.monitor.opentelemetry import configure_azure_monitor
@@ -76,11 +76,6 @@
 mimetypes.add_type("application/javascript", ".js")
 mimetypes.add_type("text/css", ".css")
 
-# Data for the backends could be supplied through config. This data is simply here to illustrate usage.
-backends: List[Backend] = [
-    Backend("cog-w2og7ojyhvoq6.openai.azure.com", 1),
-    Backend("cog-kfdf7d5q443bu.openai.azure.com", 1),
-]
 
 @bp.route("/")
 async def index():
@@ -438,13 +433,22 @@ async def setup_clients():
 
         api_version = os.getenv("AZURE_OPENAI_API_VERSION") or "2024-03-01-preview"
 
-        lb = AsyncLoadBalancer(backends)
+        client_args = {}
+        if AZURE_OPENAI_SERVICE_BACKEND2 := os.environ.get("AZURE_OPENAI_SERVICE_BACKEND2"):
+            backends: List[Backend] = [
+                Backend(f"{AZURE_OPENAI_SERVICE}.openai.azure.com", 1),
+                Backend(f"{AZURE_OPENAI_SERVICE_BACKEND2}.openai.azure.com", 1),
+            ]
+
+            lb = AsyncLoadBalancer(backends)
+            # Inject the load balancer as the transport in a new default httpx client
+            client_args["http_client"] = httpx.AsyncClient(transport=lb)
 
         openai_client = AsyncAzureOpenAI(
             api_version=api_version,
             azure_endpoint=endpoint,
             azure_ad_token_provider=token_provider,
-            http_client = httpx.AsyncClient(transport = lb)        # Inject the load balancer as the transport in a new default httpx client
+            **client_args,
         )
     elif OPENAI_HOST == "local":
         openai_client = AsyncOpenAI(
diff --git a/infra/main.bicep b/infra/main.bicep
index 7ac59b7272..ae26235030 100644
--- a/infra/main.bicep
+++ b/infra/main.bicep
@@ -43,6 +43,7 @@ param appServiceSkuName string // Set in main.parameters.json
 @allowed([ 'azure', 'openai', 'azure_custom' ])
 param openAiHost string // Set in main.parameters.json
 param isAzureOpenAiHost bool = startsWith(openAiHost, 'azure')
+param deployAzureOpenAiBackendTwo bool = false
 param azureOpenAiCustomUrl string = ''
 param azureOpenAiApiVersion string = ''
 
@@ -294,6 +295,7 @@ module backend 'core/host/appservice.bicep' = {
       AZURE_OPENAI_GPT4V_MODEL: gpt4vModelName
       // Specific to Azure OpenAI
       AZURE_OPENAI_SERVICE: isAzureOpenAiHost ? openAi.outputs.name : ''
+      AZURE_OPENAI_SERVICE_BACKEND2: isAzureOpenAiHost && deployAzureOpenAiBackendTwo ? openAiBackendTwo.outputs.name : ''
       AZURE_OPENAI_CHATGPT_DEPLOYMENT: chatGpt.deploymentName
       AZURE_OPENAI_EMB_DEPLOYMENT: embedding.deploymentName
       AZURE_OPENAI_GPT4V_DEPLOYMENT: useGPT4V ? gpt4vDeploymentName : ''
@@ -385,6 +387,23 @@ module openAi 'core/ai/cognitiveservices.bicep' = if (isAzureOpenAiHost) {
   }
 }
 
+module openAiBackendTwo 'core/ai/cognitiveservices.bicep' = if (isAzureOpenAiHost && deployAzureOpenAiBackendTwo) {
+  name: 'openai-backend-two'
+  scope: openAiResourceGroup
+  params: {
+    name: '${abbrs.cognitiveServicesAccounts}${resourceToken}-b2'
+    location: openAiResourceGroupLocation
+    tags: tags
+    publicNetworkAccess: publicNetworkAccess
+    bypass: bypass
+    sku: {
+      name: openAiSkuName
+    }
+    deployments: openAiDeployments
+    disableLocalAuth: true
+  }
+}
+
 // Formerly known as Form Recognizer
 // Does not support bypass
 module documentIntelligence 'core/ai/cognitiveservices.bicep' = {
@@ -766,6 +785,7 @@ output AZURE_OPENAI_GPT4V_MODEL string = gpt4vModelName
 
 // Specific to Azure OpenAI
 output AZURE_OPENAI_SERVICE string = isAzureOpenAiHost ? openAi.outputs.name : ''
+output AZURE_OPENAI_SERVICE_BACKEND2 string = isAzureOpenAiHost && deployAzureOpenAiBackendTwo ? openAiBackendTwo.outputs.name : ''
 output AZURE_OPENAI_API_VERSION string = isAzureOpenAiHost ? azureOpenAiApiVersion : ''
 output AZURE_OPENAI_RESOURCE_GROUP string = isAzureOpenAiHost ? openAiResourceGroup.name : ''
 output AZURE_OPENAI_CHATGPT_DEPLOYMENT string = isAzureOpenAiHost ? chatGpt.deploymentName : ''
diff --git a/infra/main.parameters.json b/infra/main.parameters.json
index bfaa0deff9..9b87e68f29 100644
--- a/infra/main.parameters.json
+++ b/infra/main.parameters.json
@@ -119,6 +119,9 @@
     "openAiHost": {
       "value": "${OPENAI_HOST=azure}"
     },
+    "deployAzureOpenAiBackendTwo": {
+      "value": "${DEPLOY_AZURE_OPENAI_BACKEND_TWO=false}"
+    },
     "azureOpenAiCustomUrl":{
       "value": "${AZURE_OPENAI_CUSTOM_URL}"
     },

From eecfac27889b57423dd6530c588c9adbad9eaf8d Mon Sep 17 00:00:00 2001
From: Simon Kurtz <simonkurtz@microsoft.com>
Date: Mon, 3 Jun 2024 13:18:49 -0400
Subject: [PATCH 08/10] Fix linter error

---
 app/backend/app.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/backend/app.py b/app/backend/app.py
index a4b49c242e..6948a62cd1 100644
--- a/app/backend/app.py
+++ b/app/backend/app.py
@@ -7,6 +7,7 @@
 import time
 from pathlib import Path
 from typing import Any, AsyncGenerator, Dict, List, Union, cast
+
 import httpx
 from azure.cognitiveservices.speech import (
     ResultReason,

From d140570f7946eedab777bcf4dda4860066fc09d2 Mon Sep 17 00:00:00 2001
From: Simon Kurtz <84809797+simonkurtz-MSFT@users.noreply.github.com>
Date: Fri, 7 Jun 2024 21:54:45 -0400
Subject: [PATCH 09/10] Update openai_priority_loadbalancer to 1.1.0

---
 app/backend/requirements.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/backend/requirements.in b/app/backend/requirements.in
index 289f3356ae..9a6f5b90a3 100644
--- a/app/backend/requirements.in
+++ b/app/backend/requirements.in
@@ -2,7 +2,7 @@ azure-identity
 quart
 quart-cors
 openai[datalib]>=1.3.7
-openai_priority_loadbalancer==1.0.9
+openai_priority_loadbalancer==1.1.0
 tiktoken
 tenacity
 azure-ai-documentintelligence

From b9f2e53de1b8ec0e263423bd6889c6b09789cedb Mon Sep 17 00:00:00 2001
From: Simon Kurtz <84809797+simonkurtz-MSFT@users.noreply.github.com>
Date: Fri, 7 Jun 2024 21:55:22 -0400
Subject: [PATCH 10/10] Update openai_priority_loadbalancer to 1.1.0

---
 app/backend/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/backend/requirements.txt b/app/backend/requirements.txt
index 7587be9674..660063239e 100644
--- a/app/backend/requirements.txt
+++ b/app/backend/requirements.txt
@@ -204,7 +204,7 @@ openai[datalib]==1.30.5
     #   openai-messages-token-helper
 openai-messages-token-helper==0.1.4
     # via -r requirements.in
-openai-priority-loadbalancer==1.0.9
+openai-priority-loadbalancer==1.1.0
 opentelemetry-api==1.24.0
     # via
     #   azure-core-tracing-opentelemetry