Skip to content

Commit

Permalink
debug
Browse files Browse the repository at this point in the history
  • Loading branch information
muhammed-shihebi committed Mar 19, 2024
1 parent df6bd9a commit 097670a
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 13 deletions.
35 changes: 31 additions & 4 deletions model-inference/model_inference/tasks/config/model_config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import json
import os
import logging
from collections.abc import Mapping
from dataclasses import asdict, dataclass

Expand All @@ -17,6 +18,9 @@
IDENTIFIER = os.getenv("QUEUE")


logger = logging.getLogger(__name__)


@dataclass
class ModelConfig(Mapping):
# Corresponds to the Huggingface name for finetuned Transformers or the name of a finetuned SentenceTransformers
Expand Down Expand Up @@ -109,15 +113,36 @@ def update(self, identifier: str = IDENTIFIER):

@staticmethod
def load(path=".env"): # change .env filename to work on local

# REMOVE THIS:
# logging every env variable
logger.info(f"model_name: {os.getenv('MODEL_NAME')}")
logger.info(f"model_type: {os.getenv('MODEL_TYPE')}")
logger.info(f"model_path: {os.getenv('MODEL_PATH')}")
logger.info(f"decoder_path: {os.getenv('DECODER_PATH')}")
logger.info(f"is_encoder_decoder: {os.getenv('IS_ENCODER_DECODER')}")
logger.info(f"data_path: {os.getenv('DATA_PATH')}")
logger.info(f"preloaded_adapters: {os.getenv('PRELOADED_ADAPTERS')}")
logger.info(f"disable_gpu: {os.getenv('DISABLE_GPU')}")
logger.info(f"batch_size: {os.getenv('BATCH_SIZE')}")
logger.info(f"max_input_size: {os.getenv('MAX_INPUT_SIZE')}")
logger.info(f"transformers_cache: {os.getenv('TRANSFORMERS_CACHE')}")
logger.info(f"model_class: {os.getenv('MODEL_CLASS')}")
logger.info(
f"return_plaintext_arrays: {os.getenv('RETURN_PLAINTEXT_ARRAYS')}")
logger.info(f"onnx_use_quantized: {os.getenv('ONNX_USE_QUANTIZED')}")

config = Config(path)
model_config = ModelConfig(
model_name=config("MODEL_NAME", default=None),
model_type=config("MODEL_TYPE", default=None),
model_path=config("MODEL_PATH", default=None),
decoder_path=config("DECODER_PATH", default=None),
is_encoder_decoder=config("IS_ENCODER_DECODER", cast=bool, default=False),
is_encoder_decoder=config(
"IS_ENCODER_DECODER", cast=bool, default=False),
data_path=config("DATA_PATH", default=None),
preloaded_adapters=config("PRELOADED_ADAPTERS", cast=bool, default=True),
preloaded_adapters=config(
"PRELOADED_ADAPTERS", cast=bool, default=True),
disable_gpu=config("DISABLE_GPU", cast=bool, default=False),
batch_size=config("BATCH_SIZE", cast=int, default=32),
max_input_size=config("MAX_INPUT_SIZE", cast=int, default=1024),
Expand All @@ -126,7 +151,8 @@ def load(path=".env"): # change .env filename to work on local
return_plaintext_arrays=config(
"RETURN_PLAINTEXT_ARRAYS", cast=bool, default=False
),
onnx_use_quantized=config("ONNX_USE_QUANTIZED", cast=bool, default=False),
onnx_use_quantized=config(
"ONNX_USE_QUANTIZED", cast=bool, default=False),
)
model_config.save(IDENTIFIER)
return model_config
Expand All @@ -143,7 +169,8 @@ def save(self, identifier):
identifier = identifier.replace("/", "-")
if not os.path.exists(f"{CONFIG_PATH}/{identifier}.json"):
try:
os.makedirs(os.path.dirname(f"{CONFIG_PATH}/{identifier}.json"))
os.makedirs(os.path.dirname(
f"{CONFIG_PATH}/{identifier}.json"))
except OSError as err:
print(err)
with FileLock(f"{CONFIG_PATH}/{identifier}.lock"):
Expand Down
29 changes: 20 additions & 9 deletions model-manager/model_manager/docker_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@

ONNX_VOLUME = os.getenv("ONNX_VOLUME", "model-inference_onnx-models")
CONFIG_VOLUME = os.getenv("CONFIG_VOLUME", "model-inference_model_configs")
MODELS_API_PATH = "models" # For management server e.g. /api/models/deployed-models to list models etc.
# For management server e.g. /api/models/deployed-models to list models etc.
MODELS_API_PATH = "models"
USER = os.getenv("USERNAME", "user")
PASSWORD = os.getenv("PASSWORD", "user")

Expand Down Expand Up @@ -77,7 +78,8 @@ def start_new_model_container(identifier: str, uid: str, env):
logger.info("Found no running instance, so we try to create it")
# in order to obtain necessary information like the network id
# get the traefik container and read out the information
reference_container = docker_client.containers.list(filters={"name": "traefik"})[0]
reference_container = docker_client.containers.list(
filters={"name": "traefik"})[0]
logger.info("Reference Container: {}".format(reference_container))
network_id = list(
reference_container.attrs["NetworkSettings"]["Networks"].values()
Expand All @@ -91,7 +93,8 @@ def start_new_model_container(identifier: str, uid: str, env):

network = docker_client.networks.get(network_id)
worker_name = (
network.name + "-model-" + identifier.replace("/", "-") + "-worker-" + uid
network.name + "-model-" +
identifier.replace("/", "-") + "-worker-" + uid
)

env["WEB_CONCURRENCY"] = 1
Expand All @@ -106,7 +109,8 @@ def start_new_model_container(identifier: str, uid: str, env):
env["REDIS_PASSWORD"] = os.getenv("REDIS_PASSWORD", "secret")
env["CONFIG_PATH"] = os.getenv("CONFIG_PATH", "/model_configs")

model_api_base_image = os.getenv("MODEL_API_IMAGE", "ukpsquare/model-inference")
model_api_base_image = os.getenv(
"MODEL_API_IMAGE", "ukpsquare/model-inference")
image_tag = os.getenv("MODEL_API_IMAGE_TAG", "latest")

image = ""
Expand All @@ -122,8 +126,11 @@ def start_new_model_container(identifier: str, uid: str, env):

logger.info("Starting container with image: {}".format(image))

logger.info("Starting container with env: {}".format(env))

try:
random_cpus = random.sample(list(range(cpu_count())), k=os.getenv("CPU_COUNT", max(1,cpu_count() // 8)))
random_cpus = random.sample(list(range(cpu_count())), k=os.getenv(
"CPU_COUNT", max(1, cpu_count() // 8)))
random_cpus = ",".join(map(str, random_cpus))
cpuset_cpus = env.get("CPUS", random_cpus)
logger.info(f"Deploying {identifier} using CPUS={cpuset_cpus}")
Expand All @@ -136,7 +143,8 @@ def start_new_model_container(identifier: str, uid: str, env):
detach=True,
environment=env,
network=network.name,
volumes=["/:/usr/src/app", "/var/run/docker.sock:/var/run/docker.sock"],
volumes=["/:/usr/src/app",
"/var/run/docker.sock:/var/run/docker.sock"],
mounts=[
Mount(
target=env["CONFIG_PATH"],
Expand Down Expand Up @@ -187,7 +195,8 @@ def get_container(container_id):
"""
if len(docker_client.containers.list(all=True, filters={"id": container_id})) == 0:
return None
container = docker_client.containers.list(all=True, filters={"id": container_id})[0]
container = docker_client.containers.list(
all=True, filters={"id": container_id})[0]
return container


Expand All @@ -210,7 +219,8 @@ def get_all_model_prefixes():
"""
# assumes square is somewhere in the container name
lst_container = docker_client.containers.list(filters={"name": "square"})
reference_container = docker_client.containers.list(filters={"name": "traefik"})[0]
reference_container = docker_client.containers.list(
filters={"name": "traefik"})[0]
port = list(reference_container.attrs["NetworkSettings"]["Ports"].items())[0][1][0][
"HostPort"
]
Expand All @@ -233,7 +243,8 @@ def get_all_model_prefixes():


def get_port():
reference_container = docker_client.containers.list(filters={"name": "traefik"})[0]
reference_container = docker_client.containers.list(
filters={"name": "traefik"})[0]
return list(reference_container.attrs["NetworkSettings"]["Ports"].items())[0][1][0][
"HostPort"
]

0 comments on commit 097670a

Please sign in to comment.