From 059a4020557cb4aeaa23b7895a68a1f32c06f005 Mon Sep 17 00:00:00 2001 From: muhammed-shihebi <58932402+muhammed-shihebi@users.noreply.github.com> Date: Wed, 8 May 2024 17:59:53 +0000 Subject: [PATCH 1/7] feat: init replicate service --- replicate/Dockerfile | 19 +++ replicate/Makefile | 9 ++ replicate/api.http | 71 +++++++++ replicate/app/app.py | 136 +++++++++++++++++ replicate/app/chat_templates.json | 23 +++ replicate/app/models.json | 236 ++++++++++++++++++++++++++++++ replicate/docker-compose.yml | 57 ++++++++ replicate/requirements.txt | 5 + 8 files changed, 556 insertions(+) create mode 100644 replicate/Dockerfile create mode 100644 replicate/Makefile create mode 100644 replicate/api.http create mode 100644 replicate/app/app.py create mode 100644 replicate/app/chat_templates.json create mode 100644 replicate/app/models.json create mode 100644 replicate/docker-compose.yml create mode 100644 replicate/requirements.txt diff --git a/replicate/Dockerfile b/replicate/Dockerfile new file mode 100644 index 00000000..90da5a80 --- /dev/null +++ b/replicate/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.12.3-slim + +# used to avoid interactive prompts during build +ARG DEBIAN_FRONTEND=noninteractive + +# used to avoid buffering output from python +ENV PYTHONUNBUFFERED=1 + +WORKDIR /app +COPY ./requirements.txt /app/requirements.txt + +RUN apt-get update && apt-get install --no-install-recommends -y +RUN pip3 install --no-cache-dir --upgrade -r requirements.txt + +COPY /app /app + +CMD ["python3", "app.py", "--port", "8000"] + +EXPOSE 8000 diff --git a/replicate/Makefile b/replicate/Makefile new file mode 100644 index 00000000..daaa4c4a --- /dev/null +++ b/replicate/Makefile @@ -0,0 +1,9 @@ +up: + docker compose up --remove-orphans --detach --build --force-recreate + +stop: + docker compose down + if [ -n "$(DOCKER_CONTAINER_LIST)" ]; \ + then \ + docker stop "$(DOCKER_CONTAINER_LIST)" || true ; \ + fi diff --git a/replicate/api.http b/replicate/api.http new file mode 100644 index 00000000..f725288b --- /dev/null +++ b/replicate/api.http @@ -0,0 +1,71 @@ +@token = +@domain = https://localhost:8443/api/replicate + +### +# @name test +GET {{domain}}/test HTTP/1.1 + + +### +# @name models +GET {{domain}}/models HTTP/1.1 + +### +# @name hf_model +POST {{domain}}/generate_stream HTTP/1.1 +Authorization: Bearer {{token}} + +{ + "model_identifier": "meta/llama-2-7b-chat", + "top_p": 0.9, + "temperature": 0.7, + "max_new_tokens": 100, + "messages": [ + { + "role": "system", + "text": "You are my helpful assistant" + }, + { + "role": "human", + "text": "What is the theory of relativity?" + }, + { + "role": "ai", + "text": "The theory of relativity, or simply relativity, encompasses two interrelated theories of Albert Einstein: special relativity and general relativity. However, the word relativity is sometimes used in reference to Galilean invariance." + }, + { + "role": "human", + "text": "Who is Albert Einstein?" + } + ] +} + +### +# @name non_hf_model +POST {{domain}}/generate_stream HTTP/1.1 +Authorization: Bearer {{token}} + +{ + "model_identifier": "meta/meta-llama-3-8b", + "top_p": 0.9, + "temperature": 0.7, + "max_new_tokens": 100, + "messages": [ + { + "role": "system", + "text": "You are my helpful assistant" + }, + { + "role": "human", + "text": "What is the theory of relativity?" + }, + { + "role": "ai", + "text": "The theory of relativity, or simply relativity, encompasses two interrelated theories of Albert Einstein: special relativity and general relativity. However, the word relativity is sometimes used in reference to Galilean invariance." + }, + { + "role": "human", + "text": "Who is Albert Einstein?" + } + ] +} \ No newline at end of file diff --git a/replicate/app/app.py b/replicate/app/app.py new file mode 100644 index 00000000..e6fb8aa3 --- /dev/null +++ b/replicate/app/app.py @@ -0,0 +1,136 @@ +import uvicorn +import argparse +import json + +from fastapi import FastAPI, Request, APIRouter, Header, HTTPException, Depends +from fastapi.responses import JSONResponse +from fastapi.middleware.cors import CORSMiddleware +from jinja2.exceptions import TemplateError +from replicate.client import Client +from jinja2 import Template + + +router = APIRouter() +with open('chat_templates.json', 'r') as file: + chat_templates = json.load(file) + +with open('models.json', 'r') as file: + models = json.load(file)['models'] + + +def default_formatter(messages): + formatted = [] + for message in messages: + if message["role"] == "user": + formatted.append(f"User: {message['content']}") + elif message["role"] == "assistant": + formatted.append(f"Assistant: {message['content']}") + elif message["role"] == "system": + formatted.append(f"System: {message['content']}") + formatted.append("Assistant: ") + return "\n\n".join(formatted) + + +def hf_format(messages): + for message in messages: + if message["role"] == "human": + message["role"] = "user" + elif message["role"] == "ai": + message["role"] = "assistant" + if "text" in message: + message["content"] = message.pop("text") + return messages + + +def get_conversation_prompt(params) -> str: + model = params["model_identifier"] + messages = hf_format(params["messages"]) + formatted_prompt = "" + + if model in chat_templates: + template = Template(chat_templates[model]) + try: + formatted_prompt = template.render(messages=messages) + except TemplateError: + # remove system message + new_messages = [message for message in messages if message["role"] != "system"] + formatted_prompt = template.render(messages=new_messages) + elif model in models: # default formatter in casen of unknown model + formatted_prompt = default_formatter(messages) + else: + raise HTTPException(status_code=400, detail="Model identifier not supported.") + + return formatted_prompt + + +def get_token(authorization: str = Header(None)): + if authorization is None or not authorization.startswith("Bearer "): + raise HTTPException(status_code=401, detail="Invalid or missing token") + return authorization.split(" ")[1] + + +@router.get("/test") +async def api_test(): + return JSONResponse({"test": "test"}) + + +@router.get("/models") +async def get_models(): + # from hf_rp_map return list of the keys + return JSONResponse({"models": models}) + + +@router.post("/generate_stream") +async def generate_stream(request: Request, token: str = Depends(get_token)): + params = await request.json() + replicate = Client(api_token=token) + model = params['model_identifier'] + + formatted_prompt = get_conversation_prompt(params) + + print(f"Prompt: {formatted_prompt}") + + input = { + "top_p": params.get("top_p", 0.9), + "prompt": formatted_prompt, + "max_new_tokens": params.get("max_new_tokens", 100), + "temperature": params.get("temperature", 0.7), + "prompt_template": "{prompt}", # I am formatting the prompt + "system_prompt": "" # this is included in the prompt + } + + prediction = replicate.models.predictions.create( + model, + input=input, + stream=True, + ) + + return JSONResponse({ + 'url': prediction.urls['stream'] + }) + + +def get_app() -> FastAPI: + fast_app = FastAPI( + title="Replicate.ai Client", + version="0.0.1", + openapi_url="/api/openapi.json", + ) + fast_app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + fast_app.include_router(router, prefix="/api") + return fast_app + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--port", type=int, default=8000) + args = parser.parse_args() + + fast_app = get_app() + uvicorn.run(fast_app, host='0.0.0.0', port=args.port, log_level="info") diff --git a/replicate/app/chat_templates.json b/replicate/app/chat_templates.json new file mode 100644 index 00000000..e2c7a01c --- /dev/null +++ b/replicate/app/chat_templates.json @@ -0,0 +1,23 @@ +{ + "meta/llama-2-70b-chat": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '' + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + '' }}{% endif %}{% endfor %}", + "meta/llama-2-7b-chat": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '' + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + '' }}{% endif %}{% endfor %}", + "meta/llama-2-13b-chat": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '' + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + '' }}{% endif %}{% endfor %}", + "yorickvp/llava-v1.6-34b": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '' + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + '' }}{% endif %}{% endfor %}", + "yorickvp/llava-v1.6-mistral-7b": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '' + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + '' }}{% endif %}{% endfor %}", + "01-ai/yi-34b-chat": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif true == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '' + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + '' }}{% endif %}{% endfor %}", + "nomagick/chatglm3-6b": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "andreasjansson/llama-2-13b-chat-gguf": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '' + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + '' }}{% endif %}{% endfor %}", + "nomagick/qwen-14b-chat": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "01-ai/yi-6b-chat": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif true == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '' + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + '' }}{% endif %}{% endfor %}", + "andreasjansson/llama-2-70b-chat-gguf": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '' + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + '' }}{% endif %}{% endfor %}", + "niron1/qwen-7b-chat": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "nwhitehead/llama2-7b-chat-gptq": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '' + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + '' }}{% endif %}{% endfor %}", + "lucataco/tinyllama-1.1b-chat-v1.0": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '' + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + '' }}{% endif %}{% endfor %}", + "nomagick/chatglm3-6b-32k": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "niron1/llama-2-7b-chat": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '' + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + '' }}{% endif %}{% endfor %}", + "zeke/nyu-llama-2-7b-chat-training-test": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '' + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + '' }}{% endif %}{% endfor %}", + "nateraw/llama-2-7b-chat-hf": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '' + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + '' }}{% endif %}{% endfor %}", + "juanjaragavi/abby-llama-2-7b-chat": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '' + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + '' }}{% endif %}{% endfor %}", + "juanjaragavi/abbot-llama-2-7b-chat": "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% elif false == true and not '<>' in messages[0]['content'] %}{% set loop_messages = messages %}{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '' + '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'system' %}{{ '<>\\n' + content.strip() + '\\n<>\\n\\n' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' + '' }}{% endif %}{% endfor %}", + "jquintanilla4/qwen1.5-32b-chat": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}" +} \ No newline at end of file diff --git a/replicate/app/models.json b/replicate/app/models.json new file mode 100644 index 00000000..6c947f2e --- /dev/null +++ b/replicate/app/models.json @@ -0,0 +1,236 @@ +{ + "models": [ + "meta/meta-llama-3-70b-instruct", + "meta/meta-llama-3-8b", + "yorickvp/llava-13b", + "mistralai/mixtral-8x7b-instruct-v0.1", + "meta/llama-2-70b-chat", + "meta/llama-2-7b-chat", + "meta/llama-2-13b-chat", + "mistralai/mistral-7b-instruct-v0.2", + "fofr/prompt-classifier", + "meta/meta-llama-3-8b-instruct", + "yorickvp/llava-v1.6-34b", + "mistralai/mistral-7b-v0.1", + "replicate-internal/llama-2-70b-triton", + "mistralai/mistral-7b-instruct-v0.1", + "yorickvp/llava-v1.6-vicuna-13b", + "replicate/dolly-v2-12b", + "yorickvp/llava-v1.6-mistral-7b", + "spuuntries/flatdolphinmaid-8x7b-gguf", + "joehoover/instructblip-vicuna13b", + "replicate/vicuna-13b", + "meta/llama-2-7b", + "01-ai/yi-34b-chat", + "snowflake/snowflake-arctic-instruct", + "andreasjansson/sheep-duck-llama-2-70b-v1-1-gguf", + "meta/llama-2-70b", + "nateraw/goliath-120b", + "01-ai/yi-6b", + "antoinelyset/openhermes-2-mistral-7b-awq", + "replicate/flan-t5-xl", + "meta/codellama-13b", + "stability-ai/stablelm-tuned-alpha-7b", + "replicate/llama-7b", + "meta/codellama-34b-instruct", + "nateraw/openchat_3.5-awq", + "google-deepmind/gemma-2b-it", + "lucataco/phi-3-mini-4k-instruct", + "nateraw/mistral-7b-openorca", + "meta/llama-2-13b", + "joehoover/mplug-owl", + "fofr/image-prompts", + "nateraw/nous-hermes-2-solar-10.7b", + "meta/meta-llama-3-70b", + "joehoover/falcon-40b-instruct", + "kcaverly/dolphin-2.5-mixtral-8x7b-gguf", + "meta/codellama-13b-instruct", + "google-deepmind/gemma-7b-it", + "replicate/oasst-sft-1-pythia-12b", + "meta/codellama-7b-instruct", + "lucataco/dolphin-2.2.1-mistral-7b", + "lucataco/moondream2", + "yorickvp/llava-v1.6-vicuna-7b", + "nateraw/defog-sqlcoder-7b-2", + "andreasjansson/codellama-7b-instruct-gguf", + "meta/codellama-70b-instruct", + "uwulewd/airoboros-llama-2-70b", + "replicate/lifeboat-70b", + "meta/codellama-7b", + "nomagick/chatglm3-6b", + "spuuntries/miqumaid-v1-70b-gguf", + "gregwdata/defog-sqlcoder-q8", + "lucataco/dolphin-2.1-mistral-7b", + "kcaverly/neuralbeagle14-7b-gguf", + "antoinelyset/openhermes-2.5-mistral-7b", + "nomagick/chatglm2-6b", + "lucataco/moondream1", + "kcaverly/openchat-3.5-1210-gguf", + "meta/codellama-34b", + "kcaverly/nous-hermes-2-yi-34b-gguf", + "replicate/mpt-7b-storywriter", + "replicate/gpt-j-6b", + "andreasjansson/llama-2-13b-chat-gguf", + "nateraw/nous-hermes-llama2-awq", + "joehoover/zephyr-7b-alpha", + "google-deepmind/gemma-7b", + "meta/codellama-34b-python", + "nateraw/zephyr-7b-beta", + "hikikomori-haven/solar-uncensored", + "replicate/llama-13b-lora", + "nomagick/qwen-14b-chat", + "kcaverly/dolphin-2.7-mixtral-8x7b-gguf", + "meta/codellama-13b-python", + "lucataco/qwen1.5-72b", + "meta/codellama-7b-python", + "01-ai/yi-6b-chat", + "joehoover/sql-generator", + "nateraw/salmonn", + "anotherjesse/llava-lies", + "kcaverly/dolphin-2.6-mixtral-8x7b-gguf", + "lucataco/qwen1.5-110b", + "organisciak/ocsai-llama2-7b", + "01-ai/yi-34b", + "andreasjansson/llama-2-70b-chat-gguf", + "kcaverly/deepseek-coder-33b-instruct-gguf", + "replit/replit-code-v1-3b", + "01-ai/yi-34b-200k", + "lucataco/deepseek-vl-7b-base", + "mattt/orca-2-13b", + "niron1/openorca-platypus2-13b", + "camenduru/wizardlm-2-8x22b", + "daanelson/flan-t5-large", + "lucataco/phi-3-mini-128k-instruct", + "lucataco/llama-3-vision-alpha", + "meta/codellama-70b", + "anotherjesse/sdxl-recur", + "niron1/qwen-7b-chat", + "nateraw/causallm-14b", + "meta/codellama-70b-python", + "deepseek-ai/deepseek-math-7b-instruct", + "nateraw/samsum-llama-2-13b", + "spuuntries/miqumaid-v2-2x70b-dpo-gguf", + "nomagick/qwen-vl-chat", + "andreasjansson/codellama-34b-instruct-gguf", + "andreasjansson/wizardcoder-python-34b-v1-gguf", + "nwhitehead/llama2-7b-chat-gptq", + "moinnadeem/vllm-engine-llama-7b", + "andreasjansson/llama-2-13b-gguf", + "charles-dyfis-net/llama-2-13b-hf--lmtp-8bit", + "ruben-svensson/llama2-aqua-test1", + "lucataco/wizardcoder-33b-v1.1-gguf", + "antoinelyset/openhermes-2.5-mistral-7b-awq", + "papermoose/llama-pajama", + "stability-ai/stablelm-base-alpha-7b", + "fofr/llama2-prompter", + "kcaverly/deepseek-coder-6.7b-instruct", + "google-deepmind/gemma-2b", + "fofr/star-trek-gpt-j-6b", + "replicate-internal/staging-llama-2-7b", + "andreasjansson/plasma", + "stability-ai/stablelm-base-alpha-3b", + "theghoul21/srl", + "kcaverly/dolphin-2.6-mistral-7b-gguf", + "spuuntries/erosumika-7b-v3-0.2-gguf", + "lucataco/tinyllama-1.1b-chat-v1.0", + "ignaciosgithub/pllava", + "nomagick/chatglm3-6b-32k", + "nateraw/axolotl-llama-2-7b-english-to-hinglish", + "kcaverly/nous-capybara-34b-gguf", + "peter65374/openbuddy-llemma-34b-gguf", + "niron1/llama-2-7b-chat", + "nateraw/sqlcoder-70b-alpha", + "antoinelyset/openhermes-2-mistral-7b", + "cbh123/dylan-lyrics", + "lucataco/qwen1.5-14b", + "camenduru/mixtral-8x22b-v0.1-instruct-oh", + "kcaverly/phind-codellama-34b-v2-gguf", + "hayooucom/llm-60k", + "nomagick/chatglm2-6b-int4", + "zeke/nyu-llama-2-7b-chat-training-test", + "deepseek-ai/deepseek-math-7b-base", + "hamelsmu/llama-3-70b-instruct-awq-with-tools", + "kcaverly/nous-hermes-2-solar-10.7b-gguf", + "xrunda/med", + "lucataco/phixtral-2x2_8", + "kcaverly/nexus-raven-v2-13b-gguf", + "adirik/mamba-2.8b", + "lucataco/wizard-vicuna-13b-uncensored", + "hamelsmu/honeycomb-2", + "fofr/star-trek-adventure", + "nateraw/stablecode-completion-alpha-3b-4k", + "zallesov/super-real-llama2", + "fofr/neuromancer-13b", + "swartype/lanne-m1-70b", + "m1guelpf/mario-gpt", + "camenduru/zephyr-orpo-141b-a35b-v0.1", + "nateraw/samsum-llama-7b", + "fofr/star-trek-flan", + "fofr/star-trek-llama", + "titocosta/notus-7b-v1", + "moinnadeem/fastervicuna_13b", + "nateraw/llama-2-7b-paraphrase-v1", + "camenduru/mixtral-8x22b-instruct-v0.1", + "rybens92/una-cybertron-7b-v2--lmtp-8bit", + "cjwbw/opencodeinterpreter-ds-6.7b", + "nateraw/wizardcoder-python-34b-v1.0", + "automorphic-ai/runhouse", + "crowdy/line-lang-3.6b", + "cjwbw/starcoder2-15b", + "nateraw/llama-2-7b-chat-hf", + "tanzir11/merge", + "moinnadeem/codellama-34b-instruct-vllm", + "lucataco/olmo-7b", + "lucataco/qwen1.5-1.8b", + "spuuntries/borealis-10.7b-dpo-gguf", + "nateraw/codellama-7b-instruct-hf", + "juanjaragavi/abby-llama-2-7b-chat", + "adirik/mamba-130m", + "peter65374/openbuddy-mistral-7b", + "lucataco/qwen1.5-7b", + "nateraw/aidc-ai-business-marcoroni-13b", + "replicate-internal/mixtral-8x7b-instruct-v0.1-pget", + "martintmv-git/moondream2", + "chigozienri/llava-birds", + "cjwbw/c4ai-command-r-v01", + "lidarbtc/kollava-v1.5", + "dsingal0/mixtral-single-gpu", + "replicate-internal/staging-honeycomb-triton", + "cbh123/samsum", + "titocosta/starling", + "replicate/elixir-gen", + "cbh123/homerbot", + "technillogue/mixtral-instruct-nix", + "lucataco/nous-hermes-2-mixtral-8x7b-dpo", + "adirik/mamba-2.8b-slimpj", + "adirik/mamba-1.4b", + "sruthiselvaraj/finetuned-llama2", + "lucataco/qwen1.5-0.5b", + "hamelsmu/honeycomb", + "adirik/mamba-370m", + "seanoliver/bob-dylan-fun-tuning", + "nateraw/gairmath-abel-7b", + "adirik/mamba-790m", + "intentface/poro-34b-gguf-checkpoint", + "lucataco/hermes-2-pro-llama-3-8b", + "lucataco/qwen1.5-32b", + "fleshgordo/orni2-chat", + "nateraw/codellama-7b-instruct", + "charles-dyfis-net/llama-2-7b-hf--lmtp-4bit", + "lucataco/qwen1.5-4b", + "nateraw/llama-2-7b-samsum", + "juanjaragavi/abbot-llama-2-7b-chat", + "msamogh/iiu-generator-llama2-7b-2", + "divyavanmahajan/my-pet-llama", + "nateraw/codellama-7b", + "nateraw/codellama-34b", + "charles-dyfis-net/llama-2-13b-hf--lmtp", + "replicate-internal/gemma-2b-it", + "jquintanilla4/qwen1.5-32b-chat", + "halevi/sandbox1", + "demonpore-sys/llamaxine0.1", + "charles-dyfis-net/llama-2-13b-hf--lmtp-4bit", + "nateraw/codellama-13b", + "nateraw/codellama-13b-instruct" + ] +} \ No newline at end of file diff --git a/replicate/docker-compose.yml b/replicate/docker-compose.yml new file mode 100644 index 00000000..4815a277 --- /dev/null +++ b/replicate/docker-compose.yml @@ -0,0 +1,57 @@ +version: "3.9" + +services: + traefik: + image: traefik:v3.0 + container_name: traefik-square + command: + - --api.insecure=true + - --providers.docker=true + - --providers.docker.exposedbydefault=false + - --entrypoints.web.address=:80 + - --accesslog=true + - --accesslog.format=json + - --accesslog.filepath=/var/log/traefik/access.log.json + - --entrypoints.websecure.address=:443 + - --certificatesresolvers.le.acme.tlschallenge=true + - --certificatesresolvers.le.acme.caserver=https://acme-staging-v02.api.letsencrypt.org/directory + - --certificatesresolvers.le.acme.email=sachdeva@ukp.informatik.tu-darmstadt.de + - --certificatesresolvers.le.acme.storage=/certificates/acme.json + - --api.dashboard=true + - --log.level=DEBUG + ports: + - "8989:80" # http port + - "8443:443" # https port + - "8083:8080" # web UI port + labels: + - "traefik.enable=true" + # Global redirect to https + - "traefik.http.routers.http-catchall.rule=hostregexp(`{host:.+}`)" + - "traefik.http.routers.http-catchall.entrypoints=web" + - "traefik.http.routers.http-catchall.middlewares=https-redirect" + + # Middleware redirect from HTTP to HTTPS + - "traefik.http.middlewares.https-redirect.redirectscheme.scheme=https" + - "traefik.http.middlewares.https-redirect.redirectscheme.port=8443" + - "traefik.http.middlewares.https-redirect.redirectscheme.permanent=true" + volumes: + - /var/run/docker.sock:/var/run/docker.sock:ro + # mount volume to store certificates + - traefik-public-certificates:/certificates + + replicate: + build: + context: . + container_name: replicate + labels: + - "traefik.enable=true" + - "traefik.http.routers.replicate.rule=PathPrefix(`/api/replicate`)" + - "traefik.http.routers.replicate.entrypoints=websecure" + - "traefik.http.routers.replicate.tls=true" + - "traefik.http.routers.replicate.tls.certresolver=le" + - "traefik.http.routers.replicate.middlewares=replicate-stripprefix,replicate-addprefix" + - "traefik.http.middlewares.replicate-stripprefix.stripPrefixRegex.regex=/api/[a-zA-Z0-9_-]+" + - "traefik.http.middlewares.replicate-addprefix.addPrefix.prefix=/api" + +volumes: + traefik-public-certificates: \ No newline at end of file diff --git a/replicate/requirements.txt b/replicate/requirements.txt new file mode 100644 index 00000000..490ce784 --- /dev/null +++ b/replicate/requirements.txt @@ -0,0 +1,5 @@ +replicate==0.25.2 +jinja2==3.1.4 +sentencepiece==0.2.0 +protobuf==5.26.1 +fastapi==0.111.0 \ No newline at end of file From c0017f21ed85877dc30f74890cd2383dcbf71a6f Mon Sep 17 00:00:00 2001 From: muhammed-shihebi <58932402+muhammed-shihebi@users.noreply.github.com> Date: Thu, 9 May 2024 18:26:47 +0000 Subject: [PATCH 2/7] feat: add completion support --- replicate/api.http | 17 ++++++++++++++++- replicate/app/app.py | 32 +++++++++++++++++++++++++++++++- 2 files changed, 47 insertions(+), 2 deletions(-) diff --git a/replicate/api.http b/replicate/api.http index f725288b..51a00338 100644 --- a/replicate/api.http +++ b/replicate/api.http @@ -42,7 +42,7 @@ Authorization: Bearer {{token}} ### # @name non_hf_model -POST {{domain}}/generate_stream HTTP/1.1 +POST {{domain}}/generate_chat_stream HTTP/1.1 Authorization: Bearer {{token}} { @@ -68,4 +68,19 @@ Authorization: Bearer {{token}} "text": "Who is Albert Einstein?" } ] +} + + +### +# @name completion +POST {{domain}}/generate_completion_stream HTTP/1.1 +Authorization: Bearer {{token}} + +{ + "model_identifier": "meta/meta-llama-3-8b", + "top_p": 0.9, + "temperature": 0.7, + "max_new_tokens": 100, + "prompt": "What is the theory of relativity?", + "system_prompt": "You are a helpful assistant." } \ No newline at end of file diff --git a/replicate/app/app.py b/replicate/app/app.py index e6fb8aa3..e2581614 100644 --- a/replicate/app/app.py +++ b/replicate/app/app.py @@ -55,6 +55,8 @@ def get_conversation_prompt(params) -> str: # remove system message new_messages = [message for message in messages if message["role"] != "system"] formatted_prompt = template.render(messages=new_messages) + except Exception as e: + raise HTTPException(status_code=400, detail="Error rendering template: " + str(e)) elif model in models: # default formatter in casen of unknown model formatted_prompt = default_formatter(messages) else: @@ -80,7 +82,7 @@ async def get_models(): return JSONResponse({"models": models}) -@router.post("/generate_stream") +@router.post("/generate_chat_stream") async def generate_stream(request: Request, token: str = Depends(get_token)): params = await request.json() replicate = Client(api_token=token) @@ -110,6 +112,34 @@ async def generate_stream(request: Request, token: str = Depends(get_token)): }) +@router.post("/generate_completion_stream") +async def generate_completion(request: Request, token: str = Depends(get_token)): + params = await request.json() + replicate = Client(api_token=token) + model = params['model_identifier'] + + print(f"Prompt: {params['prompt']}") + + input = { + "top_p": params.get("top_p", 0.9), + "prompt": params['prompt'], + "max_new_tokens": params.get("max_new_tokens", 100), + "temperature": params.get("temperature", 0.7), + # "prompt_template": "{prompt}", # rely on the default behavior of the replicate lib + "system_prompt": params['system_prompt'] + } + + prediction = replicate.models.predictions.create( + model, + input=input, + stream=True, + ) + + return JSONResponse({ + 'url': prediction.urls['stream'] + }) + + def get_app() -> FastAPI: fast_app = FastAPI( title="Replicate.ai Client", From 5ec535b8794fa3d7810206687b141af773b9f7b0 Mon Sep 17 00:00:00 2001 From: muhammed-shihebi <58932402+muhammed-shihebi@users.noreply.github.com> Date: Thu, 9 May 2024 21:31:47 +0000 Subject: [PATCH 3/7] feat: better error handling --- replicate/api.http | 3 +-- replicate/app/app.py | 33 +++++++++++++++++++++------------ 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/replicate/api.http b/replicate/api.http index 51a00338..90a0b47b 100644 --- a/replicate/api.http +++ b/replicate/api.http @@ -81,6 +81,5 @@ Authorization: Bearer {{token}} "top_p": 0.9, "temperature": 0.7, "max_new_tokens": 100, - "prompt": "What is the theory of relativity?", - "system_prompt": "You are a helpful assistant." + "prompt": "What is the theory of relativity?" } \ No newline at end of file diff --git a/replicate/app/app.py b/replicate/app/app.py index e2581614..14cf563c 100644 --- a/replicate/app/app.py +++ b/replicate/app/app.py @@ -7,6 +7,7 @@ from fastapi.middleware.cors import CORSMiddleware from jinja2.exceptions import TemplateError from replicate.client import Client +from replicate.exceptions import ReplicateError from jinja2 import Template @@ -101,11 +102,15 @@ async def generate_stream(request: Request, token: str = Depends(get_token)): "system_prompt": "" # this is included in the prompt } - prediction = replicate.models.predictions.create( - model, - input=input, - stream=True, - ) + try: + prediction = replicate.models.predictions.create( + model, + input=input, + stream=True, + ) + except ReplicateError as e: + if e.status == 401: + raise HTTPException(status_code=401, detail="You did not pass a valid authentication token") return JSONResponse({ 'url': prediction.urls['stream'] @@ -125,15 +130,19 @@ async def generate_completion(request: Request, token: str = Depends(get_token)) "prompt": params['prompt'], "max_new_tokens": params.get("max_new_tokens", 100), "temperature": params.get("temperature", 0.7), - # "prompt_template": "{prompt}", # rely on the default behavior of the replicate lib - "system_prompt": params['system_prompt'] + "prompt_template": "{prompt}", + # "system_prompt": params['system_prompt'] } - prediction = replicate.models.predictions.create( - model, - input=input, - stream=True, - ) + try: + prediction = replicate.models.predictions.create( + model, + input=input, + stream=True, + ) + except ReplicateError as e: + if e.status == 401: + raise HTTPException(status_code=401, detail="You did not pass a valid authentication token") return JSONResponse({ 'url': prediction.urls['stream'] From a7d83c7c4e9bac5e5bd8cbba87af20fa71261254 Mon Sep 17 00:00:00 2001 From: muhammed-shihebi <58932402+muhammed-shihebi@users.noreply.github.com> Date: Thu, 9 May 2024 21:34:07 +0000 Subject: [PATCH 4/7] feat: add replicate to the interface --- frontend/src/api/index.js | 37 +++- frontend/src/services/custom_llm.js | 290 +++++++++++++++++++++------ frontend/src/views/PromptingView.vue | 192 +++++++++++++++--- 3 files changed, 415 insertions(+), 104 deletions(-) diff --git a/frontend/src/api/index.js b/frontend/src/api/index.js index 6c160bb3..4e146e7b 100644 --- a/frontend/src/api/index.js +++ b/frontend/src/api/index.js @@ -12,7 +12,7 @@ const SKILL_MANAGER_URL = `${process.env.VUE_APP_SKILL_MANAGER_URL}`; const EVALUATOR_URL = `${process.env.VUE_APP_EVALUATOR_URL}`; const DATASTORES_URL = `${process.env.VUE_APP_DATASTORES_URL}`; const MODEL_MANAGER_URL = `${process.env.VUE_APP_MODEL_MANAGER_URL}` -const LLM_MODELS_URL = MODEL_MANAGER_URL.replace('/models', ''); +const BASE_SQUARE_URL = MODEL_MANAGER_URL.replace('/models', ''); /** * Get a list of available skill types. @@ -263,7 +263,7 @@ export function getLocalLLMs() { export function generateText(params, streaming) { let url; if(!streaming){ - url = `${LLM_MODELS_URL}/${params.model_identifier}/worker_generate` + url = `${BASE_SQUARE_URL}/${params.model_identifier}/worker_generate` const response = axios.post(url, params, { headers:{ 'Content-Type': 'application/json' @@ -271,7 +271,7 @@ export function generateText(params, streaming) { }); return response; }else{ - url = `${LLM_MODELS_URL}/${params.model_identifier}/worker_generate_stream` + url = `${BASE_SQUARE_URL}/${params.model_identifier}/worker_generate_stream` const response = fetch(url, { method: 'POST', headers: { @@ -288,10 +288,37 @@ export function getAlternatives(text){ const params = { text: text } - const response = axios.post(`${LLM_MODELS_URL}/sensitivity/generate_alternatives`, params, { + const response = axios.post(`${BASE_SQUARE_URL}/sensitivity/generate_alternatives`, params, { headers:{ 'Content-Type': 'application/json' } }); return response; -} \ No newline at end of file +} + +export function getReplicateModels(){ + return axios.get(`https://localhost:8443/api/replicate/models`, {}); // TODO: change to actual URL +} + +export function generateChatStreamReplicate(params, token) { + let url = `https://localhost:8443/api/replicate/generate_chat_stream` // TODO: change to actual URL + const response = axios.post(url, params, { + headers:{ + 'Content-Type': 'application/json', + 'Authorization': 'Bearer ' + token + } + }); + return response; +} + + +export function generateCompletionStreamReplicate(params, token) { + let url = `https://localhost:8443/api/replicate/generate_completion_stream` // TODO: change to actual URL + const response = axios.post(url, params, { + headers:{ + 'Content-Type': 'application/json', + 'Authorization': 'Bearer ' + token + } + }); + return response; +} diff --git a/frontend/src/services/custom_llm.js b/frontend/src/services/custom_llm.js index 088a2934..abb49e08 100644 --- a/frontend/src/services/custom_llm.js +++ b/frontend/src/services/custom_llm.js @@ -1,13 +1,17 @@ +/* eslint-disable no-unused-vars */ import { BaseChatModel } from 'langchain/chat_models/base'; import { BaseLLM } from 'langchain/llms/base'; import { generateText, + generateChatStreamReplicate, + generateCompletionStreamReplicate, } from '@/api' import { AIMessage, } from 'langchain/schema'; + export class CustomChatModel extends BaseChatModel { model_identifier = ""; temperature = 0.7; @@ -62,68 +66,227 @@ export class CustomChatModel extends BaseChatModel { echo: false, // false will make model return only last message generation_mode: "chat" } - try { - if (this.streaming === false) { - const response = await generateText(bodyData, this.streaming) - const generations = [ - {text: response.data.text, message: new AIMessage(response.data.text)} - ] - return { - generations: generations, - llmOutput: { tokenUsage: response.data.usage } - } - } else { - let readableStream = await generateText(bodyData, this.streaming); - readableStream = readableStream.body; - const decoder = new TextDecoder(); - const reader = readableStream.getReader(); - let buffer = ''; - let receivedText = ''; - let lastLength = 0; - let done, value; - - while (!done){ - ({done, value} = await reader.read()); - - const messageChunk = decoder.decode(value, { stream: true }); - buffer += messageChunk; - - // sometimes the buffer contains several messages so we split them - const parts = buffer.split('\u0000'); - for (let i = 0; i < parts.length - 1; i++) { - try { - const jsonChunk = JSON.parse(parts[i]); - const newText = jsonChunk.text.substring(lastLength); - lastLength = jsonChunk.text.length; - receivedText += newText; - runManager.handleLLMNewToken(newText, jsonChunk.usage) - } catch (error) { - console.error(error); - } - } - // Keep the last incomplete part in the buffer - buffer = parts[parts.length - 1]; + if (this.streaming === false) { + const response = await generateText(bodyData, this.streaming) + const generations = [ + {text: response.data.text, message: new AIMessage(response.data.text)} + ] + return { + generations: generations, + llmOutput: { tokenUsage: response.data.usage } + } + } else { + let readableStream = await generateText(bodyData, this.streaming); + readableStream = readableStream.body; + const decoder = new TextDecoder(); + const reader = readableStream.getReader(); + let buffer = ''; + let receivedText = ''; + let lastLength = 0; + let done, value; + + while (!done){ + ({done, value} = await reader.read()); + + const messageChunk = decoder.decode(value, { stream: true }); + buffer += messageChunk; - if (options.signal.aborted){ - throw new Error("AbortError"); + // sometimes the buffer contains several messages so we split them + const parts = buffer.split('\u0000'); + for (let i = 0; i < parts.length - 1; i++) { + try { + const jsonChunk = JSON.parse(parts[i]); + const newText = jsonChunk.text.substring(lastLength); + lastLength = jsonChunk.text.length; + receivedText += newText; + runManager.handleLLMNewToken(newText, jsonChunk.usage) + } catch (error) { + console.error(error); } + } + // Keep the last incomplete part in the buffer + buffer = parts[parts.length - 1]; + if (options.signal.aborted){ + throw new Error("AbortError"); } - const generations = [ - {text: receivedText, message: new AIMessage(receivedText)} - ] - return { - generations: generations, - llmOutput: { tokenUsage: 0 } + + } + const generations = [ + {text: receivedText, message: new AIMessage(receivedText)} + ] + return { + generations: generations, + llmOutput: { tokenUsage: 0 } + } + } + } +} + + +export class ReplicateChatModel extends BaseChatModel { + model_identifier = ""; + temperature = 0.7; + top_p = 0.9; + max_new_tokens = 1000; + streaming = true; + replicateKey = ""; + + constructor(params) { + super(params); + this.model_identifier = params.model_identifier; + this.temperature = params.temperature; + this.top_p = params.top_p; + this.max_new_tokens = params.max_new_tokens; + this.streaming = params.streaming; + this.replicateKey = params.replicateKey; + } + + async _llmType() { + return 'replicate_model' + } + + _parseChatHistory(history){ + const chatHistory = []; + for (const message of history) { + if ("content" in message) { + if (message._getType() === "human") { + chatHistory.push({ role: "human", text: message.content }); + } else if (message._getType() === "ai") { + chatHistory.push({ role: "ai", text: message.content }); + } else if (message._getType() === "system" && message.content !== "") { + chatHistory.push({ role: "system", text: message.content }); } } - } catch (error) { - console.log(error) - return { error: error } + } + return chatHistory; + } + + async _generate( + messages, + options, + runManager + ) { + const messageHistory = this._parseChatHistory(messages); + const bodyData = { + model_identifier: this.model_identifier, + messages: messageHistory, + temperature: this.temperature, + top_p: this.top_p, + max_new_tokens: this.max_new_tokens, + } + + const response = await generateChatStreamReplicate(bodyData, this.replicateKey); + const streamUrl = response.data.url + + let receivedText = ''; + + const waitForDoneEvent = new Promise((resolve, reject) => { + const source = new EventSource(streamUrl); + + const onOutput = (event) => { + if (options.signal?.aborted) { + source.removeEventListener("output", onOutput); + source.removeEventListener("done", onDone); + source.close(); + reject(new Error("AbortError")); + return; + } + + const next_token = event.data; + if (this.streaming){ + runManager?.handleLLMNewToken(next_token); + } + receivedText += next_token; + }; + + const onDone = (_) => { + source.removeEventListener("output", onOutput); + source.removeEventListener("done", onDone); + source.close(); + resolve(); + }; + + source.addEventListener("output", onOutput); + source.addEventListener("done", onDone); + }); + + await waitForDoneEvent; + + const generations = [ + {text: receivedText, message: new AIMessage(receivedText)} + ] + return { + generations: generations, + llmOutput: { tokenUsage: 0 } } } } + +export class ReplicateGenerativeModel extends BaseLLM { + model_identifier = ""; + top_p = 0.9; + temperature = 0.7; + max_new_tokens = 1000; + streaming = false; + replicateKey = ""; + + constructor(params) { + super(params); + this.model_identifier = params.model_identifier; + this.top_p = params.top_p; + this.temperature = params.temperature; + this.max_new_tokens = params.max_new_tokens; + this.streaming = params.streaming; + this.replicateKey = params.replicateKey; + } + + async _llmType() { + return 'replicate_generative_model' + } + + async _generate(prompts) { + const bodyData = { + model_identifier: this.model_identifier, + top_p: this.top_p, + temperature: this.temperature, + max_new_tokens: this.max_new_tokens, + prompt: prompts[0], + } + const response = await generateCompletionStreamReplicate(bodyData, this.replicateKey); + const streamUrl = response.data.url + + let receivedText = ''; + + const waitForDoneEvent = new Promise((resolve, _) => { + const source = new EventSource(streamUrl); + source.addEventListener("output", (event) => { + const next_token = event.data; + receivedText += next_token; + }) + source.addEventListener("done", (_) => { + source.close(); + resolve(); + }); + }); + + await waitForDoneEvent; + + const generations = [[ + { + text: receivedText.trim(), + generationInfo: {} + } + ]] + return { + generations: generations, + llmOutput: { tokenUsage: 0 } + } + } +} + + export class CustomGenerativeModel extends BaseLLM { model_identifier = ""; temperature = 0.7; @@ -155,21 +318,16 @@ export class CustomGenerativeModel extends BaseLLM { echo: false, // false will make model return only last message generation_mode: "completion" } - try { - const response = await generateText(bodyData, this.streaming) - const generations = [[ - { - text: response.data.text.trim(), - generationInfo: {} - } - ]] - return { - generations: generations, - llmOutput: { tokenUsage: response.data.usage } + const response = await generateText(bodyData, this.streaming) + const generations = [[ + { + text: response.data.text.trim(), + generationInfo: {} } - } catch (error) { - console.log(error) - return { error: error } + ]] + return { + generations: generations, + llmOutput: { tokenUsage: response.data.usage } } } } diff --git a/frontend/src/views/PromptingView.vue b/frontend/src/views/PromptingView.vue index 13b8f642..3f06cdf1 100644 --- a/frontend/src/views/PromptingView.vue +++ b/frontend/src/views/PromptingView.vue @@ -10,24 +10,76 @@ :style="{height: ['normal_chat', 'agent_chat'].includes(chatConfig.chatMode) ? '77vh': '100%'}">
- - -
-
-
- - + + + +
+ + + +
+ + +
+
+ + +
+
+
+ + +
+ + +
+
+ + +
+
+
+ + +
+ + +

@@ -507,9 +559,15 @@ import VueTippy from "vue-tippy"; import { getOpenAIModels, getLocalLLMs, - getAlternatives + getAlternatives, + getReplicateModels } from '@/api'; -import { CustomChatModel, CustomGenerativeModel } from "../services/custom_llm"; +import { + CustomChatModel, + CustomGenerativeModel, + ReplicateChatModel, + ReplicateGenerativeModel, +} from "../services/custom_llm"; import {Popover} from "bootstrap"; @@ -561,6 +619,8 @@ export default { "gpt-3.5-turbo", "gpt-3.5-turbo-0301", ], + replicateModels: [], + replicateKey: "", localChatModels: [], availableTools: [], addingNewTool: false, @@ -577,12 +637,12 @@ export default { }, chatConfig: { - chatMode: "sensitivity", - selectedModel: "gpt-3.5-turbo-0613", + chatMode: "normal_chat", + selectedModel: "gpt-3.5-turbo-1106", temperature: 0.7, maxTokens: 256, top_p: 0.9, - systemPrompt: ``, + systemPrompt: "", tools: [], sensitivityPromptTemplate: 'SENTENCE: {sentence}\nQUESTION: Is this (0) unacceptable, or (1) acceptable?\nANSWER: {answer}', }, @@ -636,6 +696,7 @@ export default { created() { this.messages = []; this.openAIApiKey = localStorage.getItem("openAIApiKey"); + this.replicateKey = localStorage.getItem("replicateKey"); this.fetchModels(); this.initChatModel(); this.initGenerativeModel() @@ -983,6 +1044,16 @@ export default { top_p: this.chatConfig.top_p, }); } + else if (this.replicateModels.includes(this.chatConfig.selectedModel)) { + this.generativeModel = new ReplicateGenerativeModel({ + model_identifier: this.chatConfig.selectedModel, + top_p: this.chatConfig.top_p, + temperature: this.chatConfig.temperature, + max_new_tokens: 2, + streaming: false, + replicateKey: this.replicateKey, + }); + } }, async addNewTool() { @@ -1096,6 +1167,11 @@ export default { this.isGenerating = false; this.messages.splice(this.messages.length - 1, 1); return; + } else if (this.replicateModels.includes(this.chatConfig.selectedModel) && this.replicateKey === ""){ + this.showErrorToast("Please enter your Replicate key first."); + this.isGenerating = false; + this.messages.splice(this.messages.length - 1, 1); + return; } this.messages.push({ @@ -1123,7 +1199,9 @@ export default { this.isGenerating = false; this.scrollDown(); this.messages[this.messages.length - 1].done = true; + } else { // agent chat + const res = await this.chatModel.call({ input: text }); if (res.intermediateSteps.length > 0) { response += "```\n"; @@ -1148,13 +1226,14 @@ export default { } } catch (err) { - console.error(err); - console.log(err.error) - if (err.status === 401 && err.code === "invalid_api_key") { + console.error(`======== err: ${err}`); + if (err?.status === 401 && err?.code === "invalid_api_key") { this.showErrorToast("Please enter a valid OpenAI key."); - } else if (typeof err === "string" && err.includes("API key")){ + } else if (err?.response?.status == 401 && err?.response?.data?.detail?.startsWith("You did not pass")) { + this.showErrorToast("Please enter a valid Replicate key."); + } else if (typeof err === "string" && err?.includes("API key")){ this.showErrorToast("Please enter a valid OpenAI key."); - }else if(err.message === "AbortError"){ + }else if(err?.message === "AbortError"){ console.log("Request aborted") } else { this.showErrorToast("Something went wrong. Please try again."); @@ -1208,6 +1287,16 @@ export default { streaming: true, }); } + else if (this.replicateModels.includes(this.chatConfig.selectedModel) && this.replicateKey !== "") { + chat = new ReplicateChatModel({ + model_identifier: this.chatConfig.selectedModel, + temperature: this.chatConfig.temperature, + top_p: this.chatConfig.top_p, + max_new_tokens: this.chatConfig.maxTokens, + streaming: true, + replicateKey: this.replicateKey, + }); + } if (chat !== null) { const chatPrompt = ChatPromptTemplate.fromPromptMessages([ @@ -1227,6 +1316,11 @@ export default { }); } else if (this.chatConfig.chatMode === "agent_chat") { + if (this.replicateModels.includes(this.chatConfig.selectedModel) && this.replicateKey !== "") { + chat.streaming = false; + } + + process.env.LANGCHAIN_HANDLER = "langchain"; // filter the tools that are checked @@ -1257,6 +1351,33 @@ export default { }, async fetchModels() { + await this.fetchOpenAIModels(); + await this.fetchLocalModels(); + await this.fetchReplicateModels(); + }, + + async fetchReplicateModels(){ + try { + const response = await getReplicateModels(); + this.replicateModels = response.data.models; + } catch (e){ + console.error(e) + } + }, + + async fetchLocalModels(){ + try{ + let response = await getLocalLLMs(); + this.localChatModels = response.data.filter( + (model) => + model.model_type === "llm" + ).map((model) => model.identifier); + } catch (e){ + console.error(e) + } + }, + + async fetchOpenAIModels(){ if (this.openAIApiKey !== "") { try { let response = await getOpenAIModels(this.openAIApiKey); @@ -1275,12 +1396,6 @@ export default { console.error(e) } } - - let response = await getLocalLLMs(); - this.localChatModels = response.data.filter( - (model) => - model.model_type === "llm" - ).map((model) => model.identifier); }, initTools() { @@ -1440,6 +1555,17 @@ export default { } }, + 'replicateKey': { + /* eslint-disable no-unused-vars */ + async handler(newKey, oldKey) { + localStorage.setItem("replicateKey", newKey); + await this.fetchModels(); + await this.initChatModel(); + this.resetConv(); + this.initGenerativeModel(); + } + }, + 'errorToast.show': { /* eslint-disable no-unused-vars */ async handler(newErrorToastShow, oldErrorToastShow) { From 7d423306647acd50d1200646b53c05df7e208f06 Mon Sep 17 00:00:00 2001 From: muhammed-shihebi <58932402+muhammed-shihebi@users.noreply.github.com> Date: Fri, 10 May 2024 10:07:00 +0000 Subject: [PATCH 5/7] feat: add replicate to build wrokflow --- .github/workflows/build-and-deploy.yml | 3 +++ docker-compose.ytt.yaml | 14 ++++++++++++++ frontend/src/api/index.js | 6 +++--- replicate/Dockerfile | 4 +++- 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-and-deploy.yml b/.github/workflows/build-and-deploy.yml index 014de994..4e3e70cb 100644 --- a/.github/workflows/build-and-deploy.yml +++ b/.github/workflows/build-and-deploy.yml @@ -49,6 +49,7 @@ jobs: "skill-kgqa-kqapro", "evaluator", "sensitivity", + "replicate", ] include: - build-args: "" @@ -118,6 +119,8 @@ jobs: context: ./evaluator - service: sensitivity context: ./sensitivity + - service: replicate + context: ./replicate steps: - name: Checkout uses: actions/checkout@v3 diff --git a/docker-compose.ytt.yaml b/docker-compose.ytt.yaml index e315ce02..583d4b22 100644 --- a/docker-compose.ytt.yaml +++ b/docker-compose.ytt.yaml @@ -458,6 +458,20 @@ services: - "traefik.http.middlewares.sensitivity-stripprefix.stripPrefixRegex.regex=/api/[a-zA-Z0-9_-]+" - "traefik.http.middlewares.sensitivity-addprefix.addPrefix.prefix=/api" + replicate: + image: #@ "ukpsquare/replicate:" + data.values.tag + build: + context: ./replicate + labels: + - "traefik.enable=true" + - "traefik.http.routers.replicate.rule=PathPrefix(`/api/replicate`)" + - "traefik.http.routers.replicate.entrypoints=websecure" + - "traefik.http.routers.replicate.tls=true" + - "traefik.http.routers.replicate.tls.certresolver=le" + - "traefik.http.routers.replicate.middlewares=replicate-stripprefix,replicate-addprefix" + - "traefik.http.middlewares.replicate-stripprefix.stripPrefixRegex.regex=/api/[a-zA-Z0-9_-]+" + - "traefik.http.middlewares.sensitivity-addprefix.addPrefix.prefix=/api" + dpr_worker: image: #@ "ukpsquare/model-inference-transformer:" + data.values.tag build: diff --git a/frontend/src/api/index.js b/frontend/src/api/index.js index 4e146e7b..62cc36ea 100644 --- a/frontend/src/api/index.js +++ b/frontend/src/api/index.js @@ -297,11 +297,11 @@ export function getAlternatives(text){ } export function getReplicateModels(){ - return axios.get(`https://localhost:8443/api/replicate/models`, {}); // TODO: change to actual URL + return axios.get(`${BASE_SQUARE_URL}/replicate/models`, {}); } export function generateChatStreamReplicate(params, token) { - let url = `https://localhost:8443/api/replicate/generate_chat_stream` // TODO: change to actual URL + let url = `${BASE_SQUARE_URL}/replicate/generate_chat_stream` const response = axios.post(url, params, { headers:{ 'Content-Type': 'application/json', @@ -313,7 +313,7 @@ export function generateChatStreamReplicate(params, token) { export function generateCompletionStreamReplicate(params, token) { - let url = `https://localhost:8443/api/replicate/generate_completion_stream` // TODO: change to actual URL + let url = `${BASE_SQUARE_URL}/replicate/generate_completion_stream` const response = axios.post(url, params, { headers:{ 'Content-Type': 'application/json', diff --git a/replicate/Dockerfile b/replicate/Dockerfile index 90da5a80..ef1f2589 100644 --- a/replicate/Dockerfile +++ b/replicate/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.12.3-slim +FROM python:3.12.3-slim AS base # used to avoid interactive prompts during build ARG DEBIAN_FRONTEND=noninteractive @@ -12,6 +12,8 @@ COPY ./requirements.txt /app/requirements.txt RUN apt-get update && apt-get install --no-install-recommends -y RUN pip3 install --no-cache-dir --upgrade -r requirements.txt +FROM base AS build + COPY /app /app CMD ["python3", "app.py", "--port", "8000"] From a284aec6d46b983a2ac6dec5f045f258713851c9 Mon Sep 17 00:00:00 2001 From: muhammed-shihebi <58932402+muhammed-shihebi@users.noreply.github.com> Date: Fri, 10 May 2024 10:29:20 +0000 Subject: [PATCH 6/7] fix: add correct prefix --- docker-compose.ytt.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker-compose.ytt.yaml b/docker-compose.ytt.yaml index 583d4b22..f1e1ce5f 100644 --- a/docker-compose.ytt.yaml +++ b/docker-compose.ytt.yaml @@ -470,7 +470,7 @@ services: - "traefik.http.routers.replicate.tls.certresolver=le" - "traefik.http.routers.replicate.middlewares=replicate-stripprefix,replicate-addprefix" - "traefik.http.middlewares.replicate-stripprefix.stripPrefixRegex.regex=/api/[a-zA-Z0-9_-]+" - - "traefik.http.middlewares.sensitivity-addprefix.addPrefix.prefix=/api" + - "traefik.http.middlewares.replicate-addprefix.addPrefix.prefix=/api" dpr_worker: image: #@ "ukpsquare/model-inference-transformer:" + data.values.tag From a5417403b1e1fc3e3322d7be956b1b724f4bad6b Mon Sep 17 00:00:00 2001 From: muhammed-shihebi <58932402+muhammed-shihebi@users.noreply.github.com> Date: Tue, 14 May 2024 13:39:31 +0000 Subject: [PATCH 7/7] feat: allow for adding new replicate models --- frontend/src/api/index.js | 5 ++++ frontend/src/views/PromptingView.vue | 37 +++++++++++++++++++++++++--- replicate/api.http | 8 +++++- replicate/app/app.py | 23 +++++++++++++++++ replicate/requirements.txt | 3 ++- 5 files changed, 70 insertions(+), 6 deletions(-) diff --git a/frontend/src/api/index.js b/frontend/src/api/index.js index 62cc36ea..a3345ec1 100644 --- a/frontend/src/api/index.js +++ b/frontend/src/api/index.js @@ -322,3 +322,8 @@ export function generateCompletionStreamReplicate(params, token) { }); return response; } + + +export function replicateModelHealthCheck(model_id){ + return axios.get(`${BASE_SQUARE_URL}/replicate/${model_id}/status`); +} diff --git a/frontend/src/views/PromptingView.vue b/frontend/src/views/PromptingView.vue index 3f06cdf1..9770b21a 100644 --- a/frontend/src/views/PromptingView.vue +++ b/frontend/src/views/PromptingView.vue @@ -53,10 +53,23 @@
+
+ +
+
+ +
+
+ +
+
+
+