Skip to content

Commit

Permalink
feat: 🎸 files
Browse files Browse the repository at this point in the history
link
  • Loading branch information
StanGirard committed Jan 27, 2024
1 parent 7043a28 commit 95912f3
Show file tree
Hide file tree
Showing 19 changed files with 40 additions and 35 deletions.
11 changes: 8 additions & 3 deletions backend/llm/knowledge_brain_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from llm.utils.format_chat_history import format_chat_history
from llm.utils.get_prompt_to_use import get_prompt_to_use
from llm.utils.get_prompt_to_use_id import get_prompt_to_use_id
from repository.files.generate_file_signed_url import generate_file_signed_url
from logger import get_logger
from models import BrainSettings
from modules.brain.service.brain_service import BrainService
Expand Down Expand Up @@ -81,7 +82,6 @@ def __init__(
streaming: bool = False,
prompt_id: Optional[UUID] = None,
metadata: Optional[dict] = None,

**kwargs,
):
super().__init__(
Expand Down Expand Up @@ -313,9 +313,14 @@ async def wrap_done(fn: Awaitable, event: asyncio.Event):
if "url" in doc.metadata
else doc.metadata["file_name"],
"type": "url" if "url" in doc.metadata else "file",
"source_url": doc.metadata["url"]
if "url" in doc.metadata
"source_url": generate_file_signed_url(
f"{brain.brain_id}/{doc.metadata['file_name']}"
).get("signedURL", "")
if "url" not in doc.metadata
else "",
"original_file_name": doc.metadata[
"original_file_name"
],
}
)
)
Expand Down
1 change: 1 addition & 0 deletions backend/modules/chat/dto/chats.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class Sources(BaseModel):
name: str
source_url: str
type: str
original_file_name: str

class Config:
json_encoders = {
Expand Down
5 changes: 1 addition & 4 deletions backend/packages/files/parsers/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,7 @@
from packages.files.file import compute_sha1_from_content


async def process_audio(
file: File,
user,
):
async def process_audio(file: File, user, original_file_name):
temp_filename = None
file_sha = ""
dateshort = time.strftime("%Y%m%d-%H%M%S")
Expand Down
3 changes: 2 additions & 1 deletion backend/packages/files/parsers/code_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from .common import process_file


async def process_python(file: File, brain_id):
async def process_python(file: File, brain_id, original_file_name):
return await process_file(
file=file,
loader_class=PythonLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)
9 changes: 2 additions & 7 deletions backend/packages/files/parsers/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,14 @@

from logger import get_logger
from models import File
from models.settings import get_supabase_db
from modules.brain.service.brain_vector_service import BrainVectorService
from packages.embeddings.vectors import Neurons
from repository.files.upload_file import DocumentSerializable

logger = get_logger(__name__)


async def process_file(
file: File,
loader_class,
brain_id,
):
database = get_supabase_db()
async def process_file(file: File, loader_class, brain_id, original_file_name):
dateshort = time.strftime("%Y%m%d")
neurons = Neurons()

Expand All @@ -28,6 +22,7 @@ async def process_file(
"chunk_size": file.chunk_size,
"chunk_overlap": file.chunk_overlap,
"date": dateshort,
"original_file_name": original_file_name or file.file_name,
}
docs = []

Expand Down
6 changes: 2 additions & 4 deletions backend/packages/files/parsers/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@
from .common import process_file


def process_csv(
file: File,
brain_id,
):
def process_csv(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=CSVLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)
3 changes: 2 additions & 1 deletion backend/packages/files/parsers/docx.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from .common import process_file


def process_docx(file: File, brain_id):
def process_docx(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=Docx2txtLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)
3 changes: 2 additions & 1 deletion backend/packages/files/parsers/epub.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from .common import process_file


def process_epub(file: File, brain_id):
def process_epub(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=UnstructuredEPubLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)
1 change: 1 addition & 0 deletions backend/packages/files/parsers/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ async def process_github(
"chunk_size": chunk_size,
"chunk_overlap": chunk_overlap,
"date": dateshort,
"original_file_name": doc.metadata["original_file_name"],
}
doc_with_metadata = Document(page_content=doc.page_content, metadata=metadata)

Expand Down
3 changes: 2 additions & 1 deletion backend/packages/files/parsers/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from .common import process_file


def process_html(file: File, brain_id):
def process_html(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=UnstructuredHTMLLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)
3 changes: 2 additions & 1 deletion backend/packages/files/parsers/markdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from .common import process_file


def process_markdown(file: File, brain_id):
def process_markdown(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=UnstructuredMarkdownLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)
3 changes: 2 additions & 1 deletion backend/packages/files/parsers/notebook.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from .common import process_file


def process_ipnyb(file: File, brain_id):
def process_ipnyb(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=NotebookLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)
3 changes: 2 additions & 1 deletion backend/packages/files/parsers/odt.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from .common import process_file


def process_odt(file: File, brain_id):
def process_odt(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=UnstructuredPDFLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)
3 changes: 2 additions & 1 deletion backend/packages/files/parsers/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from .common import process_file


def process_pdf(file: File, brain_id):
def process_pdf(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=UnstructuredPDFLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)
3 changes: 2 additions & 1 deletion backend/packages/files/parsers/powerpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@
from .common import process_file


def process_powerpoint(file: File, brain_id):
def process_powerpoint(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=UnstructuredPowerPointLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)
6 changes: 2 additions & 4 deletions backend/packages/files/parsers/telegram.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@
from .common import process_file


def process_telegram(
file: File,
brain_id,
):
def process_telegram(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=TelegramChatFileLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)
2 changes: 2 additions & 0 deletions backend/packages/files/parsers/txt.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
async def process_txt(
file: File,
brain_id,
original_file_name,
):
return await process_file(
file=file,
loader_class=TextLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)
6 changes: 2 additions & 4 deletions backend/packages/files/parsers/xlsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@
from .common import process_file


def process_xlsx(
file: File,
brain_id,
):
def process_xlsx(file: File, brain_id, original_file_name):
return process_file(
file=file,
loader_class=UnstructuredExcelLoader,
brain_id=brain_id,
original_file_name=original_file_name,
)
1 change: 1 addition & 0 deletions backend/packages/files/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ async def filter_file(
result = await file_processors[file.file_extension](
file=file,
brain_id=brain_id,
original_file_name=original_file_name,
)
if result is None or result == 0:
return create_response(
Expand Down

0 comments on commit 95912f3

Please sign in to comment.