Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: nlp query search #1531

Merged
merged 2 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs_website/docs/user_guide/ai_assistant.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ This feature is disabled by default. You can enable it in the Editor tab of user

![](/img/user_guide/sql_complete.png)

## Search Table by Natural Language
## Search Tables and Queries by Natural Language

If [vector store](../integrations/add_ai_assistant.mdx#vector-store) of the AI assistant plugin is also enabled, you'll be able to search the tables by natual language as well as keyword based search.
If the [vector store](../integrations/add_ai_assistant.mdx#vector-store) of the AI assistant plugin is enabled, you'll be able to search both tables and queries using natural language in addition to traditional keyword-based search.

![](/img/user_guide/table_vector_search.png)
3 changes: 3 additions & 0 deletions querybook/config/querybook_public_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ ai_assistant:
table_vector_search:
enabled: false

query_vector_search:
enabled: false

sql_complete:
enabled: false

Expand Down
2 changes: 2 additions & 0 deletions querybook/server/const/ai_assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,7 @@ class AICommandType(Enum):
DEFAULT_VECTOR_STORE_FETCH_LIMIT = 30
# how many tables to return from vector table search eventually
DEFAULT_TABLE_SEARCH_LIMIT = 10
# how many queries to return from vector query search eventually
DEFAULT_QUERY_SEARCH_LIMIT = 10
# how many tables to select for text-to-sql
DEFAULT_TABLE_SELECT_LIMIT = 3
14 changes: 14 additions & 0 deletions querybook/server/datasources/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,20 @@ def search_query(
return {"count": count, "results": results}


@register("/search/queries/vector/", methods=["GET"])
def vector_search_query(
environment_id,
keywords,
filters=[],
):
from logic import vector_store as vs_logic

verify_environment_permission([environment_id])
filters.append(["environment_id", environment_id])

return vs_logic.search_query(keywords, filters)


@register("/search/tables/", methods=["GET"])
def search_tables(
metastore_id,
Expand Down
16 changes: 16 additions & 0 deletions querybook/server/lib/elasticsearch/search_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,19 @@ def construct_query_search_query(
)

return query


def construct_query_search_by_query_cell_ids(ids, filters, limit):
if not ids:
return {"query": {"match_all": {}}, "size": 0}

bool_query = {"must": [{"terms": {"id": ids}}]}

if filters:
filter_query = match_filters(filters, and_filter_names=FILTERS_TO_AND)
if filter_query:
bool_query["filter"] = filter_query["filter"]

es_query = {"query": {"bool": bool_query}, "size": limit}

return es_query
40 changes: 40 additions & 0 deletions querybook/server/lib/vector_store/base_vector_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Literal, Optional

from const.ai_assistant import (
DEFAULT_QUERY_SEARCH_LIMIT,
DEFAULT_TABLE_SEARCH_LIMIT,
DEFAULT_VECTOR_STORE_FETCH_LIMIT,
DEFAULT_SIMILARITY_SCORE_THRESHOLD,
Expand Down Expand Up @@ -108,3 +109,42 @@ def search_tables(
table_score_dict[table_name] = table_score_dict.get(table_name, 0) + score

return sorted(table_score_dict.items(), key=lambda x: x[1], reverse=True)[:k]

def search_query(
self,
text: str,
threshold: float = DEFAULT_SIMILARITY_SCORE_THRESHOLD,
k=DEFAULT_QUERY_SEARCH_LIMIT,
fetch_k=DEFAULT_VECTOR_STORE_FETCH_LIMIT,
) -> list[tuple[int, float]]:
"""
Finds similar SQL queries based on the given text (NLP query).

Args:
text: The natural language description or keywords.
threshold: Only return queries with a similarity score above this value.
k: Max number of matching queries to return.
fetch_k: Number of queries to retrieve from vector store before trimming.

Returns:
A list of (query_cell_id, score) tuples in descending score order.
"""
must_query = [
{"term": {"metadata.type": "query"}},
]
boolean_filter = {"bool": {"must": must_query}}

docs_with_score = self.similarity_search_with_score(
text,
k=fetch_k,
boolean_filter=boolean_filter,
)

query_results = []
for doc, score in docs_with_score:
if score > threshold:
query_cell_id = doc.metadata.get("query_cell_id")
query_results.append((query_cell_id, score))

query_results.sort(key=lambda x: x[1], reverse=True)
return query_results[:k]
27 changes: 27 additions & 0 deletions querybook/server/logic/vector_store.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from app.db import with_session
from const.ai_assistant import (
DEFAULT_QUERY_SEARCH_LIMIT,
DEFAULT_TABLE_SEARCH_LIMIT,
MAX_SAMPLE_QUERY_COUNT_FOR_TABLE_SUMMARY,
)
Expand All @@ -13,6 +14,9 @@
from logic.elasticsearch import get_sample_query_cells_by_table_name
from logic.metastore import get_all_table, get_table_by_name
from models.metastore import DataTable
from lib.elasticsearch.search_query import (
construct_query_search_by_query_cell_ids,
)

LOG = get_logger(__file__)

Expand Down Expand Up @@ -175,6 +179,29 @@ def search_tables(
return {"count": len(sorted_docs), "results": sorted_docs}


def search_query(keywords, filters=None, limit=DEFAULT_QUERY_SEARCH_LIMIT):
"""Search related SQL queries from vector store based on NLP query text."""
queries = get_vector_store().search_query(keywords, k=limit)
query_cell_ids = [q[0] for q in queries]

if not query_cell_ids:
return {"count": 0, "results": []}

es_query = construct_query_search_by_query_cell_ids(
ids=query_cell_ids, filters=filters, limit=limit
)

index_name = ES_CONFIG["query_cells"]["index_name"]
results = get_matching_objects(es_query, index_name)

# Reorder the Elasticsearch results based on the vector store ranking
es_results_by_id = {res["id"]: res for res in results}
sorted_docs = [
es_results_by_id[qid] for qid in query_cell_ids if qid in es_results_by_id
]
return {"count": len(sorted_docs), "results": sorted_docs}


@with_session
def get_table_summary_by_name(
metastore_id: int, full_table_name: str, session=None
Expand Down
29 changes: 17 additions & 12 deletions querybook/webapp/components/Search/SearchOverview.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,12 @@ export const SearchOverview: React.FC<ISearchOverviewProps> = ({
? 'Search data docs'
: 'Search tables';

const showVectorSearch =
(searchType === SearchType.Table &&
isAIFeatureEnabled('table_vector_search')) ||
(searchType === SearchType.Query &&
isAIFeatureEnabled('query_vector_search'));

return (
<div className="search-bar-wrapper">
<SearchBar
Expand All @@ -311,18 +317,17 @@ export const SearchOverview: React.FC<ISearchOverviewProps> = ({
placeholder={placeholder}
autoFocus
/>
{searchType === SearchType.Table &&
isAIFeatureEnabled('table_vector_search') && (
<div className="mt8 flex-row">
<AccentText weight="bold" className="ml8 mr12">
Natural Language Search
</AccentText>
<ToggleSwitch
checked={useVectorSearch}
onChange={(val) => updateUseVectorSearch(val)}
/>
</div>
)}
{showVectorSearch && (
<div className="mt8 flex-row">
<AccentText weight="bold" className="ml8 mr12">
Natural Language Search
</AccentText>
<ToggleSwitch
checked={useVectorSearch}
onChange={(val) => updateUseVectorSearch(val)}
/>
</div>
)}
</div>
);
};
Expand Down
4 changes: 4 additions & 0 deletions querybook/webapp/config.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ declare module 'config/querybook_public_config.yaml' {
enabled: boolean;
};

query_vector_search: {
enabled: boolean;
};

sql_complete: {
enabled: boolean;
};
Expand Down
1 change: 1 addition & 0 deletions querybook/webapp/lib/public-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export const isAIFeatureEnabled = (
| 'query_generation'
| 'query_auto_fix'
| 'table_vector_search'
| 'query_vector_search'
| 'sql_complete'
): boolean => {
const aiAssistantConfig = PublicConfig.ai_assistant;
Expand Down
18 changes: 14 additions & 4 deletions querybook/webapp/redux/search/action.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,20 @@ export function performSearch(): ThunkResult<Promise<ISearchPreview[]>> {
}>;
switch (searchType) {
case SearchType.Query:
searchRequest = SearchQueryResource.search({
...searchParams,
environment_id: state.environment.currentEnvironmentId,
});
if (useVectorSearch) {
searchRequest = SearchQueryResource.vectorSearch({
environment_id:
state.environment.currentEnvironmentId,
keywords: searchString,
filters: searchParams.filters,
});
} else {
searchRequest = SearchQueryResource.search({
...searchParams,
environment_id:
state.environment.currentEnvironmentId,
});
}
break;
case SearchType.DataDoc:
searchRequest = SearchDataDocResource.search({
Expand Down
6 changes: 6 additions & 0 deletions querybook/webapp/resource/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ export const SearchQueryResource = {
results: IQueryPreview[];
count: number;
}>('/search/queries/', params as unknown as Record<string, unknown>),

vectorSearch: (params: ISearchQueryParams) =>
ds.fetch<{
results: IQueryPreview[];
count: number;
}>('/search/queries/vector/', { ...params }),
};

export const SearchDataDocResource = {
Expand Down
Loading