Skip to content

Commit

Permalink
feat: nlp query search
Browse files Browse the repository at this point in the history
  • Loading branch information
Victor committed Jan 3, 2025
1 parent 7ee8219 commit 6d2104c
Show file tree
Hide file tree
Showing 11 changed files with 143 additions and 16 deletions.
3 changes: 3 additions & 0 deletions querybook/config/querybook_public_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ ai_assistant:
table_vector_search:
enabled: false

query_vector_search:
enabled: false

sql_complete:
enabled: false

Expand Down
2 changes: 2 additions & 0 deletions querybook/server/const/ai_assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,7 @@ class AICommandType(Enum):
DEFAULT_VECTOR_STORE_FETCH_LIMIT = 30
# how many tables to return from vector table search eventually
DEFAULT_TABLE_SEARCH_LIMIT = 10
# how many tables to return from vector query search eventually
DEFAULT_QUERY_SEARCH_LIMIT = 10
# how many tables to select for text-to-sql
DEFAULT_TABLE_SELECT_LIMIT = 3
14 changes: 14 additions & 0 deletions querybook/server/datasources/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,20 @@ def search_query(
return {"count": count, "results": results}


@register("/search/queries/vector/", methods=["GET"])
def vector_search_query(
environment_id,
keywords,
filters=[],
):
from logic import vector_store as vs_logic

verify_environment_permission([environment_id])
filters.append(["environment_id", environment_id])

return vs_logic.search_query(keywords, filters)


@register("/search/tables/", methods=["GET"])
def search_tables(
metastore_id,
Expand Down
16 changes: 16 additions & 0 deletions querybook/server/lib/elasticsearch/search_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,19 @@ def construct_query_search_query(
)

return query


def construct_query_search_by_query_cell_ids(ids, filters, limit):
if not ids:
return {"query": {"match_all": {}}, "size": 0}

bool_query = {"must": [{"terms": {"id": ids}}]}

if filters:
filter_query = match_filters(filters, and_filter_names=FILTERS_TO_AND)
if filter_query:
bool_query["filter"] = filter_query["filter"]

es_query = {"query": {"bool": bool_query}, "size": limit}

return es_query
39 changes: 39 additions & 0 deletions querybook/server/lib/vector_store/base_vector_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,42 @@ def search_tables(
table_score_dict[table_name] = table_score_dict.get(table_name, 0) + score

return sorted(table_score_dict.items(), key=lambda x: x[1], reverse=True)[:k]

def search_query(
self,
text: str,
threshold: float = DEFAULT_SIMILARITY_SCORE_THRESHOLD,
k=DEFAULT_TABLE_SEARCH_LIMIT,
fetch_k=DEFAULT_VECTOR_STORE_FETCH_LIMIT,
) -> list[tuple[int, float]]:
"""
Finds similar SQL queries based on the given text (NLP query).
Args:
text: The natural language description or keywords.
threshold: Only return queries with a similarity score above this value.
k: Max number of matching queries to return.
fetch_k: Number of queries to retrieve from vector store before trimming.
Returns:
A list of (query_cell_id, score) tuples in descending score order.
"""
must_query = [
{"term": {"metadata.type": "query"}},
]
boolean_filter = {"bool": {"must": must_query}}

docs_with_score = self.similarity_search_with_score(
text,
k=fetch_k,
boolean_filter=boolean_filter,
)

query_results = []
for doc, score in docs_with_score:
if score > threshold:
query_cell_id = doc.metadata.get("query_cell_id")
query_results.append((query_cell_id, score))

query_results.sort(key=lambda x: x[1], reverse=True)
return query_results[:k]
27 changes: 27 additions & 0 deletions querybook/server/logic/vector_store.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from app.db import with_session
from const.ai_assistant import (
DEFAULT_QUERY_SEARCH_LIMIT,
DEFAULT_TABLE_SEARCH_LIMIT,
MAX_SAMPLE_QUERY_COUNT_FOR_TABLE_SUMMARY,
)
Expand All @@ -13,6 +14,9 @@
from logic.elasticsearch import get_sample_query_cells_by_table_name
from logic.metastore import get_all_table, get_table_by_name
from models.metastore import DataTable
from lib.elasticsearch.search_query import (
construct_query_search_by_query_cell_ids,
)

LOG = get_logger(__file__)

Expand Down Expand Up @@ -175,6 +179,29 @@ def search_tables(
return {"count": len(sorted_docs), "results": sorted_docs}


def search_query(keywords, filters=None, limit=DEFAULT_QUERY_SEARCH_LIMIT):
"""Search related SQL queries from vector store based on NLP query text."""
queries = get_vector_store().search_query(keywords, k=limit)
query_cell_ids = [q[0] for q in queries]

if not query_cell_ids:
return {"count": 0, "results": []}

es_query = construct_query_search_by_query_cell_ids(
ids=query_cell_ids, filters=filters, limit=limit
)

index_name = ES_CONFIG["query_cells"]["index_name"]
results = get_matching_objects(es_query, index_name)

# Reorder the Elasticsearch results based on the vector store ranking
es_results_by_id = {res["id"]: res for res in results}
sorted_docs = [
es_results_by_id[qid] for qid in query_cell_ids if qid in es_results_by_id
]
return {"count": len(sorted_docs), "results": sorted_docs}


@with_session
def get_table_summary_by_name(
metastore_id: int, full_table_name: str, session=None
Expand Down
29 changes: 17 additions & 12 deletions querybook/webapp/components/Search/SearchOverview.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,12 @@ export const SearchOverview: React.FC<ISearchOverviewProps> = ({
? 'Search data docs'
: 'Search tables';

const showVectorSearch =
(searchType === SearchType.Table &&
isAIFeatureEnabled('table_vector_search')) ||
(searchType === SearchType.Query &&
isAIFeatureEnabled('query_vector_search'));

return (
<div className="search-bar-wrapper">
<SearchBar
Expand All @@ -311,18 +317,17 @@ export const SearchOverview: React.FC<ISearchOverviewProps> = ({
placeholder={placeholder}
autoFocus
/>
{searchType === SearchType.Table &&
isAIFeatureEnabled('table_vector_search') && (
<div className="mt8 flex-row">
<AccentText weight="bold" className="ml8 mr12">
Natural Language Search
</AccentText>
<ToggleSwitch
checked={useVectorSearch}
onChange={(val) => updateUseVectorSearch(val)}
/>
</div>
)}
{showVectorSearch && (
<div className="mt8 flex-row">
<AccentText weight="bold" className="ml8 mr12">
Natural Language Search
</AccentText>
<ToggleSwitch
checked={useVectorSearch}
onChange={(val) => updateUseVectorSearch(val)}
/>
</div>
)}
</div>
);
};
Expand Down
4 changes: 4 additions & 0 deletions querybook/webapp/config.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ declare module 'config/querybook_public_config.yaml' {
enabled: boolean;
};

query_vector_search: {
enabled: boolean;
};

sql_complete: {
enabled: boolean;
};
Expand Down
1 change: 1 addition & 0 deletions querybook/webapp/lib/public-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export const isAIFeatureEnabled = (
| 'query_generation'
| 'query_auto_fix'
| 'table_vector_search'
| 'query_vector_search'
| 'sql_complete'
): boolean => {
const aiAssistantConfig = PublicConfig.ai_assistant;
Expand Down
18 changes: 14 additions & 4 deletions querybook/webapp/redux/search/action.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,10 +125,20 @@ export function performSearch(): ThunkResult<Promise<ISearchPreview[]>> {
}>;
switch (searchType) {
case SearchType.Query:
searchRequest = SearchQueryResource.search({
...searchParams,
environment_id: state.environment.currentEnvironmentId,
});
if (useVectorSearch) {
searchRequest = SearchQueryResource.vectorSearch({
environment_id:
state.environment.currentEnvironmentId,
keywords: searchString,
filters: searchParams.filters,
});
} else {
searchRequest = SearchQueryResource.search({
...searchParams,
environment_id:
state.environment.currentEnvironmentId,
});
}
break;
case SearchType.DataDoc:
searchRequest = SearchDataDocResource.search({
Expand Down
6 changes: 6 additions & 0 deletions querybook/webapp/resource/search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,12 @@ export const SearchQueryResource = {
results: IQueryPreview[];
count: number;
}>('/search/queries/', params as unknown as Record<string, unknown>),

vectorSearch: (params: ISearchQueryParams) =>
ds.fetch<{
results: IQueryPreview[];
count: number;
}>('/search/queries/vector/', { ...params }),
};

export const SearchDataDocResource = {
Expand Down

0 comments on commit 6d2104c

Please sign in to comment.