diff --git a/core/config/config.exs b/core/config/config.exs
index fc7c55f8..7ee93792 100644
--- a/core/config/config.exs
+++ b/core/config/config.exs
@@ -56,7 +56,7 @@ config :canary, :root, File.cwd!()
config :canary, Oban,
engine: Oban.Engines.Basic,
- queues: [fetcher: 1, email: 2, updater: 5, stripe: 50, keyword: 20],
+ queues: [fetcher: 1, email: 2, updater: 5, stripe: 50, summary: 20],
repo: Canary.Repo,
plugins: [
{Oban.Plugins.Pruner, max_age: 60 * 60 * 24 * 7},
diff --git a/core/lib/canary/query/understander.ex b/core/lib/canary/query/understander.ex
index e15b8621..187a576b 100644
--- a/core/lib/canary/query/understander.ex
+++ b/core/lib/canary/query/understander.ex
@@ -42,15 +42,12 @@ defmodule Canary.Query.Understander.LLM do
Your job is to analyze the user's query and return a structured response like below:
- QUERY
KEYWORD_1,KEYWORD_2,KEYWORD_3
IMPORTANT NOTES:
- should contain comma separated list of keywords. MAX 3 keywords are allowed.
- - Each "keyword" should be one or two words. It will be used to run keyword based search. User '#{@keywords_section}' section for inspiration.
- - The "query" is typo-corrected version of the user's raw query for better search results, with minimal formatting applied.
- - Do not omit prepositions or details that are not mentioned in the user's query.
+ - Each "keyword" must be a single word. It will be used to run keyword based search. User '#{@keywords_section}' section for inspiration.
Do not include any other text, just respond with the XML-like format that I provided.
If user's query is totally nonsense, just return .
diff --git a/core/lib/canary/reranker.ex b/core/lib/canary/reranker.ex
index 297d13dc..35639735 100644
--- a/core/lib/canary/reranker.ex
+++ b/core/lib/canary/reranker.ex
@@ -2,12 +2,12 @@ defmodule Canary.Reranker do
@callback run(
query :: String.t(),
docs :: list(any()),
- renderer :: function()
+ opts :: keyword()
) :: {:ok, list(any())} | {:error, any()}
- def run(_, _, _ \\ fn doc -> doc end)
+ def run(_, _, _ \\ [])
def run(_, [], _), do: {:ok, []}
- def run(query, docs, renderer), do: impl().run(query, docs, renderer)
+ def run(query, docs, opts), do: impl().run(query, docs, opts)
defp impl(), do: Application.get_env(:canary, :reranker, Canary.Reranker.Noop)
end
@@ -19,7 +19,10 @@ defmodule Canary.Reranker.Cohere do
use Retry
- def run(query, docs, renderer, threshold \\ 0.5) do
+ def run(query, docs, opts) do
+ renderer = opts[:renderer] || fn doc -> doc end
+ threshold = opts[:threshold] || 0
+
result =
retry with: exponential_backoff() |> randomize |> cap(1_000) |> expiry(4_000) do
request(query, docs, renderer)
diff --git a/core/lib/canary/searcher.ex b/core/lib/canary/searcher.ex
index 72918722..63e90b80 100644
--- a/core/lib/canary/searcher.ex
+++ b/core/lib/canary/searcher.ex
@@ -38,7 +38,7 @@ defmodule Canary.Searcher.Default do
keywords =
source.documents
- |> Enum.map(& &1.keywords)
+ |> Enum.map(fn doc -> if doc.summary, do: doc.summary.keywords, else: [] end)
|> Enum.flat_map(& &1)
|> Enum.frequencies()
|> Enum.map(fn {k, v} -> if v > 0.5 * docs_size, do: k, else: nil end)
@@ -48,14 +48,15 @@ defmodule Canary.Searcher.Default do
{:ok, docs} <- Canary.Index.batch_search_documents(source.id, analysis.keywords),
{:ok, reranked} <-
Canary.Reranker.run(
- analysis.query,
+ query,
Enum.dedup_by(docs, & &1.id),
- fn doc -> doc.content end
+ renderer: fn doc -> doc.content end,
+ threshold: 0.05
) do
{:ok,
%{
search: reranked,
- suggestion: %{questions: [analysis.query]}
+ suggestion: %{questions: [query]}
}}
end
end
diff --git a/core/lib/canary/sources/document.ex b/core/lib/canary/sources/document.ex
index 1d9bd035..183202c8 100644
--- a/core/lib/canary/sources/document.ex
+++ b/core/lib/canary/sources/document.ex
@@ -11,7 +11,7 @@ defmodule Canary.Sources.Document do
attribute :url, :string, allow_nil?: true
attribute :content, :binary, allow_nil?: false
attribute :chunks, {:array, Canary.Sources.Chunk}, default: []
- attribute :keywords, {:array, :string}, default: []
+ attribute :summary, Canary.Sources.DocumentSummary, allow_nil?: true
end
identities do
@@ -64,14 +64,14 @@ defmodule Canary.Sources.Document do
change Canary.Sources.Document.Changes.DestroyChunks
end
- update :update_kewords do
- argument :keywords, {:array, :string}, allow_nil?: false
- change set_attribute(:keywords, arg(:keywords))
+ update :update_summary do
+ argument :summary, Canary.Sources.DocumentSummary, allow_nil?: false
+ change set_attribute(:summary, arg(:summary))
end
end
code_interface do
- define :update_kewords, args: [:keywords], action: :update_kewords
+ define :update_summary, args: [:summary], action: :update_summary
define :find_by_chunk_index_id, args: [:id], action: :find_by_chunk_index_id
end
@@ -161,7 +161,7 @@ defmodule Canary.Sources.Document.Changes.CreateSummary do
changeset
|> Ash.Changeset.after_action(fn _changeset, record ->
%{"document_id" => record.id}
- |> Canary.Workers.Keywords.new()
+ |> Canary.Workers.Summary.new()
|> Oban.insert()
{:ok, record}
diff --git a/core/lib/canary/sources/document_summary.ex b/core/lib/canary/sources/document_summary.ex
new file mode 100644
index 00000000..c4e4eb5e
--- /dev/null
+++ b/core/lib/canary/sources/document_summary.ex
@@ -0,0 +1,9 @@
+defmodule Canary.Sources.DocumentSummary do
+ use Ash.Resource,
+ domain: Canary.Sources,
+ data_layer: :embedded
+
+ attributes do
+ attribute :keywords, {:array, :struct}, allow_nil?: false, default: []
+ end
+end
diff --git a/core/lib/canary/workers/keywords.ex b/core/lib/canary/workers/summary.ex
similarity index 69%
rename from core/lib/canary/workers/keywords.ex
rename to core/lib/canary/workers/summary.ex
index 1c47e793..783d979e 100644
--- a/core/lib/canary/workers/keywords.ex
+++ b/core/lib/canary/workers/summary.ex
@@ -1,5 +1,5 @@
-defmodule Canary.Workers.Keywords do
- use Oban.Worker, queue: :keyword, max_attempts: 3
+defmodule Canary.Workers.Summary do
+ use Oban.Worker, queue: :summary, max_attempts: 3
@impl true
def perform(%Oban.Job{args: %{"document_id" => id}}) do
@@ -18,7 +18,7 @@ defmodule Canary.Workers.Keywords do
]
with {:ok, completion} <- Canary.AI.chat(%{model: chat_model, messages: messages}),
- {:ok, _} <- Canary.Sources.Document.update_kewords(doc, transform(completion)) do
+ {:ok, _} <- Canary.Sources.Document.update_summary(doc, to_summary(completion)) do
:ok
end
end
@@ -45,15 +45,18 @@ defmodule Canary.Workers.Keywords do
}
end
- defp transform(completion) do
- ~r/(.*?)<\/keywords>/s
- |> Regex.scan(completion, capture: :all_but_first)
- |> Enum.flat_map(fn [keywords] ->
- keywords
- |> String.split(",")
- |> Enum.map(&String.trim/1)
- |> Enum.map(&String.downcase/1)
- end)
- |> Enum.uniq()
+ defp to_summary(completion) do
+ keywords =
+ ~r/(.*?)<\/keywords>/s
+ |> Regex.scan(completion, capture: :all_but_first)
+ |> Enum.flat_map(fn [keywords] ->
+ keywords
+ |> String.split(",")
+ |> Enum.map(&String.trim/1)
+ |> Enum.map(&String.downcase/1)
+ end)
+ |> Enum.uniq()
+
+ %Canary.Sources.DocumentSummary{keywords: keywords}
end
end
diff --git a/core/priv/repo/migrations/20240819053647_init_resources.exs b/core/priv/repo/migrations/20240820060848_init_resources.exs
similarity index 99%
rename from core/priv/repo/migrations/20240819053647_init_resources.exs
rename to core/priv/repo/migrations/20240820060848_init_resources.exs
index 0dbf924d..bf08a932 100644
--- a/core/priv/repo/migrations/20240819053647_init_resources.exs
+++ b/core/priv/repo/migrations/20240820060848_init_resources.exs
@@ -149,7 +149,7 @@ defmodule Canary.Repo.Migrations.InitResources do
add :url, :text
add :content, :binary, null: false
add :chunks, {:array, :map}, default: []
- add :keywords, {:array, :text}, default: []
+ add :summary, :map
add :source_id,
references(:sources,
diff --git a/core/priv/resource_snapshots/repo/account_invites/20240819053647.json b/core/priv/resource_snapshots/repo/account_invites/20240820060848.json
similarity index 97%
rename from core/priv/resource_snapshots/repo/account_invites/20240819053647.json
rename to core/priv/resource_snapshots/repo/account_invites/20240820060848.json
index abbdd81d..5cc89015 100644
--- a/core/priv/resource_snapshots/repo/account_invites/20240819053647.json
+++ b/core/priv/resource_snapshots/repo/account_invites/20240820060848.json
@@ -94,7 +94,7 @@
"custom_indexes": [],
"custom_statements": [],
"has_create_action": true,
- "hash": "98879BAC60B4ADD10E3B06B53567B604570352EE9C6327B6BF99833344A47811",
+ "hash": "383D341378EDC3629A559A480BD4847F2655F2DC4BFA5A068CE012FD1AFB7DE3",
"identities": [],
"multitenancy": {
"attribute": null,
diff --git a/core/priv/resource_snapshots/repo/account_users/20240819053647.json b/core/priv/resource_snapshots/repo/account_users/20240820060848.json
similarity index 96%
rename from core/priv/resource_snapshots/repo/account_users/20240819053647.json
rename to core/priv/resource_snapshots/repo/account_users/20240820060848.json
index 47496a6d..98decc2a 100644
--- a/core/priv/resource_snapshots/repo/account_users/20240819053647.json
+++ b/core/priv/resource_snapshots/repo/account_users/20240820060848.json
@@ -64,7 +64,7 @@
"custom_indexes": [],
"custom_statements": [],
"has_create_action": true,
- "hash": "AC6DF984994063A10E9B27B57B224D4DD8ED58DA0A6C769CAA300ED8C86E94BC",
+ "hash": "DD6CEC340B8F6643E4DD1BFA0B4A487E5AE4B9FFAA12E828AC4AA722285A1652",
"identities": [],
"multitenancy": {
"attribute": null,
diff --git a/core/priv/resource_snapshots/repo/accounts/20240819053647.json b/core/priv/resource_snapshots/repo/accounts/20240820060848.json
similarity index 90%
rename from core/priv/resource_snapshots/repo/accounts/20240819053647.json
rename to core/priv/resource_snapshots/repo/accounts/20240820060848.json
index 990dfe05..a4eedb14 100644
--- a/core/priv/resource_snapshots/repo/accounts/20240819053647.json
+++ b/core/priv/resource_snapshots/repo/accounts/20240820060848.json
@@ -26,7 +26,7 @@
"custom_indexes": [],
"custom_statements": [],
"has_create_action": true,
- "hash": "B35DD5FE904796F7D3D7C40719CF58869119425EF836447CB60CFDE2D54F310F",
+ "hash": "316149EB3CAEC59B7CFEA63AB0FF5D338AFD0337C427241F6EC7D0EB1CA121BB",
"identities": [],
"multitenancy": {
"attribute": null,
diff --git a/core/priv/resource_snapshots/repo/billings/20240819053647.json b/core/priv/resource_snapshots/repo/billings/20240820060848.json
similarity index 96%
rename from core/priv/resource_snapshots/repo/billings/20240819053647.json
rename to core/priv/resource_snapshots/repo/billings/20240820060848.json
index a3f19572..c2f7abe5 100644
--- a/core/priv/resource_snapshots/repo/billings/20240819053647.json
+++ b/core/priv/resource_snapshots/repo/billings/20240820060848.json
@@ -85,7 +85,7 @@
"custom_indexes": [],
"custom_statements": [],
"has_create_action": true,
- "hash": "A51E691C47FC92D0C1C60EF69B74497BFF914204249F0A776972EC6E70C48E9C",
+ "hash": "09AABB4F17080666362E79F9BFF51BBA47BDB1A0A1FA59BEC53142B29A30D717",
"identities": [],
"multitenancy": {
"attribute": null,
diff --git a/core/priv/resource_snapshots/repo/client_sources/20240819053647.json b/core/priv/resource_snapshots/repo/client_sources/20240820060848.json
similarity index 96%
rename from core/priv/resource_snapshots/repo/client_sources/20240819053647.json
rename to core/priv/resource_snapshots/repo/client_sources/20240820060848.json
index dc20c4d8..7e92c672 100644
--- a/core/priv/resource_snapshots/repo/client_sources/20240819053647.json
+++ b/core/priv/resource_snapshots/repo/client_sources/20240820060848.json
@@ -64,7 +64,7 @@
"custom_indexes": [],
"custom_statements": [],
"has_create_action": true,
- "hash": "5C051176E120A13AF02BCD4D2A1DCD854AB200C2F758FCB81830FA568897FD8A",
+ "hash": "EFDAC643A9F3A4935E330675ED67E5B570662B56B241333379604D9877B14B4C",
"identities": [],
"multitenancy": {
"attribute": null,
diff --git a/core/priv/resource_snapshots/repo/clients/20240819053647.json b/core/priv/resource_snapshots/repo/clients/20240820060848.json
similarity index 97%
rename from core/priv/resource_snapshots/repo/clients/20240819053647.json
rename to core/priv/resource_snapshots/repo/clients/20240820060848.json
index 92df9900..844351b0 100644
--- a/core/priv/resource_snapshots/repo/clients/20240819053647.json
+++ b/core/priv/resource_snapshots/repo/clients/20240820060848.json
@@ -105,7 +105,7 @@
"custom_indexes": [],
"custom_statements": [],
"has_create_action": true,
- "hash": "DCCD9248AFCC71CCB74AF01AC1FBC59D92DFA703BEC9C5FF1343200658033FCB",
+ "hash": "35DAC4B4059AAF23C1590A8C79553316ABBA5ED4096B5E374271D12D661A3F1D",
"identities": [
{
"all_tenants?": false,
diff --git a/core/priv/resource_snapshots/repo/documents/20240819053647.json b/core/priv/resource_snapshots/repo/documents/20240820060848.json
similarity index 94%
rename from core/priv/resource_snapshots/repo/documents/20240819053647.json
rename to core/priv/resource_snapshots/repo/documents/20240820060848.json
index 43d616da..1d5cc515 100644
--- a/core/priv/resource_snapshots/repo/documents/20240819053647.json
+++ b/core/priv/resource_snapshots/repo/documents/20240820060848.json
@@ -62,13 +62,13 @@
},
{
"allow_nil?": true,
- "default": "[]",
+ "default": "nil",
"generated?": false,
"primary_key?": false,
"references": null,
"size": null,
- "source": "keywords",
- "type": ["array", "text"]
+ "source": "summary",
+ "type": "map"
},
{
"allow_nil?": true,
@@ -105,7 +105,7 @@
"custom_indexes": [],
"custom_statements": [],
"has_create_action": true,
- "hash": "413666EB4B9889C919C94F09DC4ED831ECD16F86E61C56A238AABEA0C557CBA8",
+ "hash": "D80535A8B54308E72279E976190AD4A2A3EE9E99CA936B31F751848CA4BFE9C5",
"identities": [
{
"all_tenants?": false,
diff --git a/core/priv/resource_snapshots/repo/feedbacks/20240819053647.json b/core/priv/resource_snapshots/repo/feedbacks/20240820060848.json
similarity index 95%
rename from core/priv/resource_snapshots/repo/feedbacks/20240819053647.json
rename to core/priv/resource_snapshots/repo/feedbacks/20240820060848.json
index 3eacc109..29b7b61f 100644
--- a/core/priv/resource_snapshots/repo/feedbacks/20240819053647.json
+++ b/core/priv/resource_snapshots/repo/feedbacks/20240820060848.json
@@ -55,7 +55,7 @@
"custom_indexes": [],
"custom_statements": [],
"has_create_action": false,
- "hash": "713C62CB8FAC47ACAEEA948F98A97FD49476C8DD7B5DCDFAD3CB8C5AD32D6919",
+ "hash": "6B54196233F50E22ACF8F95D69DC4968D0DEDB58D6061CB3B3B2F0E85A6B9B64",
"identities": [],
"multitenancy": {
"attribute": null,
diff --git a/core/priv/resource_snapshots/repo/github_apps/20240819053647.json b/core/priv/resource_snapshots/repo/github_apps/20240820060848.json
similarity index 95%
rename from core/priv/resource_snapshots/repo/github_apps/20240819053647.json
rename to core/priv/resource_snapshots/repo/github_apps/20240820060848.json
index 70e28bb8..b1bbf08a 100644
--- a/core/priv/resource_snapshots/repo/github_apps/20240819053647.json
+++ b/core/priv/resource_snapshots/repo/github_apps/20240820060848.json
@@ -55,7 +55,7 @@
"custom_indexes": [],
"custom_statements": [],
"has_create_action": true,
- "hash": "5BAA7E570E0A1162D19F01D6E8A99198A0138B460740DD9B10890CA855E21572",
+ "hash": "2E341957FC52089503F12B7B6489F131C4BBEDDAAB24BDAC75AE0B17EA6516B8",
"identities": [],
"multitenancy": {
"attribute": null,
diff --git a/core/priv/resource_snapshots/repo/github_repos/20240819053647.json b/core/priv/resource_snapshots/repo/github_repos/20240820060848.json
similarity index 96%
rename from core/priv/resource_snapshots/repo/github_repos/20240819053647.json
rename to core/priv/resource_snapshots/repo/github_repos/20240820060848.json
index 0b1f1085..229ae9a8 100644
--- a/core/priv/resource_snapshots/repo/github_repos/20240819053647.json
+++ b/core/priv/resource_snapshots/repo/github_repos/20240820060848.json
@@ -55,7 +55,7 @@
"custom_indexes": [],
"custom_statements": [],
"has_create_action": true,
- "hash": "EE6332081152F2B571D07826708C5A55DA38AAC8CF086DD81D9DF3056459DE47",
+ "hash": "FEA2BF4D2BEA6C2FE2268550DC11C24094F408AE5D6BDFFA70C56A1735DB36AA",
"identities": [
{
"all_tenants?": false,
diff --git a/core/priv/resource_snapshots/repo/messages/20240819053647.json b/core/priv/resource_snapshots/repo/messages/20240820060848.json
similarity index 96%
rename from core/priv/resource_snapshots/repo/messages/20240819053647.json
rename to core/priv/resource_snapshots/repo/messages/20240820060848.json
index 73648819..bc4add64 100644
--- a/core/priv/resource_snapshots/repo/messages/20240819053647.json
+++ b/core/priv/resource_snapshots/repo/messages/20240820060848.json
@@ -75,7 +75,7 @@
"custom_indexes": [],
"custom_statements": [],
"has_create_action": true,
- "hash": "BCD1EFEF0CCA7B40A32546E39C79C72201CF910AE8812DA99B35701A00B5EF9E",
+ "hash": "475E0A6A723B37EAA2F064A7F9BF9EB303A11A8E38031405371870393B30245A",
"identities": [],
"multitenancy": {
"attribute": null,
diff --git a/core/priv/resource_snapshots/repo/sessions/20240819053647.json b/core/priv/resource_snapshots/repo/sessions/20240820060848.json
similarity index 97%
rename from core/priv/resource_snapshots/repo/sessions/20240819053647.json
rename to core/priv/resource_snapshots/repo/sessions/20240820060848.json
index e0049345..b778ceee 100644
--- a/core/priv/resource_snapshots/repo/sessions/20240819053647.json
+++ b/core/priv/resource_snapshots/repo/sessions/20240820060848.json
@@ -75,7 +75,7 @@
"custom_indexes": [],
"custom_statements": [],
"has_create_action": true,
- "hash": "0456C71B7218EC634EA5DCE52A9479E43236ECD5421B066DB8FA2F16E7F1DD38",
+ "hash": "64CCDBC6E40C48594E0D5E9AD9FDE69D56FD49D616171034F1F1F4E14922F3B6",
"identities": [
{
"all_tenants?": false,
diff --git a/core/priv/resource_snapshots/repo/sources/20240819053647.json b/core/priv/resource_snapshots/repo/sources/20240820060848.json
similarity index 96%
rename from core/priv/resource_snapshots/repo/sources/20240819053647.json
rename to core/priv/resource_snapshots/repo/sources/20240820060848.json
index 76b18e96..7c1e97de 100644
--- a/core/priv/resource_snapshots/repo/sources/20240819053647.json
+++ b/core/priv/resource_snapshots/repo/sources/20240820060848.json
@@ -95,7 +95,7 @@
"custom_indexes": [],
"custom_statements": [],
"has_create_action": true,
- "hash": "40D24862C7D33396BA0B110B7F0F4BDD0D69404E9B3EC0B8F771BD31AC98E762",
+ "hash": "9529EFFA4B5AB7FFD6809D4E6C2318229D658F9C131A24BAF9CE6C6AC3F1866B",
"identities": [],
"multitenancy": {
"attribute": null,
diff --git a/core/priv/resource_snapshots/repo/tokens/20240819053647.json b/core/priv/resource_snapshots/repo/tokens/20240820060848.json
similarity index 96%
rename from core/priv/resource_snapshots/repo/tokens/20240819053647.json
rename to core/priv/resource_snapshots/repo/tokens/20240820060848.json
index 3798479a..f28d684c 100644
--- a/core/priv/resource_snapshots/repo/tokens/20240819053647.json
+++ b/core/priv/resource_snapshots/repo/tokens/20240820060848.json
@@ -76,7 +76,7 @@
"custom_indexes": [],
"custom_statements": [],
"has_create_action": true,
- "hash": "EF5B1D84FF86890DE7641D25F20FA44786D7D4A28AC895113979A220BB8ECAB3",
+ "hash": "2DD9595A4B30CE001E665CA41FAE7FFA70DB5EE5B7E14AE364C0D7652417B4A7",
"identities": [],
"multitenancy": {
"attribute": null,
diff --git a/core/priv/resource_snapshots/repo/user_identities/20240819053647.json b/core/priv/resource_snapshots/repo/user_identities/20240820060848.json
similarity index 97%
rename from core/priv/resource_snapshots/repo/user_identities/20240819053647.json
rename to core/priv/resource_snapshots/repo/user_identities/20240820060848.json
index 951a96bc..eba063ee 100644
--- a/core/priv/resource_snapshots/repo/user_identities/20240819053647.json
+++ b/core/priv/resource_snapshots/repo/user_identities/20240820060848.json
@@ -95,7 +95,7 @@
"custom_indexes": [],
"custom_statements": [],
"has_create_action": true,
- "hash": "499697AC6010E054BC094EFE77BFE29D0F3F778D28C88D73159AD3A4505BF517",
+ "hash": "0B7BEC1405443921328364D5EB9EDB39808DEC6C70397759108C963AD7E909AB",
"identities": [
{
"all_tenants?": false,
diff --git a/core/priv/resource_snapshots/repo/users/20240819053647.json b/core/priv/resource_snapshots/repo/users/20240820060848.json
similarity index 94%
rename from core/priv/resource_snapshots/repo/users/20240819053647.json
rename to core/priv/resource_snapshots/repo/users/20240820060848.json
index 4ad698ee..8639486b 100644
--- a/core/priv/resource_snapshots/repo/users/20240819053647.json
+++ b/core/priv/resource_snapshots/repo/users/20240820060848.json
@@ -36,7 +36,7 @@
"custom_indexes": [],
"custom_statements": [],
"has_create_action": true,
- "hash": "4D7CECE9BCF9CDF6F17FE3D28FDB1F96A1A82AAA40B47519E2DACD24BA5091A2",
+ "hash": "F86810D4B686DD4D54A9993CE8F8F902FBF85DC16FE643E2093CA29F91FDE6D6",
"identities": [
{
"all_tenants?": false,