diff --git a/core/lib/canary/scraper.ex b/core/lib/canary/scraper.ex
index 16a7520b..bba46f69 100644
--- a/core/lib/canary/scraper.ex
+++ b/core/lib/canary/scraper.ex
@@ -3,59 +3,98 @@ defmodule Canary.Scraper.Item do
defstruct [:id, :level, :title, :content]
end
+# Inspired by: https://github.com/agoodway/html2markdown/blob/06e3587/lib/html2markdown.ex#L6-L32
defmodule Canary.Scraper do
alias Canary.Scraper.Item
+ @non_content_tags [
+ "aside",
+ "audio",
+ "base",
+ "button",
+ "datalist",
+ "embed",
+ "form",
+ "iframe",
+ "input",
+ "keygen",
+ "nav",
+ "noscript",
+ "object",
+ "output",
+ "script",
+ "select",
+ "source",
+ "style",
+ "svg",
+ "template",
+ "textarea",
+ "track",
+ "video"
+ ]
+
def run(html) do
- with {:ok, doc} <- Floki.parse_document(html),
- [body] <- doc |> Floki.find("body") do
- items =
- process(body)
- |> Enum.reverse()
- |> Enum.reject(&(&1.level == nil || &1.level == 0))
- |> Enum.reject(&(&1.content == nil || &1.content == ""))
- |> Enum.map(&%Item{&1 | content: String.trim(&1.content)})
-
- {:ok, items}
- else
- error -> {:error, error}
- end
+ html
+ |> preprocess()
+ |> process()
+ |> postprocess()
end
- def run!(html) do
- {:ok, content} = run(html)
+ defp postprocess(items) do
+ items
+ |> Enum.reverse()
+ |> Enum.reject(&(&1.level == nil || &1.level == 0))
+ |> Enum.reject(&(&1.content == nil || &1.content == ""))
+ |> Enum.map(&%Item{&1 | content: String.trim(&1.content)})
+ end
+
+ defp preprocess(content) do
content
+ |> ensure_html()
+ |> Floki.parse_document!()
+ |> Floki.find("body")
+ |> Floki.filter_out(:comment)
+ |> remove_non_content_tags()
+ end
+
+ defp ensure_html(content) do
+ if is_html_document?(content), do: content, else: wrap_fragment(content)
end
- def print!(html) do
- run!(html)
- |> Enum.each(&IO.puts("#{&1.content}\n-----"))
+ defp is_html_document?(content) do
+ String.contains?(content, "
#{fragment}"
+
+ defp remove_non_content_tags(document) do
+ Enum.reduce(@non_content_tags, document, &Floki.filter_out(&2, &1))
end
defp process(_, acc \\ [])
- defp process({"script", _, _}, acc), do: acc
- defp process({"style", _, _}, acc), do: acc
- defp process({"nav", _, _}, acc), do: acc
- defp process({"header", _, _}, acc), do: acc
- defp process({"footer", _, _}, acc), do: acc
- defp process({:comment, _}, acc), do: acc
-
- defp process({"h" <> level, _, nodes} = node, acc) do
+
+ defp process(nodes, acc) when is_list(nodes) do
+ nodes
+ |> Enum.reduce(acc, &process/2)
+ end
+
+ defp process({"h" <> level, _, nodes} = node, acc)
+ when level in ["1", "2", "3", "4", "5", "6"] do
+ level = String.to_integer(level)
+
id =
node
|> Floki.attribute("id")
|> Enum.at(0)
- level = parse_integer(level)
-
title =
nodes
|> Enum.map(&to_text/1)
|> Enum.join(" ")
- |> trim_leading_hash()
-
- content = String.duplicate("#", level) <> " #{title}\n"
+ |> String.trim_leading("#")
+ |> String.trim()
+ content = "#{String.duplicate("#", level)} #{title}" <> "\n"
[%Item{id: id, level: level, title: title, content: content} | acc]
end
@@ -66,7 +105,7 @@ defmodule Canary.Scraper do
if String.trim(text) in ["Skip to content"] do
acc
else
- acc |> update_first(&%Item{&1 | content: &1.content <> "[#{text}](#{href})"})
+ acc |> append_content("[#{text}](#{href})")
end
end
@@ -75,29 +114,10 @@ defmodule Canary.Scraper do
classes(node)
|> Enum.any?(&String.contains?(&1, "VPLocalNav"))
- # code = nodes |> Enum.find(&(elem(&1, 0) == "pre"))
-
cond do
is_nav ->
acc
- # not is_nil(code) ->
- # lang =
- # classes(node)
- # |> Enum.find("", &String.contains?(&1, "language-"))
- # |> String.replace("language-", "")
-
- # is_diff =
- # classes(code)
- # |> Enum.any?(fn c -> String.contains?(c, "diff") end)
-
- # lang = if is_diff, do: "#{lang}-diff", else: lang
-
- # rendered_code = process(code) |> Enum.at(0) |> Map.get(:content)
- # content = "\n```#{lang}\n#{rendered_code}\n```\n"
-
- # acc |> update_first(&%Item{&1 | content: &1.content <> content})
-
true ->
nodes |> Enum.reduce(acc, &process(&1, &2))
end
@@ -115,11 +135,11 @@ defmodule Canary.Scraper do
end)
|> Enum.join("\n")
- acc |> update_first(&%Item{&1 | content: &1.content <> content})
+ acc |> append_content(content)
end
defp process({"code", _, [text]}, acc) when is_binary(text) do
- acc |> update_first(&%Item{&1 | content: &1.content <> "`#{text}`"})
+ acc |> append_content("`#{text}`")
end
defp process({"li", _, nodes}, acc) do
@@ -129,20 +149,20 @@ defmodule Canary.Scraper do
|> Enum.map(& &1.content)
|> Enum.join()
- acc |> update_first(&%Item{&1 | content: &1.content <> "\n- #{text}"})
+ acc |> append_content("\n- #{text}")
end
defp process({"tr", _, nodes}, acc) do
row = nodes |> Enum.map(&to_text/1) |> Enum.join(",")
- acc |> update_first(&%Item{&1 | content: &1.content <> "\n#{row}"})
+ acc |> append_content("\n#{row}")
end
defp process({_, _, [text]}, acc) when is_binary(text) do
- acc |> update_first(&%Item{&1 | content: &1.content <> text})
+ acc |> append_content(text)
end
defp process(text, acc) when is_binary(text) do
- acc |> update_first(&%Item{&1 | content: &1.content <> text})
+ acc |> append_content(text)
end
defp process({_, _, nodes}, acc) do
@@ -167,19 +187,10 @@ defmodule Canary.Scraper do
|> String.trim()
end
- defp update_first(list, fun) when length(list) == 0, do: [fun.(%Item{title: "", content: ""})]
- defp update_first(list, fun), do: List.update_at(list, 0, fun)
-
- defp parse_integer(s) do
- case Integer.parse(s) do
- {n, _} -> n
- _ -> 0
- end
+ defp append_content(list, content) when length(list) > 0 do
+ list
+ |> List.update_at(0, &%Item{&1 | content: &1.content <> content})
end
- defp trim_leading_hash(s) do
- s
- |> String.trim_leading("#")
- |> String.trim()
- end
+ defp append_content(list, _content), do: list
end
diff --git a/core/lib/canary/sources/document/create_webpage.ex b/core/lib/canary/sources/document/create_webpage.ex
index 854b6649..a2641375 100644
--- a/core/lib/canary/sources/document/create_webpage.ex
+++ b/core/lib/canary/sources/document/create_webpage.ex
@@ -30,7 +30,7 @@ defmodule Canary.Sources.Document.CreateWebpage do
|> Ash.Changeset.change_attribute(opts[:meta_attribute], wrap_union(%Webpage.DocumentMeta{}))
|> Ash.Changeset.change_attribute(opts[:chunks_attribute], [])
|> Ash.Changeset.after_action(fn _, record ->
- items = Canary.Scraper.run!(html)
+ items = Canary.Scraper.run(html)
hash =
html
diff --git a/core/lib/canary_web/live/dev/reader.ex b/core/lib/canary_web/live/dev/reader.ex
index 2e36b009..3e856b0d 100644
--- a/core/lib/canary_web/live/dev/reader.ex
+++ b/core/lib/canary_web/live/dev/reader.ex
@@ -41,7 +41,7 @@ defmodule CanaryWeb.Dev.ReaderLive do
params["url"]
|> Req.get!()
|> Map.get(:body)
- |> Scraper.run!()
+ |> Scraper.run()
{:noreply, socket |> assign(url: params["url"], chunks: chunks)}
else
diff --git a/core/test/canary/document_test.exs b/core/test/canary/document_test.exs
index 962b34c9..1e8d9045 100644
--- a/core/test/canary/document_test.exs
+++ b/core/test/canary/document_test.exs
@@ -24,7 +24,7 @@ defmodule Canary.Test.Document do
|> Ash.Changeset.for_create(:create_webpage, %{
source_id: source.id,
url: "https://example.com/",
- html: "hello
"
+ html: "hello
"
})
|> Ash.create!()
diff --git a/core/test/canary/scraper_test.exs b/core/test/canary/scraper_test.exs
index b14f78e9..039b8af7 100644
--- a/core/test/canary/scraper_test.exs
+++ b/core/test/canary/scraper_test.exs
@@ -2,7 +2,6 @@ defmodule Canary.Test.Scraper do
use ExUnit.Case, async: true
alias Canary.Scraper
- alias Canary.Scraper.Item
describe "it works without crashing" do
for {name, params} <- %{
@@ -14,282 +13,39 @@ defmodule Canary.Test.Scraper do
@tag params: params
test name, %{params: params} do
html = Req.get!(params[:url]).body
- assert Scraper.run!(html) |> length() > 0
+ assert Scraper.run(html) |> length() > 1
end
end
end
test "canary-1" do
html = File.read!("test/fixtures/canary-1.html")
+ items = Scraper.run(html)
+ assert length(items) == 3
- assert Scraper.run!(html) == [
- %Item{
- content:
- """
- # Not everyone needs a hosted service.
- You can just use keyword-based search locally, and still benefit from our composable components.
- Feature,Local,Cloud
- Search,Only Keyword-based Search,AI Powered Hybrid Search
- Ask AI,X,OTIPWanna try it out? We made a [playground](/docs/local/playground.html) for you!
- """
- |> String.trim(),
- id: "not-everyone-needs-a-hosted-service",
- level: 1,
- title: "Not everyone needs a hosted service."
- },
- %Item{
- content:
- """
- ## Any documentation & Any search index
- Our UI components are decoupled from the actual operation layer.We currently support:
- - Any `Pagefind` based search using `canary-provider-pagefind`
- - `VitePress` with `Minisearch` using `canary-provider-vitepress-minisearch`
- """
- |> String.trim(),
- id: "any-documentation-any-search-index",
- level: 2,
- title: "Any documentation & Any search index"
- },
- %Item{
- content:
- """
- ## Migrate to cloud provider
- If you need more features, you can easily migrate.html
- -
- +
-
- +
- -
-
- """
- |> String.trim(),
- id: "migrate-to-cloud-provider",
- level: 2,
- title: "Migrate to cloud provider"
- }
- ]
+ assert Enum.at(items, 0).level == 1
+ assert Enum.at(items, 0).id == "not-everyone-needs-a-hosted-service"
+ assert Enum.at(items, 0).title == "Not everyone needs a hosted service."
end
test "hono-1" do
html = File.read!("test/fixtures/hono-1.html")
+ items = Scraper.run(html)
+ assert length(items) == 13
- assert Scraper.run!(html) == [
- %Item{
- id: "bearer-auth-middleware",
- level: 1,
- title: "Bearer Auth Middleware",
- content:
- "# Bearer Auth Middleware\nThe Bearer Auth Middleware provides authentication by verifying an API token in the Request header. The HTTP clients accessing the endpoint will add the `Authorization` header with `Bearer {token}` as the header value.Using `curl` from the terminal, it would look like this:shcurl -H 'Authorization: Bearer honoiscool' http://localhost:8787/auth/page"
- },
- %Item{
- id: "import",
- level: 2,
- title: "Import",
- content:
- "## Import\ntsimport { Hono } from 'hono'\nimport { bearerAuth } from 'hono/bearer-auth'"
- },
- %Item{
- id: "usage",
- level: 2,
- title: "Usage",
- content:
- "## Usage\ntsconst app = new Hono()\n\nconst token = 'honoiscool'\n\napp.use('/api/*', bearerAuth({ token }))\n\napp.get('/api/page', (c) => {\nreturn c.json({ message: 'You are authorized' })\n})To restrict to a specific route + method:tsconst app = new Hono()\n\nconst token = 'honoiscool'\n\napp.get('/api/page', (c) => {\nreturn c.json({ message: 'Read posts' })\n})\n\napp.post('/api/page', bearerAuth({ token }), (c) => {\nreturn c.json({ message: 'Created post!' }, 201)\n})To implement multiple tokens (E.g., any valid token can read but create/update/delete are restricted to a privileged token):tsconst app = new Hono()\n\nconst readToken = 'read'\nconst privilegedToken = 'read+write'\nconst privilegedMethods = ['POST', 'PUT', 'PATCH', 'DELETE']\n\napp.on('GET', '/api/page/*', async (c, next) => {\n// List of valid tokens\nconst bearer = bearerAuth({ token: [readToken, privilegedToken] })\nreturn bearer(c, next)\n})\napp.on(privilegedMethods, '/api/page/*', async (c, next) => {\n// Single valid privileged token\nconst bearer = bearerAuth({ token: privilegedToken })\nreturn bearer(c, next)\n})\n\n// Define handlers for GET, POST, etc.If you want to verify the value of the token yourself, specify the `verifyToken` option; returning `true` means it is accepted.tsconst app = new Hono()\n\napp.use(\n'/auth-verify-token/*',\nbearerAuth({\nverifyToken: async (token, c) => {\nreturn token === 'dynamic-token'\n},\n})\n)"
- },
- %Item{
- id: "options",
- level: 2,
- title: "Options",
- content: "## Options"
- },
- %Item{
- id: "token-string-string",
- level: 3,
- title: "required token: string | string[]",
- content:
- "### required token: string | string[]\nThe string to validate the incoming bearer token against."
- },
- %Item{
- id: "realm-string",
- level: 3,
- title: "optional realm: string",
- content:
- "### optional realm: string\nThe domain name of the realm, as part of the returned WWW-Authenticate challenge header. The default is `\"\"`. See more: [https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/WWW-Authenticate#directives](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/WWW-Authenticate#directives)"
- },
- %Item{
- id: "prefix-string",
- level: 3,
- title: "optional prefix: string",
- content:
- "### optional prefix: string\nThe prefix (or known as `schema`) for the Authorization header value. The default is `\"Bearer\"`."
- },
- %Item{
- id: "headername-string",
- level: 3,
- title: "optional headerName: string",
- content:
- "### optional headerName: string\nThe header name. The default value is `Authorization`."
- },
- %Item{
- id: "hashfunction-function",
- level: 3,
- title: "optional hashFunction: Function",
- content:
- "### optional hashFunction: Function\nA function to handle hashing for safe comparison of authentication tokens."
- },
- %Item{
- id: "verifytoken-token-string-c-context-boolean-promise-boolean",
- level: 3,
- title:
- "optional verifyToken: (token: string, c: Context) => boolean | Promise",
- content:
- "### optional verifyToken: (token: string, c: Context) => boolean | Promise\nThe function to verify the token."
- },
- %Item{
- id: "noauthenticationheadermessage-string-object-messagefunction",
- level: 3,
- title: "optional noAuthenticationHeaderMessage: string | object | MessageFunction",
- content:
- "### optional noAuthenticationHeaderMessage: string | object | MessageFunction\n`MessageFunction` is `(c: Context) => string | object | Promise`. The custom message if it does not have an authentication header."
- },
- %Item{
- id: "invalidauthenticationheadermessage-string-object-messagefunction",
- level: 3,
- title:
- "optional invalidAuthenticationHeaderMessage: string | object | MessageFunction",
- content:
- "### optional invalidAuthenticationHeaderMessage: string | object | MessageFunction\nThe custom message if the authentication header is invalid."
- },
- %Item{
- id: "invalidtokenmessage-string-object-messagefunction",
- level: 3,
- title: "optional invalidTokenMessage: string | object | MessageFunction",
- content:
- "### optional invalidTokenMessage: string | object | MessageFunction\nThe custom message if the token is invalid."
- }
- ]
+ assert Enum.at(items, 0).level == 1
+ assert Enum.at(items, 0).id == "bearer-auth-middleware"
+ assert Enum.at(items, 0).title == "Bearer Auth Middleware"
end
test "litellm-1" do
html = File.read!("test/fixtures/litellm-1.html")
- assert Scraper.run!(html) == [
- %Item{
- id: nil,
- level: 1,
- title: "LiteLLM - Getting Started",
- content:
- "# LiteLLM - Getting Started\n[https://github.com/BerriAI/litellm](https://github.com/BerriAI/litellm)"
- },
- %Item{
- id: "call-100-llms-using-the-openai-inputoutput-format",
- level: 2,
- title: "Call 100+ LLMs using the OpenAI Input/Output Format",
- content:
- "## Call 100+ LLMs using the OpenAI Input/Output Format\n\n- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints\n- [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`\n- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)\n- Track spend & set budgets per project [LiteLLM Proxy Server](https://docs.litellm.ai/docs/simple_proxy)"
- },
- %Item{
- id: "how-to-use-litellm",
- level: 2,
- title: "How to use LiteLLM",
- content:
- "## How to use LiteLLM\nYou can use litellm through either:\n- [LiteLLM Proxy Server](#litellm-proxy-server-llm-gateway) - Server (LLM Gateway) to call 100+ LLMs, load balance, cost tracking across projects\n- [LiteLLM python SDK](#basic-usage) - Python Client to call 100+ LLMs, load balance, cost tracking"
- },
- %Item{
- id: "when-to-use-litellm-proxy-server-llm-gateway",
- level: 3,
- title: "When to use LiteLLM Proxy Server (LLM Gateway)",
- content:
- "### When to use LiteLLM Proxy Server (LLM Gateway)\ntipUse LiteLLM Proxy Server if you want a central service (LLM Gateway) to access multiple LLMsTypically used by Gen AI Enablement / ML PLatform Teams\n- LiteLLM Proxy gives you a unified interface to access multiple LLMs (100+ LLMs)\n- Track LLM Usage and setup guardrails\n- Customize Logging, Guardrails, Caching per project"
- },
- %Item{
- id: "when-to-use-litellm-python-sdk",
- level: 3,
- title: "When to use LiteLLM Python SDK",
- content:
- "### When to use LiteLLM Python SDK\ntip Use LiteLLM Python SDK if you want to use LiteLLM in your python codeTypically used by developers building llm projects\n- LiteLLM SDK gives you a unified interface to access multiple LLMs (100+ LLMs) \n- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)"
- },
- %Item{
- id: "litellm-python-sdk",
- level: 2,
- title: "LiteLLM Python SDK",
- content: "## LiteLLM Python SDK"
- },
- %Item{
- id: "basic-usage",
- level: 3,
- title: "Basic usage",
- content:
- "### Basic usage\npip install litellm\n- OpenAI\n- Anthropic\n- VertexAI\n- HuggingFace\n- Azure OpenAI\n- Ollama\n- Openrouterfrom litellm import completion\nimport os\n\n## set ENV variables\nos.environ[\"OPENAI_API_KEY\"]=\"your-api-key\"\n\nresponse = completion(\nmodel=\"gpt-3.5-turbo\",\nmessages=[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}]\n)from litellm import completion\nimport os\n\n## set ENV variables\nos.environ[\"ANTHROPIC_API_KEY\"]=\"your-api-key\"\n\nresponse = completion(\nmodel=\"claude-2\",\nmessages=[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}]\n)from litellm import completion\nimport os\n\n# auth: run 'gcloud auth application-default'\nos.environ[\"VERTEX_PROJECT\"]=\"hardy-device-386718\"\nos.environ[\"VERTEX_LOCATION\"]=\"us-central1\"\n\nresponse = completion(\nmodel=\"chat-bison\",\nmessages=[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}]\n)from litellm import completion\nimport os\n\nos.environ[\"HUGGINGFACE_API_KEY\"]=\"huggingface_api_key\"\n\n# e.g. Call 'WizardLM/WizardCoder-Python-34B-V1.0' hosted on HF Inference endpoints\nresponse = completion(\nmodel=\"huggingface/WizardLM/WizardCoder-Python-34B-V1.0\",\nmessages=[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\napi_base=\"https://my-endpoint.huggingface.cloud\"\n)\n\nprint(response)from litellm import completion\nimport os\n\n## set ENV variables\nos.environ[\"AZURE_API_KEY\"]=\"\"\nos.environ[\"AZURE_API_BASE\"]=\"\"\nos.environ[\"AZURE_API_VERSION\"]=\"\"\n\n# azure call\nresponse = completion(\n\"azure/\",\nmessages =[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}]\n)from litellm import completion\n\nresponse = completion(\nmodel=\"ollama/llama2\",\nmessages =[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\napi_base=\"http://localhost:11434\"\n)from litellm import completion\nimport os\n\n## set ENV variables\nos.environ[\"OPENROUTER_API_KEY\"]=\"openrouter_api_key\"\n\nresponse = completion(\nmodel=\"openrouter/google/palm-2-chat-bison\",\nmessages =[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\n)"
- },
- %Item{
- id: "streaming",
- level: 3,
- title: "Streaming",
- content:
- "### Streaming\nSet `stream=True` in the `completion` args. \n- OpenAI\n- Anthropic\n- VertexAI\n- HuggingFace\n- Azure OpenAI\n- Ollama\n- Openrouterfrom litellm import completion\nimport os\n\n## set ENV variables\nos.environ[\"OPENAI_API_KEY\"]=\"your-api-key\"\n\nresponse = completion(\nmodel=\"gpt-3.5-turbo\",\nmessages=[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\nstream=True,\n)from litellm import completion\nimport os\n\n## set ENV variables\nos.environ[\"ANTHROPIC_API_KEY\"]=\"your-api-key\"\n\nresponse = completion(\nmodel=\"claude-2\",\nmessages=[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\nstream=True,\n)from litellm import completion\nimport os\n\n# auth: run 'gcloud auth application-default'\nos.environ[\"VERTEX_PROJECT\"]=\"hardy-device-386718\"\nos.environ[\"VERTEX_LOCATION\"]=\"us-central1\"\n\nresponse = completion(\nmodel=\"chat-bison\",\nmessages=[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\nstream=True,\n)from litellm import completion\nimport os\n\nos.environ[\"HUGGINGFACE_API_KEY\"]=\"huggingface_api_key\"\n\n# e.g. Call 'WizardLM/WizardCoder-Python-34B-V1.0' hosted on HF Inference endpoints\nresponse = completion(\nmodel=\"huggingface/WizardLM/WizardCoder-Python-34B-V1.0\",\nmessages=[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\napi_base=\"https://my-endpoint.huggingface.cloud\",\nstream=True,\n)\n\nprint(response)from litellm import completion\nimport os\n\n## set ENV variables\nos.environ[\"AZURE_API_KEY\"]=\"\"\nos.environ[\"AZURE_API_BASE\"]=\"\"\nos.environ[\"AZURE_API_VERSION\"]=\"\"\n\n# azure call\nresponse = completion(\n\"azure/\",\nmessages =[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\nstream=True,\n)from litellm import completion\n\nresponse = completion(\nmodel=\"ollama/llama2\",\nmessages =[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\napi_base=\"http://localhost:11434\",\nstream=True,\n)from litellm import completion\nimport os\n\n## set ENV variables\nos.environ[\"OPENROUTER_API_KEY\"]=\"openrouter_api_key\"\n\nresponse = completion(\nmodel=\"openrouter/google/palm-2-chat-bison\",\nmessages =[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\nstream=True,\n)"
- },
- %Item{
- id: "exception-handling",
- level: 3,
- title: "Exception handling",
- content:
- "### Exception handling\nLiteLLM maps exceptions across all supported providers to the OpenAI exceptions. All our exceptions inherit from OpenAI's exception types, so any error-handling you have for that, should work out of the box with LiteLLM.from openai.error import OpenAIError\nfrom litellm import completion\n\nos.environ[\"ANTHROPIC_API_KEY\"]=\"bad-key\"\ntry:\n# some code\ncompletion(model=\"claude-instant-1\", messages=[{\"role\":\"user\",\"content\":\"Hey, how's it going?\"}])\nexcept OpenAIError as e:\nprint(e)"
- },
- %Item{
- id: "logging-observability---log-llm-inputoutput-docs",
- level: 3,
- title: "Logging Observability - Log LLM Input/Output ( Docs )",
- content:
- "### Logging Observability - Log LLM Input/Output ( Docs )\nLiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slackfrom litellm import completion\n\n## set env variables for logging tools\nos.environ[\"HELICONE_API_KEY\"]=\"your-helicone-key\"\nos.environ[\"LANGFUSE_PUBLIC_KEY\"]=\"\"\nos.environ[\"LANGFUSE_SECRET_KEY\"]=\"\"\nos.environ[\"LUNARY_PUBLIC_KEY\"]=\"your-lunary-public-key\"\n\nos.environ[\"OPENAI_API_KEY\"]\n\n# set callbacks\nlitellm.success_callback =[\"lunary\",\"langfuse\",\"helicone\"]# log input/output to lunary, langfuse, supabase, helicone\n\n#openai call\nresponse = completion(model=\"gpt-3.5-turbo\", messages=[{\"role\":\"user\",\"content\":\"Hi - i'm openai\"}])"
- },
- %Item{
- id: "track-costs-usage-latency-for-streaming",
- level: 3,
- title: "Track Costs, Usage, Latency for streaming",
- content:
- "### Track Costs, Usage, Latency for streaming\nUse a callback function for this - more info on custom callbacks: [https://docs.litellm.ai/docs/observability/custom_callback](https://docs.litellm.ai/docs/observability/custom_callback)import litellm\n\n# track_cost_callback\ndeftrack_cost_callback(\nkwargs,# kwargs to completion\ncompletion_response,# response from completion\nstart_time, end_time # start/end time\n):\ntry:\nresponse_cost = kwargs.get(\"response_cost\",0)\nprint(\"streaming response_cost\", response_cost)\nexcept:\npass\n# set callback\nlitellm.success_callback =[track_cost_callback]# set custom callback function\n\n# litellm.completion() call\nresponse = completion(\nmodel=\"gpt-3.5-turbo\",\nmessages=[\n{\n\"role\":\"user\",\n\"content\":\"Hi - i'm openai\"\n}\n],\nstream=True\n)"
- },
- %Item{
- id: "litellm-proxy-server-llm-gateway",
- level: 2,
- title: "LiteLLM Proxy Server (LLM Gateway)",
- content:
- "## LiteLLM Proxy Server (LLM Gateway)\nTrack spend across multiple projects/peopleThe proxy provides:\n- [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)\n- [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)\n- [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)\n- [Rate Limiting](https://docs.litellm.ai/docs/proxy/users#set-rate-limits)"
- },
- %Item{
- id: "-proxy-endpoints---swagger-docs",
- level: 3,
- title: "Proxy Endpoints - Swagger Docs",
- content:
- "### Proxy Endpoints - Swagger Docs\nGo here for a complete tutorial with keys + rate limits - here"
- },
- %Item{
- id: "quick-start-proxy---cli",
- level: 3,
- title: "Quick Start Proxy - CLI",
- content: "### Quick Start Proxy - CLI\npip install'litellm[proxy]'"
- },
- %Item{
- id: "step-1-start-litellm-proxy",
- level: 4,
- title: "Step 1: Start litellm proxy",
- content:
- "#### Step 1: Start litellm proxy\n$ litellm --model huggingface/bigcode/starcoder\n\n#INFO: Proxy running on http://0.0.0.0:4000"
- },
- %Item{
- id: "step-2-make-chatcompletions-request-to-proxy",
- level: 4,
- title: "Step 2: Make ChatCompletions Request to Proxy",
- content:
- "#### Step 2: Make ChatCompletions Request to Proxy\nimport openai # openai v1.0.0+\nclient = openai.OpenAI(api_key=\"anything\",base_url=\"http://0.0.0.0:4000\")# set proxy to base_url\n# request sent to model set on litellm proxy, `litellm --model`\nresponse = client.chat.completions.create(model=\"gpt-3.5-turbo\", messages =[\n{\n\"role\":\"user\",\n\"content\":\"this is a test request, write a short poem\"\n}\n])\n\nprint(response)"
- },
- %Item{
- id: "more-details",
- level: 2,
- title: "More details",
- content:
- "## More details\n\n- [exception mapping](/docs/exception_mapping)\n- [retries + model fallbacks for completion()](/docs/completion/reliable_completions)\n- [proxy virtual keys & spend management](/docs/proxy/virtual_keys)\n- [E2E Tutorial for LiteLLM Proxy Server](/docs/proxy/docker_quick_start)\n- Call 100+ LLMs using the OpenAI Input/Output Format\n- [How to use LiteLLM](#how-to-use-litellm)\n- When to use LiteLLM Proxy Server (LLM Gateway)\n- When to use LiteLLM Python SDK\n- LiteLLM Python SDK\n- [Basic usage](#basic-usage)\n- [Streaming](#streaming)\n- [Exception handling](#exception-handling)\n- [Logging Observability - Log LLM Input/Output (Docs)](#logging-observability---log-llm-inputoutput-docs)\n- [Track Costs, Usage, Latency for streaming](#track-costs-usage-latency-for-streaming)\n- LiteLLM Proxy Server (LLM Gateway)\n- [Proxy Endpoints - Swagger Docs](#-proxy-endpoints---swagger-docs)\n- [Quick Start Proxy - CLI](#quick-start-proxy---cli)\n- [More details](#more-details)"
- }
- ]
+ items = Scraper.run(html)
+ assert length(items) == 17
+
+ assert Enum.at(items, 0).level == 1
+ assert Enum.at(items, 0).id == nil
+ assert Enum.at(items, 0).title == "LiteLLM - Getting Started"
end
end