diff --git a/core/lib/canary/scraper.ex b/core/lib/canary/scraper.ex index 16a7520b..bba46f69 100644 --- a/core/lib/canary/scraper.ex +++ b/core/lib/canary/scraper.ex @@ -3,59 +3,98 @@ defmodule Canary.Scraper.Item do defstruct [:id, :level, :title, :content] end +# Inspired by: https://github.com/agoodway/html2markdown/blob/06e3587/lib/html2markdown.ex#L6-L32 defmodule Canary.Scraper do alias Canary.Scraper.Item + @non_content_tags [ + "aside", + "audio", + "base", + "button", + "datalist", + "embed", + "form", + "iframe", + "input", + "keygen", + "nav", + "noscript", + "object", + "output", + "script", + "select", + "source", + "style", + "svg", + "template", + "textarea", + "track", + "video" + ] + def run(html) do - with {:ok, doc} <- Floki.parse_document(html), - [body] <- doc |> Floki.find("body") do - items = - process(body) - |> Enum.reverse() - |> Enum.reject(&(&1.level == nil || &1.level == 0)) - |> Enum.reject(&(&1.content == nil || &1.content == "")) - |> Enum.map(&%Item{&1 | content: String.trim(&1.content)}) - - {:ok, items} - else - error -> {:error, error} - end + html + |> preprocess() + |> process() + |> postprocess() end - def run!(html) do - {:ok, content} = run(html) + defp postprocess(items) do + items + |> Enum.reverse() + |> Enum.reject(&(&1.level == nil || &1.level == 0)) + |> Enum.reject(&(&1.content == nil || &1.content == "")) + |> Enum.map(&%Item{&1 | content: String.trim(&1.content)}) + end + + defp preprocess(content) do content + |> ensure_html() + |> Floki.parse_document!() + |> Floki.find("body") + |> Floki.filter_out(:comment) + |> remove_non_content_tags() + end + + defp ensure_html(content) do + if is_html_document?(content), do: content, else: wrap_fragment(content) end - def print!(html) do - run!(html) - |> Enum.each(&IO.puts("#{&1.content}\n-----")) + defp is_html_document?(content) do + String.contains?(content, "#{fragment}" + + defp remove_non_content_tags(document) do + Enum.reduce(@non_content_tags, document, &Floki.filter_out(&2, &1)) end defp process(_, acc \\ []) - defp process({"script", _, _}, acc), do: acc - defp process({"style", _, _}, acc), do: acc - defp process({"nav", _, _}, acc), do: acc - defp process({"header", _, _}, acc), do: acc - defp process({"footer", _, _}, acc), do: acc - defp process({:comment, _}, acc), do: acc - - defp process({"h" <> level, _, nodes} = node, acc) do + + defp process(nodes, acc) when is_list(nodes) do + nodes + |> Enum.reduce(acc, &process/2) + end + + defp process({"h" <> level, _, nodes} = node, acc) + when level in ["1", "2", "3", "4", "5", "6"] do + level = String.to_integer(level) + id = node |> Floki.attribute("id") |> Enum.at(0) - level = parse_integer(level) - title = nodes |> Enum.map(&to_text/1) |> Enum.join(" ") - |> trim_leading_hash() - - content = String.duplicate("#", level) <> " #{title}\n" + |> String.trim_leading("#") + |> String.trim() + content = "#{String.duplicate("#", level)} #{title}" <> "\n" [%Item{id: id, level: level, title: title, content: content} | acc] end @@ -66,7 +105,7 @@ defmodule Canary.Scraper do if String.trim(text) in ["Skip to content"] do acc else - acc |> update_first(&%Item{&1 | content: &1.content <> "[#{text}](#{href})"}) + acc |> append_content("[#{text}](#{href})") end end @@ -75,29 +114,10 @@ defmodule Canary.Scraper do classes(node) |> Enum.any?(&String.contains?(&1, "VPLocalNav")) - # code = nodes |> Enum.find(&(elem(&1, 0) == "pre")) - cond do is_nav -> acc - # not is_nil(code) -> - # lang = - # classes(node) - # |> Enum.find("", &String.contains?(&1, "language-")) - # |> String.replace("language-", "") - - # is_diff = - # classes(code) - # |> Enum.any?(fn c -> String.contains?(c, "diff") end) - - # lang = if is_diff, do: "#{lang}-diff", else: lang - - # rendered_code = process(code) |> Enum.at(0) |> Map.get(:content) - # content = "\n```#{lang}\n#{rendered_code}\n```\n" - - # acc |> update_first(&%Item{&1 | content: &1.content <> content}) - true -> nodes |> Enum.reduce(acc, &process(&1, &2)) end @@ -115,11 +135,11 @@ defmodule Canary.Scraper do end) |> Enum.join("\n") - acc |> update_first(&%Item{&1 | content: &1.content <> content}) + acc |> append_content(content) end defp process({"code", _, [text]}, acc) when is_binary(text) do - acc |> update_first(&%Item{&1 | content: &1.content <> "`#{text}`"}) + acc |> append_content("`#{text}`") end defp process({"li", _, nodes}, acc) do @@ -129,20 +149,20 @@ defmodule Canary.Scraper do |> Enum.map(& &1.content) |> Enum.join() - acc |> update_first(&%Item{&1 | content: &1.content <> "\n- #{text}"}) + acc |> append_content("\n- #{text}") end defp process({"tr", _, nodes}, acc) do row = nodes |> Enum.map(&to_text/1) |> Enum.join(",") - acc |> update_first(&%Item{&1 | content: &1.content <> "\n#{row}"}) + acc |> append_content("\n#{row}") end defp process({_, _, [text]}, acc) when is_binary(text) do - acc |> update_first(&%Item{&1 | content: &1.content <> text}) + acc |> append_content(text) end defp process(text, acc) when is_binary(text) do - acc |> update_first(&%Item{&1 | content: &1.content <> text}) + acc |> append_content(text) end defp process({_, _, nodes}, acc) do @@ -167,19 +187,10 @@ defmodule Canary.Scraper do |> String.trim() end - defp update_first(list, fun) when length(list) == 0, do: [fun.(%Item{title: "", content: ""})] - defp update_first(list, fun), do: List.update_at(list, 0, fun) - - defp parse_integer(s) do - case Integer.parse(s) do - {n, _} -> n - _ -> 0 - end + defp append_content(list, content) when length(list) > 0 do + list + |> List.update_at(0, &%Item{&1 | content: &1.content <> content}) end - defp trim_leading_hash(s) do - s - |> String.trim_leading("#") - |> String.trim() - end + defp append_content(list, _content), do: list end diff --git a/core/lib/canary/sources/document/create_webpage.ex b/core/lib/canary/sources/document/create_webpage.ex index 854b6649..a2641375 100644 --- a/core/lib/canary/sources/document/create_webpage.ex +++ b/core/lib/canary/sources/document/create_webpage.ex @@ -30,7 +30,7 @@ defmodule Canary.Sources.Document.CreateWebpage do |> Ash.Changeset.change_attribute(opts[:meta_attribute], wrap_union(%Webpage.DocumentMeta{})) |> Ash.Changeset.change_attribute(opts[:chunks_attribute], []) |> Ash.Changeset.after_action(fn _, record -> - items = Canary.Scraper.run!(html) + items = Canary.Scraper.run(html) hash = html diff --git a/core/lib/canary_web/live/dev/reader.ex b/core/lib/canary_web/live/dev/reader.ex index 2e36b009..3e856b0d 100644 --- a/core/lib/canary_web/live/dev/reader.ex +++ b/core/lib/canary_web/live/dev/reader.ex @@ -41,7 +41,7 @@ defmodule CanaryWeb.Dev.ReaderLive do params["url"] |> Req.get!() |> Map.get(:body) - |> Scraper.run!() + |> Scraper.run() {:noreply, socket |> assign(url: params["url"], chunks: chunks)} else diff --git a/core/test/canary/document_test.exs b/core/test/canary/document_test.exs index 962b34c9..1e8d9045 100644 --- a/core/test/canary/document_test.exs +++ b/core/test/canary/document_test.exs @@ -24,7 +24,7 @@ defmodule Canary.Test.Document do |> Ash.Changeset.for_create(:create_webpage, %{ source_id: source.id, url: "https://example.com/", - html: "

hello

" + html: "

hello

" }) |> Ash.create!() diff --git a/core/test/canary/scraper_test.exs b/core/test/canary/scraper_test.exs index b14f78e9..039b8af7 100644 --- a/core/test/canary/scraper_test.exs +++ b/core/test/canary/scraper_test.exs @@ -2,7 +2,6 @@ defmodule Canary.Test.Scraper do use ExUnit.Case, async: true alias Canary.Scraper - alias Canary.Scraper.Item describe "it works without crashing" do for {name, params} <- %{ @@ -14,282 +13,39 @@ defmodule Canary.Test.Scraper do @tag params: params test name, %{params: params} do html = Req.get!(params[:url]).body - assert Scraper.run!(html) |> length() > 0 + assert Scraper.run(html) |> length() > 1 end end end test "canary-1" do html = File.read!("test/fixtures/canary-1.html") + items = Scraper.run(html) + assert length(items) == 3 - assert Scraper.run!(html) == [ - %Item{ - content: - """ - # Not everyone needs a hosted service. - You can just use keyword-based search locally, and still benefit from our composable components. - Feature,Local,Cloud - Search,Only Keyword-based Search,AI Powered Hybrid Search - Ask AI,X,OTIPWanna try it out? We made a [playground](/docs/local/playground.html) for you! - """ - |> String.trim(), - id: "not-everyone-needs-a-hosted-service", - level: 1, - title: "Not everyone needs a hosted service." - }, - %Item{ - content: - """ - ## Any documentation & Any search index - Our UI components are decoupled from the actual operation layer.We currently support: - - Any `Pagefind` based search using `canary-provider-pagefind` - - `VitePress` with `Minisearch` using `canary-provider-vitepress-minisearch` - """ - |> String.trim(), - id: "any-documentation-any-search-index", - level: 2, - title: "Any documentation & Any search index" - }, - %Item{ - content: - """ - ## Migrate to cloud provider - If you need more features, you can easily migrate.html - - - + - - + - - - - """ - |> String.trim(), - id: "migrate-to-cloud-provider", - level: 2, - title: "Migrate to cloud provider" - } - ] + assert Enum.at(items, 0).level == 1 + assert Enum.at(items, 0).id == "not-everyone-needs-a-hosted-service" + assert Enum.at(items, 0).title == "Not everyone needs a hosted service." end test "hono-1" do html = File.read!("test/fixtures/hono-1.html") + items = Scraper.run(html) + assert length(items) == 13 - assert Scraper.run!(html) == [ - %Item{ - id: "bearer-auth-middleware", - level: 1, - title: "Bearer Auth Middleware", - content: - "# Bearer Auth Middleware\nThe Bearer Auth Middleware provides authentication by verifying an API token in the Request header. The HTTP clients accessing the endpoint will add the `Authorization` header with `Bearer {token}` as the header value.Using `curl` from the terminal, it would look like this:shcurl -H 'Authorization: Bearer honoiscool' http://localhost:8787/auth/page" - }, - %Item{ - id: "import", - level: 2, - title: "Import", - content: - "## Import\ntsimport { Hono } from 'hono'\nimport { bearerAuth } from 'hono/bearer-auth'" - }, - %Item{ - id: "usage", - level: 2, - title: "Usage", - content: - "## Usage\ntsconst app = new Hono()\n\nconst token = 'honoiscool'\n\napp.use('/api/*', bearerAuth({ token }))\n\napp.get('/api/page', (c) => {\nreturn c.json({ message: 'You are authorized' })\n})To restrict to a specific route + method:tsconst app = new Hono()\n\nconst token = 'honoiscool'\n\napp.get('/api/page', (c) => {\nreturn c.json({ message: 'Read posts' })\n})\n\napp.post('/api/page', bearerAuth({ token }), (c) => {\nreturn c.json({ message: 'Created post!' }, 201)\n})To implement multiple tokens (E.g., any valid token can read but create/update/delete are restricted to a privileged token):tsconst app = new Hono()\n\nconst readToken = 'read'\nconst privilegedToken = 'read+write'\nconst privilegedMethods = ['POST', 'PUT', 'PATCH', 'DELETE']\n\napp.on('GET', '/api/page/*', async (c, next) => {\n// List of valid tokens\nconst bearer = bearerAuth({ token: [readToken, privilegedToken] })\nreturn bearer(c, next)\n})\napp.on(privilegedMethods, '/api/page/*', async (c, next) => {\n// Single valid privileged token\nconst bearer = bearerAuth({ token: privilegedToken })\nreturn bearer(c, next)\n})\n\n// Define handlers for GET, POST, etc.If you want to verify the value of the token yourself, specify the `verifyToken` option; returning `true` means it is accepted.tsconst app = new Hono()\n\napp.use(\n'/auth-verify-token/*',\nbearerAuth({\nverifyToken: async (token, c) => {\nreturn token === 'dynamic-token'\n},\n})\n)" - }, - %Item{ - id: "options", - level: 2, - title: "Options", - content: "## Options" - }, - %Item{ - id: "token-string-string", - level: 3, - title: "required token: string | string[]", - content: - "### required token: string | string[]\nThe string to validate the incoming bearer token against." - }, - %Item{ - id: "realm-string", - level: 3, - title: "optional realm: string", - content: - "### optional realm: string\nThe domain name of the realm, as part of the returned WWW-Authenticate challenge header. The default is `\"\"`. See more: [https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/WWW-Authenticate#directives](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/WWW-Authenticate#directives)" - }, - %Item{ - id: "prefix-string", - level: 3, - title: "optional prefix: string", - content: - "### optional prefix: string\nThe prefix (or known as `schema`) for the Authorization header value. The default is `\"Bearer\"`." - }, - %Item{ - id: "headername-string", - level: 3, - title: "optional headerName: string", - content: - "### optional headerName: string\nThe header name. The default value is `Authorization`." - }, - %Item{ - id: "hashfunction-function", - level: 3, - title: "optional hashFunction: Function", - content: - "### optional hashFunction: Function\nA function to handle hashing for safe comparison of authentication tokens." - }, - %Item{ - id: "verifytoken-token-string-c-context-boolean-promise-boolean", - level: 3, - title: - "optional verifyToken: (token: string, c: Context) => boolean | Promise", - content: - "### optional verifyToken: (token: string, c: Context) => boolean | Promise\nThe function to verify the token." - }, - %Item{ - id: "noauthenticationheadermessage-string-object-messagefunction", - level: 3, - title: "optional noAuthenticationHeaderMessage: string | object | MessageFunction", - content: - "### optional noAuthenticationHeaderMessage: string | object | MessageFunction\n`MessageFunction` is `(c: Context) => string | object | Promise`. The custom message if it does not have an authentication header." - }, - %Item{ - id: "invalidauthenticationheadermessage-string-object-messagefunction", - level: 3, - title: - "optional invalidAuthenticationHeaderMessage: string | object | MessageFunction", - content: - "### optional invalidAuthenticationHeaderMessage: string | object | MessageFunction\nThe custom message if the authentication header is invalid." - }, - %Item{ - id: "invalidtokenmessage-string-object-messagefunction", - level: 3, - title: "optional invalidTokenMessage: string | object | MessageFunction", - content: - "### optional invalidTokenMessage: string | object | MessageFunction\nThe custom message if the token is invalid." - } - ] + assert Enum.at(items, 0).level == 1 + assert Enum.at(items, 0).id == "bearer-auth-middleware" + assert Enum.at(items, 0).title == "Bearer Auth Middleware" end test "litellm-1" do html = File.read!("test/fixtures/litellm-1.html") - assert Scraper.run!(html) == [ - %Item{ - id: nil, - level: 1, - title: "LiteLLM - Getting Started", - content: - "# LiteLLM - Getting Started\n[https://github.com/BerriAI/litellm](https://github.com/BerriAI/litellm)" - }, - %Item{ - id: "call-100-llms-using-the-openai-inputoutput-format", - level: 2, - title: "Call 100+ LLMs using the OpenAI Input/Output Format", - content: - "## Call 100+ LLMs using the OpenAI Input/Output Format\n\n- Translate inputs to provider's `completion`, `embedding`, and `image_generation` endpoints\n- [Consistent output](https://docs.litellm.ai/docs/completion/output), text responses will always be available at `['choices'][0]['message']['content']`\n- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)\n- Track spend & set budgets per project [LiteLLM Proxy Server](https://docs.litellm.ai/docs/simple_proxy)" - }, - %Item{ - id: "how-to-use-litellm", - level: 2, - title: "How to use LiteLLM", - content: - "## How to use LiteLLM\nYou can use litellm through either:\n- [LiteLLM Proxy Server](#litellm-proxy-server-llm-gateway) - Server (LLM Gateway) to call 100+ LLMs, load balance, cost tracking across projects\n- [LiteLLM python SDK](#basic-usage) - Python Client to call 100+ LLMs, load balance, cost tracking" - }, - %Item{ - id: "when-to-use-litellm-proxy-server-llm-gateway", - level: 3, - title: "When to use LiteLLM Proxy Server (LLM Gateway)", - content: - "### When to use LiteLLM Proxy Server (LLM Gateway)\ntipUse LiteLLM Proxy Server if you want a central service (LLM Gateway) to access multiple LLMsTypically used by Gen AI Enablement / ML PLatform Teams\n- LiteLLM Proxy gives you a unified interface to access multiple LLMs (100+ LLMs)\n- Track LLM Usage and setup guardrails\n- Customize Logging, Guardrails, Caching per project" - }, - %Item{ - id: "when-to-use-litellm-python-sdk", - level: 3, - title: "When to use LiteLLM Python SDK", - content: - "### When to use LiteLLM Python SDK\ntip Use LiteLLM Python SDK if you want to use LiteLLM in your python codeTypically used by developers building llm projects\n- LiteLLM SDK gives you a unified interface to access multiple LLMs (100+ LLMs) \n- Retry/fallback logic across multiple deployments (e.g. Azure/OpenAI) - [Router](https://docs.litellm.ai/docs/routing)" - }, - %Item{ - id: "litellm-python-sdk", - level: 2, - title: "LiteLLM Python SDK", - content: "## LiteLLM Python SDK" - }, - %Item{ - id: "basic-usage", - level: 3, - title: "Basic usage", - content: - "### Basic usage\npip install litellm\n- OpenAI\n- Anthropic\n- VertexAI\n- HuggingFace\n- Azure OpenAI\n- Ollama\n- Openrouterfrom litellm import completion\nimport os\n\n## set ENV variables\nos.environ[\"OPENAI_API_KEY\"]=\"your-api-key\"\n\nresponse = completion(\nmodel=\"gpt-3.5-turbo\",\nmessages=[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}]\n)from litellm import completion\nimport os\n\n## set ENV variables\nos.environ[\"ANTHROPIC_API_KEY\"]=\"your-api-key\"\n\nresponse = completion(\nmodel=\"claude-2\",\nmessages=[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}]\n)from litellm import completion\nimport os\n\n# auth: run 'gcloud auth application-default'\nos.environ[\"VERTEX_PROJECT\"]=\"hardy-device-386718\"\nos.environ[\"VERTEX_LOCATION\"]=\"us-central1\"\n\nresponse = completion(\nmodel=\"chat-bison\",\nmessages=[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}]\n)from litellm import completion\nimport os\n\nos.environ[\"HUGGINGFACE_API_KEY\"]=\"huggingface_api_key\"\n\n# e.g. Call 'WizardLM/WizardCoder-Python-34B-V1.0' hosted on HF Inference endpoints\nresponse = completion(\nmodel=\"huggingface/WizardLM/WizardCoder-Python-34B-V1.0\",\nmessages=[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\napi_base=\"https://my-endpoint.huggingface.cloud\"\n)\n\nprint(response)from litellm import completion\nimport os\n\n## set ENV variables\nos.environ[\"AZURE_API_KEY\"]=\"\"\nos.environ[\"AZURE_API_BASE\"]=\"\"\nos.environ[\"AZURE_API_VERSION\"]=\"\"\n\n# azure call\nresponse = completion(\n\"azure/\",\nmessages =[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}]\n)from litellm import completion\n\nresponse = completion(\nmodel=\"ollama/llama2\",\nmessages =[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\napi_base=\"http://localhost:11434\"\n)from litellm import completion\nimport os\n\n## set ENV variables\nos.environ[\"OPENROUTER_API_KEY\"]=\"openrouter_api_key\"\n\nresponse = completion(\nmodel=\"openrouter/google/palm-2-chat-bison\",\nmessages =[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\n)" - }, - %Item{ - id: "streaming", - level: 3, - title: "Streaming", - content: - "### Streaming\nSet `stream=True` in the `completion` args. \n- OpenAI\n- Anthropic\n- VertexAI\n- HuggingFace\n- Azure OpenAI\n- Ollama\n- Openrouterfrom litellm import completion\nimport os\n\n## set ENV variables\nos.environ[\"OPENAI_API_KEY\"]=\"your-api-key\"\n\nresponse = completion(\nmodel=\"gpt-3.5-turbo\",\nmessages=[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\nstream=True,\n)from litellm import completion\nimport os\n\n## set ENV variables\nos.environ[\"ANTHROPIC_API_KEY\"]=\"your-api-key\"\n\nresponse = completion(\nmodel=\"claude-2\",\nmessages=[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\nstream=True,\n)from litellm import completion\nimport os\n\n# auth: run 'gcloud auth application-default'\nos.environ[\"VERTEX_PROJECT\"]=\"hardy-device-386718\"\nos.environ[\"VERTEX_LOCATION\"]=\"us-central1\"\n\nresponse = completion(\nmodel=\"chat-bison\",\nmessages=[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\nstream=True,\n)from litellm import completion\nimport os\n\nos.environ[\"HUGGINGFACE_API_KEY\"]=\"huggingface_api_key\"\n\n# e.g. Call 'WizardLM/WizardCoder-Python-34B-V1.0' hosted on HF Inference endpoints\nresponse = completion(\nmodel=\"huggingface/WizardLM/WizardCoder-Python-34B-V1.0\",\nmessages=[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\napi_base=\"https://my-endpoint.huggingface.cloud\",\nstream=True,\n)\n\nprint(response)from litellm import completion\nimport os\n\n## set ENV variables\nos.environ[\"AZURE_API_KEY\"]=\"\"\nos.environ[\"AZURE_API_BASE\"]=\"\"\nos.environ[\"AZURE_API_VERSION\"]=\"\"\n\n# azure call\nresponse = completion(\n\"azure/\",\nmessages =[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\nstream=True,\n)from litellm import completion\n\nresponse = completion(\nmodel=\"ollama/llama2\",\nmessages =[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\napi_base=\"http://localhost:11434\",\nstream=True,\n)from litellm import completion\nimport os\n\n## set ENV variables\nos.environ[\"OPENROUTER_API_KEY\"]=\"openrouter_api_key\"\n\nresponse = completion(\nmodel=\"openrouter/google/palm-2-chat-bison\",\nmessages =[{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\nstream=True,\n)" - }, - %Item{ - id: "exception-handling", - level: 3, - title: "Exception handling", - content: - "### Exception handling\nLiteLLM maps exceptions across all supported providers to the OpenAI exceptions. All our exceptions inherit from OpenAI's exception types, so any error-handling you have for that, should work out of the box with LiteLLM.from openai.error import OpenAIError\nfrom litellm import completion\n\nos.environ[\"ANTHROPIC_API_KEY\"]=\"bad-key\"\ntry:\n# some code\ncompletion(model=\"claude-instant-1\", messages=[{\"role\":\"user\",\"content\":\"Hey, how's it going?\"}])\nexcept OpenAIError as e:\nprint(e)" - }, - %Item{ - id: "logging-observability---log-llm-inputoutput-docs", - level: 3, - title: "Logging Observability - Log LLM Input/Output ( Docs )", - content: - "### Logging Observability - Log LLM Input/Output ( Docs )\nLiteLLM exposes pre defined callbacks to send data to Lunary, Langfuse, Helicone, Promptlayer, Traceloop, Slackfrom litellm import completion\n\n## set env variables for logging tools\nos.environ[\"HELICONE_API_KEY\"]=\"your-helicone-key\"\nos.environ[\"LANGFUSE_PUBLIC_KEY\"]=\"\"\nos.environ[\"LANGFUSE_SECRET_KEY\"]=\"\"\nos.environ[\"LUNARY_PUBLIC_KEY\"]=\"your-lunary-public-key\"\n\nos.environ[\"OPENAI_API_KEY\"]\n\n# set callbacks\nlitellm.success_callback =[\"lunary\",\"langfuse\",\"helicone\"]# log input/output to lunary, langfuse, supabase, helicone\n\n#openai call\nresponse = completion(model=\"gpt-3.5-turbo\", messages=[{\"role\":\"user\",\"content\":\"Hi - i'm openai\"}])" - }, - %Item{ - id: "track-costs-usage-latency-for-streaming", - level: 3, - title: "Track Costs, Usage, Latency for streaming", - content: - "### Track Costs, Usage, Latency for streaming\nUse a callback function for this - more info on custom callbacks: [https://docs.litellm.ai/docs/observability/custom_callback](https://docs.litellm.ai/docs/observability/custom_callback)import litellm\n\n# track_cost_callback\ndeftrack_cost_callback(\nkwargs,# kwargs to completion\ncompletion_response,# response from completion\nstart_time, end_time # start/end time\n):\ntry:\nresponse_cost = kwargs.get(\"response_cost\",0)\nprint(\"streaming response_cost\", response_cost)\nexcept:\npass\n# set callback\nlitellm.success_callback =[track_cost_callback]# set custom callback function\n\n# litellm.completion() call\nresponse = completion(\nmodel=\"gpt-3.5-turbo\",\nmessages=[\n{\n\"role\":\"user\",\n\"content\":\"Hi - i'm openai\"\n}\n],\nstream=True\n)" - }, - %Item{ - id: "litellm-proxy-server-llm-gateway", - level: 2, - title: "LiteLLM Proxy Server (LLM Gateway)", - content: - "## LiteLLM Proxy Server (LLM Gateway)\nTrack spend across multiple projects/peopleThe proxy provides:\n- [Hooks for auth](https://docs.litellm.ai/docs/proxy/virtual_keys#custom-auth)\n- [Hooks for logging](https://docs.litellm.ai/docs/proxy/logging#step-1---create-your-custom-litellm-callback-class)\n- [Cost tracking](https://docs.litellm.ai/docs/proxy/virtual_keys#tracking-spend)\n- [Rate Limiting](https://docs.litellm.ai/docs/proxy/users#set-rate-limits)" - }, - %Item{ - id: "-proxy-endpoints---swagger-docs", - level: 3, - title: "Proxy Endpoints - Swagger Docs", - content: - "### Proxy Endpoints - Swagger Docs\nGo here for a complete tutorial with keys + rate limits - here" - }, - %Item{ - id: "quick-start-proxy---cli", - level: 3, - title: "Quick Start Proxy - CLI", - content: "### Quick Start Proxy - CLI\npip install'litellm[proxy]'" - }, - %Item{ - id: "step-1-start-litellm-proxy", - level: 4, - title: "Step 1: Start litellm proxy", - content: - "#### Step 1: Start litellm proxy\n$ litellm --model huggingface/bigcode/starcoder\n\n#INFO: Proxy running on http://0.0.0.0:4000" - }, - %Item{ - id: "step-2-make-chatcompletions-request-to-proxy", - level: 4, - title: "Step 2: Make ChatCompletions Request to Proxy", - content: - "#### Step 2: Make ChatCompletions Request to Proxy\nimport openai # openai v1.0.0+\nclient = openai.OpenAI(api_key=\"anything\",base_url=\"http://0.0.0.0:4000\")# set proxy to base_url\n# request sent to model set on litellm proxy, `litellm --model`\nresponse = client.chat.completions.create(model=\"gpt-3.5-turbo\", messages =[\n{\n\"role\":\"user\",\n\"content\":\"this is a test request, write a short poem\"\n}\n])\n\nprint(response)" - }, - %Item{ - id: "more-details", - level: 2, - title: "More details", - content: - "## More details\n\n- [exception mapping](/docs/exception_mapping)\n- [retries + model fallbacks for completion()](/docs/completion/reliable_completions)\n- [proxy virtual keys & spend management](/docs/proxy/virtual_keys)\n- [E2E Tutorial for LiteLLM Proxy Server](/docs/proxy/docker_quick_start)\n- Call 100+ LLMs using the OpenAI Input/Output Format\n- [How to use LiteLLM](#how-to-use-litellm)\n- When to use LiteLLM Proxy Server (LLM Gateway)\n- When to use LiteLLM Python SDK\n- LiteLLM Python SDK\n- [Basic usage](#basic-usage)\n- [Streaming](#streaming)\n- [Exception handling](#exception-handling)\n- [Logging Observability - Log LLM Input/Output (Docs)](#logging-observability---log-llm-inputoutput-docs)\n- [Track Costs, Usage, Latency for streaming](#track-costs-usage-latency-for-streaming)\n- LiteLLM Proxy Server (LLM Gateway)\n- [Proxy Endpoints - Swagger Docs](#-proxy-endpoints---swagger-docs)\n- [Quick Start Proxy - CLI](#quick-start-proxy---cli)\n- [More details](#more-details)" - } - ] + items = Scraper.run(html) + assert length(items) == 17 + + assert Enum.at(items, 0).level == 1 + assert Enum.at(items, 0).id == nil + assert Enum.at(items, 0).title == "LiteLLM - Getting Started" end end