diff --git a/README.md b/README.md index 64b575d..006e45d 100644 --- a/README.md +++ b/README.md @@ -49,4 +49,6 @@ jupyter-lab * .NET Interactive | Samples - * .NET Interactive - * Elasticsearch Labs -* \ No newline at end of file +* +* +* \ No newline at end of file diff --git a/src/_infra/azure-infra.ipynb b/src/_infra/azure-infra.ipynb new file mode 100644 index 0000000..2a9f139 --- /dev/null +++ b/src/_infra/azure-infra.ipynb @@ -0,0 +1,173 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Deploy Azure Resources\n", + "\n", + "https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/create-resource?pivots=cli" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "pwsh" + }, + "polyglot_notebook": { + "kernelName": "pwsh" + } + }, + "outputs": [], + "source": [ + "az account show\n", + "\n", + "$location = \"eastus\"\n", + "$resourceGroup = \"rg-elasticsearch-playground\"\n", + "$aiResourceName = \"ai-elasticsearch-playground\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "pwsh" + }, + "polyglot_notebook": { + "kernelName": "pwsh" + } + }, + "outputs": [], + "source": [ + "az group create `\n", + " --name rg-elasticsearch-playground `\n", + " --location $location" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "pwsh" + }, + "polyglot_notebook": { + "kernelName": "pwsh" + } + }, + "outputs": [], + "source": [ + "az group show --name rg-elasticsearch-playground" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "pwsh" + }, + "polyglot_notebook": { + "kernelName": "pwsh" + } + }, + "outputs": [], + "source": [ + "az cognitiveservices account create `\n", + " --name $aiResourceName `\n", + " --resource-group $resourceGroup `\n", + " --location $location `\n", + " --kind OpenAI --sku s0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "pwsh" + }, + "polyglot_notebook": { + "kernelName": "pwsh" + } + }, + "outputs": [], + "source": [ + "az cognitiveservices account show `\n", + " --name $aiResourceName `\n", + " --resource-group $resourceGroup | jq -r '.properties.endpoint'\n", + "\n", + "az cognitiveservices account keys list `\n", + " --name $aiResourceName `\n", + " --resource-group $resourceGroup | jq -r .key1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "pwsh" + }, + "polyglot_notebook": { + "kernelName": "pwsh" + } + }, + "outputs": [], + "source": [ + "az cognitiveservices account `\n", + " deployment create `\n", + " --name $aiResourceName `\n", + " --resource-group $resourceGroup `\n", + " --deployment-name \"text-embedding-3-small\" `\n", + " --model-name \"text-embedding-3-small\" `\n", + " --model-version \"1\" `\n", + " --model-format OpenAI `\n", + " --sku-capacity \"1\" `\n", + " --sku-name \"Standard\" `\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "pwsh" + }, + "polyglot_notebook": { + "kernelName": "pwsh" + } + }, + "outputs": [], + "source": [ + "az cognitiveservices account deployment list `\n", + " --name $aiResourceName `\n", + " --resource-group $resourceGroup" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".NET (C#)", + "language": "C#", + "name": ".net-csharp" + }, + "polyglot_notebook": { + "kernelInfo": { + "defaultKernelName": "csharp", + "items": [ + { + "aliases": [], + "languageName": "csharp", + "name": "csharp" + } + ] + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/_infra/get-connection-string.ipynb b/src/_infra/get-connection-string.ipynb index 8db6f44..7f24492 100644 --- a/src/_infra/get-connection-string.ipynb +++ b/src/_infra/get-connection-string.ipynb @@ -31,9 +31,9 @@ "source": [ "using dotenv.net;\n", "\n", - "var variables = DotEnv.Read(new DotEnvOptions(envFilePaths: new[] {\"../../.env\"}));\n", + "var envs = DotEnv.Read(new DotEnvOptions(envFilePaths: new[] {\"../../.env\"}));\n", "\n", - "if (!variables.TryGetValue(\"PLAYGROUND_CONNECTION_STRING\", out var connectionStringInput)\n", + "if (!envs.TryGetValue(\"PLAYGROUND_CONNECTION_STRING\", out var connectionStringInput)\n", " || string.IsNullOrEmpty(connectionStringInput))\n", "{\n", " connectionStringInput = await Microsoft.DotNet.Interactive.Kernel.GetInputAsync(\"Please provide a connection string.\");\n", diff --git a/src/_infra/setup-elastic-infrastructure.ipynb b/src/_infra/setup-elastic-infrastructure.ipynb index 6890fc5..72a2cd5 100644 --- a/src/_infra/setup-elastic-infrastructure.ipynb +++ b/src/_infra/setup-elastic-infrastructure.ipynb @@ -1,21 +1,5 @@ { "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "dotnet_interactive": { - "language": "csharp" - }, - "polyglot_notebook": { - "kernelName": "csharp" - } - }, - "outputs": [], - "source": [ - "#r \"nuget: Testcontainers.Elasticsearch\"" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -39,6 +23,8 @@ }, "outputs": [], "source": [ + "#r \"nuget: Testcontainers.Elasticsearch\"\n", + "\n", "using Testcontainers.Elasticsearch;\n", "\n", "var elasticsearchContainer = new ElasticsearchBuilder()\n", diff --git a/src/elasticsearch-getting-started/00-quickstart.ipynb b/src/elasticsearch-getting-started/00-quick-start.ipynb similarity index 54% rename from src/elasticsearch-getting-started/00-quickstart.ipynb rename to src/elasticsearch-getting-started/00-quick-start.ipynb index 819aafb..783b7b6 100644 --- a/src/elasticsearch-getting-started/00-quickstart.ipynb +++ b/src/elasticsearch-getting-started/00-quick-start.ipynb @@ -13,11 +13,36 @@ }, "outputs": [], "source": [ - "#r \"nuget: Elastic.Clients.Elasticsearch\"\n", - "#r \"nuget: System.Net.Http.Json\"\n", + "#r \"nuget: Elastic.Clients.Elasticsearch, 8.15.10\"\n", + "#r \"nuget: System.Net.Http.Json, 8.0.1\"\n", "#!import ./Utils.cs\n", - "#!import ../_infra/get-connection-string.ipynb\n", + "#!import ../_infra/get-connection-string.ipynb" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialize the Elasticsearch client\n", "\n", + "We can instantiate the [Elastic.Clients.Elasticsearch](https://github.com/elastic/elasticsearch-net) client.\n", + "\n", + "Make sure you have an instance of Elasticsearch running, see [setup-elastic-infrastructure.ipynb](../_infra/setup-elastic-infrastructure.ipynb) for more details.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + } + }, + "outputs": [], + "source": [ "using Elastic.Transport;\n", "using Elastic.Clients.Elasticsearch;\n", "using Elastic.Transport.Products.Elasticsearch;\n", @@ -56,6 +81,47 @@ "display(info);" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup the Embedding Model\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + } + }, + "outputs": [], + "source": [ + "#r \"nuget: Microsoft.Extensions.AI.OpenAI, 9.0.0-preview.*\"\n", + "#r \"nuget: Azure.AI.OpenAI, 2.0.0\"\n", + "#r \"nuget: dotenv.net, 3.2.1\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "using Azure.AI.OpenAI;\n", + "using Microsoft.Extensions.AI;\n", + "\n", + "IEmbeddingGenerator> generator =\n", + " new AzureOpenAIClient(\n", + " new Uri(envs[\"AZURE_OPENAI_ENDPOINT\"]),\n", + " new System.ClientModel.ApiKeyCredential(envs[\"AZURE_OPENAI_APIKEY\"]))\n", + " .AsEmbeddingGenerator(modelId: \"text-embedding-3-small\");" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -88,7 +154,7 @@ " public string Summary { get; set; }\n", "\n", " [JsonPropertyName(\"publish_date\")]\n", - " public DateTimeOffset PublishDate { get; set; }\n", + " public DateTime PublishDate { get; set; }\n", "\n", " [JsonPropertyName(\"num_reviews\")]\n", " public int NumReviews { get; set; }\n", @@ -97,7 +163,7 @@ " public string Publisher { get; set; }\n", "\n", "\n", - " public object? TitleVector { get; set; }\n", + " public float[] TitleVector { get; set; }\n", "}" ] }, @@ -118,6 +184,8 @@ "using Elastic.Clients.Elasticsearch.IndexManagement;\n", "using Elastic.Clients.Elasticsearch.Mapping;\n", "\n", + "var textEmeddingDimension = 384;\n", + "\n", "const string BookIndex = \"book_index\";\n", "var indexDescriptor = new CreateIndexRequestDescriptor(BookIndex)\n", " .Mappings(m => m\n", @@ -125,7 +193,7 @@ " .Text(p => p.Title)\n", " .DenseVector(\n", " Infer.Property(p => p.TitleVector),\n", - " d => d.Dims(384).Index(true).Similarity(DenseVectorSimilarity.Cosine))\n", + " d => d.Dims(textEmeddingDimension).Index(true).Similarity(DenseVectorSimilarity.Cosine))\n", " .Text(p => p.Summary)\n", " .Date(p => p.PublishDate)\n", " .IntegerNumber(p => p.NumReviews)\n", @@ -147,10 +215,11 @@ }, "outputs": [], "source": [ - "var indexResponse = await client.Indices.CreateAsync(indexDescriptor);\n", + "var indexResponse = await client.Indices.CreateAsync(indexDescriptor);\n", "\n", - "display(indexResponse);\n", - "ToJson(DumpGetRequest(indexResponse.DebugInformation)).Display();" + "// display(indexResponse);\n", + "ToJson(DumpGetRequest(indexResponse.DebugInformation))\n", + " .DisplayAs(\"application/json\");" ] }, { @@ -189,10 +258,19 @@ }, "outputs": [], "source": [ - "var bulkResponse = await client.BulkAsync(BookIndex, d => d\n", - " .IndexMany(books, (bd, b) => bd.Index(BookIndex).));\n", + "async Task ToEmbedding(string text) {\n", + " GeneratedEmbeddings> embeddings = await generator\n", + " .GenerateAsync(text, new EmbeddingGenerationOptions{\n", + " AdditionalProperties = new AdditionalPropertiesDictionary{\n", + " {\"dimensions\", textEmeddingDimension}\n", + " }\n", + " });\n", "\n", - "display(DumpGetRequest(bulkResponse));" + " return embeddings.First().Vector.ToArray();\n", + "}\n", + "\n", + "var embedding = await ToEmbedding(\"The quick brown fox jumps over the lazy dog\");\n", + "embedding.Length\n" ] }, { @@ -208,15 +286,24 @@ }, "outputs": [], "source": [ - "var searchResponse = await client.SearchAsync(s => \n", - " s \n", - " .Index(BookIndex)\n", - " .Query(q => q.Match(d => d.Field(f => f.Title).Query(\"JavaScript\")))\n", - ");\n", - "\n", - "DumpGetRequest(searchResponse).Display();\n", + "foreach(var book in books)\n", + "{\n", + " book.TitleVector = await ToEmbedding(book.Title);\n", + " display(book.Title);\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "var bulkResponse = await client.BulkAsync(BookIndex, d => d\n", + " .IndexMany(books, (bd, b) => bd.Index(BookIndex)));\n", "\n", - "searchResponse.Display();" + "bulkResponse.Display();\n", + "// display(DumpGetRequest(bulkResponse));" ] }, { @@ -231,7 +318,49 @@ } }, "outputs": [], - "source": [] + "source": [ + "var searchResponse = await client.SearchAsync(s => s\n", + " .Index(BookIndex)\n", + " .Query(q => q.Match(m => m.Field(f => f.Title).Query(\"JavaScript\")))\n", + ");\n", + "\n", + "ToJson(DumpGetRequest(searchResponse)).Display();\n", + "\n", + "searchResponse.Display();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "var searchQuery = \"JavaScript\";\n", + "var queryEmbedding = await ToEmbedding(searchQuery);\n", + "queryEmbedding.Length.Display();\n", + "var searchResponse = await client.SearchAsync(s => s\n", + " .Index(BookIndex)\n", + " .Knn(d => d\n", + " .Field(f => f.TitleVector)\n", + " .QueryVector(queryEmbedding)\n", + " .k(10)\n", + " .NumCandidates(100))\n", + ");\n", + "\n", + "ToJson(DumpGetRequest(searchResponse)).Display();\n", + "\n", + "searchResponse.Display();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "await client.Indices.DeleteAsync(BookIndex);" + ] } ], "metadata": { diff --git a/src/elasticsearch-getting-started/Utils.cs b/src/elasticsearch-getting-started/Utils.cs index 697eb78..bdd8749 100644 --- a/src/elasticsearch-getting-started/Utils.cs +++ b/src/elasticsearch-getting-started/Utils.cs @@ -2,7 +2,7 @@ using System.IO; using Elastic.Clients.Elasticsearch; using Elastic.Transport.Products.Elasticsearch; - +using System.Text.Json.Nodes; static object DumpGetRequest(ElasticsearchResponse response) { @@ -28,7 +28,7 @@ static object TryParsePayload(string payload) static string ToJson(object paylod) { - return prettyJson(System.Text.Json.JsonSerializer.Serialize(paylod)); + return Indent(prettyJson(System.Text.Json.JsonSerializer.Serialize(paylod))); static string prettyJson(string json) { @@ -47,6 +47,16 @@ static string prettyJson(string json) } } +static string Indent(string json) +{ + json = JsonNode.Parse(json).ToJsonString(new JsonSerializerOptions + { + WriteIndented = true, + // Encoder = JavaScriptEncoder.UnsafeRelaxedJsonEscaping + }); + return json; +} + static string GetRequestFromDebugInformation(string paylod) { var request = paylod