From cf39901e639c445e6006c85a1fe69961841b444d Mon Sep 17 00:00:00 2001 From: cmendible <266546+cmendible@users.noreply.github.com> Date: Mon, 6 Nov 2023 21:45:37 +0100 Subject: [PATCH] added eventhub logger and openai api to the apim --- infra/main.tf | 28 +- infra/modules/apim/azure_openai.json | 811 +++++++++++++++++++++++++++ infra/modules/apim/main.tf | 130 +++++ infra/modules/apim/outputs.tf | 6 +- infra/modules/apim/variables.tf | 6 + infra/modules/ca-back/main.tf | 2 +- infra/modules/evh/main.tf | 32 ++ infra/modules/evh/outputs.tf | 12 + infra/modules/evh/variables.tf | 5 + infra/modules/nsg/nsg_apim.tf | 12 + infra/modules/openai/outputs.tf | 2 +- infra/variables.tf | 6 +- 12 files changed, 1041 insertions(+), 11 deletions(-) create mode 100644 infra/modules/apim/azure_openai.json create mode 100644 infra/modules/evh/main.tf create mode 100644 infra/modules/evh/outputs.tf create mode 100644 infra/modules/evh/variables.tf diff --git a/infra/main.tf b/infra/main.tf index 0e2215b..10e2384 100644 --- a/infra/main.tf +++ b/infra/main.tf @@ -33,15 +33,29 @@ module "nsg" { pe_subnet_id = module.vnet.pe_subnet_id } -module "apim" { - source = "./modules/apim" +module "evh" { + source = "./modules/evh" location = azurerm_resource_group.rg.location resource_group_name = azurerm_resource_group.rg.name - apim_name = var.apim_name - apim_subnet_id = module.vnet.apim_subnet_id - publisher_name = var.publisher_name - publisher_email = var.publisher_email + eventhub_name = var.eventhub_name enable_apim = var.enable_apim +} + +module "apim" { + source = "./modules/apim" + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name + apim_name = var.apim_name + apim_subnet_id = module.vnet.apim_subnet_id + publisher_name = var.publisher_name + publisher_email = var.publisher_email + enable_apim = var.enable_apim + eventhub_id = module.evh.eventhub_id + eventhub_name = module.evh.eventhub_name + eventhub_connection_string = module.evh.eventhub_connection_string + openai_service_name = module.openai.openai_service_name + openai_service_endpoint = module.openai.openai_endpoint + tenant_id = data.azurerm_subscription.current.tenant_id depends_on = [module.nsg] } @@ -132,7 +146,7 @@ module "ca_back" { storage_container_name = module.st.storage_container_name search_service_name = module.search.search_service_name search_index_name = module.search.search_index_name - openai_service_name = module.openai.openai_service_name + openai_service_name = var.enable_apim ? module.apim.gateway_url : module.openai.openai_endpoint tenant_id = data.azurerm_subscription.current.tenant_id managed_identity_client_id = module.mi.client_id enable_entra_id_authentication = var.enable_entra_id_authentication diff --git a/infra/modules/apim/azure_openai.json b/infra/modules/apim/azure_openai.json new file mode 100644 index 0000000..aff3a63 --- /dev/null +++ b/infra/modules/apim/azure_openai.json @@ -0,0 +1,811 @@ +{ + "openapi": "3.0.0", + "info": { + "title": "Azure OpenAI Service API", + "description": "Azure OpenAI APIs for completions and search", + "version": "2023-03-15-preview" + }, + "servers": [ + { + "url": "{endpoint}openai", + "variables": { + "endpoint": { + "default": "{servicename}.openai.azure.com" + } + } + } + ], + "security": [ + { + "bearer": [ + "api.read" + ] + }, + { + "apiKey": [] + } + ], + "paths": { + "/deployments/{deployment-id}/completions": { + "post": { + "summary": "Creates a completion for the provided prompt, parameters and chosen model.", + "operationId": "Completions_Create", + "parameters": [ + { + "in": "path", + "name": "deployment-id", + "required": true, + "schema": { + "type": "string", + "example": "davinci", + "description": "Deployment id of the model which was deployed." + } + }, + { + "in": "query", + "name": "api-version", + "required": true, + "schema": { + "type": "string", + "example": "2023-03-15-preview", + "description": "api version" + } + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "prompt": { + "description": "The prompt(s) to generate completions for, encoded as a string or array of strings.\nNote that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document. Maximum allowed size of string list is 2048.", + "oneOf": [ + { + "type": "string", + "default": "", + "example": "This is a test.", + "nullable": true + }, + { + "type": "array", + "items": { + "type": "string", + "default": "", + "example": "This is a test.", + "nullable": false + }, + "description": "Array size minimum of 1 and maximum of 2048" + } + ] + }, + "max_tokens": { + "description": "The token count of your prompt plus max_tokens cannot exceed the model's context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096). Has minimum of 0.", + "type": "integer", + "default": 16, + "example": 16, + "nullable": true + }, + "temperature": { + "description": "What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.\nWe generally recommend altering this or top_p but not both.", + "type": "number", + "default": 1, + "example": 1, + "nullable": true + }, + "top_p": { + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\nWe generally recommend altering this or temperature but not both.", + "type": "number", + "default": 1, + "example": 1, + "nullable": true + }, + "logit_bias": { + "description": "Defaults to null. Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this tokenizer tool (which works for both GPT-2 and GPT-3) to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token. As an example, you can pass {\"50256\" : -100} to prevent the <|endoftext|> token from being generated.", + "type": "object", + "nullable": false + }, + "user": { + "description": "A unique identifier representing your end-user, which can help monitoring and detecting abuse", + "type": "string", + "nullable": false + }, + "n": { + "description": "How many completions to generate for each prompt. Minimum of 1 and maximum of 128 allowed.\nNote: Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for max_tokens and stop.", + "type": "integer", + "default": 1, + "example": 1, + "nullable": true + }, + "stream": { + "description": "Whether to stream back partial progress. If set, tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a data: [DONE] message.", + "type": "boolean", + "nullable": true, + "default": false + }, + "logprobs": { + "description": "Include the log probabilities on the logprobs most likely tokens, as well the chosen tokens. For example, if logprobs is 5, the API will return a list of the 5 most likely tokens. The API will always return the logprob of the sampled token, so there may be up to logprobs+1 elements in the response.\nMinimum of 0 and maximum of 5 allowed.", + "type": "integer", + "default": null, + "nullable": true + }, + "model": { + "type": "string", + "example": "davinci", + "nullable": true, + "description": "ID of the model to use. You can use the Models_List operation to see all of your available models, or see our Models_Get overview for descriptions of them." + }, + "suffix": { + "type": "string", + "nullable": true, + "description": "The suffix that comes after a completion of inserted text." + }, + "echo": { + "description": "Echo back the prompt in addition to the completion", + "type": "boolean", + "default": false, + "nullable": true + }, + "stop": { + "description": "Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.", + "oneOf": [ + { + "type": "string", + "default": "<|endoftext|>", + "example": "\n", + "nullable": true + }, + { + "type": "array", + "items": { + "type": "string", + "example": [ + "\n" + ], + "nullable": false + }, + "description": "Array minimum size of 1 and maximum of 4" + } + ] + }, + "completion_config": { + "type": "string", + "nullable": true + }, + "presence_penalty": { + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "type": "number", + "default": 0 + }, + "frequency_penalty": { + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "type": "number", + "default": 0 + }, + "best_of": { + "description": "Generates best_of completions server-side and returns the \"best\" (the one with the highest log probability per token). Results cannot be streamed.\nWhen used with n, best_of controls the number of candidate completions and n specifies how many to return – best_of must be greater than n.\nNote: Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for max_tokens and stop. Has maximum value of 128.", + "type": "integer" + } + } + }, + "example": { + "prompt": "Negate the following sentence.The price for bubblegum increased on thursday.\n\n Negated Sentence:", + "max_tokens": 50 + } + } + } + }, + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "object": { + "type": "string" + }, + "created": { + "type": "integer" + }, + "model": { + "type": "string" + }, + "choices": { + "type": "array", + "items": { + "type": "object", + "properties": { + "text": { + "type": "string" + }, + "index": { + "type": "integer" + }, + "logprobs": { + "type": "object", + "properties": { + "tokens": { + "type": "array", + "items": { + "type": "string" + } + }, + "token_logprobs": { + "type": "array", + "items": { + "type": "number" + } + }, + "top_logprobs": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": { + "type": "number" + } + } + }, + "text_offset": { + "type": "array", + "items": { + "type": "integer" + } + } + } + }, + "finish_reason": { + "type": "string" + } + } + } + }, + "usage": { + "type": "object", + "properties": { + "completion_tokens": { + "type": "number", + "format": "int32" + }, + "prompt_tokens": { + "type": "number", + "format": "int32" + }, + "total_tokens": { + "type": "number", + "format": "int32" + } + }, + "required": [ + "prompt_tokens", + "total_tokens", + "completion_tokens" + ] + } + }, + "required": [ + "id", + "object", + "created", + "model", + "choices" + ] + }, + "example": { + "model": "davinci", + "object": "text_completion", + "id": "cmpl-4509KAos68kxOqpE2uYGw81j6m7uo", + "created": 1637097562, + "choices": [ + { + "index": 0, + "text": "The price for bubblegum decreased on thursday.", + "logprobs": null, + "finish_reason": "stop" + } + ] + } + } + }, + "headers": { + "apim-request-id": { + "description": "Request ID for troubleshooting purposes", + "schema": { + "type": "string" + } + } + } + }, + "default": { + "description": "Service unavailable", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/errorResponse" + } + } + }, + "headers": { + "apim-request-id": { + "description": "Request ID for troubleshooting purposes", + "schema": { + "type": "string" + } + } + } + } + } + } + }, + "/deployments/{deployment-id}/embeddings": { + "post": { + "summary": "Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms.", + "operationId": "embeddings_create", + "parameters": [ + { + "in": "path", + "name": "deployment-id", + "required": true, + "schema": { + "type": "string", + "example": "ada-search-index-v1" + }, + "description": "The deployment id of the model which was deployed." + }, + { + "in": "query", + "name": "api-version", + "required": true, + "schema": { + "type": "string", + "example": "2023-03-15-preview", + "description": "api version" + } + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "additionalProperties": true, + "properties": { + "input": { + "description": "Input text to get embeddings for, encoded as a string. To get embeddings for multiple inputs in a single request, pass an array of strings. Each input must not exceed 2048 tokens in length.\nUnless you are embedding code, we suggest replacing newlines (\\n) in your input with a single space, as we have observed inferior results when newlines are present.", + "oneOf": [ + { + "type": "string", + "default": "", + "example": "This is a test.", + "nullable": true + }, + { + "type": "array", + "minItems": 1, + "maxItems": 2048, + "items": { + "type": "string", + "minLength": 1, + "example": "This is a test.", + "nullable": false + } + } + ] + }, + "user": { + "description": "A unique identifier representing your end-user, which can help monitoring and detecting abuse.", + "type": "string", + "nullable": false + }, + "input_type": { + "description": "input type of embedding search to use", + "type": "string", + "example": "query" + }, + "model": { + "type": "string", + "description": "ID of the model to use. You can use the Models_List operation to see all of your available models, or see our Models_Get overview for descriptions of them.", + "nullable": false + } + }, + "required": [ + "input" + ] + } + } + } + }, + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "object": { + "type": "string" + }, + "model": { + "type": "string" + }, + "data": { + "type": "array", + "items": { + "type": "object", + "properties": { + "index": { + "type": "integer" + }, + "object": { + "type": "string" + }, + "embedding": { + "type": "array", + "items": { + "type": "number" + } + } + }, + "required": [ + "index", + "object", + "embedding" + ] + } + }, + "usage": { + "type": "object", + "properties": { + "prompt_tokens": { + "type": "integer" + }, + "total_tokens": { + "type": "integer" + } + }, + "required": [ + "prompt_tokens", + "total_tokens" + ] + } + }, + "required": [ + "object", + "model", + "data", + "usage" + ] + } + } + } + } + } + } + }, + "/deployments/{deployment-id}/chat/completions": { + "post": { + "summary": "Creates a completion for the chat message", + "operationId": "ChatCompletions_Create", + "parameters": [ + { + "in": "path", + "name": "deployment-id", + "required": true, + "schema": { + "type": "string", + "description": "Deployment id of the model which was deployed." + } + }, + { + "in": "query", + "name": "api-version", + "required": true, + "schema": { + "type": "string", + "example": "2023-03-15-preview", + "description": "api version" + } + } + ], + "requestBody": { + "required": true, + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "messages": { + "description": "The messages to generate chat completions for, in the chat format.", + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "properties": { + "role": { + "type": "string", + "enum": [ + "system", + "user", + "assistant" + ], + "description": "The role of the author of this message." + }, + "content": { + "type": "string", + "description": "The contents of the message" + }, + "name": { + "type": "string", + "description": "The name of the user in a multi-user chat" + } + }, + "required": [ + "role", + "content" + ] + } + }, + "temperature": { + "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.\nWe generally recommend altering this or `top_p` but not both.", + "type": "number", + "minimum": 0, + "maximum": 2, + "default": 1, + "example": 1, + "nullable": true + }, + "top_p": { + "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\nWe generally recommend altering this or `temperature` but not both.", + "type": "number", + "minimum": 0, + "maximum": 1, + "default": 1, + "example": 1, + "nullable": true + }, + "n": { + "description": "How many chat completion choices to generate for each input message.", + "type": "integer", + "minimum": 1, + "maximum": 128, + "default": 1, + "example": 1, + "nullable": true + }, + "stream": { + "description": "If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as data-only server-sent events as they become available, with the stream terminated by a `data: [DONE]` message.", + "type": "boolean", + "nullable": true, + "default": false + }, + "stop": { + "description": "Up to 4 sequences where the API will stop generating further tokens.", + "oneOf": [ + { + "type": "string", + "nullable": true + }, + { + "type": "array", + "items": { + "type": "string", + "nullable": false + }, + "minItems": 1, + "maxItems": 4, + "description": "Array minimum size of 1 and maximum of 4" + } + ], + "default": null + }, + "max_tokens": { + "description": "The maximum number of tokens allowed for the generated answer. By default, the number of tokens the model can return will be (4096 - prompt tokens).", + "type": "integer", + "default": "inf" + }, + "presence_penalty": { + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.", + "type": "number", + "default": 0, + "minimum": -2, + "maximum": 2 + }, + "frequency_penalty": { + "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.", + "type": "number", + "default": 0, + "minimum": -2, + "maximum": 2 + }, + "logit_bias": { + "description": "Modify the likelihood of specified tokens appearing in the completion. Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.", + "type": "object", + "nullable": true + }, + "user": { + "description": "A unique identifier representing your end-user, which can help Azure OpenAI to monitor and detect abuse.", + "type": "string", + "example": "user-1234", + "nullable": false + } + }, + "required": [ + "messages" + ] + }, + "example": { + "model": "gpt-35-turbo", + "messages": [ + { + "role": "user", + "content": "Hello!" + } + ] + } + } + } + }, + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "type": "object", + "properties": { + "id": { + "type": "string" + }, + "object": { + "type": "string" + }, + "created": { + "type": "integer", + "format": "unixtime" + }, + "model": { + "type": "string" + }, + "choices": { + "type": "array", + "items": { + "type": "object", + "properties": { + "index": { + "type": "integer" + }, + "message": { + "type": "object", + "properties": { + "role": { + "type": "string", + "enum": [ + "system", + "user", + "assistant" + ], + "description": "The role of the author of this message." + }, + "content": { + "type": "string", + "description": "The contents of the message" + } + }, + "required": [ + "role", + "content" + ] + }, + "finish_reason": { + "type": "string" + } + } + } + }, + "usage": { + "type": "object", + "properties": { + "prompt_tokens": { + "type": "integer" + }, + "completion_tokens": { + "type": "integer" + }, + "total_tokens": { + "type": "integer" + } + }, + "required": [ + "prompt_tokens", + "completion_tokens", + "total_tokens" + ] + } + }, + "required": [ + "id", + "object", + "created", + "model", + "choices" + ] + }, + "example": { + "id": "chatcmpl-123", + "object": "chat.completion", + "created": 1677652288, + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "\n\nHello there, how may I assist you today?" + }, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 9, + "completion_tokens": 12, + "total_tokens": 21 + } + } + } + } + } + } + } + } + }, + "components": { + "schemas": { + "errorResponse": { + "type": "object", + "properties": { + "error": { + "type": "object", + "properties": { + "code": { + "type": "string" + }, + "message": { + "type": "string" + }, + "param": { + "type": "string" + }, + "type": { + "type": "string" + } + } + } + } + } + }, + "securitySchemes": { + "bearer": { + "type": "oauth2", + "flows": { + "implicit": { + "authorizationUrl": "https://login.microsoftonline.com/common/oauth2/v2.0/authorize", + "scopes": {} + } + }, + "x-tokenInfoFunc": "api.middleware.auth.bearer_auth", + "x-scopeValidateFunc": "api.middleware.auth.validate_scopes" + }, + "apiKey": { + "type": "apiKey", + "name": "api-key", + "in": "header" + } + } + } +} \ No newline at end of file diff --git a/infra/modules/apim/main.tf b/infra/modules/apim/main.tf index 8af695c..1f42471 100644 --- a/infra/modules/apim/main.tf +++ b/infra/modules/apim/main.tf @@ -1,3 +1,7 @@ +locals { + logger_name = "openai-logger" +} + resource "azurerm_api_management" "apim" { count = var.enable_apim ? 1 : 0 name = var.apim_name @@ -12,3 +16,129 @@ resource "azurerm_api_management" "apim" { subnet_id = var.apim_subnet_id } } + +// TODO: https://learn.microsoft.com/en-us/azure/api-management/api-management-howto-log-event-hubs?tabs=bicep#logger-with-system-assigned-managed-identity-credentialss +resource "azurerm_api_management_logger" "logger" { + count = var.enable_apim ? 1 : 0 + name = local.logger_name + api_management_name = azurerm_api_management.apim[0].name + resource_group_name = var.resource_group_name + resource_id = var.eventhub_id + + eventhub { + name = var.eventhub_name + connection_string = var.eventhub_connection_string + } +} + +// https://learn.microsoft.com/en-us/semantic-kernel/deploy/use-ai-apis-with-api-management#setup-azure-api-management-instance-with-azure-openai-api +resource "azurerm_api_management_api" "openai" { + count = var.enable_apim ? 1 : 0 + name = "openai-api" + resource_group_name = var.resource_group_name + api_management_name = azurerm_api_management.apim[0].name + revision = "1" + display_name = "Azure Open AI API" + path = "openai" + protocols = ["https"] + subscription_required = false + + import { + content_format = "openapi" + content_value = replace(replace(file("${path.module}/azure_openai.json"), "{endpoint}", var.openai_service_endpoint), "{servicename}", var.openai_service_name) + } +} + +resource "azurerm_api_management_named_value" "tenant_id" { + count = var.enable_apim ? 1 : 0 + name = "tenant-id" + resource_group_name = var.resource_group_name + api_management_name = azurerm_api_management.apim[0].name + display_name = "TENANT_ID" + value = var.tenant_id +} + +resource "azurerm_api_management_named_value" "logger_compliance" { + count = var.enable_apim ? 1 : 0 + name = "logger-compliance" + resource_group_name = var.resource_group_name + api_management_name = azurerm_api_management.apim[0].name + display_name = "YOUR_LOGGER_COMPLIANCE" + value = local.logger_name +} + +resource "azurerm_api_management_named_value" "logger_chargeback" { + count = var.enable_apim ? 1 : 0 + name = "logger-chargeback" + resource_group_name = var.resource_group_name + api_management_name = azurerm_api_management.apim[0].name + display_name = "YOUR_LOGGER_CHARGEBACK" + value = local.logger_name +} + +// https://github.com/mattfeltonma/azure-openai-apim/blob/main/apim-policies/apim-policy-event-hub-logging.xml +resource "azurerm_api_management_api_policy" "policy" { + count = var.enable_apim ? 1 : 0 + api_name = azurerm_api_management_api.openai[0].name + api_management_name = azurerm_api_management.apim[0].name + resource_group_name = var.resource_group_name + + xml_content = < + + + + + + + https://sts.windows.net/{{TENANT_ID}}/ + + + + https://cognitiveservices.azure.com + + + + + + + @{ + var requestBody = context.Request.Body?.As(true); + + string prompt = requestBody["prompt"]?.ToString(); + string messages = requestBody["messages"]?.ToString(); + + return new JObject( + new JProperty("event-time", DateTime.UtcNow.ToString()), + new JProperty("message-id", context.Variables["message-id"]), + new JProperty("appid", context.Request.Headers.GetValueOrDefault("Authorization",string.Empty).Split(' ').Last().AsJwt().Claims.GetValueOrDefault("appid", string.Empty)), + new JProperty("operationname", context.Operation.Id), + new JProperty("prompt", prompt), + new JProperty("messages", messages) + ).ToString(); + } + + + + + + + + + + + + XML +} diff --git a/infra/modules/apim/outputs.tf b/infra/modules/apim/outputs.tf index f0144bd..df06350 100644 --- a/infra/modules/apim/outputs.tf +++ b/infra/modules/apim/outputs.tf @@ -1,3 +1,7 @@ output "apim_name" { - value = var.enable_apim ? azurerm_api_management.apim[0].name : "" + value = var.enable_apim ? azurerm_api_management.apim[0].name : "" +} + +output "gateway_url" { + value = var.enable_apim ? azurerm_api_management.apim[0].gateway_url : "" } diff --git a/infra/modules/apim/variables.tf b/infra/modules/apim/variables.tf index cf78fb1..1f6cf29 100644 --- a/infra/modules/apim/variables.tf +++ b/infra/modules/apim/variables.tf @@ -4,5 +4,11 @@ variable "apim_name" {} variable "publisher_name" {} variable "publisher_email" {} variable "apim_subnet_id" {} +variable "eventhub_name" {} +variable "eventhub_connection_string" {} +variable "eventhub_id" {} +variable "openai_service_name" {} +variable "openai_service_endpoint" {} +variable "tenant_id" {} variable "enable_apim" {} diff --git a/infra/modules/ca-back/main.tf b/infra/modules/ca-back/main.tf index 7e42e41..65f491e 100644 --- a/infra/modules/ca-back/main.tf +++ b/infra/modules/ca-back/main.tf @@ -40,7 +40,7 @@ resource "azapi_resource" "ca_back" { containers = [ { name = "azseachopenai" - image = "cmendibl3/azseachopenai" + image = "cmendibl3/azseachopenai:0.1.0" resources = { cpu = 0.5 memory = "1Gi" diff --git a/infra/modules/evh/main.tf b/infra/modules/evh/main.tf new file mode 100644 index 0000000..d6e21bb --- /dev/null +++ b/infra/modules/evh/main.tf @@ -0,0 +1,32 @@ +resource "azurerm_eventhub_namespace" "evh" { + count = var.enable_apim ? 1 : 0 + name = var.eventhub_name + location = var.location + resource_group_name = var.resource_group_name + sku = "Standard" + capacity = 1 + + # network_rulesets { + # default_action = "Deny" + # trusted_service_access_enabled = true + + # ip_rule { + # ip_mask = "0.0.0.0" + # } + # } + + lifecycle { + ignore_changes = [ + network_rulesets + ] + } +} + +resource "azurerm_eventhub" "hub" { + count = var.enable_apim ? 1 : 0 + name = var.eventhub_name + namespace_name = azurerm_eventhub_namespace.evh[0].name + resource_group_name = var.resource_group_name + partition_count = 2 + message_retention = 1 +} diff --git a/infra/modules/evh/outputs.tf b/infra/modules/evh/outputs.tf new file mode 100644 index 0000000..3b36004 --- /dev/null +++ b/infra/modules/evh/outputs.tf @@ -0,0 +1,12 @@ +output "eventhub_name" { + value = var.enable_apim ? azurerm_eventhub_namespace.evh[0].name : "" +} + +output "eventhub_id" { + value = var.enable_apim ? azurerm_eventhub_namespace.evh[0].id : "" +} + +output "eventhub_connection_string" { + value = var.enable_apim ? azurerm_eventhub_namespace.evh[0].default_primary_connection_string : "" +} + diff --git a/infra/modules/evh/variables.tf b/infra/modules/evh/variables.tf new file mode 100644 index 0000000..88313c0 --- /dev/null +++ b/infra/modules/evh/variables.tf @@ -0,0 +1,5 @@ +variable "resource_group_name" {} +variable "location" {} +variable "eventhub_name" {} + +variable "enable_apim" {} diff --git a/infra/modules/nsg/nsg_apim.tf b/infra/modules/nsg/nsg_apim.tf index 5810037..cc3481d 100644 --- a/infra/modules/nsg/nsg_apim.tf +++ b/infra/modules/nsg/nsg_apim.tf @@ -27,6 +27,18 @@ resource "azurerm_network_security_group" "nsg_apim" { destination_address_prefix = "VirtualNetwork" } + security_rule { + name = "allowanyhttpsinbound" + priority = 310 + direction = "Inbound" + access = "Allow" + protocol = "Tcp" + source_port_range = "*" + destination_port_range = "443" + source_address_prefix = "*" + destination_address_prefix = "VirtualNetwork" + } + security_rule { name = "dependency-on-storage" priority = 100 diff --git a/infra/modules/openai/outputs.tf b/infra/modules/openai/outputs.tf index cd69a6b..3ffbc36 100644 --- a/infra/modules/openai/outputs.tf +++ b/infra/modules/openai/outputs.tf @@ -13,7 +13,7 @@ output "gpt_deployment_name" { output "embedding_deployment_name" { value = azurerm_cognitive_deployment.embedding.name } - + # output "secondary_openai_endpoint" { # value = azurerm_cognitive_account.secondary_openai.endpoint # } diff --git a/infra/variables.tf b/infra/variables.tf index 0831656..fb61ea2 100644 --- a/infra/variables.tf +++ b/infra/variables.tf @@ -30,6 +30,10 @@ variable "storage_account_name" { default = "stgenai" } +variable "eventhub_name" { + default = "evh-activate-genai" +} + variable "apim_name" { default = "apim-activate-genai" } @@ -75,4 +79,4 @@ variable "enable_entra_id_authentication" { variable "enable_apim" { default = false -} \ No newline at end of file +}