From 1ff4251e1be996fb8f889d763d1e1d58b68073d2 Mon Sep 17 00:00:00 2001 From: Charles Marion Date: Wed, 4 Sep 2024 14:28:47 +0000 Subject: [PATCH 1/3] feat: Add usage monitoring to Bedrock Claude --- cli/magic-config.ts | 9 + lib/aws-genai-llm-chatbot-stack.ts | 1 + .../request-handler/adapters/base/base.py | 156 +++++++++++++++++- .../request-handler/adapters/bedrock/base.py | 70 ++++++++ .../adapters/bedrock/claude.py | 82 +-------- .../functions/request-handler/index.py | 24 ++- lib/monitoring/index.ts | 56 ++++++- lib/shared/layers/common/requirements.txt | 3 +- .../langchain/chat_message_history.py | 12 +- .../langchain/workspace_retriever.py | 12 +- lib/shared/types.ts | 1 + 11 files changed, 326 insertions(+), 100 deletions(-) diff --git a/cli/magic-config.ts b/cli/magic-config.ts index 008040fdb..e2c55ec1b 100644 --- a/cli/magic-config.ts +++ b/cli/magic-config.ts @@ -201,6 +201,7 @@ const embeddingModels = [ // Advanced settings + options.advancedMonitoring = config.advancedMonitoring; options.createVpcEndpoints = config.vpc?.createVpcEndpoints; options.logRetention = config.logRetention; options.privateWebsite = config.privateWebsite; @@ -827,6 +828,13 @@ async function processCreateOptions(options: any): Promise { } }, }, + { + type: "confirm", + name: "advancedMonitoring", + message: + "Do you want to enable custom metrics and advanced monitoring?", + initial: options.advancedMonitoring || false, + }, { type: "confirm", name: "createVpcEndpoints", @@ -1106,6 +1114,7 @@ async function processCreateOptions(options: any): Promise { } : undefined, privateWebsite: advancedSettings.privateWebsite, + advancedMonitoring: advancedSettings.advancedMonitoring, logRetention: advancedSettings.logRetention ? Number(advancedSettings.logRetention) : undefined, diff --git a/lib/aws-genai-llm-chatbot-stack.ts b/lib/aws-genai-llm-chatbot-stack.ts index f979c3fd9..5fd0ed056 100644 --- a/lib/aws-genai-llm-chatbot-stack.ts +++ b/lib/aws-genai-llm-chatbot-stack.ts @@ -223,6 +223,7 @@ export class AwsGenAILLMChatbotStack extends cdk.Stack { const monitoringStack = new cdk.NestedStack(this, "MonitoringStack"); new Monitoring(monitoringStack, "Monitoring", { prefix: props.config.prefix, + advancedMonitoring: props.config.advancedMonitoring === true, appsycnApi: chatBotApi.graphqlApi, appsyncResolversLogGroups: chatBotApi.resolvers.map((r) => { return LogGroup.fromLogGroupName( diff --git a/lib/model-interfaces/langchain/functions/request-handler/adapters/base/base.py b/lib/model-interfaces/langchain/functions/request-handler/adapters/base/base.py index 66a28440a..65eaec1a2 100644 --- a/lib/model-interfaces/langchain/functions/request-handler/adapters/base/base.py +++ b/lib/model-interfaces/langchain/functions/request-handler/adapters/base/base.py @@ -3,7 +3,11 @@ from enum import Enum from aws_lambda_powertools import Logger from langchain.callbacks.base import BaseCallbackHandler -from langchain.chains import ConversationalRetrievalChain, ConversationChain +from langchain.chains.conversation.base import ConversationChain +from langchain.chains import ConversationalRetrievalChain +from langchain.chains.retrieval import create_retrieval_chain +from langchain.chains.history_aware_retriever import create_history_aware_retriever +from langchain.chains.combine_documents import create_stuff_documents_chain from langchain.memory import ConversationBufferMemory from langchain.prompts.prompt import PromptTemplate from langchain.chains.conversational_retrieval.prompts import ( @@ -15,6 +19,13 @@ from genai_core.langchain import WorkspaceRetriever, DynamoDBChatMessageHistory from genai_core.types import ChatbotMode +from langchain_core.runnables.history import RunnableWithMessageHistory +from langchain_core.outputs import LLMResult, ChatGeneration +from langchain_core.messages.ai import AIMessage, AIMessageChunk +from langchain_core.messages.human import HumanMessage +from langchain_core.language_models.chat_models import BaseChatModel +from langchain import hub + logger = Logger() @@ -24,13 +35,40 @@ class Mode(Enum): class LLMStartHandler(BaseCallbackHandler): prompts = [] + usage = None + # Langchain callbacks + # https://python.langchain.com/v0.2/docs/concepts/#callbacks def on_llm_start( self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any ) -> Any: - logger.info(prompts) self.prompts.append(prompts) + def on_llm_end( + self, response: LLMResult, *, run_id, parent_run_id, **kwargs: Any + ) -> Any: + generation = response.generations[0][0] # only one llm request + if ( + generation is not None + and isinstance(generation, ChatGeneration) + and isinstance(generation.message, AIMessage) + ): + ## In case of rag there could be 2 llm calls. + if self.usage is None: + self.usage = { + "input_tokens": 0, + "output_tokens": 0, + "total_tokens": 0, + } + self.usage = { + "input_tokens": self.usage.get("input_tokens") + + generation.message.usage_metadata.get("input_tokens"), + "output_tokens": self.usage.get("output_tokens") + + generation.message.usage_metadata.get("output_tokens"), + "total_tokens": self.usage.get("total_tokens") + + generation.message.usage_metadata.get("total_tokens"), + } + class ModelAdapter: def __init__( @@ -101,6 +139,113 @@ def get_condense_question_prompt(self): def get_qa_prompt(self): return QA_PROMPT + def run_with_chain_v2(self, user_prompt, workspace_id=None): + if not self.llm: + raise ValueError("llm must be set") + + self.callback_handler.prompts = [] + documents = [] + retriever = None + + if workspace_id: + retriever = WorkspaceRetriever(workspace_id=workspace_id) + ## Only stream the last llm call (otherwise the internal llm response will be visible) + llm_without_streaming = self.get_llm({"streaming": False}) + history_aware_retriever = create_history_aware_retriever( + llm_without_streaming, + retriever, + self.get_condense_question_prompt(), + ) + question_answer_chain = create_stuff_documents_chain( + self.llm, self.get_qa_prompt(), + ) + chain = create_retrieval_chain( + history_aware_retriever, question_answer_chain + ) + else: + chain = self.get_prompt() | self.llm + + + conversation = RunnableWithMessageHistory( + chain, + lambda session_id: self.chat_history, + history_messages_key="chat_history", + input_messages_key="input", + output_messages_key="output" + ) + + config = {"configurable": {"session_id": self.session_id}} + try: + if self.model_kwargs.get("streaming", False): + answer = "" + for chunk in conversation.stream( + input={"input": user_prompt}, config=config + ): + logger.info("chunk", chunk=chunk) + if "answer" in chunk: + answer = answer + chunk["answer"] + elif isinstance(chunk, AIMessageChunk): + for c in chunk.content: + if "text" in c: + answer = answer + c.get("text") + else: + response = conversation.invoke( + input={"input": user_prompt}, config=config + ) + if "answer" in response: + answer = response.get("answer") # Rag flow + else: + answer = response.content + except Exception as e: + logger.exception(e) + raise e + + if workspace_id: + # In the RAG flow, the history is not updated automatically + self.chat_history.add_message(HumanMessage(user_prompt)) + self.chat_history.add_message(AIMessage(answer)) + if retriever is not None: + documents = [ + { + "page_content": doc.page_content, + "metadata": doc.metadata, + } + for doc in retriever.get_last_search_documents() + ] + + metadata = { + "modelId": self.model_id, + "modelKwargs": self.model_kwargs, + "mode": self._mode, + "sessionId": self.session_id, + "userId": self.user_id, + "documents": documents, + "prompts": self.callback_handler.prompts, + "usage": self.callback_handler.usage, + } + + self.chat_history.add_metadata(metadata) + + if ( + self.callback_handler.usage is not None + and "total_tokens" in self.callback_handler.usage + ): + # Used by Cloudwatch filters to generate a metric of token usage. + logger.info( + "Usage Metric", + # Each unique value of model id will create a new cloudwatch metric (each one has a cost) + model=self.model_id, + metric_type="token_usage", + value=self.callback_handler.usage.get("total_tokens"), + ) + + return { + "sessionId": self.session_id, + "type": "text", + "content": answer, + "metadata": metadata, + } + def run_with_chain(self, user_prompt, workspace_id=None): if not self.llm: raise ValueError("llm must be set") @@ -120,7 +265,7 @@ def run_with_chain(self, user_prompt, workspace_id=None): callbacks=[self.callback_handler], ) result = conversation({"question": user_prompt}) - logger.info(result["source_documents"]) + logger.debug(result["source_documents"]) documents = [ { "page_content": doc.page_content, @@ -184,6 +329,9 @@ def run(self, prompt, workspace_id=None, *args, **kwargs): logger.debug(f"mode: {self._mode}") if self._mode == ChatbotMode.CHAIN.value: - return self.run_with_chain(prompt, workspace_id) + if isinstance(self.llm, BaseChatModel): + return self.run_with_chain_v2(prompt, workspace_id) + else: + return self.run_with_chain(prompt, workspace_id) raise ValueError(f"unknown mode {self._mode}") diff --git a/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/base.py b/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/base.py index 9b41f0468..5a68a7f80 100644 --- a/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/base.py +++ b/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/base.py @@ -14,6 +14,11 @@ ) +from ..base import ModelAdapter +import genai_core.clients +from langchain_aws import ChatBedrockConverse +from langchain.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder + def get_guardrails() -> dict: if "BEDROCK_GUARDRAILS_ID" in os.environ: return { @@ -23,6 +28,71 @@ def get_guardrails() -> dict: return {} +class BedrockChatAdapter(ModelAdapter): + def __init__(self, model_id, *args, **kwargs): + self.model_id = model_id + + super().__init__(*args, **kwargs) + + def get_qa_prompt(self): + system_prompt = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. \n\n{context}" + return ChatPromptTemplate.from_messages( + [ + ("system", system_prompt), + MessagesPlaceholder("chat_history"), + ("human", "{input}"), + ] + ) + + def get_prompt(self): + prompt_template = ChatPromptTemplate( + [ + ( + "system", + "The following is a friendly conversation between a human and an AI. If the AI does not know the answer to a question, it truthfully says it does not know.", + ), + MessagesPlaceholder(variable_name="chat_history"), + ("human", "{input}"), + ] + ) + + return prompt_template + + def get_condense_question_prompt(self): + contextualize_q_system_prompt = ( + "Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question." + ) + return ChatPromptTemplate.from_messages( + [ + ("system", contextualize_q_system_prompt), + MessagesPlaceholder("chat_history"), + ("human", "{input}"), + ] + ) + + def get_llm(self, model_kwargs={}, extra={}): + bedrock = genai_core.clients.get_bedrock_client() + params = {} + if "temperature" in model_kwargs: + params["temperature"] = model_kwargs["temperature"] + if "topP" in model_kwargs: + params["top_p"] = model_kwargs["topP"] + if "maxTokens" in model_kwargs: + params["max_tokens"] = model_kwargs["maxTokens"] + + guardrails = get_guardrails() + if len(guardrails.keys()) > 0: + params["guardrails"] = guardrails + + return ChatBedrockConverse( + client=bedrock, + model=self.model_id, + disable_streaming=model_kwargs.get("streaming", False) == False, + callbacks=[self.callback_handler], + **params, + **extra + ) + class LLMInputOutputAdapter: """Adapter class to prepare the inputs from Langchain to a format that LLM model expects. diff --git a/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/claude.py b/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/claude.py index d3b556aa3..f592df5f2 100644 --- a/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/claude.py +++ b/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/claude.py @@ -1,84 +1,6 @@ -import genai_core.clients - -# from langchain_community.llms import Bedrock -from langchain.prompts.prompt import PromptTemplate -from .base import Bedrock -from ..base import ModelAdapter +from .base import BedrockChatAdapter from genai_core.registry import registry -class BedrockClaudeAdapter(ModelAdapter): - def __init__(self, model_id, *args, **kwargs): - self.model_id = model_id - - super().__init__(*args, **kwargs) - - def get_llm(self, model_kwargs={}): - bedrock = genai_core.clients.get_bedrock_client() - params = {} - if "temperature" in model_kwargs: - params["temperature"] = model_kwargs["temperature"] - if "topP" in model_kwargs: - params["top_p"] = model_kwargs["topP"] - if "maxTokens" in model_kwargs: - params["max_tokens"] = model_kwargs["maxTokens"] - - params["anthropic_version"] = "bedrock-2023-05-31" - return Bedrock( - client=bedrock, - model_id=self.model_id, - model_kwargs=params, - streaming=model_kwargs.get("streaming", False), - callbacks=[self.callback_handler], - ) - - def get_qa_prompt(self): - template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. - -{context} - -Question: {question}""" # noqa: E501 - - return PromptTemplate( - template=template, input_variables=["context", "question"] - ) - - def get_prompt(self): - template = """The following is a friendly conversation between a human and an AI. If the AI does not know the answer to a question, it truthfully says it does not know. - -Current conversation: -{chat_history} - -Question: {input}""" # noqa: E501 - - input_variables = ["input", "chat_history"] - prompt_template_args = { - "chat_history": "{chat_history}", - "input_variables": input_variables, - "template": template, - } - prompt_template = PromptTemplate(**prompt_template_args) - - return prompt_template - - def get_condense_question_prompt(self): - template = """ -{chat_history} - - - -{question} - - -Given the conversation inside the tags , rephrase the follow up question you find inside to be a standalone question, in the same language as the follow up question. -""" # noqa: E501 - - return PromptTemplate( - input_variables=["chat_history", "question"], - chat_history="{chat_history}", - template=template, - ) - - # Register the adapter -registry.register(r"^bedrock.anthropic.claude*", BedrockClaudeAdapter) +registry.register(r"^bedrock.anthropic.claude*", BedrockChatAdapter) diff --git a/lib/model-interfaces/langchain/functions/request-handler/index.py b/lib/model-interfaces/langchain/functions/request-handler/index.py index a75af8f87..3079d6d4d 100644 --- a/lib/model-interfaces/langchain/functions/request-handler/index.py +++ b/lib/model-interfaces/langchain/functions/request-handler/index.py @@ -23,9 +23,17 @@ sequence_number = 0 - -def on_llm_new_token(user_id, session_id, self, token, run_id, *args, **kwargs): - if token is None or len(token) == 0: +def on_llm_new_token(user_id, session_id, self, token, run_id, chunk, parent_run_id, *args, **kwargs): + if isinstance(token, list): + # When using the newer Chat objects from Langchain. + # Token is not a string + text = "" + for t in token: + if "text" in t: + text = text + t.get("text") + else: + text = token + if text is None or len(text) == 0: return global sequence_number sequence_number += 1 @@ -42,7 +50,7 @@ def on_llm_new_token(user_id, session_id, self, token, run_id, *args, **kwargs): "token": { "runId": run_id, "sequenceNumber": sequence_number, - "value": token, + "value": text, }, }, } @@ -98,7 +106,7 @@ def handle_run(record): workspace_id=workspace_id, ) - logger.info(response) + logger.debug(response) send_to_client( { @@ -116,7 +124,7 @@ def record_handler(record: SQSRecord): payload: str = record.body message: dict = json.loads(payload) detail: dict = json.loads(message["Message"]) - logger.info(detail) + logger.debug(detail) if detail["action"] == ChatbotAction.RUN.value: handle_run(detail) @@ -130,7 +138,7 @@ def handle_failed_records(records): payload: str = record.body message: dict = json.loads(payload) detail: dict = json.loads(message["Message"]) - logger.info(detail) + logger.debug(detail) user_id = detail["userId"] data = detail.get("data", {}) session_id = data.get("sessionId", "") @@ -153,7 +161,7 @@ def handle_failed_records(records): ) -@logger.inject_lambda_context(log_event=True) +@logger.inject_lambda_context(log_event=False) @tracer.capture_lambda_handler def handler(event, context: LambdaContext): batch = event["Records"] diff --git a/lib/monitoring/index.ts b/lib/monitoring/index.ts index 9e77af6c0..8d65b0f8c 100644 --- a/lib/monitoring/index.ts +++ b/lib/monitoring/index.ts @@ -1,6 +1,6 @@ import { Stack } from "aws-cdk-lib"; import { IGraphqlApi } from "aws-cdk-lib/aws-appsync"; -import { LogQueryWidget, Metric } from "aws-cdk-lib/aws-cloudwatch"; +import { LogQueryWidget, MathExpression, Metric } from "aws-cdk-lib/aws-cloudwatch"; import { ITable } from "aws-cdk-lib/aws-dynamodb"; import { IFunction as ILambdaFunction } from "aws-cdk-lib/aws-lambda"; import { CfnCollection } from "aws-cdk-lib/aws-opensearchserverless"; @@ -15,10 +15,11 @@ import { Construct } from "constructs"; import { CfnIndex } from "aws-cdk-lib/aws-kendra"; import { IDatabaseCluster } from "aws-cdk-lib/aws-rds"; import { Queue } from "aws-cdk-lib/aws-sqs"; -import { ILogGroup } from "aws-cdk-lib/aws-logs"; +import { FilterPattern, ILogGroup, MetricFilter } from "aws-cdk-lib/aws-logs"; export interface MonitoringProps { prefix: string; + advancedMonitoring: boolean; appsycnApi: IGraphqlApi; appsyncResolversLogGroups: ILogGroup[]; llmRequestHandlersLogGroups: ILogGroup[]; @@ -72,6 +73,10 @@ export class Monitoring extends Construct { ) ); + if (props.advancedMonitoring) { + this.addMetricFilter(props.prefix + "GenAI", monitoring, props.llmRequestHandlersLogGroups); + } + const link = `https://${region}.console.aws.amazon.com/cognito/v2/idp/user-pools/${props.cognito.userPoolId}/users?region=${region}`; const title = `Cognito [**UserPool**](${link})`; this.addCognitoMetrics( @@ -147,6 +152,51 @@ export class Monitoring extends Construct { } } + private addMetricFilter(namespace: string, monitoring: MonitoringFacade, logGroups: ILogGroup[]) { + for (const logGroupKey in logGroups) { + new MetricFilter(this, 'UsageFilter' + logGroupKey, { + logGroup: logGroups[logGroupKey], + metricNamespace: namespace, + metricName: 'TokenUsage', + filterPattern: FilterPattern.stringValue('$.metric_type', "=", "token_usage"), + metricValue: '$.value', + dimensions: { + "model": "$.model" + } + }); + } + + monitoring.monitorCustom({ + alarmFriendlyName: "TokenUsage", + humanReadableName: "Token Usage", + metricGroups: [ + { + title: "LLM Usage", + metrics: [ + { + alarmFriendlyName: "TokenUsage", + metric: new MathExpression({ + label: "Tokens", + expression: `SEARCH('{${namespace},model} MetricName=TokenUsage', 'Sum', 60)`, + }), + addAlarm: {}, + }, + { + alarmFriendlyName: "LLMChains Calls", + metric: new MathExpression({ + label: "Calls", + expression: `SEARCH('{${namespace},model} MetricName=TokenUsage', 'SampleCount', 60)`, + }), + addAlarm: {}, + position: AxisPosition.RIGHT, + }, + ], + }, + ], + }); + + } + private addCognitoMetrics( monitoring: MonitoringFacade, userpoolId: string, @@ -319,7 +369,7 @@ export class Monitoring extends Construct { */ queryLines: [ "fields @timestamp, message, level, location" + - (extraFields.length > 0 ? "," + extraFields.join(",") : ""), + (extraFields.length > 0 ? "," + extraFields.join(",") : ""), `filter ispresent(level)`, // only includes messages using the logger "sort @timestamp desc", `limit 200`, diff --git a/lib/shared/layers/common/requirements.txt b/lib/shared/layers/common/requirements.txt index 079f0a611..6ef6cb403 100644 --- a/lib/shared/layers/common/requirements.txt +++ b/lib/shared/layers/common/requirements.txt @@ -1,12 +1,11 @@ aws_xray_sdk==2.12.1 -boto3==1.34.98 numpy==1.26.0 cfnresponse==1.1.2 aws_requests_auth==0.4.3 requests-aws4auth==1.2.3 langchain==0.2.14 langchain-community==0.2.12 -langchain-aws==0.1.6 +langchain-aws==0.1.17 opensearch-py==2.4.2 psycopg2-binary==2.9.7 pgvector==0.2.2 diff --git a/lib/shared/layers/python-sdk/python/genai_core/langchain/chat_message_history.py b/lib/shared/layers/python-sdk/python/genai_core/langchain/chat_message_history.py index 731fdc724..21815d390 100644 --- a/lib/shared/layers/python-sdk/python/genai_core/langchain/chat_message_history.py +++ b/lib/shared/layers/python-sdk/python/genai_core/langchain/chat_message_history.py @@ -13,6 +13,7 @@ messages_from_dict, messages_to_dict, ) +from langchain_core.messages.ai import AIMessage, AIMessageChunk client = boto3.resource("dynamodb") logger = Logger() @@ -54,7 +55,16 @@ def messages(self) -> List[BaseMessage]: def add_message(self, message: BaseMessage) -> None: """Append the message to the record in DynamoDB""" messages = messages_to_dict(self.messages) - _message = _message_to_dict(message) + if isinstance(message, AIMessageChunk): + # When streaming with RunnableWithMessageHistory, + # it would add a chunk to the history but it expects a text as content. + ai_message = "" + for c in message.content: + if "text" in c: + ai_message = ai_message + c.get("text") + _message = _message_to_dict(AIMessage(ai_message)) + else: + _message = _message_to_dict(message) messages.append(_message) try: diff --git a/lib/shared/layers/python-sdk/python/genai_core/langchain/workspace_retriever.py b/lib/shared/layers/python-sdk/python/genai_core/langchain/workspace_retriever.py index 28413ab08..29c4da2bf 100644 --- a/lib/shared/layers/python-sdk/python/genai_core/langchain/workspace_retriever.py +++ b/lib/shared/layers/python-sdk/python/genai_core/langchain/workspace_retriever.py @@ -1,21 +1,29 @@ +from aws_lambda_powertools import Logger import genai_core.semantic_search from typing import List from langchain.callbacks.manager import CallbackManagerForRetrieverRun from langchain.schema import BaseRetriever, Document +logger = Logger() class WorkspaceRetriever(BaseRetriever): workspace_id: str + documents_found: List[Document] = [] + def get_last_search_documents(self) -> List[Document]: + return self.documents_found + def _get_relevant_documents( self, query: str, *, run_manager: CallbackManagerForRetrieverRun ) -> List[Document]: + logger.debug("SearchRequest", query=query) result = genai_core.semantic_search.semantic_search( self.workspace_id, query, limit=3, full_response=False ) - return [self._get_document(item) for item in result.get("items", [])] - + self.documents_found = [self._get_document(item) for item in result.get("items", [])] + return self.documents_found + def _get_document(self, item): content = item["content"] content_complement = item.get("content_complement") diff --git a/lib/shared/types.ts b/lib/shared/types.ts index f6b8ea77a..baa399421 100644 --- a/lib/shared/types.ts +++ b/lib/shared/types.ts @@ -77,6 +77,7 @@ export interface SystemConfig { createVpcEndpoints?: boolean; vpcDefaultSecurityGroup?: string; }; + advancedMonitoring?: boolean; logRetention?: number; certificate?: string; domain?: string; From f7b01b4035dd9dc1fae06a2372be7e85fe1727df Mon Sep 17 00:00:00 2001 From: Charles Marion Date: Wed, 4 Sep 2024 14:58:48 +0000 Subject: [PATCH 2/3] test: Format + add test --- cli/magic-config.ts | 3 +- integtests/chatbot-api/session_test.py | 6 ++ .../request-handler/adapters/base/base.py | 21 ++--- .../request-handler/adapters/bedrock/base.py | 24 +++-- .../functions/request-handler/index.py | 7 +- lib/monitoring/index.ts | 37 +++++--- .../langchain/chat_message_history.py | 2 +- .../langchain/workspace_retriever.py | 11 ++- package.json | 5 +- .../monitoring-contruct.test.ts.snap | 90 ++++++++++++------- tests/monitoring/monitoring-contruct.test.ts | 1 + 11 files changed, 137 insertions(+), 70 deletions(-) diff --git a/cli/magic-config.ts b/cli/magic-config.ts index e2c55ec1b..b3a30dbd3 100644 --- a/cli/magic-config.ts +++ b/cli/magic-config.ts @@ -831,8 +831,7 @@ async function processCreateOptions(options: any): Promise { { type: "confirm", name: "advancedMonitoring", - message: - "Do you want to enable custom metrics and advanced monitoring?", + message: "Do you want to enable custom metrics and advanced monitoring?", initial: options.advancedMonitoring || false, }, { diff --git a/integtests/chatbot-api/session_test.py b/integtests/chatbot-api/session_test.py index 636cbaea2..446558587 100644 --- a/integtests/chatbot-api/session_test.py +++ b/integtests/chatbot-api/session_test.py @@ -38,9 +38,11 @@ def test_create_session(client, default_model, default_provider, session_id): break assert found == True + assert sessionFound.get("title") == request.get("data").get("text") + def test_get_session(client, session_id, default_model): session = client.get_session(session_id) assert session.get("id") == session_id @@ -48,6 +50,10 @@ def test_get_session(client, session_id, default_model): assert len(session.get("history")) == 2 assert session.get("history")[0].get("type") == "human" assert session.get("history")[1].get("type") == "ai" + assert session.get("history")[1].get("metadata") is not None + metadata = json.loads(session.get("history")[1].get("metadata")) + assert metadata.get("usage") is not None + assert metadata.get("usage").get("total_tokens") > 0 def test_delete_session(client, session_id): diff --git a/lib/model-interfaces/langchain/functions/request-handler/adapters/base/base.py b/lib/model-interfaces/langchain/functions/request-handler/adapters/base/base.py index 65eaec1a2..9fa05b6f1 100644 --- a/lib/model-interfaces/langchain/functions/request-handler/adapters/base/base.py +++ b/lib/model-interfaces/langchain/functions/request-handler/adapters/base/base.py @@ -23,8 +23,7 @@ from langchain_core.outputs import LLMResult, ChatGeneration from langchain_core.messages.ai import AIMessage, AIMessageChunk from langchain_core.messages.human import HumanMessage -from langchain_core.language_models.chat_models import BaseChatModel -from langchain import hub +from langchain_aws import ChatBedrockConverse logger = Logger() @@ -53,7 +52,7 @@ def on_llm_end( and isinstance(generation, ChatGeneration) and isinstance(generation.message, AIMessage) ): - ## In case of rag there could be 2 llm calls. + # In case of rag there could be 2 llm calls. if self.usage is None: self.usage = { "input_tokens": 0, @@ -149,7 +148,8 @@ def run_with_chain_v2(self, user_prompt, workspace_id=None): if workspace_id: retriever = WorkspaceRetriever(workspace_id=workspace_id) - ## Only stream the last llm call (otherwise the internal llm response will be visible) + # Only stream the last llm call (otherwise the internal + # llm response will be visible) llm_without_streaming = self.get_llm({"streaming": False}) history_aware_retriever = create_history_aware_retriever( llm_without_streaming, @@ -157,21 +157,21 @@ def run_with_chain_v2(self, user_prompt, workspace_id=None): self.get_condense_question_prompt(), ) question_answer_chain = create_stuff_documents_chain( - self.llm, self.get_qa_prompt(), + self.llm, + self.get_qa_prompt(), ) chain = create_retrieval_chain( history_aware_retriever, question_answer_chain ) else: chain = self.get_prompt() | self.llm - conversation = RunnableWithMessageHistory( chain, lambda session_id: self.chat_history, history_messages_key="chat_history", input_messages_key="input", - output_messages_key="output" + output_messages_key="output", ) config = {"configurable": {"session_id": self.session_id}} @@ -212,7 +212,7 @@ def run_with_chain_v2(self, user_prompt, workspace_id=None): } for doc in retriever.get_last_search_documents() ] - + metadata = { "modelId": self.model_id, "modelKwargs": self.model_kwargs, @@ -233,7 +233,8 @@ def run_with_chain_v2(self, user_prompt, workspace_id=None): # Used by Cloudwatch filters to generate a metric of token usage. logger.info( "Usage Metric", - # Each unique value of model id will create a new cloudwatch metric (each one has a cost) + # Each unique value of model id will create a + # new cloudwatch metric (each one has a cost) model=self.model_id, metric_type="token_usage", value=self.callback_handler.usage.get("total_tokens"), @@ -329,7 +330,7 @@ def run(self, prompt, workspace_id=None, *args, **kwargs): logger.debug(f"mode: {self._mode}") if self._mode == ChatbotMode.CHAIN.value: - if isinstance(self.llm, BaseChatModel): + if isinstance(self.llm, ChatBedrockConverse): return self.run_with_chain_v2(prompt, workspace_id) else: return self.run_with_chain(prompt, workspace_id) diff --git a/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/base.py b/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/base.py index 5a68a7f80..801261671 100644 --- a/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/base.py +++ b/lib/model-interfaces/langchain/functions/request-handler/adapters/bedrock/base.py @@ -17,7 +17,8 @@ from ..base import ModelAdapter import genai_core.clients from langchain_aws import ChatBedrockConverse -from langchain.prompts import PromptTemplate, ChatPromptTemplate, MessagesPlaceholder +from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder + def get_guardrails() -> dict: if "BEDROCK_GUARDRAILS_ID" in os.environ: @@ -33,9 +34,13 @@ def __init__(self, model_id, *args, **kwargs): self.model_id = model_id super().__init__(*args, **kwargs) - + def get_qa_prompt(self): - system_prompt = "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. \n\n{context}" + system_prompt = ( + "Use the following pieces of context to answer the question at the end." + " If you don't know the answer, just say that you don't know, " + "don't try to make up an answer. \n\n{context}" + ) return ChatPromptTemplate.from_messages( [ ("system", system_prompt), @@ -49,7 +54,12 @@ def get_prompt(self): [ ( "system", - "The following is a friendly conversation between a human and an AI. If the AI does not know the answer to a question, it truthfully says it does not know.", + ( + "The following is a friendly conversation between " + "a human and an AI." + "If the AI does not know the answer to a question, it " + "truthfully says it does not know." + ), ), MessagesPlaceholder(variable_name="chat_history"), ("human", "{input}"), @@ -60,7 +70,8 @@ def get_prompt(self): def get_condense_question_prompt(self): contextualize_q_system_prompt = ( - "Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question." + "Given the following conversation and a follow up" + " question, rephrase the follow up question to be a standalone question." ) return ChatPromptTemplate.from_messages( [ @@ -90,9 +101,10 @@ def get_llm(self, model_kwargs={}, extra={}): disable_streaming=model_kwargs.get("streaming", False) == False, callbacks=[self.callback_handler], **params, - **extra + **extra, ) + class LLMInputOutputAdapter: """Adapter class to prepare the inputs from Langchain to a format that LLM model expects. diff --git a/lib/model-interfaces/langchain/functions/request-handler/index.py b/lib/model-interfaces/langchain/functions/request-handler/index.py index 3079d6d4d..ab38aa0f5 100644 --- a/lib/model-interfaces/langchain/functions/request-handler/index.py +++ b/lib/model-interfaces/langchain/functions/request-handler/index.py @@ -23,9 +23,12 @@ sequence_number = 0 -def on_llm_new_token(user_id, session_id, self, token, run_id, chunk, parent_run_id, *args, **kwargs): + +def on_llm_new_token( + user_id, session_id, self, token, run_id, chunk, parent_run_id, *args, **kwargs +): if isinstance(token, list): - # When using the newer Chat objects from Langchain. + # When using the newer Chat objects from Langchain. # Token is not a string text = "" for t in token: diff --git a/lib/monitoring/index.ts b/lib/monitoring/index.ts index 8d65b0f8c..69d3d2a65 100644 --- a/lib/monitoring/index.ts +++ b/lib/monitoring/index.ts @@ -1,6 +1,10 @@ import { Stack } from "aws-cdk-lib"; import { IGraphqlApi } from "aws-cdk-lib/aws-appsync"; -import { LogQueryWidget, MathExpression, Metric } from "aws-cdk-lib/aws-cloudwatch"; +import { + LogQueryWidget, + MathExpression, + Metric, +} from "aws-cdk-lib/aws-cloudwatch"; import { ITable } from "aws-cdk-lib/aws-dynamodb"; import { IFunction as ILambdaFunction } from "aws-cdk-lib/aws-lambda"; import { CfnCollection } from "aws-cdk-lib/aws-opensearchserverless"; @@ -74,7 +78,11 @@ export class Monitoring extends Construct { ); if (props.advancedMonitoring) { - this.addMetricFilter(props.prefix + "GenAI", monitoring, props.llmRequestHandlersLogGroups); + this.addMetricFilter( + props.prefix + "GenAI", + monitoring, + props.llmRequestHandlersLogGroups + ); } const link = `https://${region}.console.aws.amazon.com/cognito/v2/idp/user-pools/${props.cognito.userPoolId}/users?region=${region}`; @@ -152,17 +160,25 @@ export class Monitoring extends Construct { } } - private addMetricFilter(namespace: string, monitoring: MonitoringFacade, logGroups: ILogGroup[]) { + private addMetricFilter( + namespace: string, + monitoring: MonitoringFacade, + logGroups: ILogGroup[] + ) { for (const logGroupKey in logGroups) { - new MetricFilter(this, 'UsageFilter' + logGroupKey, { + new MetricFilter(this, "UsageFilter" + logGroupKey, { logGroup: logGroups[logGroupKey], metricNamespace: namespace, - metricName: 'TokenUsage', - filterPattern: FilterPattern.stringValue('$.metric_type', "=", "token_usage"), - metricValue: '$.value', + metricName: "TokenUsage", + filterPattern: FilterPattern.stringValue( + "$.metric_type", + "=", + "token_usage" + ), + metricValue: "$.value", dimensions: { - "model": "$.model" - } + model: "$.model", + }, }); } @@ -194,7 +210,6 @@ export class Monitoring extends Construct { }, ], }); - } private addCognitoMetrics( @@ -369,7 +384,7 @@ export class Monitoring extends Construct { */ queryLines: [ "fields @timestamp, message, level, location" + - (extraFields.length > 0 ? "," + extraFields.join(",") : ""), + (extraFields.length > 0 ? "," + extraFields.join(",") : ""), `filter ispresent(level)`, // only includes messages using the logger "sort @timestamp desc", `limit 200`, diff --git a/lib/shared/layers/python-sdk/python/genai_core/langchain/chat_message_history.py b/lib/shared/layers/python-sdk/python/genai_core/langchain/chat_message_history.py index 21815d390..aa686ef56 100644 --- a/lib/shared/layers/python-sdk/python/genai_core/langchain/chat_message_history.py +++ b/lib/shared/layers/python-sdk/python/genai_core/langchain/chat_message_history.py @@ -56,7 +56,7 @@ def add_message(self, message: BaseMessage) -> None: """Append the message to the record in DynamoDB""" messages = messages_to_dict(self.messages) if isinstance(message, AIMessageChunk): - # When streaming with RunnableWithMessageHistory, + # When streaming with RunnableWithMessageHistory, # it would add a chunk to the history but it expects a text as content. ai_message = "" for c in message.content: diff --git a/lib/shared/layers/python-sdk/python/genai_core/langchain/workspace_retriever.py b/lib/shared/layers/python-sdk/python/genai_core/langchain/workspace_retriever.py index 29c4da2bf..6f6a67d36 100644 --- a/lib/shared/layers/python-sdk/python/genai_core/langchain/workspace_retriever.py +++ b/lib/shared/layers/python-sdk/python/genai_core/langchain/workspace_retriever.py @@ -6,13 +6,14 @@ logger = Logger() + class WorkspaceRetriever(BaseRetriever): workspace_id: str documents_found: List[Document] = [] - def get_last_search_documents(self) -> List[Document]: + def get_last_search_documents(self) -> List[Document]: return self.documents_found - + def _get_relevant_documents( self, query: str, *, run_manager: CallbackManagerForRetrieverRun ) -> List[Document]: @@ -21,9 +22,11 @@ def _get_relevant_documents( self.workspace_id, query, limit=3, full_response=False ) - self.documents_found = [self._get_document(item) for item in result.get("items", [])] + self.documents_found = [ + self._get_document(item) for item in result.get("items", []) + ] return self.documents_found - + def _get_document(self, item): content = item["content"] content_complement = item.get("content_complement") diff --git a/package.json b/package.json index abd7913c4..11f77e796 100644 --- a/package.json +++ b/package.json @@ -8,9 +8,11 @@ "build": "npx @aws-amplify/cli codegen && npx tsc", "watch": "npx tsc -w", "cdk": "cdk", + "deploy": "npx cdk deploy", "hotswap": "cdk deploy --hotswap", "test": "jest", "pytest": "pytest tests/", + "test-all": "npm run test && npm run pytest", "integtest": "pytest integtests/", "gen": "npx @aws-amplify/cli codegen", "create": "node ./dist/cli/magic.js config", @@ -19,7 +21,8 @@ "pylint": "flake8 .", "format": "npx prettier --ignore-path .gitignore --write \"**/*.+(js|ts|jsx|tsx|json|css)\"", "pyformat": "black .", - "deploy": "npm run format && npx cdk deploy", + "format-lint-all": "npm run format && npm run pyformat && npm run lint && npm run pylint", + "vet-all": "npm run format-lint-all && npm run test-all", "docs:dev": "vitepress dev docs", "docs:build": "vitepress build docs", "docs:preview": "vitepress preview docs" diff --git a/tests/monitoring/__snapshots__/monitoring-contruct.test.ts.snap b/tests/monitoring/__snapshots__/monitoring-contruct.test.ts.snap index b8d2f849b..5d92cb33b 100644 --- a/tests/monitoring/__snapshots__/monitoring-contruct.test.ts.snap +++ b/tests/monitoring/__snapshots__/monitoring-contruct.test.ts.snap @@ -72,7 +72,11 @@ exports[`snapshot test 1`] = ` { "Ref": "AWS::Region", }, - "","query":"SOURCE 'Test2' | fields @timestamp, message, level, location\\n| filter ispresent(level)\\n| sort @timestamp desc\\n| limit 200"}},{"type":"text","width":24,"height":1,"x":0,"y":17,"properties":{"markdown":"### Cognito [**UserPool**](https://", + "","query":"SOURCE 'Test2' | fields @timestamp, message, level, location\\n| filter ispresent(level)\\n| sort @timestamp desc\\n| limit 200"}},{"type":"text","width":24,"height":1,"x":0,"y":17,"properties":{"markdown":"### Token Usage"}},{"type":"metric","width":24,"height":5,"x":0,"y":18,"properties":{"view":"timeSeries","title":"LLM Usage","region":"", + { + "Ref": "AWS::Region", + }, + "","metrics":[[{"label":"Tokens","expression":"SEARCH('{GenAI,model} MetricName=TokenUsage', 'Sum', 60)"}],[{"label":"Calls","expression":"SEARCH('{GenAI,model} MetricName=TokenUsage', 'SampleCount', 60)","yAxis":"right"}]],"yAxis":{}}},{"type":"text","width":24,"height":1,"x":0,"y":23,"properties":{"markdown":"### Cognito [**UserPool**](https://", { "Ref": "AWS::Region", }, @@ -80,11 +84,11 @@ exports[`snapshot test 1`] = ` { "Ref": "AWS::Region", }, - ")"}},{"type":"metric","width":24,"height":5,"x":0,"y":18,"properties":{"view":"timeSeries","title":"SignIn","region":"", + ")"}},{"type":"metric","width":24,"height":5,"x":0,"y":24,"properties":{"view":"timeSeries","title":"SignIn","region":"", { "Ref": "AWS::Region", }, - "","metrics":[["AWS/Cognito","SignInSuccesses","UserPool","userPoolId","UserPoolClient","clientId",{"stat":"Sum"}],["AWS/Cognito","FederationSuccesses","UserPool","userPoolId","UserPoolClient","clientId",{"stat":"Sum","yAxis":"right"}]],"yAxis":{}}},{"type":"text","width":24,"height":1,"x":0,"y":23,"properties":{"markdown":"# Storage"}},{"type":"text","width":24,"height":1,"x":0,"y":24,"properties":{"markdown":"### Dynamo Table **[Table](https://", + "","metrics":[["AWS/Cognito","SignInSuccesses","UserPool","userPoolId","UserPoolClient","clientId",{"stat":"Sum"}],["AWS/Cognito","FederationSuccesses","UserPool","userPoolId","UserPoolClient","clientId",{"stat":"Sum","yAxis":"right"}]],"yAxis":{}}},{"type":"text","width":24,"height":1,"x":0,"y":29,"properties":{"markdown":"# Storage"}},{"type":"text","width":24,"height":1,"x":0,"y":30,"properties":{"markdown":"### Dynamo Table **[Table](https://", { "Ref": "AWS::Region", }, @@ -92,27 +96,27 @@ exports[`snapshot test 1`] = ` { "Ref": "AWS::Region", }, - "#tables:selected=Name)**"}},{"type":"metric","width":6,"height":3,"x":0,"y":25,"properties":{"view":"timeSeries","title":"Read Usage","region":"", + "#tables:selected=Name)**"}},{"type":"metric","width":6,"height":3,"x":0,"y":31,"properties":{"view":"timeSeries","title":"Read Usage","region":"", { "Ref": "AWS::Region", }, - "","metrics":[[{"label":"Consumed","expression":"consumed_rcu_sum/PERIOD(consumed_rcu_sum)","id":"consumed_read_cap"}],["AWS/DynamoDB","ConsumedReadCapacityUnits","TableName","Name",{"stat":"Sum","visible":false,"id":"consumed_rcu_sum"}],["AWS/DynamoDB","ProvisionedReadCapacityUnits","TableName","Name",{"label":"Provisioned","id":"provisioned_read_cap"}],[{"label":"Utilization","expression":"100*(consumed_read_cap/provisioned_read_cap)","yAxis":"right"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false},"right":{"min":0,"max":100,"label":"%","showUnits":false}},"legend":{"position":"right"}}},{"type":"metric","width":6,"height":3,"x":0,"y":28,"properties":{"view":"timeSeries","title":"Write Usage","region":"", + "","metrics":[[{"label":"Consumed","expression":"consumed_rcu_sum/PERIOD(consumed_rcu_sum)","id":"consumed_read_cap"}],["AWS/DynamoDB","ConsumedReadCapacityUnits","TableName","Name",{"stat":"Sum","visible":false,"id":"consumed_rcu_sum"}],["AWS/DynamoDB","ProvisionedReadCapacityUnits","TableName","Name",{"label":"Provisioned","id":"provisioned_read_cap"}],[{"label":"Utilization","expression":"100*(consumed_read_cap/provisioned_read_cap)","yAxis":"right"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false},"right":{"min":0,"max":100,"label":"%","showUnits":false}},"legend":{"position":"right"}}},{"type":"metric","width":6,"height":3,"x":0,"y":34,"properties":{"view":"timeSeries","title":"Write Usage","region":"", { "Ref": "AWS::Region", }, - "","metrics":[[{"label":"Consumed","expression":"consumed_wcu_sum/PERIOD(consumed_wcu_sum)","id":"consumed_write_cap"}],["AWS/DynamoDB","ConsumedWriteCapacityUnits","TableName","Name",{"stat":"Sum","visible":false,"id":"consumed_wcu_sum"}],["AWS/DynamoDB","ProvisionedWriteCapacityUnits","TableName","Name",{"label":"Provisioned","id":"provisioned_write_cap"}],[{"label":"Utilization","expression":"100*(consumed_write_cap/provisioned_write_cap)","yAxis":"right"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false},"right":{"min":0,"max":100,"label":"%","showUnits":false}},"legend":{"position":"right"}}},{"type":"metric","width":9,"height":6,"x":6,"y":25,"properties":{"view":"timeSeries","title":"Latency (Average)","region":"", + "","metrics":[[{"label":"Consumed","expression":"consumed_wcu_sum/PERIOD(consumed_wcu_sum)","id":"consumed_write_cap"}],["AWS/DynamoDB","ConsumedWriteCapacityUnits","TableName","Name",{"stat":"Sum","visible":false,"id":"consumed_wcu_sum"}],["AWS/DynamoDB","ProvisionedWriteCapacityUnits","TableName","Name",{"label":"Provisioned","id":"provisioned_write_cap"}],[{"label":"Utilization","expression":"100*(consumed_write_cap/provisioned_write_cap)","yAxis":"right"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false},"right":{"min":0,"max":100,"label":"%","showUnits":false}},"legend":{"position":"right"}}},{"type":"metric","width":9,"height":6,"x":6,"y":31,"properties":{"view":"timeSeries","title":"Latency (Average)","region":"", { "Ref": "AWS::Region", }, - "","metrics":[[{"label":" ","expression":"SEARCH('{AWS/DynamoDB,TableName,Operation} TableName=\\"Name\\" MetricName=\\"SuccessfulRequestLatency\\"', 'Average', 300)"}]],"yAxis":{"left":{"min":0,"label":"ms","showUnits":false}},"legend":{"position":"right"}}},{"type":"metric","width":3,"height":6,"x":15,"y":25,"properties":{"view":"timeSeries","title":"Throttles","region":"", + "","metrics":[[{"label":" ","expression":"SEARCH('{AWS/DynamoDB,TableName,Operation} TableName=\\"Name\\" MetricName=\\"SuccessfulRequestLatency\\"', 'Average', 300)"}]],"yAxis":{"left":{"min":0,"label":"ms","showUnits":false}},"legend":{"position":"right"}}},{"type":"metric","width":3,"height":6,"x":15,"y":31,"properties":{"view":"timeSeries","title":"Throttles","region":"", { "Ref": "AWS::Region", }, - "","metrics":[[{"label":"Read","expression":"FILL(readThrottles,0)"}],["AWS/DynamoDB","ReadThrottleEvents","TableName","Name",{"label":"Read","stat":"Sum","visible":false,"id":"readThrottles"}],[{"label":"Write","expression":"FILL(writeThrottles,0)"}],["AWS/DynamoDB","WriteThrottleEvents","TableName","Name",{"label":"Write","stat":"Sum","visible":false,"id":"writeThrottles"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false}}}},{"type":"metric","width":6,"height":6,"x":18,"y":25,"properties":{"view":"timeSeries","title":"Errors","region":"", + "","metrics":[[{"label":"Read","expression":"FILL(readThrottles,0)"}],["AWS/DynamoDB","ReadThrottleEvents","TableName","Name",{"label":"Read","stat":"Sum","visible":false,"id":"readThrottles"}],[{"label":"Write","expression":"FILL(writeThrottles,0)"}],["AWS/DynamoDB","WriteThrottleEvents","TableName","Name",{"label":"Write","stat":"Sum","visible":false,"id":"writeThrottles"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false}}}},{"type":"metric","width":6,"height":6,"x":18,"y":31,"properties":{"view":"timeSeries","title":"Errors","region":"", { "Ref": "AWS::Region", }, - "","metrics":[[{"label":"System Errors","expression":"systemErrorGetItem+systemErrorBatchGetItem+systemErrorScan+systemErrorQuery+systemErrorGetRecords+systemErrorPutItem+systemErrorDeleteItem+systemErrorUpdateItem+systemErrorBatchWriteItem"}],["AWS/DynamoDB","SystemErrors","Operation","GetItem","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorGetItem"}],["AWS/DynamoDB","SystemErrors","Operation","BatchGetItem","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorBatchGetItem"}],["AWS/DynamoDB","SystemErrors","Operation","Scan","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorScan"}],["AWS/DynamoDB","SystemErrors","Operation","Query","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorQuery"}],["AWS/DynamoDB","SystemErrors","Operation","GetRecords","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorGetRecords"}],["AWS/DynamoDB","SystemErrors","Operation","PutItem","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorPutItem"}],["AWS/DynamoDB","SystemErrors","Operation","DeleteItem","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorDeleteItem"}],["AWS/DynamoDB","SystemErrors","Operation","UpdateItem","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorUpdateItem"}],["AWS/DynamoDB","SystemErrors","Operation","BatchWriteItem","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorBatchWriteItem"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false}}}},{"type":"text","width":24,"height":1,"x":0,"y":31,"properties":{"markdown":"### S3 Bucket **[Bucket](https://s3.console.aws.amazon.com/s3/buckets/", + "","metrics":[[{"label":"System Errors","expression":"systemErrorGetItem+systemErrorBatchGetItem+systemErrorScan+systemErrorQuery+systemErrorGetRecords+systemErrorPutItem+systemErrorDeleteItem+systemErrorUpdateItem+systemErrorBatchWriteItem"}],["AWS/DynamoDB","SystemErrors","Operation","GetItem","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorGetItem"}],["AWS/DynamoDB","SystemErrors","Operation","BatchGetItem","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorBatchGetItem"}],["AWS/DynamoDB","SystemErrors","Operation","Scan","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorScan"}],["AWS/DynamoDB","SystemErrors","Operation","Query","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorQuery"}],["AWS/DynamoDB","SystemErrors","Operation","GetRecords","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorGetRecords"}],["AWS/DynamoDB","SystemErrors","Operation","PutItem","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorPutItem"}],["AWS/DynamoDB","SystemErrors","Operation","DeleteItem","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorDeleteItem"}],["AWS/DynamoDB","SystemErrors","Operation","UpdateItem","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorUpdateItem"}],["AWS/DynamoDB","SystemErrors","Operation","BatchWriteItem","TableName","Name",{"stat":"Sum","visible":false,"id":"systemErrorBatchWriteItem"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false}}}},{"type":"text","width":24,"height":1,"x":0,"y":37,"properties":{"markdown":"### S3 Bucket **[Bucket](https://s3.console.aws.amazon.com/s3/buckets/", { "Ref": "Bucket83908E77", }, @@ -120,7 +124,7 @@ exports[`snapshot test 1`] = ` { "Ref": "AWS::Region", }, - "&tab=metrics)**"}},{"type":"metric","width":12,"height":5,"x":0,"y":32,"properties":{"view":"timeSeries","title":"Bucket Size","region":"", + "&tab=metrics)**"}},{"type":"metric","width":12,"height":5,"x":0,"y":38,"properties":{"view":"timeSeries","title":"Bucket Size","region":"", { "Ref": "AWS::Region", }, @@ -128,7 +132,7 @@ exports[`snapshot test 1`] = ` { "Ref": "Bucket83908E77", }, - "","StorageType","StandardStorage",{"label":"BucketSizeBytes"}]],"yAxis":{"left":{"min":0,"label":"bytes","showUnits":false}}}},{"type":"metric","width":12,"height":5,"x":12,"y":32,"properties":{"view":"timeSeries","title":"Object Count","region":"", + "","StorageType","StandardStorage",{"label":"BucketSizeBytes"}]],"yAxis":{"left":{"min":0,"label":"bytes","showUnits":false}}}},{"type":"metric","width":12,"height":5,"x":12,"y":38,"properties":{"view":"timeSeries","title":"Object Count","region":"", { "Ref": "AWS::Region", }, @@ -136,7 +140,7 @@ exports[`snapshot test 1`] = ` { "Ref": "Bucket83908E77", }, - "","StorageType","AllStorageTypes",{"label":"NumberOfObjects"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false}}}},{"type":"text","width":24,"height":1,"x":0,"y":37,"properties":{"markdown":"### Aurora Cluster **[Cluster](https://", + "","StorageType","AllStorageTypes",{"label":"NumberOfObjects"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false}}}},{"type":"text","width":24,"height":1,"x":0,"y":43,"properties":{"markdown":"### Aurora Cluster **[Cluster](https://", { "Ref": "AWS::Region", }, @@ -144,23 +148,23 @@ exports[`snapshot test 1`] = ` { "Ref": "AWS::Region", }, - "#database:id=clusterIdentifier;is-cluster=true;tab=monitoring)**"}},{"type":"metric","width":6,"height":5,"x":0,"y":38,"properties":{"view":"timeSeries","title":"CPU Usage","region":"", + "#database:id=clusterIdentifier;is-cluster=true;tab=monitoring)**"}},{"type":"metric","width":6,"height":5,"x":0,"y":44,"properties":{"view":"timeSeries","title":"CPU Usage","region":"", { "Ref": "AWS::Region", }, - "","metrics":[["AWS/RDS","CPUUtilization","DBClusterIdentifier","clusterIdentifier",{"label":"CPU Usage"}]],"yAxis":{"left":{"min":0,"max":100,"label":"%","showUnits":false}}}},{"type":"metric","width":6,"height":5,"x":6,"y":38,"properties":{"view":"timeSeries","title":"Connections","region":"", + "","metrics":[["AWS/RDS","CPUUtilization","DBClusterIdentifier","clusterIdentifier",{"label":"CPU Usage"}]],"yAxis":{"left":{"min":0,"max":100,"label":"%","showUnits":false}}}},{"type":"metric","width":6,"height":5,"x":6,"y":44,"properties":{"view":"timeSeries","title":"Connections","region":"", { "Ref": "AWS::Region", }, - "","metrics":[["AWS/RDS","DatabaseConnections","DBClusterIdentifier","clusterIdentifier",{"label":"Connections"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false}}}},{"type":"metric","width":12,"height":5,"x":12,"y":38,"properties":{"view":"timeSeries","title":"Query Duration","region":"", + "","metrics":[["AWS/RDS","DatabaseConnections","DBClusterIdentifier","clusterIdentifier",{"label":"Connections"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false}}}},{"type":"metric","width":12,"height":5,"x":12,"y":44,"properties":{"view":"timeSeries","title":"Query Duration","region":"", { "Ref": "AWS::Region", }, - "","metrics":[["AWS/RDS","SelectLatency","DBClusterIdentifier","clusterIdentifier",{"label":"Select","stat":"p90"}],["AWS/RDS","InsertLatency","DBClusterIdentifier","clusterIdentifier",{"label":"Insert","stat":"p90"}],["AWS/RDS","UpdateLatency","DBClusterIdentifier","clusterIdentifier",{"label":"Update","stat":"p90"}],["AWS/RDS","DeleteLatency","DBClusterIdentifier","clusterIdentifier",{"label":"Delete","stat":"p90"}],["AWS/RDS","CommitLatency","DBClusterIdentifier","clusterIdentifier",{"label":"Commit","stat":"p90"}]],"yAxis":{"left":{"min":0,"label":"ms","showUnits":false}}}},{"type":"metric","width":12,"height":5,"x":0,"y":43,"properties":{"view":"timeSeries","title":"Serverless Database Capacity","region":"", + "","metrics":[["AWS/RDS","SelectLatency","DBClusterIdentifier","clusterIdentifier",{"label":"Select","stat":"p90"}],["AWS/RDS","InsertLatency","DBClusterIdentifier","clusterIdentifier",{"label":"Insert","stat":"p90"}],["AWS/RDS","UpdateLatency","DBClusterIdentifier","clusterIdentifier",{"label":"Update","stat":"p90"}],["AWS/RDS","DeleteLatency","DBClusterIdentifier","clusterIdentifier",{"label":"Delete","stat":"p90"}],["AWS/RDS","CommitLatency","DBClusterIdentifier","clusterIdentifier",{"label":"Commit","stat":"p90"}]],"yAxis":{"left":{"min":0,"label":"ms","showUnits":false}}}},{"type":"metric","width":12,"height":5,"x":0,"y":49,"properties":{"view":"timeSeries","title":"Serverless Database Capacity","region":"", { "Ref": "AWS::Region", }, - "","metrics":[["AWS/RDS","ServerlessDatabaseCapacity","DBClusterIdentifier","clusterIdentifier",{"label":"Serverless Database Capacity"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false}}}},{"type":"text","width":24,"height":1,"x":0,"y":48,"properties":{"markdown":"### OpenSearch [**Collection**](https://", + "","metrics":[["AWS/RDS","ServerlessDatabaseCapacity","DBClusterIdentifier","clusterIdentifier",{"label":"Serverless Database Capacity"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false}}}},{"type":"text","width":24,"height":1,"x":0,"y":54,"properties":{"markdown":"### OpenSearch [**Collection**](https://", { "Ref": "AWS::Region", }, @@ -168,7 +172,7 @@ exports[`snapshot test 1`] = ` { "Ref": "AWS::Region", }, - "#opensearch/collections/name)"}},{"type":"metric","width":12,"height":5,"x":0,"y":49,"properties":{"view":"timeSeries","title":"Search","region":"", + "#opensearch/collections/name)"}},{"type":"metric","width":12,"height":5,"x":0,"y":55,"properties":{"view":"timeSeries","title":"Search","region":"", { "Ref": "AWS::Region", }, @@ -188,7 +192,7 @@ exports[`snapshot test 1`] = ` { "Ref": "Collection", }, - "","CollectionName","name",{"yAxis":"right"}]],"yAxis":{}}},{"type":"metric","width":12,"height":5,"x":12,"y":49,"properties":{"view":"timeSeries","title":"Errors","region":"", + "","CollectionName","name",{"yAxis":"right"}]],"yAxis":{}}},{"type":"metric","width":12,"height":5,"x":12,"y":55,"properties":{"view":"timeSeries","title":"Errors","region":"", { "Ref": "AWS::Region", }, @@ -208,7 +212,7 @@ exports[`snapshot test 1`] = ` { "Ref": "Collection", }, - "","CollectionName","name",{"stat":"Sum"}]],"yAxis":{}}},{"type":"text","width":24,"height":1,"x":0,"y":54,"properties":{"markdown":"### Kendra [**Index**](https://", + "","CollectionName","name",{"stat":"Sum"}]],"yAxis":{}}},{"type":"text","width":24,"height":1,"x":0,"y":60,"properties":{"markdown":"### Kendra [**Index**](https://", { "Ref": "AWS::Region", }, @@ -220,7 +224,7 @@ exports[`snapshot test 1`] = ` { "Ref": "Index", }, - ")"}},{"type":"metric","width":24,"height":5,"x":0,"y":55,"properties":{"view":"timeSeries","title":"Search","region":"", + ")"}},{"type":"metric","width":24,"height":5,"x":0,"y":61,"properties":{"view":"timeSeries","title":"Search","region":"", { "Ref": "AWS::Region", }, @@ -232,7 +236,7 @@ exports[`snapshot test 1`] = ` { "Ref": "Index", }, - "",{"stat":"Sum","yAxis":"right"}]],"yAxis":{}}},{"type":"text","width":24,"height":1,"x":0,"y":60,"properties":{"markdown":"# Requests Processing"}},{"type":"text","width":24,"height":1,"x":0,"y":61,"properties":{"markdown":"# RAG Processing"}},{"type":"text","width":24,"height":1,"x":0,"y":62,"properties":{"markdown":"### State Machine **[StateMachine](https://", + "",{"stat":"Sum","yAxis":"right"}]],"yAxis":{}}},{"type":"text","width":24,"height":1,"x":0,"y":66,"properties":{"markdown":"# Requests Processing"}},{"type":"text","width":24,"height":1,"x":0,"y":67,"properties":{"markdown":"# RAG Processing"}},{"type":"text","width":24,"height":1,"x":0,"y":68,"properties":{"markdown":"### State Machine **[StateMachine](https://", { "Ref": "AWS::Region", }, @@ -252,7 +256,7 @@ exports[`snapshot test 1`] = ` { "Ref": "AWS::AccountId", }, - ":stateMachine:Name)**"}},{"type":"metric","width":6,"height":5,"x":0,"y":63,"properties":{"view":"timeSeries","title":"Duration","region":"", + ":stateMachine:Name)**"}},{"type":"metric","width":6,"height":5,"x":0,"y":69,"properties":{"view":"timeSeries","title":"Duration","region":"", { "Ref": "AWS::Region", }, @@ -292,7 +296,7 @@ exports[`snapshot test 1`] = ` { "Ref": "AWS::AccountId", }, - ":stateMachine:Name",{"label":"P99","stat":"p99"}]],"yAxis":{"left":{"min":0,"label":"ms","showUnits":false}}}},{"type":"metric","width":12,"height":5,"x":6,"y":63,"properties":{"view":"timeSeries","title":"Executions","region":"", + ":stateMachine:Name",{"label":"P99","stat":"p99"}]],"yAxis":{"left":{"min":0,"label":"ms","showUnits":false}}}},{"type":"metric","width":12,"height":5,"x":6,"y":69,"properties":{"view":"timeSeries","title":"Executions","region":"", { "Ref": "AWS::Region", }, @@ -368,7 +372,7 @@ exports[`snapshot test 1`] = ` { "Ref": "AWS::AccountId", }, - ":stateMachine:Name",{"label":"Timeout","stat":"Sum"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false}}}},{"type":"metric","width":6,"height":5,"x":18,"y":63,"properties":{"view":"timeSeries","title":"Errors (rate)","region":"", + ":stateMachine:Name",{"label":"Timeout","stat":"Sum"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false}}}},{"type":"metric","width":6,"height":5,"x":18,"y":69,"properties":{"view":"timeSeries","title":"Errors (rate)","region":"", { "Ref": "AWS::Region", }, @@ -384,7 +388,7 @@ exports[`snapshot test 1`] = ` { "Ref": "AWS::AccountId", }, - ":stateMachine:Name",{"label":"Failed (avg)"}]],"yAxis":{}}},{"type":"text","width":24,"height":1,"x":0,"y":68,"properties":{"markdown":"### Lambda Function **[Function](https://", + ":stateMachine:Name",{"label":"Failed (avg)"}]],"yAxis":{}}},{"type":"text","width":24,"height":1,"x":0,"y":74,"properties":{"markdown":"### Lambda Function **[Function](https://", { "Ref": "AWS::Region", }, @@ -423,7 +427,7 @@ exports[`snapshot test 1`] = ` }, ], }, - ")**"}},{"type":"metric","width":6,"height":5,"x":0,"y":69,"properties":{"view":"timeSeries","title":"TPS","region":"", + ")**"}},{"type":"metric","width":6,"height":5,"x":0,"y":75,"properties":{"view":"timeSeries","title":"TPS","region":"", { "Ref": "AWS::Region", }, @@ -458,7 +462,7 @@ exports[`snapshot test 1`] = ` }, ], }, - "",{"label":"Invocations","stat":"Sum","visible":false,"id":"requests"}]],"yAxis":{"left":{"min":0,"label":"Rate","showUnits":false}}}},{"type":"metric","width":6,"height":5,"x":6,"y":69,"properties":{"view":"timeSeries","title":"Latency","region":"", + "",{"label":"Invocations","stat":"Sum","visible":false,"id":"requests"}]],"yAxis":{"left":{"min":0,"label":"Rate","showUnits":false}}}},{"type":"metric","width":6,"height":5,"x":6,"y":75,"properties":{"view":"timeSeries","title":"Latency","region":"", { "Ref": "AWS::Region", }, @@ -555,7 +559,7 @@ exports[`snapshot test 1`] = ` }, ], }, - "",{"label":"P99 (avg: \${AVG})","stat":"p99"}]],"yAxis":{"left":{"min":0,"label":"ms","showUnits":false}}}},{"type":"metric","width":6,"height":5,"x":12,"y":69,"properties":{"view":"timeSeries","title":"Errors (rate)","region":"", + "",{"label":"P99 (avg: \${AVG})","stat":"p99"}]],"yAxis":{"left":{"min":0,"label":"ms","showUnits":false}}}},{"type":"metric","width":6,"height":5,"x":12,"y":75,"properties":{"view":"timeSeries","title":"Errors (rate)","region":"", { "Ref": "AWS::Region", }, @@ -590,7 +594,7 @@ exports[`snapshot test 1`] = ` }, ], }, - "",{"label":"Faults (avg)"}]],"yAxis":{"left":{"min":0,"label":"Rate","showUnits":false}}}},{"type":"metric","width":6,"height":5,"x":18,"y":69,"properties":{"view":"timeSeries","title":"Rates","region":"", + "",{"label":"Faults (avg)"}]],"yAxis":{"left":{"min":0,"label":"Rate","showUnits":false}}}},{"type":"metric","width":6,"height":5,"x":18,"y":75,"properties":{"view":"timeSeries","title":"Rates","region":"", { "Ref": "AWS::Region", }, @@ -656,7 +660,7 @@ exports[`snapshot test 1`] = ` }, ], }, - "",{"label":"Provisioned Concurrency Spillovers (avg)"}]],"yAxis":{"left":{"min":0,"label":"Rate","showUnits":false}}}},{"type":"metric","width":8,"height":5,"x":0,"y":74,"properties":{"view":"timeSeries","title":"Invocations","region":"", + "",{"label":"Provisioned Concurrency Spillovers (avg)"}]],"yAxis":{"left":{"min":0,"label":"Rate","showUnits":false}}}},{"type":"metric","width":8,"height":5,"x":0,"y":80,"properties":{"view":"timeSeries","title":"Invocations","region":"", { "Ref": "AWS::Region", }, @@ -784,7 +788,7 @@ exports[`snapshot test 1`] = ` }, ], }, - "",{"label":"Provisioned Concurrency Spillovers","stat":"Sum"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false}}}},{"type":"metric","width":8,"height":5,"x":8,"y":74,"properties":{"view":"timeSeries","title":"Iterator","region":"", + "",{"label":"Provisioned Concurrency Spillovers","stat":"Sum"}]],"yAxis":{"left":{"min":0,"label":"Count","showUnits":false}}}},{"type":"metric","width":8,"height":5,"x":8,"y":80,"properties":{"view":"timeSeries","title":"Iterator","region":"", { "Ref": "AWS::Region", }, @@ -819,7 +823,7 @@ exports[`snapshot test 1`] = ` }, ], }, - "",{"label":"Iterator Age","stat":"Maximum"}]],"yAxis":{"left":{"min":0,"label":"ms","showUnits":false}}}},{"type":"metric","width":8,"height":5,"x":16,"y":74,"properties":{"view":"timeSeries","title":"Errors","region":"", + "",{"label":"Iterator Age","stat":"Maximum"}]],"yAxis":{"left":{"min":0,"label":"ms","showUnits":false}}}},{"type":"metric","width":8,"height":5,"x":16,"y":80,"properties":{"view":"timeSeries","title":"Errors","region":"", { "Ref": "AWS::Region", }, @@ -862,6 +866,26 @@ exports[`snapshot test 1`] = ` }, "Type": "AWS::CloudWatch::Dashboard", }, + "MonitoringUsageFilter0A1660EBB": { + "Properties": { + "FilterPattern": "{ $.metric_type = "token_usage" }", + "LogGroupName": "Test2", + "MetricTransformations": [ + { + "Dimensions": [ + { + "Key": "model", + "Value": "$.model", + }, + ], + "MetricName": "TokenUsage", + "MetricNamespace": "GenAI", + "MetricValue": "$.value", + }, + ], + }, + "Type": "AWS::Logs::MetricFilter", + }, "Queue4A7E3555": { "DeletionPolicy": "Delete", "Properties": { diff --git a/tests/monitoring/monitoring-contruct.test.ts b/tests/monitoring/monitoring-contruct.test.ts index b883328ee..279c888f0 100644 --- a/tests/monitoring/monitoring-contruct.test.ts +++ b/tests/monitoring/monitoring-contruct.test.ts @@ -32,6 +32,7 @@ new Queue(stack, "Queue", { new Monitoring(stack, "Monitoring", { prefix: "", + advancedMonitoring: true, appsycnApi: GraphqlApi.fromGraphqlApiAttributes(stack, "GraphQL", { graphqlApiId: "graphqlApiId", }), From 9de3e559a4d744aab2091290e008cd620d9cb5a2 Mon Sep 17 00:00:00 2001 From: Charles Marion Date: Wed, 4 Sep 2024 15:25:19 +0000 Subject: [PATCH 3/3] Format --- integtests/chatbot-api/session_test.py | 1 - 1 file changed, 1 deletion(-) diff --git a/integtests/chatbot-api/session_test.py b/integtests/chatbot-api/session_test.py index 446558587..d6536171e 100644 --- a/integtests/chatbot-api/session_test.py +++ b/integtests/chatbot-api/session_test.py @@ -42,7 +42,6 @@ def test_create_session(client, default_model, default_provider, session_id): assert sessionFound.get("title") == request.get("data").get("text") - def test_get_session(client, session_id, default_model): session = client.get_session(session_id) assert session.get("id") == session_id