-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* feat: added integration tests for max_tokens and stop sequence * fix: use number of bytes as token count estimator for AI21 and AWS Titan * feat: allow empty messages in each language model * feat: supported history truncation via max_prompt_tokens/discarded_messages parameters * chore: bumped version of aidial-sdk to 0.1.2 * fix: removed 'Assistant' prefix occasionally generated by Titan * feat: supported streaming for Titan and Claude * fix: fixed AI21 temperature setting
- Loading branch information
Showing
46 changed files
with
1,954 additions
and
1,006 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,51 +1,58 @@ | ||
import asyncio | ||
from typing import List | ||
from typing import Optional, Set | ||
|
||
from aidial_sdk.chat_completion import ChatCompletion, Request, Response | ||
|
||
from aidial_adapter_bedrock.llm.bedrock_adapter import BedrockAdapter | ||
from aidial_adapter_bedrock.llm.chat_emulation.types import ChatEmulationType | ||
from aidial_adapter_bedrock.llm.consumer import ChoiceConsumer | ||
from aidial_adapter_bedrock.llm.model.adapter import get_bedrock_adapter | ||
from aidial_adapter_bedrock.server.exceptions import dial_exception_decorator | ||
from aidial_adapter_bedrock.universal_api.request import ModelParameters | ||
from aidial_adapter_bedrock.universal_api.token_usage import TokenUsage | ||
from aidial_adapter_bedrock.utils.log_config import app_logger as log | ||
|
||
|
||
class BedrockChatCompletion(ChatCompletion): | ||
region: str | ||
chat_emulation_type: ChatEmulationType | ||
|
||
def __init__(self, region: str, chat_emulation_type: ChatEmulationType): | ||
def __init__(self, region: str): | ||
self.region = region | ||
self.chat_emulation_type = chat_emulation_type | ||
|
||
@dial_exception_decorator | ||
async def chat_completion(self, request: Request, response: Response): | ||
model = await BedrockAdapter.create( | ||
model_params = ModelParameters.create(request) | ||
model = await get_bedrock_adapter( | ||
region=self.region, | ||
model_id=request.deployment_id, | ||
model_params=ModelParameters.create(request), | ||
) | ||
|
||
async def generate_response(idx: int) -> TokenUsage: | ||
model_response = await model.achat( | ||
self.chat_emulation_type, request.messages | ||
) | ||
|
||
async def generate_response( | ||
usage: TokenUsage, | ||
discarded_messages_set: Set[Optional[int]], | ||
choice_idx: int, | ||
) -> None: | ||
with response.create_choice() as choice: | ||
choice.append_content(model_response.content) | ||
|
||
for data in model_response.data: | ||
choice.add_attachment( | ||
title=data.name, | ||
data=data.content, | ||
type=data.mime_type, | ||
) | ||
|
||
return model_response.usage | ||
|
||
usages: List[TokenUsage] = await asyncio.gather( | ||
*(generate_response(idx) for idx in range(request.n or 1)) | ||
consumer = ChoiceConsumer(choice) | ||
await model.achat(consumer, model_params, request.messages) | ||
usage.accumulate(consumer.usage) | ||
discarded_messages_set.add(consumer.discarded_messages) | ||
|
||
usage = TokenUsage() | ||
discarded_messages_set: Set[Optional[int]] = set() | ||
|
||
await asyncio.gather( | ||
*( | ||
generate_response(usage, discarded_messages_set, idx) | ||
for idx in range(request.n or 1) | ||
) | ||
) | ||
|
||
usage = sum(usages, TokenUsage()) | ||
log.debug(f"usage: {usage}") | ||
response.set_usage(usage.prompt_tokens, usage.completion_tokens) | ||
|
||
assert ( | ||
len(discarded_messages_set) == 1 | ||
), "Discarded messages count must be the same for each choice." | ||
|
||
discarded_messages = next(iter(discarded_messages_set)) | ||
if discarded_messages is not None: | ||
response.set_discarded_messages(discarded_messages) |
Oops, something went wrong.