From 61a9e91cd8bb2a103d13caf982c9e2192ccb1829 Mon Sep 17 00:00:00 2001 From: kunyuan Date: Tue, 15 Aug 2023 01:04:55 +0800 Subject: [PATCH 1/6] slowapi rate limit support --- .env | 29 +++++++++++++++++++++------ .env.example | 15 ++++++++++++++ openai_forward/app.py | 20 +++++++++++++++--- openai_forward/content/chat.py | 3 ++- openai_forward/forwarding/base.py | 8 ++++++-- openai_forward/forwarding/extra.py | 1 + openai_forward/forwarding/openai.py | 1 + openai_forward/forwarding/settings.py | 24 ++++++++++++++++++++-- openai_forward/helper.py | 25 +++++++++++++++++++++++ pyproject.toml | 11 ++++++++-- 10 files changed, 121 insertions(+), 16 deletions(-) diff --git a/.env b/.env index 3b7daa4..a8183be 100644 --- a/.env +++ b/.env @@ -1,22 +1,39 @@ # 示例见 .env.example -# LOG_CHAT: 是否开启日志 +# `LOG_CHAT`: 是否记录日志 LOG_CHAT=false -# OPENAI_BASE_URL: 转发openai风格的任何服务地址,允许指定多个, 以逗号隔开。 +# `OPENAI_BASE_URL`: 转发openai风格的任何服务地址,允许指定多个, 以逗号隔开。 # 如果指定超过一个,则任何OPENAI_ROUTE_PREFIX/EXTRA_ROUTE_PREFIX都不能为根路由/ OPENAI_BASE_URL=https://api.openai.com -# OPENAI_ROUTE_PREFIX: 可指定所有openai风格(为记录日志)服务的转发路由前缀 -OPENAI_ROUTE_PREFIX=/ +# `OPENAI_ROUTE_PREFIX`: 可指定所有openai风格(为记录日志)服务的转发路由前缀 +OPENAI_ROUTE_PREFIX=/openai OPENAI_API_KEY= FORWARD_KEY= -# EXTRA_BASE_URL: 可指定任意服务转发 +# `EXTRA_BASE_URL`: 可指定任意服务转发 EXTRA_BASE_URL= -# EXTRA_ROUTE_PREFIX: 与 EXTRA_BASE_URL 匹配的路由前缀 +# `EXTRA_ROUTE_PREFIX`: 与 EXTRA_BASE_URL 匹配的路由前缀 EXTRA_ROUTE_PREFIX= + +# REATE `LIMIT`: 指定路由的请求速率限制 +# format: {route: ratelimit-string} +# ratelimit-string format [count] [per|/] [n (optional)] [second|minute|hour|day|month|year] :ref:`ratelimit-string`: https://limits.readthedocs.io/en/stable/quickstart.html#rate-limit-string-notation +RATE_LIMIT='{ +"/healthz": "50/3minutes", +"/openai/v1/chat/completions": "1/10seconds", +"/localai/v1/chat/completions": "2/second" +}' + +#`RATE_LIMIT_STRATEGY` Options: (fixed-window, fixed-window-elastic-expiry, moving-window) ref: https://limits.readthedocs.io/en/latest/strategies.html +# `fixed-window`: most memory efficient strategy; `moving-window`:most effective for preventing bursts but higher memory cost. +RATE_LIMIT_STRATEGY=fixed-window + +# `GLOBAL_RATE_LIMIT`: 所有`RATE_LIMIT`没有指定的路由. 不填默认无限制 +GLOBAL_RATE_LIMIT=1/5seconds + # 设定时区 TZ=Asia/Shanghai \ No newline at end of file diff --git a/.env.example b/.env.example index 06c0a40..e537fc8 100644 --- a/.env.example +++ b/.env.example @@ -22,6 +22,21 @@ EXTRA_BASE_URL=http://localhost:8882, http://localhost:8881 # EXTRA_ROUTE_PREFIX: 与 EXTRA_BASE_URL 匹配的路由前缀 EXTRA_ROUTE_PREFIX=/tts,/translate +# REATE LIMIT: 指定路由的请求速率限制 +# format: {route: ratelimit-string} +# ratelimit-string format [count] [per|/] [n (optional)] [second|minute|hour|day|month|year] :ref:`ratelimit-string`: https://limits.readthedocs.io/en/stable/quickstart.html#rate-limit-string-notation +RATE_LIMIT='{ +"/healthz": "50/3minutes", +"/openai/v1/chat/completions": "1/10seconds", +"/localai/v1/chat/completions": "2/second"}' + +# GLOBAL_RATE_LIMIT: 所有`RATE_LIMIT`没有指定的路由. 不填默认无限制 +GLOBAL_RATE_LIMIT=2/5seconds + +#`RATE_LIMIT_STRATEGY` Options: (fixed-window, fixed-window-elastic-expiry, moving-window) ref: https://limits.readthedocs.io/en/latest/strategies.html +# `fixed-window`: most memory efficient strategy; `moving-window`:most effective for preventing bursts but higher memory cost. +RATE_LIMIT_STRATEGY=fixed-window + # PROXY 配置代理 PROXY=http://localhost:7890 diff --git a/openai_forward/app.py b/openai_forward/app.py index e800e67..b751e4e 100644 --- a/openai_forward/app.py +++ b/openai_forward/app.py @@ -1,9 +1,21 @@ -from fastapi import FastAPI, status +from fastapi import FastAPI, Request, status +from slowapi import Limiter, _rate_limit_exceeded_handler +from slowapi.errors import RateLimitExceeded from .forwarding import get_fwd_anything_objs, get_fwd_openai_style_objs +from .forwarding.settings import ( + RATE_LIMIT_STRATEGY, + dynamic_rate_limit, + get_limiter_key, +) + +limiter = Limiter(key_func=get_limiter_key, strategy=RATE_LIMIT_STRATEGY) app = FastAPI(title="openai_forward", version="0.4") +app.state.limiter = limiter +app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) + @app.get( "/healthz", @@ -11,13 +23,15 @@ response_description="Return HTTP Status Code 200 (OK)", status_code=status.HTTP_200_OK, ) -def healthz(): +@limiter.limit(dynamic_rate_limit) +def healthz(request: Request): + print(request.scope.get("client")) return "OK" add_route = lambda obj: app.add_route( obj.ROUTE_PREFIX + "{api_path:path}", - obj.reverse_proxy, + limiter.limit(dynamic_rate_limit)(obj.reverse_proxy), methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "PATCH", "TRACE"], ) diff --git a/openai_forward/content/chat.py b/openai_forward/content/chat.py index 3e7a9cd..1532081 100644 --- a/openai_forward/content/chat.py +++ b/openai_forward/content/chat.py @@ -7,6 +7,7 @@ from loguru import logger from orjson import JSONDecodeError +from ..helper import get_client_ip from .decode import parse_to_lines @@ -23,7 +24,7 @@ async def parse_payload(request: Request): content = { "messages": [{msg["role"]: msg["content"]} for msg in msgs], "model": model, - "forwarded-for": request.headers.get("x-forwarded-for") or "", + "forwarded-for": get_client_ip(request) or "", "uid": uid, "datetime": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), } diff --git a/openai_forward/forwarding/base.py b/openai_forward/forwarding/base.py index 97c822d..3aa46e7 100644 --- a/openai_forward/forwarding/base.py +++ b/openai_forward/forwarding/base.py @@ -16,6 +16,7 @@ class ForwardingBase: BASE_URL = None ROUTE_PREFIX = None client: httpx.AsyncClient = None + # token_counts: int = None if IP_BLACKLIST or IP_WHITELIST: validate_host = True else: @@ -104,6 +105,9 @@ def prepare_client(self, request: Request): async def reverse_proxy(self, request: Request): assert self.client is not None + # if token_limit: + # assert self.token_counts is not None + client_config = self.prepare_client(request) r = await self.try_send(client_config, request) @@ -164,7 +168,7 @@ async def _add_payload_log(self, request: Request, url_path: str): ) return uid - async def aiter_bytes( + async def aiter_append( self, r: httpx.Response, request: Request, route_path: str, uid: str ): byte_list = [] @@ -200,7 +204,7 @@ def set_apikey_from_preset(): r = await self.try_send(client_config, request) return StreamingResponse( - self.aiter_bytes(r, request, url_path, uid), + self.aiter_append(r, request, url_path, uid), status_code=r.status_code, media_type=r.headers.get("content-type"), ) diff --git a/openai_forward/forwarding/extra.py b/openai_forward/forwarding/extra.py index c64cf66..5cc0d3c 100644 --- a/openai_forward/forwarding/extra.py +++ b/openai_forward/forwarding/extra.py @@ -11,6 +11,7 @@ def __init__(self, base_url: str, route_prefix: str, proxy=None): self.client = httpx.AsyncClient( base_url=self.BASE_URL, proxies=proxy, http1=True, http2=False ) + self.token_counts = 0 print_startup_info(self.BASE_URL, self.ROUTE_PREFIX, [], "\\", LOG_CHAT) diff --git a/openai_forward/forwarding/openai.py b/openai_forward/forwarding/openai.py index 13aed15..a34a904 100644 --- a/openai_forward/forwarding/openai.py +++ b/openai_forward/forwarding/openai.py @@ -12,6 +12,7 @@ def __init__(self, base_url: str, route_prefix: str, proxy=None): self.client = httpx.AsyncClient( base_url=self.BASE_URL, proxies=proxy, http1=True, http2=False ) + self.token_counts = 0 print_startup_info( self.BASE_URL, self.ROUTE_PREFIX, diff --git a/openai_forward/forwarding/settings.py b/openai_forward/forwarding/settings.py index 43bf9b2..0673388 100644 --- a/openai_forward/forwarding/settings.py +++ b/openai_forward/forwarding/settings.py @@ -1,7 +1,9 @@ import os +from fastapi import Request + from ..config import setting_log -from ..helper import env2list, format_route_prefix +from ..helper import env2dict, env2list, format_route_prefix, get_client_ip ENV_VAR_SEP = "," OPENAI_BASE_URL = env2list("OPENAI_BASE_URL", sep=ENV_VAR_SEP) or [ @@ -19,7 +21,7 @@ LOG_CHAT = os.environ.get("LOG_CHAT", "False").strip().lower() == "true" if LOG_CHAT: - setting_log(save_file=False) + setting_log() IP_WHITELIST = env2list("IP_WHITELIST", sep=ENV_VAR_SEP) IP_BLACKLIST = env2list("IP_BLACKLIST", sep=ENV_VAR_SEP) @@ -29,3 +31,21 @@ PROXY = os.environ.get("PROXY", "").strip() PROXY = PROXY if PROXY else None + +GLOBAL_RATE_LIMIT = os.environ.get("GLOBAL_RATE_LIMIT").strip() or None +RATE_LIMIT_STRATEGY = os.environ.get("RATE_LIMIT_STRATEGY").strip() or None +rate_limit_conf = env2dict('RATE_LIMIT') +print(f"{rate_limit_conf=}") + + +def get_limiter_key(request: Request): + limiter_prefix = f"{request.scope.get('root_path')}{request.scope.get('path')}" + key = f"{limiter_prefix}" # -{get_client_ip(request)}" + return key + + +def dynamic_rate_limit(key: str): + for route in rate_limit_conf: + if key.startswith(route): + return rate_limit_conf[route] + return GLOBAL_RATE_LIMIT diff --git a/openai_forward/helper.py b/openai_forward/helper.py index 9c16326..184954b 100644 --- a/openai_forward/helper.py +++ b/openai_forward/helper.py @@ -5,9 +5,18 @@ from typing import Dict, List, Union import orjson +from fastapi import Request from rich import print +def get_client_ip(request: Request): + if "x-forwarded-for" in request.headers: + return request.headers["x-forwarded-for"] + elif not request.client or not request.client.host: + return "127.0.0.1" + return request.client.host + + def relp(rel_path: Union[str, Path], parents=0, return_str=True, strict=False): currentframe = inspect.currentframe() f = currentframe.f_back @@ -51,6 +60,13 @@ def json_dump( f.write(orjson.dumps(data, option=orjson_option)) +def toml_load(filepath: str, rel=False): + import toml + + abs_path = relp(filepath, parents=1) if rel else filepath + return toml.load(abs_path) + + def str2list(s: str, sep): if s: return [i.strip() for i in s.split(sep) if i.strip()] @@ -62,6 +78,15 @@ def env2list(env_name: str, sep=","): return str2list(os.environ.get(env_name, "").strip(), sep=sep) +def env2dict(env_name: str) -> Dict: + if not env_name: + return {} + import json + + env_str = os.environ.get(env_name, "").strip() + return json.loads(env_str) + + def format_route_prefix(route_prefix: str): if route_prefix: if route_prefix.endswith("/"): diff --git a/pyproject.toml b/pyproject.toml index 747eec0..b2fe1cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ dependencies = [ "rich", "fire", "pytz", + "slowapi==0.1.8", ] dynamic = ["version"] @@ -40,9 +41,15 @@ Source = "https://github.com/beidongjiedeguang/openai-forward" [project.optional-dependencies] test = [ - "openai>=0.27.8", "pytest", - "sparrow-python" + "openai>=0.27.8", + "sparrow-python", +] + +dev = [ + "openai>=0.27.8", + "sparrow-python", + "schedule", ] [project.scripts] From 763ab5b4ed70836ce04a8e74e9598403c847dbcd Mon Sep 17 00:00:00 2001 From: kunyuan Date: Tue, 15 Aug 2023 17:33:41 +0800 Subject: [PATCH 2/6] Add tpm && fix multi-target log --- .env | 19 ++++---- .env.example | 2 +- openai_forward/app.py | 3 +- openai_forward/config.py | 68 ++++++++++++++++++--------- openai_forward/content/chat.py | 6 ++- openai_forward/content/extra.py | 5 +- openai_forward/content/whisper.py | 5 +- openai_forward/forwarding/base.py | 44 ++++++++++------- openai_forward/forwarding/extra.py | 5 +- openai_forward/forwarding/openai.py | 8 +++- openai_forward/forwarding/settings.py | 19 +++++++- 11 files changed, 124 insertions(+), 60 deletions(-) diff --git a/.env b/.env index a8183be..7029100 100644 --- a/.env +++ b/.env @@ -1,14 +1,14 @@ # 示例见 .env.example # `LOG_CHAT`: 是否记录日志 -LOG_CHAT=false +LOG_CHAT=true # `OPENAI_BASE_URL`: 转发openai风格的任何服务地址,允许指定多个, 以逗号隔开。 # 如果指定超过一个,则任何OPENAI_ROUTE_PREFIX/EXTRA_ROUTE_PREFIX都不能为根路由/ -OPENAI_BASE_URL=https://api.openai.com +OPENAI_BASE_URL=https://api.openai-forward.com, https://api.openai.com # `OPENAI_ROUTE_PREFIX`: 可指定所有openai风格(为记录日志)服务的转发路由前缀 -OPENAI_ROUTE_PREFIX=/openai +OPENAI_ROUTE_PREFIX=/openai, /localai OPENAI_API_KEY= FORWARD_KEY= @@ -19,21 +19,24 @@ EXTRA_BASE_URL= EXTRA_ROUTE_PREFIX= -# REATE `LIMIT`: 指定路由的请求速率限制 +# RATE `LIMIT`: 指定路由的请求速率限制 # format: {route: ratelimit-string} # ratelimit-string format [count] [per|/] [n (optional)] [second|minute|hour|day|month|year] :ref:`ratelimit-string`: https://limits.readthedocs.io/en/stable/quickstart.html#rate-limit-string-notation RATE_LIMIT='{ "/healthz": "50/3minutes", -"/openai/v1/chat/completions": "1/10seconds", -"/localai/v1/chat/completions": "2/second" +"/openai/v1/chat/completions": "3/2seconds", +"/localai/v1/chat/completions": "5/second" }' #`RATE_LIMIT_STRATEGY` Options: (fixed-window, fixed-window-elastic-expiry, moving-window) ref: https://limits.readthedocs.io/en/latest/strategies.html # `fixed-window`: most memory efficient strategy; `moving-window`:most effective for preventing bursts but higher memory cost. -RATE_LIMIT_STRATEGY=fixed-window +RATE_LIMIT_STRATEGY=moving-window # `GLOBAL_RATE_LIMIT`: 所有`RATE_LIMIT`没有指定的路由. 不填默认无限制 -GLOBAL_RATE_LIMIT=1/5seconds +GLOBAL_RATE_LIMIT=10/5seconds + +# same as TPM +TOKEN_RATE_LIMIT=10/1seconds # 设定时区 TZ=Asia/Shanghai \ No newline at end of file diff --git a/.env.example b/.env.example index e537fc8..411e155 100644 --- a/.env.example +++ b/.env.example @@ -22,7 +22,7 @@ EXTRA_BASE_URL=http://localhost:8882, http://localhost:8881 # EXTRA_ROUTE_PREFIX: 与 EXTRA_BASE_URL 匹配的路由前缀 EXTRA_ROUTE_PREFIX=/tts,/translate -# REATE LIMIT: 指定路由的请求速率限制 +# RATE LIMIT: 指定路由的请求速率限制 # format: {route: ratelimit-string} # ratelimit-string format [count] [per|/] [n (optional)] [second|minute|hour|day|month|year] :ref:`ratelimit-string`: https://limits.readthedocs.io/en/stable/quickstart.html#rate-limit-string-notation RATE_LIMIT='{ diff --git a/openai_forward/app.py b/openai_forward/app.py index b751e4e..d536181 100644 --- a/openai_forward/app.py +++ b/openai_forward/app.py @@ -31,7 +31,8 @@ def healthz(request: Request): add_route = lambda obj: app.add_route( obj.ROUTE_PREFIX + "{api_path:path}", - limiter.limit(dynamic_rate_limit)(obj.reverse_proxy), + # limiter.limit(dynamic_rate_limit)(obj.reverse_proxy), + obj.reverse_proxy, methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "PATCH", "TRACE"], ) diff --git a/openai_forward/config.py b/openai_forward/config.py index ef7e12a..fb6d134 100644 --- a/openai_forward/config.py +++ b/openai_forward/config.py @@ -1,3 +1,4 @@ +import functools import logging import os import sys @@ -54,7 +55,13 @@ def emit(self, record): ) -def setting_log(save_file=False, log_name="openai_forward", multi_process=True): +def setting_log( + save_file=False, + openai_route_prefix=None, + extra_route_prefix=None, + log_name="openai_forward", + multi_process=True, +): # TODO 修复时区配置 if os.environ.get("TZ") == "Asia/Shanghai": os.environ["TZ"] = "UTC-8" @@ -68,28 +75,45 @@ def setting_log(save_file=False, log_name="openai_forward", multi_process=True): config_handlers = [ {"sink": sys.stdout, "level": "DEBUG"}, - { - "sink": f"./Log/chat/chat.log", - "enqueue": multi_process, - "rotation": "50 MB", - "filter": lambda record: "chat" in record["extra"], - "format": "{message}", - }, - { - "sink": f"./Log/whisper/whisper.log", - "enqueue": multi_process, - "rotation": "30 MB", - "filter": lambda record: "whisper" in record["extra"], - "format": "{message}", - }, - { - "sink": f"./Log/extra/extra.log", - "enqueue": multi_process, - "rotation": "50 MB", - "filter": lambda record: "extra" in record["extra"], - "format": "{message}", - }, ] + + def filter_func(_prefix, _postfix, record): + chat_key = f"{_prefix}{_postfix}" + return chat_key in record["extra"] + + for prefix in openai_route_prefix or []: + _prefix = prefix.replace('/', '_') + + config_handlers.extend( + [ + { + "sink": f"./Log/chat/{_prefix}/chat.log", + "enqueue": multi_process, + "rotation": "50 MB", + "filter": functools.partial(filter_func, _prefix, "_chat"), + "format": "{message}", + }, + { + "sink": f"./Log/whisper/{_prefix}/whisper.log", + "enqueue": multi_process, + "rotation": "30 MB", + "filter": functools.partial(filter_func, _prefix, "_whisper"), + "format": "{message}", + }, + ] + ) + + for prefix in extra_route_prefix or []: + _prefix = prefix.replace('/', '_') + config_handlers.append( + { + "sink": f"./Log/extra/{_prefix}/extra.log", + "enqueue": multi_process, + "rotation": "50 MB", + "filter": functools.partial(filter_func, _prefix, "_extra"), + "format": "{message}", + } + ) if save_file: config_handlers += [ { diff --git a/openai_forward/content/chat.py b/openai_forward/content/chat.py index 1532081..cf119f1 100644 --- a/openai_forward/content/chat.py +++ b/openai_forward/content/chat.py @@ -12,8 +12,10 @@ class ChatSaver: - def __init__(self): - self.logger = logger.bind(chat=True) + def __init__(self, route_prefix: str): + _prefix = route_prefix.replace('/', '_') + kwargs = {_prefix + "_chat": True} + self.logger = logger.bind(**kwargs) @staticmethod async def parse_payload(request: Request): diff --git a/openai_forward/content/extra.py b/openai_forward/content/extra.py index 980e57f..2af8268 100644 --- a/openai_forward/content/extra.py +++ b/openai_forward/content/extra.py @@ -2,8 +2,9 @@ class ExtraForwardingSaver: - def __init__(self): - self.logger = logger.bind(extra=True) + def __init__(self, route_prefix: str): + _prefix = route_prefix.replace('/', '_') + self.logger = logger.bind(**{f"{_prefix}_extra": True}) def add_log(self, bytes_: bytes): text_content = bytes_.decode("utf-8") diff --git a/openai_forward/content/whisper.py b/openai_forward/content/whisper.py index cda6cbf..b210d89 100644 --- a/openai_forward/content/whisper.py +++ b/openai_forward/content/whisper.py @@ -2,8 +2,9 @@ class WhisperSaver: - def __init__(self): - self.logger = logger.bind(whisper=True) + def __init__(self, route_prefix: str): + _prefix = route_prefix.replace('/', '_') + self.logger = logger.bind(**{f"{_prefix}_whisper": True}) def add_log(self, bytes_: bytes): text_content = bytes_.decode("utf-8") diff --git a/openai_forward/forwarding/base.py b/openai_forward/forwarding/base.py index 3aa46e7..a00538f 100644 --- a/openai_forward/forwarding/base.py +++ b/openai_forward/forwarding/base.py @@ -1,3 +1,4 @@ +import time import traceback import uuid from itertools import cycle @@ -16,7 +17,9 @@ class ForwardingBase: BASE_URL = None ROUTE_PREFIX = None client: httpx.AsyncClient = None - # token_counts: int = None + + if LOG_CHAT: + extrasaver: ExtraForwardingSaver = None if IP_BLACKLIST or IP_WHITELIST: validate_host = True else: @@ -24,9 +27,6 @@ class ForwardingBase: timeout = 600 - if LOG_CHAT: - extrasaver = ExtraForwardingSaver() - @staticmethod def validate_request_host(ip): if IP_WHITELIST and ip not in IP_WHITELIST: @@ -40,16 +40,15 @@ def validate_request_host(ip): detail=f"Forbidden, ip={ip} in blacklist!", ) - @classmethod async def aiter_bytes( - cls, r: httpx.Response, **kwargs + self, r: httpx.Response, **kwargs ) -> AsyncGenerator[bytes, Any]: bytes_ = b"" async for chunk in r.aiter_bytes(): bytes_ += chunk yield chunk await r.aclose() - cls.extrasaver.add_log(bytes_) + self.extrasaver.add_log(bytes_) async def try_send(self, client_config: dict, request: Request): try: @@ -85,11 +84,11 @@ def prepare_client(self, request: Request): ip = request.headers.get("x-forwarded-for") or "" self.validate_request_host(ip) - url_path = request.url.path + _url_path = request.url.path prefix_index = 0 if self.ROUTE_PREFIX == '/' else len(self.ROUTE_PREFIX) - route_path = url_path[prefix_index:] - url = httpx.URL(path=route_path, query=request.url.query.encode("utf-8")) + url_path = _url_path[prefix_index:] + url = httpx.URL(path=url_path, query=request.url.query.encode("utf-8")) headers = dict(request.headers) auth = headers.pop("authorization", "") content_type = headers.pop("content-type", "application/json") @@ -98,15 +97,13 @@ def prepare_client(self, request: Request): 'auth': auth, 'headers': auth_headers_dict, 'url': url, - 'url_path': route_path, + 'url_path': url_path, } return client_config async def reverse_proxy(self, request: Request): assert self.client is not None - # if token_limit: - # assert self.token_counts is not None client_config = self.prepare_client(request) @@ -123,9 +120,8 @@ class OpenaiBase(ForwardingBase): _cycle_api_key = cycle(OPENAI_API_KEY) _no_auth_mode = OPENAI_API_KEY != [] and FWD_KEY == set() - if LOG_CHAT: - chatsaver = ChatSaver() - whispersaver = WhisperSaver() + chatsaver: ChatSaver = None + whispersaver: WhisperSaver = None def _add_result_log( self, byte_list: List[bytes], uid: str, route_path: str, request_method: str @@ -168,12 +164,24 @@ async def _add_payload_log(self, request: Request, url_path: str): ) return uid - async def aiter_append( + async def aiter_bytes( self, r: httpx.Response, request: Request, route_path: str, uid: str ): byte_list = [] + start_time = time.perf_counter() + idx = 0 async for chunk in r.aiter_bytes(): + idx += 1 byte_list.append(chunk) + # print(f"{chunk=}") + if TOKEN_INTERVAL > 0: + current_time = time.perf_counter() + delta = current_time - start_time + sleep_time = TOKEN_INTERVAL - delta + print(f"{delta=} {sleep_time=}") + if sleep_time > 0: + time.sleep(sleep_time) + start_time = time.perf_counter() yield chunk await r.aclose() @@ -204,7 +212,7 @@ def set_apikey_from_preset(): r = await self.try_send(client_config, request) return StreamingResponse( - self.aiter_append(r, request, url_path, uid), + self.aiter_bytes(r, request, url_path, uid), status_code=r.status_code, media_type=r.headers.get("content-type"), ) diff --git a/openai_forward/forwarding/extra.py b/openai_forward/forwarding/extra.py index 5cc0d3c..90a2fa6 100644 --- a/openai_forward/forwarding/extra.py +++ b/openai_forward/forwarding/extra.py @@ -1,17 +1,18 @@ from ..config import print_startup_info -from .base import LOG_CHAT, ForwardingBase +from .base import LOG_CHAT, ExtraForwardingSaver, ForwardingBase class AnyForwarding(ForwardingBase): def __init__(self, base_url: str, route_prefix: str, proxy=None): import httpx + if LOG_CHAT: + self.extrasaver = ExtraForwardingSaver(route_prefix) self.BASE_URL = base_url self.ROUTE_PREFIX = route_prefix self.client = httpx.AsyncClient( base_url=self.BASE_URL, proxies=proxy, http1=True, http2=False ) - self.token_counts = 0 print_startup_info(self.BASE_URL, self.ROUTE_PREFIX, [], "\\", LOG_CHAT) diff --git a/openai_forward/forwarding/openai.py b/openai_forward/forwarding/openai.py index a34a904..fd53cfa 100644 --- a/openai_forward/forwarding/openai.py +++ b/openai_forward/forwarding/openai.py @@ -1,5 +1,7 @@ +import time + from ..config import print_startup_info -from .base import OpenaiBase +from .base import ChatSaver, OpenaiBase, WhisperSaver from .settings import LOG_CHAT, OPENAI_API_KEY @@ -9,10 +11,14 @@ def __init__(self, base_url: str, route_prefix: str, proxy=None): self.BASE_URL = base_url self.ROUTE_PREFIX = route_prefix + if LOG_CHAT: + self.chatsaver = ChatSaver(route_prefix) + self.whispersaver = WhisperSaver(route_prefix) self.client = httpx.AsyncClient( base_url=self.BASE_URL, proxies=proxy, http1=True, http2=False ) self.token_counts = 0 + self.token_limit_dict = {'time': time.time(), 'count': 0} print_startup_info( self.BASE_URL, self.ROUTE_PREFIX, diff --git a/openai_forward/forwarding/settings.py b/openai_forward/forwarding/settings.py index 0673388..f329c25 100644 --- a/openai_forward/forwarding/settings.py +++ b/openai_forward/forwarding/settings.py @@ -1,5 +1,6 @@ import os +import limits from fastapi import Request from ..config import setting_log @@ -21,7 +22,9 @@ LOG_CHAT = os.environ.get("LOG_CHAT", "False").strip().lower() == "true" if LOG_CHAT: - setting_log() + setting_log( + openai_route_prefix=OPENAI_ROUTE_PREFIX, extra_route_prefix=EXTRA_ROUTE_PREFIX + ) IP_WHITELIST = env2list("IP_WHITELIST", sep=ENV_VAR_SEP) IP_BLACKLIST = env2list("IP_BLACKLIST", sep=ENV_VAR_SEP) @@ -49,3 +52,17 @@ def dynamic_rate_limit(key: str): if key.startswith(route): return rate_limit_conf[route] return GLOBAL_RATE_LIMIT + + +TOKEN_RATE_LIMIT = os.environ.get("TOKEN_RATE_LIMIT").strip() +if TOKEN_RATE_LIMIT: + rate_limit_item = limits.parse(TOKEN_RATE_LIMIT) + print( + f"{rate_limit_item.amount=} {rate_limit_item=} {rate_limit_item.GRANULARITY} {rate_limit_item.multiples=}" + ) + TOKEN_INTERVAL = ( + rate_limit_item.multiples * rate_limit_item.GRANULARITY.seconds + ) / rate_limit_item.amount + print(f"{TOKEN_INTERVAL=}") +else: + TOKEN_INTERVAL = 0 From 0d109638d7f1b8e6a331cf07b688347c677ebf70 Mon Sep 17 00:00:00 2001 From: kunyuan Date: Wed, 16 Aug 2023 00:36:27 +0800 Subject: [PATCH 3/6] :fire: Cancel extra log --- openai_forward/config.py | 12 ------------ openai_forward/content/__init__.py | 1 - openai_forward/content/extra.py | 11 ----------- openai_forward/forwarding/base.py | 7 +------ openai_forward/forwarding/extra.py | 4 +--- openai_forward/forwarding/settings.py | 4 +--- 6 files changed, 3 insertions(+), 36 deletions(-) delete mode 100644 openai_forward/content/extra.py diff --git a/openai_forward/config.py b/openai_forward/config.py index fb6d134..82385b7 100644 --- a/openai_forward/config.py +++ b/openai_forward/config.py @@ -58,7 +58,6 @@ def emit(self, record): def setting_log( save_file=False, openai_route_prefix=None, - extra_route_prefix=None, log_name="openai_forward", multi_process=True, ): @@ -103,17 +102,6 @@ def filter_func(_prefix, _postfix, record): ] ) - for prefix in extra_route_prefix or []: - _prefix = prefix.replace('/', '_') - config_handlers.append( - { - "sink": f"./Log/extra/{_prefix}/extra.log", - "enqueue": multi_process, - "rotation": "50 MB", - "filter": functools.partial(filter_func, _prefix, "_extra"), - "format": "{message}", - } - ) if save_file: config_handlers += [ { diff --git a/openai_forward/content/__init__.py b/openai_forward/content/__init__.py index d80933f..d4039d2 100644 --- a/openai_forward/content/__init__.py +++ b/openai_forward/content/__init__.py @@ -1,3 +1,2 @@ from .chat import ChatSaver -from .extra import ExtraForwardingSaver from .whisper import WhisperSaver diff --git a/openai_forward/content/extra.py b/openai_forward/content/extra.py deleted file mode 100644 index 2af8268..0000000 --- a/openai_forward/content/extra.py +++ /dev/null @@ -1,11 +0,0 @@ -from loguru import logger - - -class ExtraForwardingSaver: - def __init__(self, route_prefix: str): - _prefix = route_prefix.replace('/', '_') - self.logger = logger.bind(**{f"{_prefix}_extra": True}) - - def add_log(self, bytes_: bytes): - text_content = bytes_.decode("utf-8") - self.logger.debug(text_content) diff --git a/openai_forward/forwarding/base.py b/openai_forward/forwarding/base.py index a00538f..83c95da 100644 --- a/openai_forward/forwarding/base.py +++ b/openai_forward/forwarding/base.py @@ -9,7 +9,7 @@ from fastapi.responses import StreamingResponse from loguru import logger -from ..content import ChatSaver, ExtraForwardingSaver, WhisperSaver +from ..content import ChatSaver, WhisperSaver from .settings import * @@ -18,8 +18,6 @@ class ForwardingBase: ROUTE_PREFIX = None client: httpx.AsyncClient = None - if LOG_CHAT: - extrasaver: ExtraForwardingSaver = None if IP_BLACKLIST or IP_WHITELIST: validate_host = True else: @@ -43,12 +41,9 @@ def validate_request_host(ip): async def aiter_bytes( self, r: httpx.Response, **kwargs ) -> AsyncGenerator[bytes, Any]: - bytes_ = b"" async for chunk in r.aiter_bytes(): - bytes_ += chunk yield chunk await r.aclose() - self.extrasaver.add_log(bytes_) async def try_send(self, client_config: dict, request: Request): try: diff --git a/openai_forward/forwarding/extra.py b/openai_forward/forwarding/extra.py index 90a2fa6..c64cf66 100644 --- a/openai_forward/forwarding/extra.py +++ b/openai_forward/forwarding/extra.py @@ -1,13 +1,11 @@ from ..config import print_startup_info -from .base import LOG_CHAT, ExtraForwardingSaver, ForwardingBase +from .base import LOG_CHAT, ForwardingBase class AnyForwarding(ForwardingBase): def __init__(self, base_url: str, route_prefix: str, proxy=None): import httpx - if LOG_CHAT: - self.extrasaver = ExtraForwardingSaver(route_prefix) self.BASE_URL = base_url self.ROUTE_PREFIX = route_prefix self.client = httpx.AsyncClient( diff --git a/openai_forward/forwarding/settings.py b/openai_forward/forwarding/settings.py index f329c25..f0d2ee3 100644 --- a/openai_forward/forwarding/settings.py +++ b/openai_forward/forwarding/settings.py @@ -22,9 +22,7 @@ LOG_CHAT = os.environ.get("LOG_CHAT", "False").strip().lower() == "true" if LOG_CHAT: - setting_log( - openai_route_prefix=OPENAI_ROUTE_PREFIX, extra_route_prefix=EXTRA_ROUTE_PREFIX - ) + setting_log(openai_route_prefix=OPENAI_ROUTE_PREFIX) IP_WHITELIST = env2list("IP_WHITELIST", sep=ENV_VAR_SEP) IP_BLACKLIST = env2list("IP_BLACKLIST", sep=ENV_VAR_SEP) From c9976648805e0ba4a237f9f8ee2505dce6700377 Mon Sep 17 00:00:00 2001 From: kunyuan Date: Wed, 16 Aug 2023 01:44:59 +0800 Subject: [PATCH 4/6] :dizzy: Adaptation --- .env | 9 ++-- .env.example | 21 ++++---- openai_forward/__init__.py | 2 +- openai_forward/__main__.py | 34 +++++++++++-- openai_forward/forwarding/base.py | 2 + openai_forward/forwarding/settings.py | 6 +-- openai_forward/helper.py | 71 ++++++++++++++++++++++++++- render.yaml | 4 +- scripts/keep_render_alive.py | 24 +++++++++ 9 files changed, 148 insertions(+), 25 deletions(-) create mode 100644 scripts/keep_render_alive.py diff --git a/.env b/.env index 7029100..ed0850e 100644 --- a/.env +++ b/.env @@ -1,14 +1,14 @@ -# 示例见 .env.example +# 示例与解释见 .env.example # `LOG_CHAT`: 是否记录日志 LOG_CHAT=true # `OPENAI_BASE_URL`: 转发openai风格的任何服务地址,允许指定多个, 以逗号隔开。 # 如果指定超过一个,则任何OPENAI_ROUTE_PREFIX/EXTRA_ROUTE_PREFIX都不能为根路由/ -OPENAI_BASE_URL=https://api.openai-forward.com, https://api.openai.com +OPENAI_BASE_URL='https://api.openai-forward.com, https://api.openai.com' # `OPENAI_ROUTE_PREFIX`: 可指定所有openai风格(为记录日志)服务的转发路由前缀 -OPENAI_ROUTE_PREFIX=/openai, /localai +OPENAI_ROUTE_PREFIX='/openai, /localai' OPENAI_API_KEY= FORWARD_KEY= @@ -18,7 +18,6 @@ EXTRA_BASE_URL= # `EXTRA_ROUTE_PREFIX`: 与 EXTRA_BASE_URL 匹配的路由前缀 EXTRA_ROUTE_PREFIX= - # RATE `LIMIT`: 指定路由的请求速率限制 # format: {route: ratelimit-string} # ratelimit-string format [count] [per|/] [n (optional)] [second|minute|hour|day|month|year] :ref:`ratelimit-string`: https://limits.readthedocs.io/en/stable/quickstart.html#rate-limit-string-notation @@ -36,7 +35,7 @@ RATE_LIMIT_STRATEGY=moving-window GLOBAL_RATE_LIMIT=10/5seconds # same as TPM -TOKEN_RATE_LIMIT=10/1seconds +TOKEN_RATE_LIMIT=16/second # 设定时区 TZ=Asia/Shanghai \ No newline at end of file diff --git a/.env.example b/.env.example index 411e155..1f3b6ba 100644 --- a/.env.example +++ b/.env.example @@ -1,28 +1,26 @@ # LOG_CHAT: 是否开启日志 LOG_CHAT=true - # OPENAI_BASE_URL: 转发openai风格的任何服务地址,允许指定多个, 以逗号隔开。 # 如果指定超过一个,则任何OPENAI_ROUTE_PREFIX/EXTRA_ROUTE_PREFIX都不能为根路由/ -OPENAI_BASE_URL=https://api.openai.com, http:localhost:8080 +OPENAI_BASE_URL='https://api.openai.com, http:localhost:8080' # OPENAI_ROUTE_PREFIX: 可指定所有openai风格(为记录日志)服务的转发路由前缀 -OPENAI_ROUTE_PREFIX=/openai, /localai +OPENAI_ROUTE_PREFIX='/openai, /localai' # OPENAI_API_KEY:允许输入多个api key, 以逗号隔开, 形成轮询池 -OPENAI_API_KEY=sk-xxx1,sk-xxx2,sk-xxx3 +OPENAI_API_KEY='sk-xxx1, sk-xxx2, sk-xxx3' # FORWARD_KEY: 当前面的OPENAI_API_KEY被设置,就可以设置这里的FORWARD_KEY,客户端调用时可以使用FORWARD_KEY作为api key FORWARD_KEY=fk-xxx1 - # EXTRA_BASE_URL: 可指定任意服务转发 -EXTRA_BASE_URL=http://localhost:8882, http://localhost:8881 +EXTRA_BASE_URL='http://localhost:8882, http://localhost:8881' # EXTRA_ROUTE_PREFIX: 与 EXTRA_BASE_URL 匹配的路由前缀 -EXTRA_ROUTE_PREFIX=/tts,/translate +EXTRA_ROUTE_PREFIX='/tts, /translate' -# RATE LIMIT: 指定路由的请求速率限制 +# RATE LIMIT: 指定路由的请求速率限制(不区分客户) # format: {route: ratelimit-string} # ratelimit-string format [count] [per|/] [n (optional)] [second|minute|hour|day|month|year] :ref:`ratelimit-string`: https://limits.readthedocs.io/en/stable/quickstart.html#rate-limit-string-notation RATE_LIMIT='{ @@ -30,15 +28,18 @@ RATE_LIMIT='{ "/openai/v1/chat/completions": "1/10seconds", "/localai/v1/chat/completions": "2/second"}' -# GLOBAL_RATE_LIMIT: 所有`RATE_LIMIT`没有指定的路由. 不填默认无限制 +# `GLOBAL_RATE_LIMIT`: 所有`RATE_LIMIT`没有指定的路由. 不填默认无限制 GLOBAL_RATE_LIMIT=2/5seconds #`RATE_LIMIT_STRATEGY` Options: (fixed-window, fixed-window-elastic-expiry, moving-window) ref: https://limits.readthedocs.io/en/latest/strategies.html # `fixed-window`: most memory efficient strategy; `moving-window`:most effective for preventing bursts but higher memory cost. RATE_LIMIT_STRATEGY=fixed-window -# PROXY 配置代理 +# `PROXY` http代理 PROXY=http://localhost:7890 +# `TOKEN_RATE_LIMIT` 对每一份流式请求的token速率限制 (注:这里的token并不是gpt中定义的token,而是SSE的chunk) +TOKEN_RATE_LIMIT=16/second + # 设定时区 TZ=Asia/Shanghai \ No newline at end of file diff --git a/openai_forward/__init__.py b/openai_forward/__init__.py index 78d7ef6..4b8f5de 100644 --- a/openai_forward/__init__.py +++ b/openai_forward/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.4.1" +__version__ = "0.5.0-alpha" from dotenv import load_dotenv diff --git a/openai_forward/__main__.py b/openai_forward/__main__.py index 531b336..40864ad 100644 --- a/openai_forward/__main__.py +++ b/openai_forward/__main__.py @@ -16,6 +16,10 @@ def run( openai_route_prefix=None, extra_base_url=None, extra_route_prefix=None, + rate_limit=None, + global_rate_limit=None, + rate_limit_strategy=None, + token_rate_limit=None, ip_whitelist=None, ip_blacklist=None, proxy=None, @@ -37,6 +41,7 @@ def run( ip_whitelist: str, None ip_blacklist: str, None """ + if log_chat: os.environ["LOG_CHAT"] = log_chat if openai_base_url: @@ -51,6 +56,17 @@ def run( os.environ["EXTRA_ROUTE_PREFIX"] = extra_route_prefix if forward_key: os.environ["FORWARD_KEY"] = forward_key + if rate_limit: + import json + + assert isinstance(rate_limit, dict) + os.environ["RATE_LIMIT"] = json.dumps(rate_limit) + if global_rate_limit: + os.environ["GLOBAL_RATE_LIMIT"] = global_rate_limit + if rate_limit_strategy: + os.environ["RATE_LIMIT_STRATEGY"] = rate_limit_strategy + if token_rate_limit: + os.environ["TOKEN_RATE_LIMIT"] = token_rate_limit if ip_whitelist: os.environ["IP_WHITELIST"] = ip_whitelist if ip_blacklist: @@ -71,12 +87,24 @@ def run( ) @staticmethod - def convert(log_folder: str = "./Log/chat", target_path: str = "./Log/chat.json"): + def convert(log_folder: str = None, target_path: str = None): """Convert log folder to jsonl file""" + from openai_forward.forwarding.settings import OPENAI_ROUTE_PREFIX from openai_forward.helper import convert_folder_to_jsonl - print(f"Convert {log_folder}/*.log to {target_path}") - convert_folder_to_jsonl(log_folder, target_path) + print(60 * '-') + if log_folder is None: + _prefix_list = [i.replace("/", "_") for i in OPENAI_ROUTE_PREFIX] + for _prefix in _prefix_list: + log_folder = f"./Log/chat/{_prefix}" + target_path = f"./Log/chat{_prefix}.json" + print(f"Convert {log_folder}/*.log to {target_path}") + convert_folder_to_jsonl(log_folder, target_path) + print(60 * '-') + else: + print(f"Convert {log_folder}/*.log to {target_path}") + convert_folder_to_jsonl(log_folder, target_path) + print(60 * '-') def main(): diff --git a/openai_forward/forwarding/base.py b/openai_forward/forwarding/base.py index 83c95da..2c85e5f 100644 --- a/openai_forward/forwarding/base.py +++ b/openai_forward/forwarding/base.py @@ -10,6 +10,7 @@ from loguru import logger from ..content import ChatSaver, WhisperSaver +from ..helper import async_retry, retry from .settings import * @@ -45,6 +46,7 @@ async def aiter_bytes( yield chunk await r.aclose() + @async_retry(max_retries=3, delay=0.5, backoff=2, exceptions=(HTTPException,)) async def try_send(self, client_config: dict, request: Request): try: req = self.client.build_request( diff --git a/openai_forward/forwarding/settings.py b/openai_forward/forwarding/settings.py index f0d2ee3..bfecf99 100644 --- a/openai_forward/forwarding/settings.py +++ b/openai_forward/forwarding/settings.py @@ -33,8 +33,8 @@ PROXY = os.environ.get("PROXY", "").strip() PROXY = PROXY if PROXY else None -GLOBAL_RATE_LIMIT = os.environ.get("GLOBAL_RATE_LIMIT").strip() or None -RATE_LIMIT_STRATEGY = os.environ.get("RATE_LIMIT_STRATEGY").strip() or None +GLOBAL_RATE_LIMIT = os.environ.get("GLOBAL_RATE_LIMIT", "").strip() or None +RATE_LIMIT_STRATEGY = os.environ.get("RATE_LIMIT_STRATEGY", "").strip() or None rate_limit_conf = env2dict('RATE_LIMIT') print(f"{rate_limit_conf=}") @@ -52,7 +52,7 @@ def dynamic_rate_limit(key: str): return GLOBAL_RATE_LIMIT -TOKEN_RATE_LIMIT = os.environ.get("TOKEN_RATE_LIMIT").strip() +TOKEN_RATE_LIMIT = os.environ.get("TOKEN_RATE_LIMIT", "").strip() if TOKEN_RATE_LIMIT: rate_limit_item = limits.parse(TOKEN_RATE_LIMIT) print( diff --git a/openai_forward/helper.py b/openai_forward/helper.py index 184954b..b8e54eb 100644 --- a/openai_forward/helper.py +++ b/openai_forward/helper.py @@ -1,6 +1,9 @@ import ast +import asyncio import inspect import os +import time +from functools import wraps from pathlib import Path from typing import Dict, List, Union @@ -43,6 +46,70 @@ def ls(_dir, *patterns, concat='extend', recursive=False): return path_list +def retry(max_retries=3, delay=1, backoff=2, exceptions=(Exception,)): + """ + Retry decorator. + + Parameters: + - max_retries: Maximum number of retries. + - delay: Initial delay in seconds. + - backoff: Multiplier for delay after each retry. + - exceptions: Exceptions to catch and retry on, as a tuple. + + """ + + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + retries = 0 + current_delay = delay + while retries < max_retries: + try: + return func(*args, **kwargs) + except exceptions as e: + retries += 1 + if retries >= max_retries: + raise + time.sleep(current_delay) + current_delay *= backoff + + return wrapper + + return decorator + + +def async_retry(max_retries=3, delay=1, backoff=2, exceptions=(Exception,)): + """ + Retry decorator for asynchronous functions. + + Parameters: + - max_retries: Maximum number of retries. + - delay: Initial delay in seconds. + - backoff: Multiplier for delay after each retry. + - exceptions: Exceptions to catch and retry on, as a tuple. + + """ + + def decorator(func): + @wraps(func) + async def wrapper(*args, **kwargs): + retries = 0 + current_delay = delay + while retries < max_retries: + try: + return await func(*args, **kwargs) + except exceptions as e: + retries += 1 + if retries >= max_retries: + raise + await asyncio.sleep(current_delay) + current_delay *= backoff + + return wrapper + + return decorator + + def json_load(filepath: str, rel=False, mode="rb"): abs_path = relp(filepath, parents=1) if rel else filepath with open(abs_path, mode=mode) as f: @@ -79,11 +146,11 @@ def env2list(env_name: str, sep=","): def env2dict(env_name: str) -> Dict: - if not env_name: - return {} import json env_str = os.environ.get(env_name, "").strip() + if not env_str: + return {} return json.loads(env_str) diff --git a/render.yaml b/render.yaml index 3aa3b3e..8c326de 100644 --- a/render.yaml +++ b/render.yaml @@ -1,4 +1,6 @@ services: - name: openai-forward type: web - env: docker \ No newline at end of file + env: docker + region: singapore + plan: free \ No newline at end of file diff --git a/scripts/keep_render_alive.py b/scripts/keep_render_alive.py new file mode 100644 index 0000000..96bbc16 --- /dev/null +++ b/scripts/keep_render_alive.py @@ -0,0 +1,24 @@ +import time +from urllib.parse import urljoin + +import httpx +import schedule + + +def job(url: str = "https://render.openai-forward.com"): + health_url = urljoin(url, "/healthz") + try: + r = httpx.get(health_url, timeout=5) + result = r.json() + print(result) + assert result == "OK" + except Exception as e: + print(e) + + +if __name__ == "__main__": + job() + schedule.every(10).minutes.do(job) + while True: + schedule.run_pending() + time.sleep(60) From 54de3b39a3eed1d8e043f9cee1e32b1bb21f3a7a Mon Sep 17 00:00:00 2001 From: kunyuan Date: Thu, 17 Aug 2023 00:33:32 +0800 Subject: [PATCH 5/6] chore: update docs and print info --- .env | 15 ++-- .github/images/separators/aqua.png | Bin 0 -> 1892 bytes .github/images/separators/aqua.webp | Bin 0 -> 1460 bytes README.md | 10 +-- deploy.md | 51 ++++++------- openai_forward/app.py | 5 +- openai_forward/config.py | 69 +++++++++++++----- openai_forward/forwarding/base.py | 100 +++++++++++++++++++++++++- openai_forward/forwarding/extra.py | 4 +- openai_forward/forwarding/openai.py | 10 +-- openai_forward/forwarding/settings.py | 22 ++++-- 11 files changed, 211 insertions(+), 75 deletions(-) create mode 100644 .github/images/separators/aqua.png create mode 100644 .github/images/separators/aqua.webp diff --git a/.env b/.env index ed0850e..6882e84 100644 --- a/.env +++ b/.env @@ -1,14 +1,14 @@ # 示例与解释见 .env.example # `LOG_CHAT`: 是否记录日志 -LOG_CHAT=true +LOG_CHAT=false # `OPENAI_BASE_URL`: 转发openai风格的任何服务地址,允许指定多个, 以逗号隔开。 # 如果指定超过一个,则任何OPENAI_ROUTE_PREFIX/EXTRA_ROUTE_PREFIX都不能为根路由/ -OPENAI_BASE_URL='https://api.openai-forward.com, https://api.openai.com' +OPENAI_BASE_URL=https://api.openai-forward.com # `OPENAI_ROUTE_PREFIX`: 可指定所有openai风格(为记录日志)服务的转发路由前缀 -OPENAI_ROUTE_PREFIX='/openai, /localai' +OPENAI_ROUTE_PREFIX= OPENAI_API_KEY= FORWARD_KEY= @@ -22,9 +22,8 @@ EXTRA_ROUTE_PREFIX= # format: {route: ratelimit-string} # ratelimit-string format [count] [per|/] [n (optional)] [second|minute|hour|day|month|year] :ref:`ratelimit-string`: https://limits.readthedocs.io/en/stable/quickstart.html#rate-limit-string-notation RATE_LIMIT='{ -"/healthz": "50/3minutes", -"/openai/v1/chat/completions": "3/2seconds", -"/localai/v1/chat/completions": "5/second" +"/healthz": "1000/2minutes", +"/v1/chat/completions": "5/second", }' #`RATE_LIMIT_STRATEGY` Options: (fixed-window, fixed-window-elastic-expiry, moving-window) ref: https://limits.readthedocs.io/en/latest/strategies.html @@ -32,10 +31,10 @@ RATE_LIMIT='{ RATE_LIMIT_STRATEGY=moving-window # `GLOBAL_RATE_LIMIT`: 所有`RATE_LIMIT`没有指定的路由. 不填默认无限制 -GLOBAL_RATE_LIMIT=10/5seconds +GLOBAL_RATE_LIMIT= # same as TPM -TOKEN_RATE_LIMIT=16/second +TOKEN_RATE_LIMIT=60/second # 设定时区 TZ=Asia/Shanghai \ No newline at end of file diff --git a/.github/images/separators/aqua.png b/.github/images/separators/aqua.png new file mode 100644 index 0000000000000000000000000000000000000000..3dc6b665f619ec7e5339649f84efcd0f95b51255 GIT binary patch literal 1892 zcmbuAdpwkh8pmI$h+Img;}T1SVvWdUTn4#}m_Z3kohgbY+l(DVVP36rx6_K_lIsdt z>}=9eEU(!p0l>YJ0FWYk-jPWu zs{jy(0)X|a0Dvw60L|#qHh%}%VQ-kXryC&sPgQo*J(dZHUT!V{2@@M3-pz~F`wg4> zC2D!8O9xBcCk?u2)8sQx0uFq)c*!@k*?8B3XV#&nN3wguo(KQmkdkLPAhJ1JL5dK< zdP2Cm__at<&-jbn#!%NQDsE@dR9<sMsu~+& zy0jm!sQQAjqKzmv*X4zugS+0-BHJ$8a~(zO?dyRWwiJf(NfSvr*M!-RRFeoH(vE*v zN;SaaITq9;O)F1R_1V=#>gDk;EI)Bb+D#aGm=6)L^XTIdx?uYsi~`6Jw4-w1!^nqi zKf$}q)@M8M!uPwA=IOlj*EuJl^T!CRgNHnX%GWCM7KU=EdnK%Y3|Bmo74B<5WHZe=w-;*Km)wQlU{%( zHCN{~IRkNde4($n@B=wYpnWNQMmWiBtdZlsLrbGMU>)cPHb|S_g8sV1b>9XE*Aza; zXFxiVC|)ZsXmdLgdbA_YBbXygM|<-N&q++yLIoUt)ndtN;ZC8rgC){l!0-!UqX=p} z6%xM7gpgTU5r#Qi4to!=Saal1=_6Q57IMa+5g$EWEW%Jg^anYqV4nkN-DU;1L~|6< zXVr8SmQUw1v%Sk1yeY7`XPvV zaY!fs>bJXkGH-?$KPEQ+OTQ#bV>#>uhov(UAZ**u&!|$gOG~cjfONv{tX%f*M_zh< z&L9r789q?FiZOr0XDuEw#ck3{yjr2~8l=p?)%EK(vRo-e`S1oVZSH$1G>3im> zA*k>ZR@bc>lj`wt0e7mq`-N7@a`cm|?>PxAE>#o1_W0&7gBMN>qX(@UBIy5I$$)fT zcp`yi19lke(5CtqtBo%71C0<3j*AksmsH0icx289mJC7pZ_hckw8ngdm$n_)&7YyK z69IU8WNRneBWR-vxm!OtSZ<@`PqTTNJC%^|Ios%VzXiV!YMEJ`)=#X%(c!WQb}C4m zLNOmERi0~?<|4@q*6^#$YOL9+#tH*NorAIw3hW`JYwn-I3hTi+W!$$=BepGC0}!QfwB};UXk34p`GMe5-T1xdvoLlptAOj` zXfhNi7uBXDO77X$cJk;2gZpZFr_3X%`e?ve(T6B!XGxne3X5I#a#gn&mB;HWM?Yx1 z6CK-;ZpwrYVtlUb>COtyAW_Ox9ZFQ4&bLkwCAN#sF{-I!?75;hYc*Z$LT!_lTt+G6 zAugho`7D=06@NSr>mGU$l=?=zI}2af;~XdLna;ISOM{Klv~As9pY}BQDF$qK_qWTi z^36K`6=P=N4N27x_fE@mkBm=E=}5fMeS5G%wh3rAY$=Vw^%iV?0q5zVx;7%tSHi(N z^Ypc0B|q^(6=8sd1nHu*g_EllQTV4Uxj?Bidsw7wFw0f*?B)oNk0 zR_otx$8Gu6iv6Z1H9*{up1aHaegYW=7ogAb2|QK=2?9#=`z=G-D$HfUbRyP0IQIIr z*c<3@$_*KSGpI9`W+)poON#*1Sv1N5ZDDsBg+ilHCfk}h|7EyMzD|s|`@aprztUw< P2moI0c(>nNum1RN&%wzd literal 0 HcmV?d00001 diff --git a/.github/images/separators/aqua.webp b/.github/images/separators/aqua.webp new file mode 100644 index 0000000000000000000000000000000000000000..1039d5d467c74ebedc11bab3818eef8da3d50aa1 GIT binary patch literal 1460 zcmV;l1xxx;Nk&Gj1pok7MM6+kP&iDV1pojqg98E(3CE2jDUxE9TJ+yy-h+Qc|0lp7 zP9;g>C=Ah~bUYX7OKY-q;(`JK{~ey?z_b^`_nd{PNz#k2L%XU=zQvq;Yk;Gs?i# z!2V^qvVR#~u4oJz(Zn)L)3KssxjIo9cpsHzenD=Cp?5X2{4$kzMq@e<9c-AXp&`KE zhmL7F_RE!&fomx>$}dydPv-L%;fGj6KVc@&7vTB68p^=4Jd=Bm4h{UaI{N>cSeSd4 zA2bjL-KOyz(Y_C$p`mksmHT$1Xbc)#4-^gfJE!}0L##**5l!?Hm4S{Goz;f2hMomq zlp6&!E>DkXT-Q=+T|i%B!*ujREZ-ZLNyjd~OeJtNF2hWw`(J;=z#yWzmf?E?9cXwy z7b*kKa(!`cIs|#G=t|1Nx_22g^kBGCOG8Ba`+&;8rf|7~4y~(G75hFe{KDntq62@# z7a+0(8awJ-bhNoS_a5R=UR-63{n9C12CfyIg#nGtO$Vk@0I5<=uZs>`hU>%8F6G3q z6dEE66CG^|qKyU`B2~B&eKxoZ0&w9{Mh}9HWmO8m6wmSsn$M0-CY}iz!sX^-Md$q7 zlr=yA?=EUzbhNp3?mfh##^%Q5ee2BFh3Exypb@CN4v?aJfU3!o}AMmj{ZzmS0K%n9{%gAcF=p%*5pm4dL=q zXhrAz+=Zb-0DpeY9d!ImRqT5Y@#D>n%S|kl5%=J6J(S zn;HAwL%48xae;^&xNR@ds z6PG)*LhJHSd^kEME?jOdfGiwZS0^+2I;c_rrf_*F8azEvS;M8AI2uChY8PLChAEyO z_){zBXj9jj9s=;pCg@<-btZ-Zh@58uY{Z$t1`H(7u5UPl7G{zHurWG72Rb|oBXWZ2 z9-sqkVjk=U4&?qIbO0Ahx&e$x#fN$FGIgTd zfmZCsu!u0aabnQfkP{_8P9AkZZcH5tI=fUBL-9 + + + 由本项目搭建的长期代理地址: > https://api.openai-forward.com -> https://cloudflare.worker.openai-forward.com +> https://render.openai-forward.com > https://cloudflare.page.openai-forward.com > https://vercel.openai-forward.com -> https://render.openai-forward.com -> https://railway.openai-forward.com ## 功能 diff --git a/deploy.md b/deploy.md index 6163427..a540973 100644 --- a/deploy.md +++ b/deploy.md @@ -8,10 +8,10 @@ [pip部署](#pip部署) | [docker部署](#docker部署) | -[railway一键部署](#railway-一键部署) | [render一键部署](#render-一键部署) | -[Vercel一键部署](#Vercel-一键部署) | +[railway一键部署](#railway-一键部署) | [cloudflare部署](#cloudflare-部署) | +[Vercel一键部署](#Vercel-一键部署) @@ -33,8 +33,8 @@ openai-forward run # 或者使用别名 aifd run ### 服务调用 -使用方式只需将`https://api.openai.com` 替换为服务所在端口`http://{ip}:{port}` 就可以了。 -比如 +使用方式只需将`https://api.openai.com` 替换为服务所在端口`http://{ip}:{port}` +如 ```bash # 默认 https://api.openai.com/v1/chat/completions @@ -63,7 +63,7 @@ docker run -d -p 9999:8000 beidongjiedeguang/openai-forward:latest ``` 将映射宿主机的9999端口,通过`http://{ip}:9999`访问服务。 -容器内日志路径为`/home/openai-forward/Log/`, 可以启动时将其映射出来。 +容器内日志路径为`/home/openai-forward/Log/`, 可在启动时将其映射出来。 注:同样可以在启动命令中通过-e传入环境变量OPENAI_API_KEY=sk-xxx作为默认api key 启用SSL同上. @@ -84,39 +84,42 @@ openai-forward run # 或使用别名 aifd run --- -## Railway 一键部署 -[![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/tejCum?referralCode=U0-kXv) - -1. 点击上面部署按钮进行一键部署 - 也可先fork本仓库,再手动在操作界面导入自己的fork项目 -2. 填写环境变量,必填项`PORT` :`8000`, 可选项 如默认的OPENAI_API_KEY 等 -3. 绑定自定义域名 - -注: Railway 每月提供 $5.0和500小时执行时间的免费计划。这意味着单个免费用户每个月只能使用大约21天 - -> https://railway.openai-forward.com - ---- ## Render 一键部署 [![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/beidongjiedeguang/openai-forward) -体验下来,Render应该算是所有部署中最简易的一种, 并且它生成的域名国内可以直接访问! +Render应该算是所有部署中最简易的一种, 并且它生成的域名国内可以直接访问! 1. 点击一键部署按钮 - 如果提示需要绑定卡,则可先fork本仓库 -->到Render的Dashboard上 New Web Services --> Connect 到刚刚fork到仓库 + 如果提示需要绑定卡,则可先fork本仓库 -->到Render的Dashboard上 New Web Services --> Connect 到刚刚fork到仓库 后面步骤均默认即可 2. 填写环境变量,如默认的OPENAI_API_KEY 等,也可以不填 然后等待部署完成即可。 Render的免费计划: 每月750小时免费实例时间(意味着单个实例可以不间断运行)、100G带宽流量、500分钟构建时长. 注:默认render在15分钟内没有服务请求时会自动休眠(好处是休眠后不会占用750h的免费实例时间),休眠后下一次请求会被阻塞 5~10s。 -若不希望服务15分钟自动休眠,可以使用定时脚本(如每14分钟)去请求服务进行保活。 -如果希望零停机部署可以在设置中设置`Health Check Path`为`/healthz` +如果希望服务15分钟不自动休眠,可以使用定时脚本(如每14分钟)去请求服务进行保活。保活脚本参考`scripts/keep_render_alive.py`. +如果希望零停机部署可以在设置中设置`Health Check Path`为`/healthz` + > https://render.openai-forward.com > https://openai-forward.onrender.com +--- + +## Railway 一键部署 +[![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/tejCum?referralCode=U0-kXv) + +1. 点击上面部署按钮进行一键部署 + 也可先fork本仓库,再手动在操作界面导入自己的fork项目 +2. 填写环境变量,必填项`PORT` :`8000`, 可选项 如默认的OPENAI_API_KEY 等 +3. 绑定自定义域名 + +注: Railway 每月提供 $5.0和500小时执行时间的免费计划。这意味着单个免费用户每个月只能使用大约21天 + +> https://railway.openai-forward.com + + --- ⚠️下面两种部署方式仅提供简单的转发服务,没有任何额外功能。 @@ -142,7 +145,7 @@ Render的免费计划: 每月750小时免费实例时间(意味着单个实例 * Pages部署: fork本仓库,在[cloudflare](https://dash.cloudflare.com/)上创建应用程序时选择Pages, 然后选择连接到Git, 选择刚刚fork的仓库即可完成部署。 * Workers部署: 在[cloudflare](https://dash.cloudflare.com/)上创建应用程序时选择Workers, 部署好示例代码后,点击快速修改(quick edit)复制[_worker.js](_worker.js) 至代码编辑器即可完成服务部署。 -绑定自定义域名: cloudflare自动分配的域名国内也无法访问,所以也需要绑定自定义域名. (目前Pages部署时自动分配的域名国内还可以访问) +绑定自定义域名: cloudflare自动分配的域名国内也无法访问,所以也需要绑定自定义域名. (**目前Pages部署时自动分配的域名国内可以直接访问**) 绑定自定义域名需要将域名默认nameserver(域名服务器)绑定到cloudflare提供的nameserver,大体上过程是: ```mermaid @@ -155,7 +158,7 @@ stateDiagram-v2 去注册域名机构更改默认nameserver为cloudflare提供的nameserver --> 在cloudflare的worker/page中添加域名: 域名服务器更改验证成功 在cloudflare的worker/page中添加域名 --> 成功 ``` -这种部署方式轻便简洁,支持流式转发. 对于没有vps的用户还是十分推荐的。不过目前[_worker.js](_worker.js)这个简单脚本仅提供转发服务, 不提供额外功能。 +这种部署方式轻便简洁,支持流式转发. 对于没有vps的用户还是十分推荐的。不过目前[_worker.js](_worker.js)这个简单脚本仅提供转发服务, 不支持额外功能。 > https://cloudflare.worker.openai-forward.com > https://cloudflare.page.openai-forward.com diff --git a/openai_forward/app.py b/openai_forward/app.py index d536181..6970a5c 100644 --- a/openai_forward/app.py +++ b/openai_forward/app.py @@ -11,7 +11,7 @@ limiter = Limiter(key_func=get_limiter_key, strategy=RATE_LIMIT_STRATEGY) -app = FastAPI(title="openai_forward", version="0.4") +app = FastAPI(title="openai_forward", version="0.5") app.state.limiter = limiter app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) @@ -31,8 +31,7 @@ def healthz(request: Request): add_route = lambda obj: app.add_route( obj.ROUTE_PREFIX + "{api_path:path}", - # limiter.limit(dynamic_rate_limit)(obj.reverse_proxy), - obj.reverse_proxy, + limiter.limit(dynamic_rate_limit)(obj.reverse_proxy), methods=["GET", "POST", "PUT", "DELETE", "OPTIONS", "HEAD", "PATCH", "TRACE"], ) diff --git a/openai_forward/config.py b/openai_forward/config.py index 82385b7..cbfb040 100644 --- a/openai_forward/config.py +++ b/openai_forward/config.py @@ -10,7 +10,10 @@ from rich.table import Table -def print_startup_info(base_url, route_prefix, api_key, no_auth_mode, log_chat): +def print_startup_info(base_url, route_prefix, api_key, fwd_key, log_chat): + """ + Prints the startup information of the application. + """ try: from dotenv import load_dotenv @@ -18,25 +21,54 @@ def print_startup_info(base_url, route_prefix, api_key, no_auth_mode, log_chat): except Exception: ... route_prefix = route_prefix or "/" - api_key_info = True if len(api_key) else False - table = Table(title="", box=None, width=100) - table.add_column("base-url", justify="left", style="#df412f") - table.add_column("route-prefix", justify="center", style="green") - table.add_column("api-key-polling-pool", justify="center", style="green") - table.add_column( - "no-auth-mode", justify="center", style="red" if no_auth_mode else "green" - ) - table.add_column("Log-chat", justify="center", style="green") - table.add_row( - base_url, - route_prefix, - str(api_key_info), - str(no_auth_mode), - str(log_chat), - ) + if isinstance(api_key, str): + api_key = api_key + else: + api_key = str(True if len(api_key) else False) + if isinstance(fwd_key, str): + fwd_key = fwd_key + else: + fwd_key = True if len(fwd_key) else False + table = Table(title="", box=None, width=50) + + matrcs = { + "base url": {'value': base_url, 'style': "#df412f"}, + "route prefix": {'value': route_prefix, 'style': "green"}, + "api keys": {'value': api_key, 'style': "green"}, + "forward keys": {'value': str(fwd_key), 'style': "green" if fwd_key else "red"}, + "Log chat": {'value': str(log_chat), 'style': "green"}, + } + table.add_column("matrcs", justify='left', width=10) + table.add_column("value", justify='left') + for key, value in matrcs.items(): + table.add_row(key, value['value'], style=value['style']) + print(Panel(table, title="🤗 openai-forward is ready to serve! ", expand=False)) +def show_rate_limit_info(rate_limit: dict, strategy: str, **kwargs): + """ + Print rate limit information. + + Parameters: + rate_limit (dict): A dictionary containing rate limit information. + strategy (str): The strategy used for rate limiting. + **kwargs: Additional keyword arguments. + + Returns: + None + """ + table = Table(title="", box=None, width=50) + table.add_column("matrics") + table.add_column("value") + table.add_row("strategy", strategy, style='blue') + for key, value in rate_limit.items(): + table.add_row(key, str(value), style='green') + for key, value in kwargs.items(): + table.add_row(key, str(value), style='green') + print(Panel(table, title="⏱️ Rate Limit configuration", expand=False)) + + class InterceptHandler(logging.Handler): def emit(self, record): # Get corresponding Loguru level if it exists @@ -61,6 +93,9 @@ def setting_log( log_name="openai_forward", multi_process=True, ): + """ + Configures the logging settings for the application. + """ # TODO 修复时区配置 if os.environ.get("TZ") == "Asia/Shanghai": os.environ["TZ"] = "UTC-8" diff --git a/openai_forward/forwarding/base.py b/openai_forward/forwarding/base.py index 2c85e5f..e6f0c75 100644 --- a/openai_forward/forwarding/base.py +++ b/openai_forward/forwarding/base.py @@ -28,6 +28,15 @@ class ForwardingBase: @staticmethod def validate_request_host(ip): + """ + Validates the request host IP address against the IP whitelist and blacklist. + + Parameters: + ip (str): The IP address to be validated. + + Raises: + HTTPException: If the IP address is not in the whitelist or if it is in the blacklist. + """ if IP_WHITELIST and ip not in IP_WHITELIST: raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, @@ -48,6 +57,19 @@ async def aiter_bytes( @async_retry(max_retries=3, delay=0.5, backoff=2, exceptions=(HTTPException,)) async def try_send(self, client_config: dict, request: Request): + """ + Try to send the request. + + Args: + client_config (dict): The configuration for the client. + request (Request): The request to be sent. + + Returns: + Response: The response from the client. + + Raises: + HTTPException: If there is a connection error or any other exception occurs. + """ try: req = self.client.build_request( method=request.method, @@ -75,6 +97,23 @@ async def try_send(self, client_config: dict, request: Request): ) def prepare_client(self, request: Request): + """ + Prepares the client configuration based on the given request. + + Args: + request (Request): The request object containing the necessary information. + + Returns: + dict: The client configuration dictionary with the necessary parameters set. + The dictionary has the following keys: + - 'auth': The authorization header value. + - 'headers': The dictionary of headers. + - 'url': The URL object. + - 'url_path': The URL path. + + Raises: + AssertionError: If the `BASE_URL` or `ROUTE_PREFIX` is not set. + """ assert self.BASE_URL is not None assert self.ROUTE_PREFIX is not None if self.validate_host: @@ -100,6 +139,15 @@ def prepare_client(self, request: Request): return client_config async def reverse_proxy(self, request: Request): + """ + Reverse proxies the given request. + + Args: + request (Request): The request to be reverse proxied. + + Returns: + StreamingResponse: The response from the reverse proxied server, as a streaming response. + """ assert self.client is not None client_config = self.prepare_client(request) @@ -123,6 +171,18 @@ class OpenaiBase(ForwardingBase): def _add_result_log( self, byte_list: List[bytes], uid: str, route_path: str, request_method: str ): + """ + Adds a result log for the given byte list, uid, route path, and request method. + + Args: + byte_list (List[bytes]): The list of bytes to be processed. + uid (str): The unique identifier. + route_path (str): The route path. + request_method (str): The request method. + + Returns: + None + """ try: if LOG_CHAT and request_method == "POST": if route_path == "/v1/chat/completions": @@ -140,6 +200,24 @@ def _add_result_log( logger.warning(f"log chat (not) error:\n{traceback.format_exc()}") async def _add_payload_log(self, request: Request, url_path: str): + """ + Adds a payload log for the given request. + + Args: + request (Request): The request object. + url_path (str): The URL path of the request. + + Returns: + str: The unique identifier (UID) of the payload log, which is used to match the chat result log. + + Raises: + Suppress all errors. + + Notes: + - If `LOG_CHAT` is True and the request method is "POST", the chat payload will be logged. + - If the `url_path` is "/v1/chat/completions", the chat payload will be parsed and logged. + - If the `url_path` starts with "/v1/audio/", a new UID will be generated. + """ uid = None if LOG_CHAT and request.method == "POST": try: @@ -164,13 +242,24 @@ async def _add_payload_log(self, request: Request, url_path: str): async def aiter_bytes( self, r: httpx.Response, request: Request, route_path: str, uid: str ): + """ + Asynchronously iterates over the bytes of the response and yields each chunk. + + Args: + r (httpx.Response): The HTTP response object. + request (Request): The original request object. + route_path (str): The route path. + uid (str): The unique identifier. + + Returns: + A generator that yields each chunk of bytes from the response. + """ byte_list = [] start_time = time.perf_counter() idx = 0 async for chunk in r.aiter_bytes(): idx += 1 byte_list.append(chunk) - # print(f"{chunk=}") if TOKEN_INTERVAL > 0: current_time = time.perf_counter() delta = current_time - start_time @@ -191,6 +280,15 @@ async def aiter_bytes( logger.warning(f'uid: {uid}\n' f'{response_info}') async def reverse_proxy(self, request: Request): + """ + Reverse proxies the given requests. + + Args: + request (Request): The incoming request object. + + Returns: + StreamingResponse: The response from the reverse proxied server, as a streaming response. + """ client_config = self.prepare_client(request) url_path = client_config["url_path"] diff --git a/openai_forward/forwarding/extra.py b/openai_forward/forwarding/extra.py index c64cf66..69e5dc1 100644 --- a/openai_forward/forwarding/extra.py +++ b/openai_forward/forwarding/extra.py @@ -1,5 +1,4 @@ -from ..config import print_startup_info -from .base import LOG_CHAT, ForwardingBase +from .base import ForwardingBase class AnyForwarding(ForwardingBase): @@ -11,7 +10,6 @@ def __init__(self, base_url: str, route_prefix: str, proxy=None): self.client = httpx.AsyncClient( base_url=self.BASE_URL, proxies=proxy, http1=True, http2=False ) - print_startup_info(self.BASE_URL, self.ROUTE_PREFIX, [], "\\", LOG_CHAT) def get_fwd_anything_objs(): diff --git a/openai_forward/forwarding/openai.py b/openai_forward/forwarding/openai.py index fd53cfa..3e792e2 100644 --- a/openai_forward/forwarding/openai.py +++ b/openai_forward/forwarding/openai.py @@ -1,8 +1,7 @@ import time -from ..config import print_startup_info from .base import ChatSaver, OpenaiBase, WhisperSaver -from .settings import LOG_CHAT, OPENAI_API_KEY +from .settings import LOG_CHAT class OpenaiForwarding(OpenaiBase): @@ -19,13 +18,6 @@ def __init__(self, base_url: str, route_prefix: str, proxy=None): ) self.token_counts = 0 self.token_limit_dict = {'time': time.time(), 'count': 0} - print_startup_info( - self.BASE_URL, - self.ROUTE_PREFIX, - OPENAI_API_KEY, - self._no_auth_mode, - LOG_CHAT, - ) def get_fwd_openai_style_objs(): diff --git a/openai_forward/forwarding/settings.py b/openai_forward/forwarding/settings.py index bfecf99..8031c7e 100644 --- a/openai_forward/forwarding/settings.py +++ b/openai_forward/forwarding/settings.py @@ -33,10 +33,9 @@ PROXY = os.environ.get("PROXY", "").strip() PROXY = PROXY if PROXY else None -GLOBAL_RATE_LIMIT = os.environ.get("GLOBAL_RATE_LIMIT", "").strip() or None +GLOBAL_RATE_LIMIT = os.environ.get("GLOBAL_RATE_LIMIT", "fixed-window").strip() or None RATE_LIMIT_STRATEGY = os.environ.get("RATE_LIMIT_STRATEGY", "").strip() or None rate_limit_conf = env2dict('RATE_LIMIT') -print(f"{rate_limit_conf=}") def get_limiter_key(request: Request): @@ -55,12 +54,23 @@ def dynamic_rate_limit(key: str): TOKEN_RATE_LIMIT = os.environ.get("TOKEN_RATE_LIMIT", "").strip() if TOKEN_RATE_LIMIT: rate_limit_item = limits.parse(TOKEN_RATE_LIMIT) - print( - f"{rate_limit_item.amount=} {rate_limit_item=} {rate_limit_item.GRANULARITY} {rate_limit_item.multiples=}" - ) TOKEN_INTERVAL = ( rate_limit_item.multiples * rate_limit_item.GRANULARITY.seconds ) / rate_limit_item.amount - print(f"{TOKEN_INTERVAL=}") else: TOKEN_INTERVAL = 0 + +from ..config import print_startup_info, show_rate_limit_info + +for base_url, route_prefix in zip(OPENAI_BASE_URL, OPENAI_ROUTE_PREFIX): + print_startup_info(base_url, route_prefix, OPENAI_API_KEY, FWD_KEY, LOG_CHAT) +for base_url, route_prefix in zip(EXTRA_BASE_URL, EXTRA_ROUTE_PREFIX): + print_startup_info(base_url, route_prefix, "\\", "\\", LOG_CHAT) + +show_rate_limit_info( + rate_limit_conf, + strategy=RATE_LIMIT_STRATEGY, + global_rate_limit=GLOBAL_RATE_LIMIT if GLOBAL_RATE_LIMIT else 'inf', + token_rate_limit=TOKEN_RATE_LIMIT if TOKEN_RATE_LIMIT else 'inf', + token_interval_time=f"{TOKEN_INTERVAL:.4f}s", +) From e828341c6fe0d2eb30136a58a76e1d97fb1be6d6 Mon Sep 17 00:00:00 2001 From: kunyuan Date: Thu, 17 Aug 2023 00:54:53 +0800 Subject: [PATCH 6/6] clean up --- .env | 10 +++++----- .env.example | 2 +- openai_forward/forwarding/base.py | 1 - openai_forward/forwarding/settings.py | 3 +-- pytest.ini | 2 +- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/.env b/.env index 6882e84..c1f627c 100644 --- a/.env +++ b/.env @@ -5,7 +5,7 @@ LOG_CHAT=false # `OPENAI_BASE_URL`: 转发openai风格的任何服务地址,允许指定多个, 以逗号隔开。 # 如果指定超过一个,则任何OPENAI_ROUTE_PREFIX/EXTRA_ROUTE_PREFIX都不能为根路由/ -OPENAI_BASE_URL=https://api.openai-forward.com +OPENAI_BASE_URL=https://api.openai.com # `OPENAI_ROUTE_PREFIX`: 可指定所有openai风格(为记录日志)服务的转发路由前缀 OPENAI_ROUTE_PREFIX= @@ -18,12 +18,12 @@ EXTRA_BASE_URL= # `EXTRA_ROUTE_PREFIX`: 与 EXTRA_BASE_URL 匹配的路由前缀 EXTRA_ROUTE_PREFIX= -# RATE `LIMIT`: 指定路由的请求速率限制 +# `RATE_LIMIT`: i.e. RPM 对指定路由的请求速率限制 # format: {route: ratelimit-string} # ratelimit-string format [count] [per|/] [n (optional)] [second|minute|hour|day|month|year] :ref:`ratelimit-string`: https://limits.readthedocs.io/en/stable/quickstart.html#rate-limit-string-notation RATE_LIMIT='{ "/healthz": "1000/2minutes", -"/v1/chat/completions": "5/second", +"/v1/chat/completions": "300/minute" }' #`RATE_LIMIT_STRATEGY` Options: (fixed-window, fixed-window-elastic-expiry, moving-window) ref: https://limits.readthedocs.io/en/latest/strategies.html @@ -33,8 +33,8 @@ RATE_LIMIT_STRATEGY=moving-window # `GLOBAL_RATE_LIMIT`: 所有`RATE_LIMIT`没有指定的路由. 不填默认无限制 GLOBAL_RATE_LIMIT= -# same as TPM -TOKEN_RATE_LIMIT=60/second +# TPM: 返回的token速率限制 +TOKEN_RATE_LIMIT=50/second # 设定时区 TZ=Asia/Shanghai \ No newline at end of file diff --git a/.env.example b/.env.example index 1f3b6ba..b86908d 100644 --- a/.env.example +++ b/.env.example @@ -38,7 +38,7 @@ RATE_LIMIT_STRATEGY=fixed-window # `PROXY` http代理 PROXY=http://localhost:7890 -# `TOKEN_RATE_LIMIT` 对每一份流式请求的token速率限制 (注:这里的token并不是gpt中定义的token,而是SSE的chunk) +# `TOKEN_RATE_LIMIT` 对每一份流式返回的token速率限制 (注:这里的token并不严格等于gpt中定义的token,而是SSE的chunk) TOKEN_RATE_LIMIT=16/second # 设定时区 diff --git a/openai_forward/forwarding/base.py b/openai_forward/forwarding/base.py index e6f0c75..c5863ef 100644 --- a/openai_forward/forwarding/base.py +++ b/openai_forward/forwarding/base.py @@ -264,7 +264,6 @@ async def aiter_bytes( current_time = time.perf_counter() delta = current_time - start_time sleep_time = TOKEN_INTERVAL - delta - print(f"{delta=} {sleep_time=}") if sleep_time > 0: time.sleep(sleep_time) start_time = time.perf_counter() diff --git a/openai_forward/forwarding/settings.py b/openai_forward/forwarding/settings.py index 8031c7e..3903324 100644 --- a/openai_forward/forwarding/settings.py +++ b/openai_forward/forwarding/settings.py @@ -3,7 +3,7 @@ import limits from fastapi import Request -from ..config import setting_log +from ..config import print_startup_info, setting_log, show_rate_limit_info from ..helper import env2dict, env2list, format_route_prefix, get_client_ip ENV_VAR_SEP = "," @@ -60,7 +60,6 @@ def dynamic_rate_limit(key: str): else: TOKEN_INTERVAL = 0 -from ..config import print_startup_info, show_rate_limit_info for base_url, route_prefix in zip(OPENAI_BASE_URL, OPENAI_ROUTE_PREFIX): print_startup_info(base_url, route_prefix, OPENAI_API_KEY, FWD_KEY, LOG_CHAT) diff --git a/pytest.ini b/pytest.ini index 0313166..fc6533c 100644 --- a/pytest.ini +++ b/pytest.ini @@ -5,4 +5,4 @@ markers = timeout: marks test timeout duration repeat: marks that test run n times addopts = --doctest-modules --doctest-glob=README.md --doctest-glob=*.py --ignore=setup.py -norecursedirs = Examples ssl +norecursedirs = Examples ssl scripts