diff --git a/src/starbridge/web/__init__.py b/src/starbridge/web/__init__.py index a947de5..5ddfdb5 100644 --- a/src/starbridge/web/__init__.py +++ b/src/starbridge/web/__init__.py @@ -2,6 +2,15 @@ from .cli import cli from .service import Service from .settings import Settings -from .types import GetResult, RobotForbiddenException +from .types import Context, GetResult, LinkTarget, Resource, RobotForbiddenException -__all__ = ["Service", "cli", "Settings", "RobotForbiddenException", "GetResult"] +__all__ = [ + "Service", + "cli", + "Settings", + "RobotForbiddenException", + "GetResult", + "Resource", + "Context", + "LinkTarget", +] diff --git a/src/starbridge/web/types.py b/src/starbridge/web/types.py index af168fa..288b030 100644 --- a/src/starbridge/web/types.py +++ b/src/starbridge/web/types.py @@ -1,6 +1,6 @@ from typing import Annotated -from pydantic import BaseModel, Field +from pydantic import AnyHttpUrl, AnyUrl, BaseModel, Field, model_validator class RobotForbiddenException(Exception): @@ -25,8 +25,8 @@ class MimeType: class Resource(BaseModel): - url: Annotated[str, Field(description="Final URL of the resource")] - type: Annotated[str, Field(description="MIME type of the resource")] + url: Annotated[AnyHttpUrl, Field(description="Final URL of the resource")] + type: Annotated[str, Field(description="MIME type of the resource", min_length=4)] text: Annotated[ str | None, Field( @@ -40,24 +40,34 @@ class Resource(BaseModel): ), ] = None + @model_validator(mode="after") + def check_content_exists(self) -> "Resource": + if self.text is None and self.blob is None: + raise ValueError("Either text or blob must be provided") + if self.text is not None and self.blob is not None: + raise ValueError("Only one of text or blob must be provided") + return self + class LinkTarget(BaseModel): - url: Annotated[str, Field(description="URL of the link target")] + url: Annotated[AnyUrl, Field(description="URL of the link target")] occurences: Annotated[ int, Field( - description="Number of occurences of the url as a link target in the resource" + description="Number of occurences of the url as a link target in the resource", + ge=0, ), ] + anchor_texts: Annotated[ list[str], - Field(description="Anchor texts of the link target"), + Field(description="Anchor texts of the link target", min_length=1), ] class Context(BaseModel): type: Annotated[str, Field(description="Type of context")] - url: Annotated[str, Field(description="URL of the context")] + url: Annotated[AnyHttpUrl, Field(description="URL of the context")] text: Annotated[str, Field(description="Content of context in markdown format")] diff --git a/tests/web/starbridge_web_types_test.py b/tests/web/starbridge_web_types_test.py new file mode 100644 index 0000000..effa91a --- /dev/null +++ b/tests/web/starbridge_web_types_test.py @@ -0,0 +1,15 @@ +import pytest +from pydantic import ValidationError + +from starbridge.web import Resource + + +def test_web_types_resource_exactly_one(): + with pytest.raises(ValidationError): + Resource( + url="https://example.com", type="invalid", text="Hello World", blob=b"\0" + ) + with pytest.raises(ValidationError): + Resource(url="https://example.com", type="invalid", text=None, blob=None) + Resource(url="https://example.com", type="invalid", text="Hello World", blob=None) + Resource(url="https://example.com", type="invalid", text=None, blob=b"\0") diff --git a/tests/web/starbridge_web_utils_test.py b/tests/web/starbridge_web_utils_test.py index ecfb1ed..a8aaddb 100644 --- a/tests/web/starbridge_web_utils_test.py +++ b/tests/web/starbridge_web_utils_test.py @@ -115,5 +115,5 @@ def mock_get_side_effect(url, **kwargs): (ctx for ctx in context if ctx.type == "llms_txt"), None ) assert llms_txt_context is not None - assert llms_txt_context.url == "https://docs.anthropic.com/llms.txt" + assert str(llms_txt_context.url) == "https://docs.anthropic.com/llms.txt" assert llms_txt_context.text == LLMS_DUMY_CONTENT