Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(prompts): validate jsonschema using third-party library #5988

Merged
merged 6 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ dependencies = [
"pydantic>=1.0,!=2.0.*,<3", # exclude 2.0.* since it does not support the `json_encoders` configuration setting
"authlib",
"websockets",
"jsonschema>=4.0.0,<=4.23.0", # the upper bound is to keep us off the bleeding edge in case there's a regression since this controls what gets written to the database
]
dynamic = ["version"]

Expand Down Expand Up @@ -93,7 +94,7 @@ dev = [
"arize[AutoEmbeddings, LLM_Evaluation]",
"llama-index>=0.10.3",
"langchain>=0.0.334",
"litellm>=1.0.3",
"litellm>=1.0.3,<1.57.5", # windows compatibility broken on 1.57.5 (https://github.com/BerriAI/litellm/issues/7677)
"google-cloud-aiplatform>=1.3",
"anthropic",
"prometheus_client",
Expand All @@ -102,6 +103,7 @@ dev = [
"portpicker",
"uvloop; platform_system != 'Windows'",
"grpc-interceptor[testing]",
"types-jsonschema",
]
embeddings = [
"fast-hdbscan>=0.2.0",
Expand Down
1 change: 1 addition & 0 deletions requirements/type-check.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ requests # this is needed to type-check third-party packages
strawberry-graphql[opentelemetry]==0.253.1 # need to pin version because we're monkey-patching
tenacity
types-cachetools
types-jsonschema
types-protobuf
types-psutil
types-requests
Expand Down
2 changes: 1 addition & 1 deletion requirements/unit-tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ asyncpg
grpc-interceptor[testing]
httpx<0.28
httpx-ws
litellm>=1.0.3
litellm>=1.0.3,<1.57.5
nest-asyncio # for executor testing
numpy
openai>=1.0.0
Expand Down
120 changes: 120 additions & 0 deletions src/phoenix/server/api/helpers/jsonschema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
from typing import Annotated, Any

from jsonschema import Draft7Validator, ValidationError
from pydantic import AfterValidator
from typing_extensions import TypeAlias

# This meta-schema describes valid JSON schemas according to the JSON Schema Draft 7 specification.
# It is copied from https://json-schema.org/draft-07/schema#
JSON_SCHEMA_DRAFT_7_META_SCHEMA = {
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "http://json-schema.org/draft-07/schema#",
"title": "Core schema meta-schema",
"definitions": {
"schemaArray": {"type": "array", "minItems": 1, "items": {"$ref": "#"}},
"nonNegativeInteger": {"type": "integer", "minimum": 0},
"nonNegativeIntegerDefault0": {
"allOf": [{"$ref": "#/definitions/nonNegativeInteger"}, {"default": 0}]
},
"simpleTypes": {
"enum": ["array", "boolean", "integer", "null", "number", "object", "string"]
},
"stringArray": {
"type": "array",
"items": {"type": "string"},
"uniqueItems": True,
"default": [],
},
},
"type": ["object", "boolean"],
"properties": {
"$id": {"type": "string", "format": "uri-reference"},
"$schema": {"type": "string", "format": "uri"},
"$ref": {"type": "string", "format": "uri-reference"},
"$comment": {"type": "string"},
"title": {"type": "string"},
"description": {"type": "string"},
"default": True,
"readOnly": {"type": "boolean", "default": False},
"writeOnly": {"type": "boolean", "default": False},
"examples": {"type": "array", "items": True},
"multipleOf": {"type": "number", "exclusiveMinimum": 0},
"maximum": {"type": "number"},
"exclusiveMaximum": {"type": "number"},
"minimum": {"type": "number"},
"exclusiveMinimum": {"type": "number"},
"maxLength": {"$ref": "#/definitions/nonNegativeInteger"},
"minLength": {"$ref": "#/definitions/nonNegativeIntegerDefault0"},
"pattern": {"type": "string", "format": "regex"},
"additionalItems": {"$ref": "#"},
"items": {"anyOf": [{"$ref": "#"}, {"$ref": "#/definitions/schemaArray"}], "default": True},
"maxItems": {"$ref": "#/definitions/nonNegativeInteger"},
"minItems": {"$ref": "#/definitions/nonNegativeIntegerDefault0"},
"uniqueItems": {"type": "boolean", "default": False},
"contains": {"$ref": "#"},
"maxProperties": {"$ref": "#/definitions/nonNegativeInteger"},
"minProperties": {"$ref": "#/definitions/nonNegativeIntegerDefault0"},
"required": {"$ref": "#/definitions/stringArray"},
"additionalProperties": {"$ref": "#"},
"definitions": {"type": "object", "additionalProperties": {"$ref": "#"}, "default": {}},
"properties": {"type": "object", "additionalProperties": {"$ref": "#"}, "default": {}},
"patternProperties": {
"type": "object",
"additionalProperties": {"$ref": "#"},
"propertyNames": {"format": "regex"},
"default": {},
},
"dependencies": {
"type": "object",
"additionalProperties": {
"anyOf": [{"$ref": "#"}, {"$ref": "#/definitions/stringArray"}]
},
},
"propertyNames": {"$ref": "#"},
"const": True,
"enum": {"type": "array", "items": True, "minItems": 1, "uniqueItems": True},
"type": {
"anyOf": [
{"$ref": "#/definitions/simpleTypes"},
{
"type": "array",
"items": {"$ref": "#/definitions/simpleTypes"},
"minItems": 1,
"uniqueItems": True,
},
]
},
"format": {"type": "string"},
"contentMediaType": {"type": "string"},
"contentEncoding": {"type": "string"},
"if": {"$ref": "#"},
"then": {"$ref": "#"},
"else": {"$ref": "#"},
"allOf": {"$ref": "#/definitions/schemaArray"},
"anyOf": {"$ref": "#/definitions/schemaArray"},
"oneOf": {"$ref": "#/definitions/schemaArray"},
"not": {"$ref": "#"},
},
"default": True,
}
Draft7Validator.check_schema(JSON_SCHEMA_DRAFT_7_META_SCHEMA) # ensure the schema is valid
JSON_SCHEMA_DRAFT_7_VALIDATOR = Draft7Validator(JSON_SCHEMA_DRAFT_7_META_SCHEMA)


def validate_json_schema_object_definition(schema: dict[str, Any]) -> dict[str, Any]:
"""
Validates that a dictionary is a valid JSON schema object property.
"""
try:
JSON_SCHEMA_DRAFT_7_VALIDATOR.validate(schema)
except ValidationError as error:
raise ValueError(str(error))
if schema.get("type") != "object":
raise ValueError("The 'type' property must be 'object'")
return schema


# Pydantic type with built-in validation for JSON schemas
JSONSchemaObjectDefinition: TypeAlias = Annotated[
dict[str, Any], AfterValidator(validate_json_schema_object_definition)
]
114 changes: 6 additions & 108 deletions src/phoenix/server/api/helpers/prompts/models.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from enum import Enum
from typing import Any, Literal, Optional, Union

from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator, model_validator
from pydantic import BaseModel, ConfigDict, ValidationError, model_validator
from typing_extensions import TypeAlias

from phoenix.server.api.helpers.jsonschema import JSONSchemaObjectDefinition

JSONSerializable = Union[None, bool, int, float, str, dict[str, Any], list[Any]]


Expand Down Expand Up @@ -122,110 +124,6 @@ def _get_tool_definition_model(
return None


# JSON schema
JSONSchemaPrimitiveProperty: TypeAlias = Union[
"JSONSchemaIntegerProperty",
"JSONSchemaNumberProperty",
"JSONSchemaBooleanProperty",
"JSONSchemaNullProperty",
"JSONSchemaStringProperty",
]
JSONSchemaContainerProperty: TypeAlias = Union[
"JSONSchemaArrayProperty",
"JSONSchemaObjectProperty",
]
JSONSchemaProperty: TypeAlias = Union[
"JSONSchemaPrimitiveProperty",
"JSONSchemaContainerProperty",
]


class JSONSchemaIntegerProperty(PromptModel):
type: Literal["integer"]
description: str = UNDEFINED
minimum: int = UNDEFINED
maximum: int = UNDEFINED

@model_validator(mode="after")
def ensure_minimum_lte_maximum(self) -> "JSONSchemaIntegerProperty":
if (
self.minimum is not UNDEFINED
and self.maximum is not UNDEFINED
and self.minimum > self.maximum
):
raise ValueError("minimum must be less than or equal to maximum")
return self


class JSONSchemaNumberProperty(PromptModel):
type: Literal["number"]
description: str = UNDEFINED
minimum: float = UNDEFINED
maximum: float = UNDEFINED

@model_validator(mode="after")
def ensure_minimum_lte_maximum(self) -> "JSONSchemaNumberProperty":
if (
self.minimum is not UNDEFINED
and self.maximum is not UNDEFINED
and self.minimum > self.maximum
):
raise ValueError("minimum must be less than or equal to maximum")
return self


class JSONSchemaBooleanProperty(PromptModel):
type: Literal["boolean"]
description: str = UNDEFINED


class JSONSchemaNullProperty(PromptModel):
type: Literal["null"]
description: str = UNDEFINED


class JSONSchemaStringProperty(PromptModel):
type: Literal["string"]
description: str = UNDEFINED
enum: list[str] = UNDEFINED

@field_validator("enum")
def ensure_unique_enum_values(cls, enum_values: list[str]) -> list[str]:
if enum_values is UNDEFINED:
return enum_values
if len(enum_values) != len(set(enum_values)):
raise ValueError("Enum values must be unique")
return enum_values


class JSONSchemaArrayProperty(PromptModel):
type: Literal["array"]
description: str = UNDEFINED
items: Union[JSONSchemaProperty, "JSONSchemaAnyOf"]


class JSONSchemaObjectProperty(PromptModel):
type: Literal["object"]
description: str = UNDEFINED
properties: dict[str, Union[JSONSchemaProperty, "JSONSchemaAnyOf"]]
required: list[str] = UNDEFINED
additional_properties: bool = Field(UNDEFINED, alias="additionalProperties")

@model_validator(mode="after")
def ensure_required_fields_are_included_in_properties(self) -> "JSONSchemaObjectProperty":
if self.required is UNDEFINED:
return self
invalid_fields = [field for field in self.required if field not in self.properties]
if invalid_fields:
raise ValueError(f"Required fields {invalid_fields} are not defined in properties")
return self


class JSONSchemaAnyOf(PromptModel):
description: str = UNDEFINED
any_of: list[JSONSchemaProperty] = Field(..., alias="anyOf")


# OpenAI tool definitions
class OpenAIFunctionDefinition(PromptModel):
"""
Expand All @@ -234,7 +132,7 @@ class OpenAIFunctionDefinition(PromptModel):

name: str
description: str = UNDEFINED
parameters: JSONSchemaObjectProperty = UNDEFINED
parameters: JSONSchemaObjectDefinition = UNDEFINED
strict: Optional[bool] = UNDEFINED


Expand All @@ -247,6 +145,7 @@ class OpenAIToolDefinition(PromptModel):
type: Literal["function"]


# Anthropic tool definitions
class AnthropicCacheControlEphemeralParam(PromptModel):
"""
Based on https://github.com/anthropics/anthropic-sdk-python/blob/93cbbbde964e244f02bf1bd2b579c5fabce4e267/src/anthropic/types/cache_control_ephemeral_param.py#L10
Expand All @@ -255,13 +154,12 @@ class AnthropicCacheControlEphemeralParam(PromptModel):
type: Literal["ephemeral"]


# Anthropic tool definitions
class AnthropicToolDefinition(PromptModel):
"""
Based on https://github.com/anthropics/anthropic-sdk-python/blob/93cbbbde964e244f02bf1bd2b579c5fabce4e267/src/anthropic/types/tool_param.py#L22
"""

input_schema: JSONSchemaObjectProperty
input_schema: JSONSchemaObjectDefinition
name: str
cache_control: Optional[AnthropicCacheControlEphemeralParam] = UNDEFINED
description: str = UNDEFINED
Loading
Loading