Skip to content

Commit

Permalink
refactor(prompts): validate jsonschema using third-party library (#5988)
Browse files Browse the repository at this point in the history
  • Loading branch information
axiomofjoy authored Jan 10, 2025
1 parent 3376475 commit d82d609
Show file tree
Hide file tree
Showing 7 changed files with 143 additions and 386 deletions.
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ dependencies = [
"pydantic>=1.0,!=2.0.*,<3", # exclude 2.0.* since it does not support the `json_encoders` configuration setting
"authlib",
"websockets",
"jsonschema>=4.0.0,<=4.23.0", # the upper bound is to keep us off the bleeding edge in case there's a regression since this controls what gets written to the database
]
dynamic = ["version"]

Expand Down Expand Up @@ -93,7 +94,7 @@ dev = [
"arize[AutoEmbeddings, LLM_Evaluation]",
"llama-index>=0.10.3",
"langchain>=0.0.334",
"litellm>=1.0.3",
"litellm>=1.0.3,<1.57.5", # windows compatibility broken on 1.57.5 (https://github.com/BerriAI/litellm/issues/7677)
"google-cloud-aiplatform>=1.3",
"anthropic",
"prometheus_client",
Expand All @@ -102,6 +103,7 @@ dev = [
"portpicker",
"uvloop; platform_system != 'Windows'",
"grpc-interceptor[testing]",
"types-jsonschema",
]
embeddings = [
"fast-hdbscan>=0.2.0",
Expand Down
1 change: 1 addition & 0 deletions requirements/type-check.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ requests # this is needed to type-check third-party packages
strawberry-graphql[opentelemetry]==0.253.1 # need to pin version because we're monkey-patching
tenacity
types-cachetools
types-jsonschema
types-protobuf
types-psutil
types-requests
Expand Down
2 changes: 1 addition & 1 deletion requirements/unit-tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ asyncpg
grpc-interceptor[testing]
httpx<0.28
httpx-ws
litellm>=1.0.3
litellm>=1.0.3,<1.57.5
nest-asyncio # for executor testing
numpy
openai>=1.0.0
Expand Down
120 changes: 120 additions & 0 deletions src/phoenix/server/api/helpers/jsonschema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
from typing import Annotated, Any

from jsonschema import Draft7Validator, ValidationError
from pydantic import AfterValidator
from typing_extensions import TypeAlias

# This meta-schema describes valid JSON schemas according to the JSON Schema Draft 7 specification.
# It is copied from https://json-schema.org/draft-07/schema#
JSON_SCHEMA_DRAFT_7_META_SCHEMA = {
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "http://json-schema.org/draft-07/schema#",
"title": "Core schema meta-schema",
"definitions": {
"schemaArray": {"type": "array", "minItems": 1, "items": {"$ref": "#"}},
"nonNegativeInteger": {"type": "integer", "minimum": 0},
"nonNegativeIntegerDefault0": {
"allOf": [{"$ref": "#/definitions/nonNegativeInteger"}, {"default": 0}]
},
"simpleTypes": {
"enum": ["array", "boolean", "integer", "null", "number", "object", "string"]
},
"stringArray": {
"type": "array",
"items": {"type": "string"},
"uniqueItems": True,
"default": [],
},
},
"type": ["object", "boolean"],
"properties": {
"$id": {"type": "string", "format": "uri-reference"},
"$schema": {"type": "string", "format": "uri"},
"$ref": {"type": "string", "format": "uri-reference"},
"$comment": {"type": "string"},
"title": {"type": "string"},
"description": {"type": "string"},
"default": True,
"readOnly": {"type": "boolean", "default": False},
"writeOnly": {"type": "boolean", "default": False},
"examples": {"type": "array", "items": True},
"multipleOf": {"type": "number", "exclusiveMinimum": 0},
"maximum": {"type": "number"},
"exclusiveMaximum": {"type": "number"},
"minimum": {"type": "number"},
"exclusiveMinimum": {"type": "number"},
"maxLength": {"$ref": "#/definitions/nonNegativeInteger"},
"minLength": {"$ref": "#/definitions/nonNegativeIntegerDefault0"},
"pattern": {"type": "string", "format": "regex"},
"additionalItems": {"$ref": "#"},
"items": {"anyOf": [{"$ref": "#"}, {"$ref": "#/definitions/schemaArray"}], "default": True},
"maxItems": {"$ref": "#/definitions/nonNegativeInteger"},
"minItems": {"$ref": "#/definitions/nonNegativeIntegerDefault0"},
"uniqueItems": {"type": "boolean", "default": False},
"contains": {"$ref": "#"},
"maxProperties": {"$ref": "#/definitions/nonNegativeInteger"},
"minProperties": {"$ref": "#/definitions/nonNegativeIntegerDefault0"},
"required": {"$ref": "#/definitions/stringArray"},
"additionalProperties": {"$ref": "#"},
"definitions": {"type": "object", "additionalProperties": {"$ref": "#"}, "default": {}},
"properties": {"type": "object", "additionalProperties": {"$ref": "#"}, "default": {}},
"patternProperties": {
"type": "object",
"additionalProperties": {"$ref": "#"},
"propertyNames": {"format": "regex"},
"default": {},
},
"dependencies": {
"type": "object",
"additionalProperties": {
"anyOf": [{"$ref": "#"}, {"$ref": "#/definitions/stringArray"}]
},
},
"propertyNames": {"$ref": "#"},
"const": True,
"enum": {"type": "array", "items": True, "minItems": 1, "uniqueItems": True},
"type": {
"anyOf": [
{"$ref": "#/definitions/simpleTypes"},
{
"type": "array",
"items": {"$ref": "#/definitions/simpleTypes"},
"minItems": 1,
"uniqueItems": True,
},
]
},
"format": {"type": "string"},
"contentMediaType": {"type": "string"},
"contentEncoding": {"type": "string"},
"if": {"$ref": "#"},
"then": {"$ref": "#"},
"else": {"$ref": "#"},
"allOf": {"$ref": "#/definitions/schemaArray"},
"anyOf": {"$ref": "#/definitions/schemaArray"},
"oneOf": {"$ref": "#/definitions/schemaArray"},
"not": {"$ref": "#"},
},
"default": True,
}
Draft7Validator.check_schema(JSON_SCHEMA_DRAFT_7_META_SCHEMA) # ensure the schema is valid
JSON_SCHEMA_DRAFT_7_VALIDATOR = Draft7Validator(JSON_SCHEMA_DRAFT_7_META_SCHEMA)


def validate_json_schema_object_definition(schema: dict[str, Any]) -> dict[str, Any]:
"""
Validates that a dictionary is a valid JSON schema object property.
"""
try:
JSON_SCHEMA_DRAFT_7_VALIDATOR.validate(schema)
except ValidationError as error:
raise ValueError(str(error))
if schema.get("type") != "object":
raise ValueError("The 'type' property must be 'object'")
return schema


# Pydantic type with built-in validation for JSON schemas
JSONSchemaObjectDefinition: TypeAlias = Annotated[
dict[str, Any], AfterValidator(validate_json_schema_object_definition)
]
114 changes: 6 additions & 108 deletions src/phoenix/server/api/helpers/prompts/models.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from enum import Enum
from typing import Any, Literal, Optional, Union

from pydantic import BaseModel, ConfigDict, Field, ValidationError, field_validator, model_validator
from pydantic import BaseModel, ConfigDict, ValidationError, model_validator
from typing_extensions import TypeAlias

from phoenix.server.api.helpers.jsonschema import JSONSchemaObjectDefinition

JSONSerializable = Union[None, bool, int, float, str, dict[str, Any], list[Any]]


Expand Down Expand Up @@ -122,110 +124,6 @@ def _get_tool_definition_model(
return None


# JSON schema
JSONSchemaPrimitiveProperty: TypeAlias = Union[
"JSONSchemaIntegerProperty",
"JSONSchemaNumberProperty",
"JSONSchemaBooleanProperty",
"JSONSchemaNullProperty",
"JSONSchemaStringProperty",
]
JSONSchemaContainerProperty: TypeAlias = Union[
"JSONSchemaArrayProperty",
"JSONSchemaObjectProperty",
]
JSONSchemaProperty: TypeAlias = Union[
"JSONSchemaPrimitiveProperty",
"JSONSchemaContainerProperty",
]


class JSONSchemaIntegerProperty(PromptModel):
type: Literal["integer"]
description: str = UNDEFINED
minimum: int = UNDEFINED
maximum: int = UNDEFINED

@model_validator(mode="after")
def ensure_minimum_lte_maximum(self) -> "JSONSchemaIntegerProperty":
if (
self.minimum is not UNDEFINED
and self.maximum is not UNDEFINED
and self.minimum > self.maximum
):
raise ValueError("minimum must be less than or equal to maximum")
return self


class JSONSchemaNumberProperty(PromptModel):
type: Literal["number"]
description: str = UNDEFINED
minimum: float = UNDEFINED
maximum: float = UNDEFINED

@model_validator(mode="after")
def ensure_minimum_lte_maximum(self) -> "JSONSchemaNumberProperty":
if (
self.minimum is not UNDEFINED
and self.maximum is not UNDEFINED
and self.minimum > self.maximum
):
raise ValueError("minimum must be less than or equal to maximum")
return self


class JSONSchemaBooleanProperty(PromptModel):
type: Literal["boolean"]
description: str = UNDEFINED


class JSONSchemaNullProperty(PromptModel):
type: Literal["null"]
description: str = UNDEFINED


class JSONSchemaStringProperty(PromptModel):
type: Literal["string"]
description: str = UNDEFINED
enum: list[str] = UNDEFINED

@field_validator("enum")
def ensure_unique_enum_values(cls, enum_values: list[str]) -> list[str]:
if enum_values is UNDEFINED:
return enum_values
if len(enum_values) != len(set(enum_values)):
raise ValueError("Enum values must be unique")
return enum_values


class JSONSchemaArrayProperty(PromptModel):
type: Literal["array"]
description: str = UNDEFINED
items: Union[JSONSchemaProperty, "JSONSchemaAnyOf"]


class JSONSchemaObjectProperty(PromptModel):
type: Literal["object"]
description: str = UNDEFINED
properties: dict[str, Union[JSONSchemaProperty, "JSONSchemaAnyOf"]]
required: list[str] = UNDEFINED
additional_properties: bool = Field(UNDEFINED, alias="additionalProperties")

@model_validator(mode="after")
def ensure_required_fields_are_included_in_properties(self) -> "JSONSchemaObjectProperty":
if self.required is UNDEFINED:
return self
invalid_fields = [field for field in self.required if field not in self.properties]
if invalid_fields:
raise ValueError(f"Required fields {invalid_fields} are not defined in properties")
return self


class JSONSchemaAnyOf(PromptModel):
description: str = UNDEFINED
any_of: list[JSONSchemaProperty] = Field(..., alias="anyOf")


# OpenAI tool definitions
class OpenAIFunctionDefinition(PromptModel):
"""
Expand All @@ -234,7 +132,7 @@ class OpenAIFunctionDefinition(PromptModel):

name: str
description: str = UNDEFINED
parameters: JSONSchemaObjectProperty = UNDEFINED
parameters: JSONSchemaObjectDefinition = UNDEFINED
strict: Optional[bool] = UNDEFINED


Expand All @@ -247,6 +145,7 @@ class OpenAIToolDefinition(PromptModel):
type: Literal["function"]


# Anthropic tool definitions
class AnthropicCacheControlEphemeralParam(PromptModel):
"""
Based on https://github.com/anthropics/anthropic-sdk-python/blob/93cbbbde964e244f02bf1bd2b579c5fabce4e267/src/anthropic/types/cache_control_ephemeral_param.py#L10
Expand All @@ -255,13 +154,12 @@ class AnthropicCacheControlEphemeralParam(PromptModel):
type: Literal["ephemeral"]


# Anthropic tool definitions
class AnthropicToolDefinition(PromptModel):
"""
Based on https://github.com/anthropics/anthropic-sdk-python/blob/93cbbbde964e244f02bf1bd2b579c5fabce4e267/src/anthropic/types/tool_param.py#L22
"""

input_schema: JSONSchemaObjectProperty
input_schema: JSONSchemaObjectDefinition
name: str
cache_control: Optional[AnthropicCacheControlEphemeralParam] = UNDEFINED
description: str = UNDEFINED
Loading

0 comments on commit d82d609

Please sign in to comment.