Skip to content

Commit

Permalink
Use JSON schema for automatic config validation
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinHammarstedt committed Sep 8, 2023
1 parent 9f3df55 commit a5b9c0f
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 29 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ dependencies = [
"appdirs==1.4.4",
"argcomplete==3.0.5",
"docx2python==1.27.1",
"jsonschema==4.19.0",
"nltk==3.8.1",
"pdfplumber==0.9.0",
"protobuf~=3.19.0", # Used by Stanza; see https://github.com/spraakbanken/sparv-pipeline/issues/161
Expand Down
5 changes: 4 additions & 1 deletion sparv/core/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,10 @@ update_autocompletion_cache()
sparv_config.validate_module_config()

# Validate config
sparv_config.validate_config()
if not "schema" in selected_targets:
from sparv.core import schema
json_schema = schema.build_json_schema(sparv_config.config_structure)
schema.validate(sparv_config.config, json_schema)

# Get reverse_config_usage dict for look-ups
reverse_config_usage = snake_utils.get_reverse_config_usage()
Expand Down
19 changes: 0 additions & 19 deletions sparv/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,25 +223,6 @@ def validate_module_config():
"are" if len(annotators) > 1 else "is", config_key), "sparv", "config")


def validate_config(config_dict=None, structure=None, parent=""):
"""Make sure the corpus config doesn't contain invalid keys."""
config_dict = config_dict or config
structure = structure or config_structure
for key in config_dict:
path = (parent + "." + key) if parent else key
if key not in structure:
if not parent:
raise SparvErrorMessage(f"Unknown key in config file: '{path}'. No module with that name found.",
module="sparv", function="config")
else:
module_name = parent.split(".", 1)[0]
raise SparvErrorMessage(f"Unknown key in config file: '{path}'. The module '{module_name}' "
f"doesn't have an option with that name.",
module="sparv", function="config")
elif not structure[key].get("_source"):
validate_config(config_dict[key], structure[key], path)


def load_presets(lang, lang_variety):
"""Read presets files and return dictionaries with all available presets annotations and preset classes."""
global presets
Expand Down
27 changes: 18 additions & 9 deletions sparv/core/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,21 +351,30 @@ def validate(cfg: dict, schema: dict) -> None:
"""Validate a Sparv config using JSON schema."""
import jsonschema

def build_path_string(path):
parts = []
for part in path:
if isinstance(part, str):
parts.append(part)
elif isinstance(part, int):
parts[-1] += f"[{part}]"
return ".".join(parts)

try:
jsonschema.validate(cfg, schema)
except jsonschema.ValidationError as e:
msg = ["There was a problem trying to parse the corpus config file.\n"]

if e.validator == "unevaluatedProperties":
# This only happens for unexpected keys at the root level
prop = re.search(r"'(.+)' was unexpected", e.message)
if prop:
msg.append(f"Unexpected property at root level: {prop.group(1)!r}")
else:
msg.append(e.message)
# Rephrase messages about unexpected keys
unknown_key = re.search(r"properties are not allowed \('(.+)' was unexpected", e.message)
if unknown_key:
full_path = ".".join(list(e.absolute_path) + [unknown_key.group(1)])
msg.append(f"Unexpected key in config file: {full_path!r}")
else:
if e.absolute_path:
msg.append(f"Offending config path: {'.'.join(e.absolute_path)}")
msg.append(e.message)
if e.absolute_path:
msg.append(f"Offending config path: {build_path_string(e.absolute_path)}")
if "description" in e.schema:
msg.append(f"Description of config key: {e.schema['description']}")

raise SparvErrorMessage("\n".join(msg))

0 comments on commit a5b9c0f

Please sign in to comment.