diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 85c1d1b..ccb6572 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -13,3 +13,9 @@ updates:
     directory: "/.github/workflows"
     schedule:
       interval: "daily"
+
+  # Maintain dependencies for Python code
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "daily"
diff --git a/pyproject.toml b/pyproject.toml
index 4bc1753..50f97df 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,7 +21,15 @@ classifiers = [
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
 ]
-dynamic = ["dependencies", "optional-dependencies", "version"]
+dependencies = [
+    "typing_extensions",
+    "jsonschema>=4.22.0",
+    "PyYAML>=6.0.0",
+    # The below library should NOT be imported into any python files
+    # We only use the command via subprocess
+    "yamllint>=1.35.1",
+]
+dynamic = ["version"]
 
 [project.urls]
 homepage = "https://instructlab.ai"
@@ -40,6 +48,7 @@ exclude = ["^src/instructlab/schema/_version\\.py$"]
 target-version = "py310"
 src = ["src", "tests"]
 extend-exclude = ["src/instructlab/schema/_version.py"]
+line-length = 180
 
 [tool.ruff.lint]
 select = [
@@ -52,12 +61,28 @@ select = [
     "SIM", # flake8-simplify
     "TID", # flake8-tidy-imports
 ]
+ignore = [
+    "B019", # cached-instance-method
+]
+
+[tool.ruff.lint.flake8-tidy-imports.banned-api]
+"yamllint".msg = "yamllint is for use as a command via subprocess."
 
 [tool.pylint.main]
 py-version = "3.10"
 source-roots = ["src", "tests"]
 ignore = ["_version.py"]
 
+[tool.pylint.design]
+max-branches = 30
+max-line-length = 180
+max-locals = 30
+max-statements = 80
+min-public-methods = 1
+
+[tool.pylint.format]
+max-args = 10
+
 [tool.pylint."messages control"]
 disable = [
     "missing-class-docstring",
diff --git a/src/instructlab/schema/__init__.py b/src/instructlab/schema/__init__.py
index a1bc1f2..da4e07c 100644
--- a/src/instructlab/schema/__init__.py
+++ b/src/instructlab/schema/__init__.py
@@ -10,7 +10,17 @@
 except ImportError:  # python<3.11
     from importlib.abc import Traversable
 
-__all__ = ["schema_versions"]
+__all__ = ["schema_base", "schema_versions"]
+
+
+def schema_base() -> Traversable:
+    """Return the schema base.
+
+    Returns:
+        Traversable: The base for the schema versions.
+    """
+    base = resources.files(__package__)
+    return base
 
 
 def schema_versions() -> list[Traversable]:
@@ -19,9 +29,8 @@ def schema_versions() -> list[Traversable]:
     Returns:
         list[Traversable]: A sorted list of schema versions.
     """
-    schema_base = resources.files(__package__)
     versions = sorted(
-        (v for v in schema_base.iterdir() if v.name[0] == "v" and v.name[1:].isdigit()),
+        (v for v in schema_base().iterdir() if v.name[0] == "v" and v.name[1:].isdigit()),
         key=lambda k: int(k.name[1:]),
     )
     return versions
diff --git a/src/instructlab/schema/taxonomy.py b/src/instructlab/schema/taxonomy.py
new file mode 100644
index 0000000..6b60025
--- /dev/null
+++ b/src/instructlab/schema/taxonomy.py
@@ -0,0 +1,325 @@
+# SPDX-License-Identifier: Apache-2.0
+
+"""Taxonomy qna.yaml parsing"""
+
+# Standard
+import json
+import logging
+import os
+import re
+import subprocess
+from collections.abc import Mapping
+from enum import Enum
+from functools import lru_cache, partial
+from pathlib import Path
+from typing import Any
+
+# Third Party
+import yaml
+from jsonschema.protocols import Validator
+from jsonschema.validators import validator_for
+from referencing import Registry, Resource
+from referencing.exceptions import NoSuchResource
+from referencing.jsonschema import DRAFT202012
+from typing_extensions import Self
+
+from . import schema_base, schema_versions
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_TAXONOMY_FOLDERS: list[str] = ["compositional_skills", "knowledge"]
+"""Taxonomy folders which are also the schema names"""
+
+DEFAULT_YAMLLINT_CONFIG: str = "{extends: relaxed, rules: {line-length: {max: 120}}}"
+"""Default yamllint configuration"""
+
+
+class TaxonomyReadingException(Exception):
+    """An exception raised during reading of the taxonomy."""
+
+
+TaxonomyMessageFormat = Enum("TaxonomyMessageFormat", "auto standard github logging")
+"""An enum for the format choices for taxonomy parsing messages"""
+
+
+class Taxonomy:
+    """A container for a parsed taxonomy qna.yaml file."""
+
+    def __init__(self, *, file: str | Path, message_format: TaxonomyMessageFormat) -> None:
+        self._file: Path = file if isinstance(file, Path) else Path(file)
+        if message_format == TaxonomyMessageFormat.auto:
+            message_format = TaxonomyMessageFormat.github if "GITHUB_ACTIONS" in os.environ and "GITHUB_WORKFLOW" in os.environ else TaxonomyMessageFormat.standard
+        self._message_format: TaxonomyMessageFormat = message_format
+        self.errors: int = 0
+        self.warnings: int = 0
+        self.parsed: Mapping[str, Any] = {}
+
+    @property
+    def file(self) -> Path:
+        return self._file
+
+    def error(
+        self,
+        message: str,
+        *message_args: Any,
+        line: str | int = 1,
+        col: str | int = 1,
+        yaml_path: str = "",
+    ) -> Self:
+        self.errors += 1
+        match self._message_format:
+            case TaxonomyMessageFormat.github:
+                if message_args:
+                    message = message % message_args
+                print(
+                    f"::error file={self._file},line={line},col={col}::{line}:{col} [{yaml_path}] {message}"
+                    if yaml_path
+                    else f"::error file={self._file},line={line},col={col}::{line}:{col} {message}"
+                )
+            case TaxonomyMessageFormat.logging:
+                if yaml_path:
+                    logger.error(
+                        "%s:%s:%s [%s] " + message,
+                        self._file,
+                        line,
+                        col,
+                        yaml_path,
+                        *message_args,
+                    )
+                else:
+                    logger.error("%s:%s:%s " + message, self._file, line, col, *message_args)
+            case TaxonomyMessageFormat.standard | _:
+                if message_args:
+                    message = message % message_args
+                print(f"ERROR: {self._file}:{line}:{col} [{yaml_path}] {message}" if yaml_path else f"ERROR: {self._file}:{line}:{col} {message}")
+        return self
+
+    def warning(
+        self,
+        message: str,
+        *message_args: Any,
+        line: str | int = 1,
+        col: str | int = 1,
+        yaml_path: str = "",
+    ) -> Self:
+        self.warnings += 1
+        match self._message_format:
+            case TaxonomyMessageFormat.github:
+                if message_args:
+                    message = message % message_args
+                print(
+                    f"::warning file={self._file},line={line},col={col}::{line}:{col} [{yaml_path}] {message}"
+                    if yaml_path
+                    else f"::warning file={self._file},line={line},col={col}::{line}:{col} {message}"
+                )
+            case TaxonomyMessageFormat.logging:
+                if yaml_path:
+                    logger.warning(
+                        "%s:%s:%s [%s] " + message,
+                        self._file,
+                        line,
+                        col,
+                        yaml_path,
+                        *message_args,
+                    )
+                else:
+                    logger.warning("%s:%s:%s " + message, self._file, line, col, *message_args)
+            case TaxonomyMessageFormat.standard | _:
+                if message_args:
+                    message = message % message_args
+                print(f"WARN: {self._file}:{line}:{col} [{yaml_path}] {message}" if yaml_path else f"WARN: {self._file}:{line}:{col} {message}")
+        return self
+
+
+class TaxonomyParser:
+    """A parser for taxonomy qna.yaml files. The parser will return a Taxonomy object."""
+
+    def __init__(
+        self,
+        *,
+        taxonomy_folders: list[str] | None = None,
+        schema_version: int | None = None,
+        yamllint_config: str = DEFAULT_YAMLLINT_CONFIG,
+        message_format: TaxonomyMessageFormat = TaxonomyMessageFormat.auto,
+    ) -> None:
+        if taxonomy_folders is None:
+            taxonomy_folders = DEFAULT_TAXONOMY_FOLDERS
+        self.taxonomy_folders: list[str] = taxonomy_folders
+        if schema_version is None:
+            versions = schema_versions()
+            if versions:
+                schema_version = int(versions[-1].name[1:])
+        self.schema_version = schema_version
+        self.yamllint_config: str = yamllint_config
+        self.message_format: TaxonomyMessageFormat = message_format
+        self.yq_available: bool = True
+
+    @lru_cache
+    def _load_schema(self, path: str) -> Resource:
+        try:
+            schema_path = schema_base().joinpath(path)
+            contents = json.loads(schema_path.read_text(encoding="utf-8"))
+            resource = Resource.from_contents(contents=contents, default_specification=DRAFT202012)
+            return resource
+        except Exception as e:
+            raise NoSuchResource(path) from e
+
+    def _retrieve(self, version_base: str, schema: str) -> Resource:
+        path = Path(version_base, schema).as_posix()
+        return self._load_schema(path)
+
+    def _yamllint(self, content: str, taxonomy: Taxonomy) -> None:
+        yamllint_cmd = [
+            "yamllint",
+            "-f",
+            "parsable",
+            "-d",
+            self.yamllint_config,
+            "-",  # read from stdin
+        ]
+
+        try:
+            result = subprocess.run(
+                yamllint_cmd,
+                check=False,
+                input=content,
+                text=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+            )
+        except FileNotFoundError as e:
+            taxonomy.warning(
+                "could not run yamllint command: %s",
+                e,
+            )
+            return
+
+        pattern = re.compile(r"[^:]+:(?P<line>[^:]+):(?P<col>[^:]+):\s*\[(?P<severity>[^]]+)\]\s*(?P<message>.*)")
+        lines = result.stdout.splitlines()
+        for line in lines:
+            match = pattern.match(line)
+            if match:
+                line = match.group("line")
+                col = match.group("col")
+                severity = match.group("severity")
+                message = match.group("message")
+                if severity == "error":
+                    taxonomy.error(
+                        message,
+                        line=line,
+                        col=col,
+                    )
+                else:
+                    taxonomy.warning(
+                        message,
+                        line=line,
+                        col=col,
+                    )
+
+    def parse(self, file: str | Path) -> Taxonomy:
+        full_path = Path(file).resolve()
+        for i in range(len(full_path.parts) - 1, -1, -1):
+            if full_path.parts[i] in self.taxonomy_folders:
+                taxonomy = Taxonomy(file=Path(*full_path.parts[i:]), message_format=self.message_format)
+                break
+        else:
+            taxonomy = Taxonomy(file=full_path, message_format=self.message_format)
+
+        if not os.path.isfile(full_path):
+            return taxonomy.error(
+                'The file "%s" does not exist or is not a file',
+                full_path,
+            )
+
+        if full_path.name != "qna.yaml":
+            return taxonomy.error(
+                'Taxonomy file must be named "qna.yaml"; "%s" is not a valid name',
+                full_path.name,
+            )
+
+        version = self.schema_version
+        if version is None:
+            return taxonomy.error(
+                'Schema base "%s" does not contain any schema versions',
+                schema_base(),
+            )
+
+        try:
+            with open(full_path, encoding="utf-8") as stream:
+                content = stream.read()
+
+            parsed: Mapping[str, Any] = yaml.safe_load(content)
+            if not parsed:
+                return taxonomy.error("The file is empty")
+
+            if not isinstance(parsed, Mapping):
+                return taxonomy.error(
+                    "The file is not valid. The top-level element is not an object with key-value pairs.",
+                )
+
+            if version < 1:  # Use version from YAML document
+                version = parsed.get("version", 1)
+                if not isinstance(version, int):
+                    # schema validation will complain about the type
+                    try:
+                        version = int(version)  # type: ignore[arg-type]
+                    except ValueError:
+                        version = 1  # fallback to version 1
+
+            if version > 1:  # no linting for version 1 yaml
+                self._yamllint(content=content, taxonomy=taxonomy)
+
+            retrieve = partial(self._retrieve, f"v{version}")
+            schema_name = taxonomy.file.parts[0]
+            if schema_name not in self.taxonomy_folders:
+                schema_name = "knowledge" if "document" in parsed else "compositional_skills"
+
+            try:
+                schema_resource = retrieve(f"{schema_name}.json")
+                schema = schema_resource.contents
+                validator_cls = validator_for(schema)
+                registry: Registry = Registry(retrieve=retrieve)  # type: ignore[call-arg]
+                validator: Validator = validator_cls(schema, registry=registry)
+
+                for validation_error in validator.iter_errors(parsed):
+                    yaml_path = validation_error.json_path[1:]
+                    if not yaml_path:
+                        yaml_path = "."
+                    line: str | int = 1
+                    if self.yq_available:
+                        try:
+                            yq_expression = f"{yaml_path} | line"
+                            line = subprocess.check_output(["yq", yq_expression], input=content, text=True)
+                            line = line.strip() if line else 1
+                        except (subprocess.SubprocessError, FileNotFoundError) as e:
+                            self.yq_available = not isinstance(e, FileNotFoundError)
+                            taxonomy.warning(
+                                "could not run yq command: %s",
+                                e,
+                            )
+                    if validation_error.validator == "minItems":
+                        # Special handling for minItems which can have a long message for seed_examples
+                        taxonomy.error(
+                            "Value must have at least %s items",
+                            validation_error.validator_value,
+                            line=line,
+                            yaml_path=yaml_path,
+                        )
+                    else:
+                        taxonomy.error(
+                            validation_error.message[-200:],
+                            line=line,
+                            yaml_path=yaml_path,
+                        )
+            except NoSuchResource as e:
+                taxonomy.error(
+                    "Cannot load schema file %s. %s",
+                    e.ref,
+                    e,
+                )
+
+            taxonomy.parsed = parsed
+        except Exception as e:
+            raise TaxonomyReadingException from e
+
+        return taxonomy
diff --git a/tests/test_parse.py b/tests/test_parse.py
new file mode 100644
index 0000000..bba75ae
--- /dev/null
+++ b/tests/test_parse.py
@@ -0,0 +1,272 @@
+# SPDX-License-Identifier: Apache-2.0
+
+# Standard
+import logging
+import re
+from collections.abc import Callable
+from pathlib import Path
+
+# Third Party
+from assertpy import assert_that
+from pytest import CaptureFixture, LogCaptureFixture
+
+from instructlab.schema.taxonomy import TaxonomyMessageFormat, TaxonomyParser
+
+testdata = Path("tests/testdata")
+
+
+class TestParsing:
+    def message_filter(self, regex: str) -> Callable[[logging.LogRecord], bool]:
+        return lambda r: bool(re.search(regex, r.message))
+
+    def test_invalid(self, caplog: LogCaptureFixture):
+        caplog.set_level(logging.INFO, logger="instructlab.schema")
+        test_yaml = "compositional_skills/invalid_yaml/qna.yaml"
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.logging)
+        taxonomy = parser.parse(testdata.joinpath(test_yaml))
+
+        assert_that(taxonomy.warnings).is_greater_than_or_equal_to(1)
+        assert_that(taxonomy.errors).is_greater_than_or_equal_to(2)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(f"^{re.escape(test_yaml)}:"),  # type: ignore[call-arg]
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"line too long"),  # type: ignore[call-arg]
+        ).contains_only(logging.WARNING)
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"Unevaluated properties.*createdby"),  # type: ignore[call-arg]
+        ).contains_only(logging.ERROR)
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"created_by.*required property"),  # type: ignore[call-arg]
+        ).contains_only(logging.ERROR)
+
+    def test_invalid_custom_yaml_config(self, caplog: LogCaptureFixture):
+        caplog.set_level(logging.INFO, logger="instructlab.schema")
+        yamllint_config = "{extends: relaxed, rules: {line-length: {max: 180}}}"
+        test_yaml = "compositional_skills/invalid_yaml/qna.yaml"
+        parser = TaxonomyParser(
+            schema_version=0,
+            message_format=TaxonomyMessageFormat.logging,
+            yamllint_config=yamllint_config,
+        )
+        taxonomy = parser.parse(testdata.joinpath(test_yaml))
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_greater_than_or_equal_to(2)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(f"^{re.escape(test_yaml)}:"),  # type: ignore[call-arg]
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"line too long"),  # type: ignore[call-arg]
+        ).is_empty()
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"Unevaluated properties.*createdby"),  # type: ignore[call-arg]
+        ).contains_only(logging.ERROR)
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"created_by.*required property"),  # type: ignore[call-arg]
+        ).contains_only(logging.ERROR)
+
+    def test_incomplete_skill(self, caplog: LogCaptureFixture):
+        caplog.set_level(logging.INFO, logger="instructlab.schema")
+        test_yaml = "compositional_skills/skill_incomplete/qna.yaml"
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.logging)
+        taxonomy = parser.parse(testdata.joinpath(test_yaml))
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_greater_than_or_equal_to(1)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(f"^{re.escape(test_yaml)}:"),  # type: ignore[call-arg]
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"[\.seed_examples].*Value must have at least"),  # type: ignore[call-arg]
+        ).contains_only(logging.ERROR)
+
+    def test_valid_skill(self, caplog: LogCaptureFixture):
+        caplog.set_level(logging.INFO, logger="instructlab.schema")
+        test_yaml = "compositional_skills/skill_valid/qna.yaml"
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.logging)
+        taxonomy = parser.parse(testdata.joinpath(test_yaml))
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_zero()
+        assert_that(caplog.records).is_empty()
+
+        assert_that(taxonomy.parsed).contains_only("version", "created_by", "seed_examples", "task_description")
+        assert_that(taxonomy.parsed.get("seed_examples")).is_length(5)
+
+    def test_valid_knowledge(self, caplog: LogCaptureFixture):
+        caplog.set_level(logging.INFO, logger="instructlab.schema")
+        test_yaml = "knowledge/knowledge_valid/qna.yaml"
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.logging)
+        taxonomy = parser.parse(testdata.joinpath(test_yaml))
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_zero()
+        assert_that(caplog.records).is_empty()
+
+        assert_that(taxonomy.parsed).contains_only(
+            "version",
+            "created_by",
+            "seed_examples",
+            "task_description",
+            "document",
+            "domain",
+        )
+        assert_that(taxonomy.parsed.get("seed_examples")).is_length(5)
+        assert_that(taxonomy.parsed.get("document")).contains_only("repo", "commit", "patterns")
+
+    def test_file_does_not_exist(self, caplog: LogCaptureFixture):
+        caplog.set_level(logging.INFO, logger="instructlab.schema")
+        test_yaml = "knowledge/invalid_name/qna.yaml"
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.logging)
+        taxonomy = parser.parse(testdata.joinpath(test_yaml))
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_equal_to(1)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(re.escape(test_yaml)),  # type: ignore[call-arg]
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"does not exist or is not a file"),  # type: ignore[call-arg]
+        ).contains_only(logging.ERROR)
+
+    def test_file_has_wrong_extension(self, caplog: LogCaptureFixture):
+        caplog.set_level(logging.INFO, logger="instructlab.schema")
+        test_yaml = "knowledge/invalid_name/qna.yml"
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.logging)
+        taxonomy = parser.parse(testdata.joinpath(test_yaml))
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_equal_to(1)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(re.escape(test_yaml)),  # type: ignore[call-arg]
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"""Taxonomy file must be named "qna\.yaml".*qna.yml"""),  # type: ignore[call-arg]
+        ).contains_only(logging.ERROR)
+
+    def test_file_has_wrong_name(self, caplog: LogCaptureFixture):
+        caplog.set_level(logging.INFO, logger="instructlab.schema")
+        test_yaml = "knowledge/invalid_name/file.yaml"
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.logging)
+        taxonomy = parser.parse(testdata.joinpath(test_yaml))
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_equal_to(1)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(re.escape(test_yaml)),  # type: ignore[call-arg]
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"""Taxonomy file must be named "qna\.yaml".*file\.yaml"""),  # type: ignore[call-arg]
+        ).contains_only(logging.ERROR)
+
+    def test_empty_yaml(self, caplog: LogCaptureFixture):
+        caplog.set_level(logging.INFO, logger="instructlab.schema")
+        test_yaml = "compositional_skills/empty_yaml/qna.yaml"
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.logging)
+        taxonomy = parser.parse(testdata.joinpath(test_yaml))
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_equal_to(1)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(re.escape(test_yaml)),  # type: ignore[call-arg]
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"The file is empty"),  # type: ignore[call-arg]
+        ).contains_only(logging.ERROR)
+
+    def test_array_yaml(self, caplog: LogCaptureFixture):
+        caplog.set_level(logging.INFO, logger="instructlab.schema")
+        test_yaml = "compositional_skills/array_yaml/qna.yaml"
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.logging)
+        taxonomy = parser.parse(testdata.joinpath(test_yaml))
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_equal_to(1)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(re.escape(test_yaml)),  # type: ignore[call-arg]
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"The file is not valid"),  # type: ignore[call-arg]
+        ).contains_only(logging.ERROR)
+
+    def test_version_1(self, caplog: LogCaptureFixture):
+        caplog.set_level(logging.INFO, logger="instructlab.schema")
+        test_yaml = "compositional_skills/version_1/qna.yaml"
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.logging)
+        taxonomy = parser.parse(testdata.joinpath(test_yaml))
+
+        assert_that(taxonomy.warnings).is_zero()
+        assert_that(taxonomy.errors).is_zero()
+        assert_that(caplog.records).is_empty()
+
+    def test_version_1_as_version_2(self, caplog: LogCaptureFixture):
+        caplog.set_level(logging.INFO, logger="instructlab.schema")
+        test_yaml = "compositional_skills/version_1/qna.yaml"
+        parser = TaxonomyParser(schema_version=2, message_format=TaxonomyMessageFormat.logging)
+        taxonomy = parser.parse(testdata.joinpath(test_yaml))
+
+        assert_that(taxonomy.warnings).is_greater_than_or_equal_to(1)
+        assert_that(taxonomy.errors).is_greater_than_or_equal_to(1)
+        assert_that(caplog.records).extracting(
+            "message",
+            filter=self.message_filter(f"^{re.escape(test_yaml)}:"),  # type: ignore[call-arg]
+        ).is_length(len(caplog.records))
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"line too long"),  # type: ignore[call-arg]
+        ).contains_only(logging.WARNING)
+        assert_that(caplog.records).extracting(
+            "levelno",
+            filter=self.message_filter(r"version.*required property"),  # type: ignore[call-arg]
+        ).contains_only(logging.ERROR)
+
+    def test_format_github(self, capsys: CaptureFixture[str]):
+        test_yaml = "compositional_skills/invalid_yaml/qna.yaml"
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.github)
+        taxonomy = parser.parse(testdata.joinpath(test_yaml))
+
+        assert_that(taxonomy.warnings).is_greater_than_or_equal_to(1)
+        assert_that(taxonomy.errors).is_greater_than_or_equal_to(2)
+        captured = capsys.readouterr()
+        assert_that(captured.err).is_empty()
+        assert_that(captured.out).is_not_empty()
+        lines: list[str] = captured.out.splitlines()
+        assert_that(lines).is_not_empty()
+        for line in lines:
+            assert_that(line).matches(f"^::(error|warning) file={re.escape(test_yaml)},")  # type: ignore[arg-type]
+
+    def test_format_standard(self, capsys: CaptureFixture[str]):
+        test_yaml = "compositional_skills/invalid_yaml/qna.yaml"
+        parser = TaxonomyParser(schema_version=0, message_format=TaxonomyMessageFormat.standard)
+        taxonomy = parser.parse(testdata.joinpath(test_yaml))
+
+        assert_that(taxonomy.warnings).is_greater_than_or_equal_to(1)
+        assert_that(taxonomy.errors).is_greater_than_or_equal_to(2)
+        captured = capsys.readouterr()
+        assert_that(captured.err).is_empty()
+        assert_that(captured.out).is_not_empty()
+        lines: list[str] = captured.out.splitlines()
+        assert_that(lines).is_not_empty()
+        for line in lines:
+            assert_that(line).matches(f"^(ERROR|WARN): {re.escape(test_yaml)}:")  # type: ignore[arg-type]
diff --git a/tests/test_versions.py b/tests/test_versions.py
index ed08865..5dc55e1 100644
--- a/tests/test_versions.py
+++ b/tests/test_versions.py
@@ -5,6 +5,7 @@
 from importlib import resources
 
 # Third Party
+from assertpy import assert_that
 from referencing import Resource
 from referencing.jsonschema import DRAFT202012
 
@@ -14,23 +15,17 @@
 class TestVersions:
     def test_versions(self):
         versions = schema_versions()
-        assert versions is not None
-        assert len(versions) > 1
+        assert_that(versions).is_not_none().is_not_empty()
         for i, v in enumerate(versions):
-            assert v.name == f"v{i+1}"
+            assert_that(v).has_name(f"v{i+1}")
 
     def _load_schema(self, path):
         text = path.read_text(encoding="utf-8")
-        assert text
-        assert len(text) > 1
+        assert_that(text).is_not_none().is_not_empty()
         contents = json.loads(text)
-        assert contents
-        assert len(contents) > 1
-        resource = Resource.from_contents(
-            contents=contents, default_specification=DRAFT202012
-        )
-        assert resource
-        assert resource.contents == contents
+        assert_that(contents).is_not_none().is_not_empty()
+        resource = Resource.from_contents(contents=contents, default_specification=DRAFT202012)
+        assert_that(resource).is_not_none().has_contents(contents)
 
     def test_import_schema_base(self):
         schema_base = resources.files("instructlab.schema")
diff --git a/tests/testdata/compositional_skills/array_yaml/qna.yaml b/tests/testdata/compositional_skills/array_yaml/qna.yaml
new file mode 100644
index 0000000..dc790be
--- /dev/null
+++ b/tests/testdata/compositional_skills/array_yaml/qna.yaml
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: Apache-2.0
+["a", "b"]
diff --git a/tests/testdata/compositional_skills/empty_yaml/qna.yaml b/tests/testdata/compositional_skills/empty_yaml/qna.yaml
new file mode 100644
index 0000000..0f8869a
--- /dev/null
+++ b/tests/testdata/compositional_skills/empty_yaml/qna.yaml
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: Apache-2.0
+# empty
diff --git a/tests/testdata/compositional_skills/invalid_yaml/qna.yaml b/tests/testdata/compositional_skills/invalid_yaml/qna.yaml
new file mode 100644
index 0000000..f0083c8
--- /dev/null
+++ b/tests/testdata/compositional_skills/invalid_yaml/qna.yaml
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: Apache-2.0
+
+createdby: invalid_key
+version: 2
+seed_examples:
+- question: What is this skill about?
+  answer: It's a skill that makes the tests more skillful
+- answer: "answer2"
+  question: "question2"
+- answer: "answer6"
+  question: "This is for a unit test and has a line with 124 characters! It is too long for the default rules but not too long for the customer rules!"
+- answer: "answer3"
+  question: "question3"
+- answer: "answer4"
+  question: "question4"
+- answer: "answer5"
+  question: "question5"
+task_description: For invalid yaml tests
diff --git a/tests/testdata/compositional_skills/skill_incomplete/qna.yaml b/tests/testdata/compositional_skills/skill_incomplete/qna.yaml
new file mode 100644
index 0000000..4f56a0a
--- /dev/null
+++ b/tests/testdata/compositional_skills/skill_incomplete/qna.yaml
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: Apache-2.0
+
+created_by: test
+version: 2
+seed_examples:
+- question: "Does this yaml pass schema validation?"
+  answer: "No, it does not! It should have 5 examples."
+task_description: 'This yaml does not conform to the schema'
diff --git a/tests/testdata/compositional_skills/skill_valid/qna.yaml b/tests/testdata/compositional_skills/skill_valid/qna.yaml
new file mode 100644
index 0000000..354af20
--- /dev/null
+++ b/tests/testdata/compositional_skills/skill_valid/qna.yaml
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: Apache-2.0
+
+created_by: test-bot
+version: 2
+seed_examples:
+- answer: Yes, it is.
+  question: Is this for a test?
+- answer: Yes I am very sure.
+  question: Are you sure it's for a test?
+- answer: "answer3"
+  question: "question3"
+- answer: "answer4"
+  question: "question4"
+- answer: "answer5"
+  question: "question5"
+task_description: for testing
diff --git a/tests/testdata/compositional_skills/version_1/qna.yaml b/tests/testdata/compositional_skills/version_1/qna.yaml
new file mode 100644
index 0000000..da1b888
--- /dev/null
+++ b/tests/testdata/compositional_skills/version_1/qna.yaml
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: Apache-2.0
+
+created_by: someone
+seed_examples:
+- question: What is this skill about?
+  answer: It's a skill that makes the tests more skillful
+- answer: "answer2"
+  question: "question2"
+- answer: "answer6"
+  question: "This is for a unit test and has a line with 124 characters! It is too long for the default rules but not too long for the customer rules!"
+- answer: "answer3"
+  question: "question3"
+- answer: "answer4"
+  question: "question4"
+- answer: "answer5"
+  question: "question5"
+task_description: For yaml tests
diff --git a/tests/testdata/knowledge/invalid_name/file.yaml b/tests/testdata/knowledge/invalid_name/file.yaml
new file mode 100644
index 0000000..d09b633
--- /dev/null
+++ b/tests/testdata/knowledge/invalid_name/file.yaml
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: Apache-2.0
+
+created_by: test-bot
+version: 2
+domain: test-domain
+seed_examples:
+- question: What is this knowledge about?
+  answer: It's a knowledge that makes the tests more knowledgeable
+- answer: "answer2"
+  question: "question2"
+- answer: "answer3"
+  question: "question3"
+- answer: "answer4"
+  question: "question4"
+- answer: "answer5"
+  question: "question5"
+task_description: For knowledge tests
+document:
+  repo: https://github.com/example-org/example-repo
+  commit: a0c3c8e
+  patterns:
+  - "*.md"
+  - docs/*.md
diff --git a/tests/testdata/knowledge/invalid_name/qna.yml b/tests/testdata/knowledge/invalid_name/qna.yml
new file mode 100644
index 0000000..d09b633
--- /dev/null
+++ b/tests/testdata/knowledge/invalid_name/qna.yml
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: Apache-2.0
+
+created_by: test-bot
+version: 2
+domain: test-domain
+seed_examples:
+- question: What is this knowledge about?
+  answer: It's a knowledge that makes the tests more knowledgeable
+- answer: "answer2"
+  question: "question2"
+- answer: "answer3"
+  question: "question3"
+- answer: "answer4"
+  question: "question4"
+- answer: "answer5"
+  question: "question5"
+task_description: For knowledge tests
+document:
+  repo: https://github.com/example-org/example-repo
+  commit: a0c3c8e
+  patterns:
+  - "*.md"
+  - docs/*.md
diff --git a/tests/testdata/knowledge/knowledge_valid/qna.yaml b/tests/testdata/knowledge/knowledge_valid/qna.yaml
new file mode 100644
index 0000000..d09b633
--- /dev/null
+++ b/tests/testdata/knowledge/knowledge_valid/qna.yaml
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: Apache-2.0
+
+created_by: test-bot
+version: 2
+domain: test-domain
+seed_examples:
+- question: What is this knowledge about?
+  answer: It's a knowledge that makes the tests more knowledgeable
+- answer: "answer2"
+  question: "question2"
+- answer: "answer3"
+  question: "question3"
+- answer: "answer4"
+  question: "question4"
+- answer: "answer5"
+  question: "question5"
+task_description: For knowledge tests
+document:
+  repo: https://github.com/example-org/example-repo
+  commit: a0c3c8e
+  patterns:
+  - "*.md"
+  - docs/*.md
diff --git a/tox.ini b/tox.ini
index 1c4e721..9016b43 100644
--- a/tox.ini
+++ b/tox.ini
@@ -12,7 +12,7 @@ package = wheel
 wheel_build_env = pkg
 deps =
     pytest
-    jsonschema
+    assertpy
 commands =
     unit: {envpython} -m pytest {posargs:tests}
 
@@ -20,7 +20,8 @@ commands =
 description = Lint with pylint
 deps =
     pylint
-    jsonschema
+    pytest
+    assertpy
 commands =
     {envpython} -m pylint {posargs:src tests}
 
@@ -30,7 +31,8 @@ skip_install = True
 skipsdist = true
 deps =
     ruff
-    jsonschema
+    pytest
+    assertpy
 commands =
     ./scripts/ruff.sh {posargs:fix}
 allowlist_externals = ./scripts/ruff.sh
@@ -41,7 +43,11 @@ namespace_packages = True
 explicit_package_bases = True
 deps =
     mypy
-    jsonschema
+    pytest
+    assertpy
+    types-assertpy
+    types-PyYAML
+    types-jsonschema
 commands =
     {envpython} -m mypy {posargs:src tests}