Skip to content

Commit

Permalink
[sourcegen] Improve type hinting
Browse files Browse the repository at this point in the history
  • Loading branch information
ischoegl committed Jan 4, 2025
1 parent 4687236 commit 739aa5d
Show file tree
Hide file tree
Showing 10 changed files with 191 additions and 160 deletions.
34 changes: 24 additions & 10 deletions interfaces/sourcegen/sourcegen/_HeaderFileParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
from pathlib import Path
import logging
import re
from typing import Iterable
from typing_extensions import Self

from ._dataclasses import HeaderFile, Func, Recipe
from ._helpers import read_config
Expand All @@ -26,15 +28,17 @@ class HeaderFileParser:
themselves are used for subsequent code scaffolding.
"""

def __init__(self, path: Path, ignore_funcs: list[str] = None):
def __init__(self, path: Path, ignore_funcs: Iterable[str] = None) -> None:
self._path = path
self._ignore_funcs = ignore_funcs

@classmethod
def headers_from_yaml(cls, ignore_files, ignore_funcs) -> list[HeaderFile]:
def headers_from_yaml(
cls: Self, ignore_files: Iterable[str], ignore_funcs: Iterable[str]
) -> list[HeaderFile]:
"""Parse header file YAML configuration."""
files = [ff for ff in _DATA_PATH.glob("*.yaml") if ff.name not in ignore_files]
files.sort()
files = sorted(
ff for ff in _DATA_PATH.glob("*.yaml") if ff.name not in ignore_files)
return [cls(ff, ignore_funcs.get(ff.name, []))._parse_yaml() for ff in files]

def _parse_yaml(self) -> HeaderFile:
Expand All @@ -44,27 +48,33 @@ def read_docstring():
while True:
line = fid.readline()
if line.startswith("#"):
doc.append(line.lstrip("#").strip())
doc.append(line.removeprefix("#").strip())
else:
break
if doc and doc[0].startswith("This file is part of "):
return []
return doc

msg = f" parsing {self._path.name!r}"
_LOGGER.info(msg)
config = read_config(self._path)
if self._ignore_funcs:
msg = f" ignoring {self._ignore_funcs!r}"
_LOGGER.info(msg)

recipes = []
prefix = config["prefix"]
base = config["base"]
parents = config.get("parents", [])
derived = config.get("derived", [])
for recipe in config["recipes"]:
if recipe['name'] in self._ignore_funcs:
if recipe["name"] in self._ignore_funcs:
continue
uses = recipe.get("uses", [])
if not isinstance(uses, list):
uses = [uses]
recipes.append(
Recipe(recipe['name'],
Recipe(recipe["name"],
recipe.get("implements", ""),
uses,
recipe.get("what", ""),
Expand All @@ -79,7 +89,9 @@ def read_docstring():
read_docstring())

@classmethod
def headers_from_h(cls, ignore_files, ignore_funcs) -> list[HeaderFile]:
def headers_from_h(
cls: Self, ignore_files: Iterable[str], ignore_funcs: Iterable[str]
) -> list[HeaderFile]:
"""Parse existing header file."""
files = [ff for ff in _CLIB_PATH.glob("*.h")
if ff.name not in ignore_files + _CLIB_IGNORE]
Expand All @@ -98,9 +110,11 @@ def _parse_h(self) -> HeaderFile:

parsed = map(Func.from_str, c_functions)

_LOGGER.info(f" parsing {self._path.name!r}")
msg = f" parsing {self._path.name!r}"
_LOGGER.info(msg)
if self._ignore_funcs:
_LOGGER.info(f" ignoring {self._ignore_funcs!r}")
msg = f" ignoring {self._ignore_funcs!r}"
_LOGGER.info(msg)

parsed = [f for f in parsed if f.name not in self._ignore_funcs]

Expand Down
4 changes: 2 additions & 2 deletions interfaces/sourcegen/sourcegen/_SourceGenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ class SourceGenerator(metaclass=ABCMeta):
"""Specifies the interface of a language-specific SourceGenerator"""

@abstractmethod
def __init__(self, out_dir: Path, config: dict, templates: dict):
def __init__(self, out_dir: Path, config: dict, templates: dict) -> None:
pass

@abstractmethod
def generate_source(self, headers_files: list[HeaderFile]):
def generate_source(self, headers_files: list[HeaderFile]) -> None:
pass
81 changes: 42 additions & 39 deletions interfaces/sourcegen/sourcegen/_TagFileParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
import sys
from pathlib import Path
import re
from typing import Sequence, Iterable
from typing_extensions import Self
import logging
from dataclasses import dataclass
import xml.etree.ElementTree as ET
Expand Down Expand Up @@ -33,35 +35,35 @@ class TagInfo:
anchor: str = "" #: doxygen anchor

@classmethod
def from_xml(cls, qualified_name, xml):
def from_xml(cls: Self, qualified_name: str, xml: str) -> Self:
"""Create tag information based on XML data."""
base = ""
if "::" in qualified_name:
base = qualified_name.split("::", 1)[0]

xml_tree = ET.fromstring(xml)
return cls(base,
xml_tree.find('type').text,
xml_tree.find('name').text,
xml_tree.find('arglist').text,
xml_tree.find('anchorfile').text.replace(".html", ".xml"),
xml_tree.find('anchor').text)
xml_tree.find("type").text,
xml_tree.find("name").text,
xml_tree.find("arglist").text,
xml_tree.find("anchorfile").text.replace(".html", ".xml"),
xml_tree.find("anchor").text)

def __bool__(self):
def __bool__(self) -> bool:
return all([self.type, self.name, self.arglist, self.anchorfile, self.anchor])

@property
def signature(self):
def signature(self) -> str:
"""Generate function signature based on tag information."""
return f"{self.type} {self.name}{self.arglist}"

@property
def id(self):
def id(self) -> str:
"""Generate doxygen id."""
return f"{self.anchorfile.replace('.xml', '')}_1{self.anchor}"

@property
def qualified_name(self):
def qualified_name(self) -> str:
"""Return qualified name."""
if self.base:
return f"{self.base}::{self.name}"
Expand All @@ -75,7 +77,7 @@ class TagDetails(TagInfo):

location: str = "" #: File containing doxygen description
briefdescription: str = "" #: Brief doxygen description
parameterlist: list[Param] = None #: Annotated doxygen parameter list
parameterlist: list[Param] | None = None #: Annotated doxygen parameter list


class TagFileParser:
Expand All @@ -91,16 +93,14 @@ def __init__(self, bases: dict[str, str]) -> None:
_LOGGER.critical(msg)
sys.exit(1)

with tag_file.open() as fid:
doxygen_tags = fid.read()

logging.info("Parsing doxygen tags...")
doxygen_tags = tag_file.read_text()
self._parse_doxyfile(doxygen_tags, bases)

def _parse_doxyfile(self, doxygen_tags: str, bases: list[str]) -> None:
def _parse_doxyfile(self, doxygen_tags: str, bases: Sequence[str]) -> None:
"""Retrieve class and function information from Cantera namespace."""

def xml_compounds(kind: str, names: list[str]) -> dict[str,str]:
def xml_compounds(kind: str, names: Sequence[str]) -> dict[str, str]:
regex = re.compile(rf'<compound kind="{kind}"[\s\S]*?</compound>')
found = []
compounds = {}
Expand All @@ -110,7 +110,7 @@ def xml_compounds(kind: str, names: list[str]) -> dict[str,str]:
if compound_name in names:
found.append(compound_name)
compounds[compound_name] = compound
if not set(names) - set(found):
if not (set(names) - set(found)):
return compounds
missing = '", "'.join(set(names) - set(found))
msg = f"Missing {kind!r} compound(s):\n {missing!r}\nusing regex "
Expand All @@ -130,15 +130,16 @@ def xml_compounds(kind: str, names: list[str]) -> dict[str,str]:
unknown = set(bases) - set(class_names)
if "', '".join(unknown):
unknown = "', '".join(unknown)
_LOGGER.critical("Class(es) in configuration file are missing "
f"from tag file: {unknown!r}")
msg = ("Class(es) in configuration file are missing "
f"from tag file: {unknown!r}")
_LOGGER.critical(msg)
exit(1)

# Parse content of classes that are specified by the configuration file
class_names = set(bases) & set(class_names)
classes = xml_compounds("class", class_names)

def xml_members(kind: str, text: str, prefix="") -> dict[str, str]:
def xml_members(kind: str, text: str, prefix: str = "") -> dict[str, str]:
regex = re.compile(rf'<member kind="{kind}"[\s\S]*?</member>')
functions = {}
for func in re.findall(regex, text):
Expand All @@ -161,7 +162,7 @@ def exists(self, cxx_func: str) -> bool:
"""Check whether doxygen tag exists."""
return cxx_func in self._known

def detect(self, name, bases, permissive=True):
def detect(self, name: str, bases: Iterable[str], permissive: bool = True) -> str:
"""Detect qualified method name."""
for base in bases:
name_ = f"{base}::{name}"
Expand All @@ -170,33 +171,35 @@ def detect(self, name, bases, permissive=True):
if self.exists(name):
return name
if permissive:
return None
_LOGGER.critical(f"Unable to detect {name!r} in doxygen tags.")
return ""
msg = f"Unable to detect {name!r} in doxygen tags."
_LOGGER.critical(msg)
exit(1)

def tag_info(self, func_string: str) -> TagInfo:
"""Look up tag information based on (partial) function signature."""
cxx_func = func_string.split("(")[0].split(" ")[-1]
if cxx_func not in self._known:
_LOGGER.critical(f"Could not find {cxx_func!r} in doxygen tag file.")
msg = f"Could not find {cxx_func!r} in doxygen tag file."
_LOGGER.critical(msg)
sys.exit(1)
ix = 0
if len(self._known[cxx_func]) > 1:
# Disambiguate functions with same name
# TODO: current approach does not use information on default arguments
known_args = [ET.fromstring(xml).find('arglist').text
known_args = [ET.fromstring(xml).find("arglist").text
for xml in self._known[cxx_func]]
known_args = [ArgList.from_xml(al).short_str() for al in known_args]
args = re.findall(re.compile(r'(?<=\().*(?=\))'), func_string)
args = re.findall(re.compile(r"(?<=\().*(?=\))"), func_string)
if not args and "()" in known_args:
# Candidate function without arguments exists
ix = known_args.index("()")
elif not args:
# Function does not use arguments
known = '\n - '.join([""] + known_args)
_LOGGER.critical(
f"Need argument list to disambiguate {func_string!r}. "
f"possible matches are:{known}")
known = "\n - ".join([""] + known_args)
msg = (f"Need argument list to disambiguate {func_string!r}. "
f"possible matches are:{known}")
_LOGGER.critical(msg)
sys.exit(1)
else:
args = f"({args[0]}"
Expand All @@ -207,8 +210,8 @@ def tag_info(self, func_string: str) -> TagInfo:
ix = i
break
if ix < 0:
_LOGGER.critical(
f"Unable to match {func_string!r} to known functions.")
msg = f"Unable to match {func_string!r} to known functions."
_LOGGER.critical(msg)
sys.exit(1)

return TagInfo.from_xml(cxx_func, self._known[cxx_func][ix])
Expand Down Expand Up @@ -241,28 +244,28 @@ def tag_lookup(tag_info: TagInfo) -> TagDetails:
"""Retrieve tag details from doxygen tree."""
xml_file = _XML_PATH / tag_info.anchorfile
if not xml_file.exists():
msg = (f"XML file does not exist at expected location: {xml_file}")
msg = f"Tag file does not exist at expected location:\n {xml_file}"
_LOGGER.error(msg)
return TagDetails()

with xml_file.open() as fid:
xml_details = fid.read()

xml_details = xml_file.read_text()
id_ = tag_info.id
regex = re.compile(rf'<memberdef kind="function" id="{id_}"[\s\S]*?</memberdef>')
matches = re.findall(regex, xml_details)

if not matches:
_LOGGER.error(f"No XML matches found for {tag_info.qualified_name!r}")
msg = f"No XML matches found for {tag_info.qualified_name!r}"
_LOGGER.error(msg)
return TagDetails()
if len(matches) != 1:
_LOGGER.warning(f"Inconclusive XML matches found for {tag_info.qualified_name!r}")
msg = f"Inconclusive XML matches found for {tag_info.qualified_name!r}"
_LOGGER.warning(msg)
matches = matches[:1]

def no_refs(entry: str) -> str:
# Remove stray XML markup that causes problems with xml.etree
if "<ref" in entry:
regex = re.compile(r'<ref [\s\S]*?>')
regex = re.compile(r"<ref [\s\S]*?>")
for ref in re.findall(regex, entry):
entry = entry.replace(ref, "<ref>")
entry = entry.replace("<ref>", "").replace("</ref>", "")
Expand Down
Loading

0 comments on commit 739aa5d

Please sign in to comment.