From 1d8360b3a5b7f5820931ec59bc7805495ee465bf Mon Sep 17 00:00:00 2001 From: "Austin M. Matherne" Date: Tue, 22 Oct 2024 23:20:38 -0400 Subject: [PATCH 1/9] Add missing taxonomy package validations Taxonomy package validation errors expected by report package conformance suite. --- arelle/PackageManager.py | 3 + arelle/packages/PackageValidation.py | 82 +++++++++++++++++++++++++++- 2 files changed, 84 insertions(+), 1 deletion(-) diff --git a/arelle/PackageManager.py b/arelle/PackageManager.py index 67d76ddc1..798945930 100644 --- a/arelle/PackageManager.py +++ b/arelle/PackageManager.py @@ -44,6 +44,9 @@ PackageValidation.validatePackageNotEncrypted, PackageValidation.validateTopLevelFiles, PackageValidation.validateTopLevelDirectories, + PackageValidation.validateDuplicateEntries, + PackageValidation.validateConflictingEntries, + PackageValidation.validateEntries, ) TAXONOMY_PACKAGE_NON_ABORTING_VALIDATIONS = ( diff --git a/arelle/packages/PackageValidation.py b/arelle/packages/PackageValidation.py index b755e699d..1ea225e32 100644 --- a/arelle/packages/PackageValidation.py +++ b/arelle/packages/PackageValidation.py @@ -5,6 +5,8 @@ from __future__ import annotations import os.path +import zipfile +from collections import Counter from pathlib import PurePosixPath from typing import TYPE_CHECKING @@ -53,7 +55,7 @@ def validateZipFileSeparators( packageType: PackageType, filesource: FileSource, ) -> Validation | None: - if filesource.isZipBackslashed: + if isinstance(filesource.fs, zipfile.ZipFile) and any("\\" in zinfo.orig_filename for zinfo in filesource.fs.infolist()): return Validation.error( codes=f"{packageType.errorPrefix}:invalidArchiveFormat", msg=_("%(packageType)s package directory uses '\\' as a file separator."), @@ -118,3 +120,81 @@ def validateMetadataDirectory( file=os.path.basename(str(filesource.url)), ) return None + +def validateEntries( + packageType: PackageType, + filesource: FileSource, +) -> Validation | None: + packageEntries = filesource.dir or [] + for entry in packageEntries: + if entry.startswith("/"): + return Validation.error( + f"{packageType.errorPrefix}:invalidArchiveFormat", + _("%(packageType)s Package entry must not begin with a forward slash: %(entry)s"), + packageType=packageType.name, + entry=entry, + ) + entryParts = entry.split("/") + if "." in entryParts: + return Validation.error( + f"{packageType.errorPrefix}:invalidDirectoryStructure", + _("%(packageType)s Package entries may not contain '.' in paths: %(entry)s"), + packageType=packageType.name, + entry=entry, + ) + if ".." in entryParts: + return Validation.error( + f"{packageType.errorPrefix}:invalidDirectoryStructure", + _("%(packageType)s Package entries may not contain '..' in paths: %(entry)s"), + packageType=packageType.name, + entry=entry, + ) + if "//" in entry: + return Validation.error( + f"{packageType.errorPrefix}:invalidDirectoryStructure", + _("%(packageType)s Package entries may not contain empty directories in path: %(entry)s"), + packageType=packageType.name, + entry=entry, + ) + return None + +def validateDuplicateEntries( + packageType: PackageType, + filesource: FileSource, +) -> Validation | None: + dirCounter = Counter(filesource.dir or []) + duplicates = [entry for entry, count in dirCounter.items() if count > 1] + if duplicates: + return Validation.error( + f"{packageType.errorPrefix}:invalidDirectoryStructure", + _("%(packageType)s Package contains duplicate entries: %(duplicates)s"), + packageType=packageType.name, + duplicates=", ".join(sorted(duplicates)), + ) + return None + + +def validateConflictingEntries( + packageType: PackageType, + filesource: FileSource, +) -> Validation | None: + files = set() + directories = set() + for entry in filesource.dir or []: + isFile = not entry.endswith("/") + path = PurePosixPath(entry) + if isFile: + files.add(path) + path = path.parent + directories.add(path) + directories.update(path.parents) + if clashes := files.intersection(directories): + clashStrings = [str(clash) for clash in clashes] + return Validation.error( + f"{packageType.errorPrefix}:invalidDirectoryStructure", + _("%(packageType)s Package contains file paths that conflict with directories: %(conflicts)s"), + packageType=packageType.name, + conflicts=", ".join(sorted(clashStrings)), + file=os.path.basename(str(filesource.url)), + ) + return None From 42d1f27e25cf4c1ab9bdcde676f14dc8f9d077ed Mon Sep 17 00:00:00 2001 From: "Austin M. Matherne" Date: Wed, 23 Oct 2024 17:06:53 -0400 Subject: [PATCH 2/9] Cleanup package manager imports --- arelle/PackageManager.py | 53 ++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 32 deletions(-) diff --git a/arelle/PackageManager.py b/arelle/PackageManager.py index 798945930..31a16a028 100644 --- a/arelle/PackageManager.py +++ b/arelle/PackageManager.py @@ -1,41 +1,36 @@ -''' -Separated on Jul 28, 2013 from DialogOpenArchive.py - +""" See COPYRIGHT.md for copyright information. -''' +""" from __future__ import annotations -from typing import TYPE_CHECKING -import os, io, time, json, logging + +import io +import json +import logging +import os +import time from collections import defaultdict from fnmatch import fnmatch -from lxml import etree +from typing import TYPE_CHECKING from urllib.parse import urljoin + +from lxml import etree + +from arelle import Locale from arelle.packages import PackageValidation from arelle.packages.PackageType import PackageType from arelle.typing import TypeGetText -openFileSource = None -from arelle import Locale from arelle.UrlUtil import isAbsolute, isHttpUrl from arelle.XmlValidate import lxmlResolvingParser -ArchiveFileIOError = None -try: - from collections import OrderedDict -except ImportError: - OrderedDict = dict # python 3.0 lacks OrderedDict, json file will be in weird order if TYPE_CHECKING: + from arelle.Cntlr import Cntlr from arelle.FileSource import FileSource TP_XSD = "http://www.xbrl.org/2016/taxonomy-package.xsd" CAT_XSD = "http://www.xbrl.org/2016/taxonomy-package-catalog.xsd" -if TYPE_CHECKING: - from arelle.Cntlr import Cntlr - _: TypeGetText -EMPTYDICT = {} - TAXONOMY_PACKAGE_TYPE = PackageType("Taxonomy", "tpe") TAXONOMY_PACKAGE_ABORTING_VALIDATIONS = ( @@ -98,9 +93,7 @@ def _parseFile(cntlr, parser, filepath, file, schemaUrl): def parsePackage(cntlr, filesource, metadataFile, fileBase, errors=[]): - global ArchiveFileIOError - if ArchiveFileIOError is None: - from arelle.FileSource import ArchiveFileIOError + from arelle.FileSource import ArchiveFileIOError unNamedCounter = 1 @@ -354,8 +347,8 @@ def reset() -> None: # force reloading modules and plugin infos packagesMappings.clear() # dict by class of list of ordered callable function objects def orderedPackagesConfig(): - return OrderedDict( - (('packages', [OrderedDict(sorted(_packageInfo.items(), + return dict( + (('packages', [dict(sorted(_packageInfo.items(), key=lambda k: {'name': '01', 'status': '02', 'version': '03', @@ -372,7 +365,7 @@ def orderedPackagesConfig(): 'remappings': '14', }.get(k[0],k[0]))) for _packageInfo in packagesConfig['packages']]), - ('remappings',OrderedDict(sorted(packagesConfig['remappings'].items()))))) + ('remappings',dict(sorted(packagesConfig['remappings'].items()))))) def save(cntlr: Cntlr) -> None: global packagesConfigChanged @@ -462,13 +455,9 @@ def packageInfo(cntlr, URL, reload=False, packageManifestName=None, errors=[]): #TODO several directories, eg User Application Data packageFilename = _cntlr.webCache.getfilename(URL, reload=reload, normalize=True) if packageFilename: - from arelle.FileSource import TAXONOMY_PACKAGE_FILE_NAMES + from arelle.FileSource import TAXONOMY_PACKAGE_FILE_NAMES, archiveFilenameParts, openFileSource filesource = None try: - global openFileSource - if openFileSource is None: - from arelle.FileSource import openFileSource - from arelle.FileSource import archiveFilenameParts parts = archiveFilenameParts(packageFilename) if parts is not None: sourceFileSource = openFileSource(parts[0], _cntlr) @@ -615,12 +604,12 @@ def rebuildRemappings(cntlr): def isMappedUrl(url): return (packagesConfig is not None and url is not None and any(url.startswith(mapFrom) and not url.startswith(mapTo) # prevent recursion in mapping for url hosted Packages - for mapFrom, mapTo in packagesConfig.get('remappings', EMPTYDICT).items())) + for mapFrom, mapTo in packagesConfig.get('remappings', {}).items())) def mappedUrl(url): if packagesConfig is not None and url is not None: longestPrefix = 0 - for mapFrom, mapTo in packagesConfig.get('remappings', EMPTYDICT).items(): + for mapFrom, mapTo in packagesConfig.get('remappings', {}).items(): if url.startswith(mapFrom): if url.startswith(mapTo): return url # recursive mapping, this is already mapped From 440b7da13c055d8ef6f2573278edf65d070901fd Mon Sep 17 00:00:00 2001 From: "Austin M. Matherne" Date: Wed, 23 Oct 2024 17:24:34 -0400 Subject: [PATCH 3/9] Package manager cleanup --- arelle/PackageManager.py | 65 ++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 36 deletions(-) diff --git a/arelle/PackageManager.py b/arelle/PackageManager.py index 31a16a028..f3201e132 100644 --- a/arelle/PackageManager.py +++ b/arelle/PackageManager.py @@ -3,7 +3,6 @@ """ from __future__ import annotations -import io import json import logging import os @@ -52,12 +51,8 @@ def baseForElement(element): base = "" baseElt = element while baseElt is not None: - baseAttr = baseElt.get("{http://www.w3.org/XML/1998/namespace}base") - if baseAttr: - if baseAttr.startswith("/"): - base = baseAttr - else: - base = baseAttr + base + if baseAttr := baseElt.get("{http://www.w3.org/XML/1998/namespace}base"): + base = baseAttr if baseAttr.startswith("/") else baseAttr + base baseElt = baseElt.getparent() return base @@ -103,7 +98,6 @@ def parsePackage(cntlr, filesource, metadataFile, fileBase, errors=[]): "http://xbrl.org/PR/2015-12-09/taxonomy-package", "http://xbrl.org/2016/taxonomy-package", "http://xbrl.org/WGWD/YYYY-MM-DD/taxonomy-package") - catalogNSes = ("urn:oasis:names:tc:entity:xmlns:xml:catalog",) pkg = {} @@ -123,7 +117,7 @@ def parsePackage(cntlr, filesource, metadataFile, fileBase, errors=[]): root = tree.getroot() ns = root.tag.partition("}")[0][1:] - nsPrefix = "{{{}}}".format(ns) + nsPrefix = f"{{{ns}}}" if ns in txmyPkgNSes: # package file for eltName in ("identifier", "version", "license", "publisher", "publisherURL", "publisherCountry", "publicationDate"): @@ -144,7 +138,7 @@ def parsePackage(cntlr, filesource, metadataFile, fileBase, errors=[]): if l > closestLen: closestLen = l closest = s - elif closestLen <= 0 and eltLang.startswith("en"): + elif closestLen <= 0 and isinstance(eltLang, str) and eltLang.startswith("en"): closest = s # pick english if nothing better if not closest and eltName == "name": # assign default name when none in taxonomy package closest = os.path.splitext(os.path.basename(filesource.baseurl))[0] @@ -154,7 +148,9 @@ def parsePackage(cntlr, filesource, metadataFile, fileBase, errors=[]): for m in root.iterchildren(tag=nsPrefix + "supersededTaxonomyPackages"): pkg['supersededTaxonomyPackages'] = [ r.text.strip() - for r in m.iterchildren(tag=nsPrefix + "taxonomyPackageRef")] + for r in m.iterchildren(tag=nsPrefix + "taxonomyPackageRef") + if isinstance(r.text, str) + ] for m in root.iterchildren(tag=nsPrefix + "versioningReports"): pkg['versioningReports'] = [ r.get("href") @@ -225,12 +221,12 @@ def parsePackage(cntlr, filesource, metadataFile, fileBase, errors=[]): base = baseForElement(m) if base: replaceValue = os.path.join(base, replaceValue) - if replaceValue: # neither None nor '' - if not isAbsolute(replaceValue): - if not os.path.isabs(replaceValue): - replaceValue = fileBase + replaceValue - if not isHttpUrl(replaceValue): - replaceValue = replaceValue.replace("/", os.sep) + if replaceValue and not isAbsolute(replaceValue): + # neither None nor '' + if not os.path.isabs(replaceValue): + replaceValue = fileBase + replaceValue + if not isHttpUrl(replaceValue): + replaceValue = replaceValue.replace("/", os.sep) _normedValue = cntlr.webCache.normalizeUrl(replaceValue) if replaceValue.endswith(os.sep) and not _normedValue.endswith(os.sep): _normedValue += os.sep @@ -261,28 +257,23 @@ def parsePackage(cntlr, filesource, metadataFile, fileBase, errors=[]): if l > closestLen: closestLen = l name = s - elif closestLen <= 0 and nameLang.startswith("en"): + elif closestLen <= 0 and isinstance(nameLang, str) and nameLang.startswith("en"): name = s # pick english if nothing better if not name: name = _("").format(unNamedCounter) unNamedCounter += 1 - epDocCount = 0 for epDoc in entryPointSpec.iterchildren(nsPrefix + "entryPointDocument"): epUrl = epDoc.get('href') base = epDoc.get('{http://www.w3.org/XML/1998/namespace}base') # cope with xml:base - if base: - resolvedUrl = urljoin(base, epUrl) - else: - resolvedUrl = epUrl - - epDocCount += 1 + resolvedUrl = urljoin(base, epUrl) if base else epUrl #perform prefix remappings remappedUrl = resolvedUrl longestPrefix = 0 for mapFrom, mapTo in remappings.items(): + assert isinstance(remappedUrl, str) if remappedUrl.startswith(mapFrom): prefixLength = len(mapFrom) if prefixLength > longestPrefix: @@ -305,7 +296,7 @@ def parsePackage(cntlr, filesource, metadataFile, fileBase, errors=[]): if l > closestLen: closestLen = l closest = s - elif closestLen <= 0 and eltLang.startswith("en"): + elif closestLen <= 0 and isinstance(eltLang, str) and eltLang.startswith("en"): closest = s # pick english if nothing better if not closest and name: # assign default name when none in taxonomy package closest = name @@ -326,7 +317,7 @@ def init(cntlr: Cntlr, loadPackagesConfig: bool = True) -> None: if loadPackagesConfig: try: packagesJsonFile = cntlr.userAppDir + os.sep + "taxonomyPackages.json" - with io.open(packagesJsonFile, 'rt', encoding='utf-8') as f: + with open(packagesJsonFile, encoding='utf-8') as f: packagesConfig = json.load(f) packagesConfigChanged = False except Exception: @@ -370,7 +361,7 @@ def orderedPackagesConfig(): def save(cntlr: Cntlr) -> None: global packagesConfigChanged if packagesConfigChanged and cntlr.hasFileSystem: - with io.open(packagesJsonFile, 'wt', encoding='utf-8') as f: + with open(packagesJsonFile, "w", encoding='utf-8') as f: jsonStr = str(json.dumps(orderedPackagesConfig(), ensure_ascii=False, indent=2)) # might not be unicode in 2.7 f.write(jsonStr) packagesConfigChanged = False @@ -421,7 +412,7 @@ def validateTaxonomyPackage(cntlr, filesource, errors=[]) -> bool: messageCode=validation.codes, messageArgs=validation.args, file=filesource.urlBasename, - level=logging.ERROR, + level=validation.level.name, ) errors.append(validation.codes) return False @@ -432,7 +423,7 @@ def validateTaxonomyPackage(cntlr, filesource, errors=[]) -> bool: messageCode=validation.codes, messageArgs=validation.args, file=filesource.urlBasename, - level=logging.ERROR, + level=validation.level.name, ) errors.append(validation.codes) @@ -476,9 +467,8 @@ def packageInfo(cntlr, URL, reload=False, packageManifestName=None, errors=[]): packageFiles = discoverPackageFiles(filesource) if not packageFiles: # look for pre-PWD packages - _dir = filesource.dir - _metaInf = '{}/META-INF/'.format( - os.path.splitext(os.path.basename(packageFilename))[0]) + _dir = filesource.dir or [] + _metaInf = f'{os.path.splitext(os.path.basename(packageFilename))[0]}/META-INF/' if packageManifestName: # pre-pwd packageFiles = [fileName @@ -491,7 +481,7 @@ def packageInfo(cntlr, URL, reload=False, packageManifestName=None, errors=[]): # root-level META-INF taxonomy packages packageFiles = ['META-INF/taxonomyPackage.xml'] if len(packageFiles) < 1: - raise IOError(_("Taxonomy package contained no metadata file: {0}.") + raise OSError(_("Taxonomy package contained no metadata file: {0}.") .format(', '.join(packageFiles))) # if current package files found, remove any nonconforming package files if any(pf.startswith('_metaInf') for pf in packageFiles) and any(not pf.startswith(_metaInf) for pf in packageFiles): @@ -500,10 +490,12 @@ def packageInfo(cntlr, URL, reload=False, packageManifestName=None, errors=[]): packageFiles = [pf for pf in packageFiles if pf.startswith('META-INF/')] for packageFile in packageFiles: + assert isinstance(filesource.url, str) packageFileUrl = filesource.url + os.sep + packageFile packageFilePrefix = os.sep.join(os.path.split(packageFile)[:-1]) if packageFilePrefix: packageFilePrefix += os.sep + assert isinstance(filesource.baseurl, str) packageFilePrefix = filesource.baseurl + os.sep + packageFilePrefix packages.append([packageFileUrl, packageFilePrefix, packageFile]) else: @@ -512,6 +504,7 @@ def packageInfo(cntlr, URL, reload=False, packageManifestName=None, errors=[]): file=os.path.basename(packageFilename), level=logging.ERROR) errors.append("tpe:invalidArchiveFormat") + assert isinstance(filesource.url, str) if (os.path.basename(filesource.url) in TAXONOMY_PACKAGE_FILE_NAMES or # individual manifest file (os.path.basename(filesource.url) == "taxonomyPackage.xml" and os.path.basename(os.path.dirname(filesource.url)) == "META-INF")): @@ -521,7 +514,7 @@ def packageInfo(cntlr, URL, reload=False, packageManifestName=None, errors=[]): packageFilePrefix += os.sep packages.append([packageFileUrl, packageFilePrefix, ""]) else: - raise IOError(_("File must be a taxonomy package (zip file), catalog file, or manifest (): {0}.") + raise OSError(_("File must be a taxonomy package (zip file), catalog file, or manifest (): {0}.") .format(packageFilename, ', '.join(TAXONOMY_PACKAGE_FILE_NAMES))) remappings = {} packageNames = [] @@ -564,7 +557,7 @@ def packageInfo(cntlr, URL, reload=False, packageManifestName=None, errors=[]): } filesource.close() return package - except (EnvironmentError, etree.XMLSyntaxError): + except (OSError, etree.XMLSyntaxError): pass if filesource: filesource.close() From 148dfcc335f1ec28dd1c136545019be322fe334e Mon Sep 17 00:00:00 2001 From: "Austin M. Matherne" Date: Wed, 23 Oct 2024 17:39:30 -0400 Subject: [PATCH 4/9] Load valid remappings from invalid packages Refactor taxonomy package loading to load package remappings even if package metadata is invalid. --- arelle/PackageManager.py | 145 +++++++++++------- .../xbrl_report_packages_1_0.py | 2 +- 2 files changed, 88 insertions(+), 59 deletions(-) diff --git a/arelle/PackageManager.py b/arelle/PackageManager.py index f3201e132..4fac1cf30 100644 --- a/arelle/PackageManager.py +++ b/arelle/PackageManager.py @@ -87,7 +87,30 @@ def _parseFile(cntlr, parser, filepath, file, schemaUrl): return tree -def parsePackage(cntlr, filesource, metadataFile, fileBase, errors=[]): +def parsePackage( + cntlr: Cntlr, + filesource: FileSource, + metadataFile: str, + fileBase: str, + errors: list[str] | None = None, +) -> dict[str, str | dict[str, str]]: + if errors is None: + errors = [] + parser = lxmlResolvingParser(cntlr) + catalogFile = metadataFile.replace('taxonomyPackage.xml','catalog.xml') + remappings = _parseCatalog(cntlr, filesource, parser, catalogFile, fileBase, errors) + pkg = _parsePackageMetadata(cntlr, filesource, parser, metadataFile, remappings, errors) + return pkg + + +def _parsePackageMetadata( + cntlr: Cntlr, + filesource: FileSource, + parser: etree.XMLParser, + metadataFile: str, + remappings: dict[str, str], + errors: list[str], +) -> dict[str, str | dict[str, str]]: from arelle.FileSource import ArchiveFileIOError unNamedCounter = 1 @@ -99,13 +122,13 @@ def parsePackage(cntlr, filesource, metadataFile, fileBase, errors=[]): "http://xbrl.org/2016/taxonomy-package", "http://xbrl.org/WGWD/YYYY-MM-DD/taxonomy-package") - pkg = {} + pkg = {"remappings": remappings} currentLang = Locale.getLanguageCode() - _file = filesource.file(metadataFile)[0] # URL in zip, plain file in file system or web parser = lxmlResolvingParser(cntlr) try: - tree = _parseFile(cntlr, parser, metadataFile, _file, TP_XSD) + metadataFileContent = filesource.file(metadataFile)[0] # URL in zip, plain file in file system or web + tree = _parseFile(cntlr, parser, metadataFile, metadataFileContent, TP_XSD) except (etree.XMLSyntaxError, etree.DocumentInvalid) as err: cntlr.addToLog(_("Taxonomy package file syntax error %(error)s"), messageArgs={"error": str(err)}, @@ -114,6 +137,8 @@ def parsePackage(cntlr, filesource, metadataFile, fileBase, errors=[]): level=logging.ERROR) errors.append("tpe:invalidMetaDataFile") return pkg + except ArchiveFileIOError: + return pkg root = tree.getroot() ns = root.tag.partition("}")[0][1:] @@ -141,6 +166,7 @@ def parsePackage(cntlr, filesource, metadataFile, fileBase, errors=[]): elif closestLen <= 0 and isinstance(eltLang, str) and eltLang.startswith("en"): closest = s # pick english if nothing better if not closest and eltName == "name": # assign default name when none in taxonomy package + assert isinstance(filesource.baseurl, str) closest = os.path.splitext(os.path.basename(filesource.baseurl))[0] pkg[eltName] = closest for eltName in ("supersededTaxonomyPackages", "versioningReports"): @@ -188,60 +214,6 @@ def parsePackage(cntlr, filesource, metadataFile, fileBase, errors=[]): pkg["description"] = "oasis catalog" pkg["version"] = "(none)" - remappings = {} - rewriteTree = tree - catalogFile = metadataFile - if ns in ("http://xbrl.org/PWD/2015-01-14/taxonomy-package", - "http://xbrl.org/PR/2015-12-09/taxonomy-package", - "http://xbrl.org/WGWD/YYYY-MM-DD/taxonomy-package", - "http://xbrl.org/2016/taxonomy-package", - "http://xbrl.org/REC/2016-04-19/taxonomy-package"): - catalogFile = metadataFile.replace('taxonomyPackage.xml','catalog.xml') - try: - _file = filesource.file(catalogFile)[0] - rewriteTree = _parseFile(cntlr, parser, catalogFile, _file, CAT_XSD) - except (etree.XMLSyntaxError, etree.DocumentInvalid) as err: - cntlr.addToLog(_("Catalog file syntax error %(error)s"), - messageArgs={"error": str(err)}, - messageCode="tpe:invalidCatalogFile", - file=os.path.basename(metadataFile), - level=logging.ERROR) - errors.append("tpe:invalidCatalogFile") - except ArchiveFileIOError: - pass - for tag, prefixAttr, replaceAttr in ( - (nsPrefix + "remapping", "prefix", "replaceWith"), # taxonomy package - ("{urn:oasis:names:tc:entity:xmlns:xml:catalog}rewriteSystem", "systemIdStartString", "rewritePrefix"), - ("{urn:oasis:names:tc:entity:xmlns:xml:catalog}rewriteURI", "uriStartString", "rewritePrefix")): # oasis catalog - for m in rewriteTree.iter(tag=tag): - prefixValue = m.get(prefixAttr) - replaceValue = m.get(replaceAttr) - if prefixValue and replaceValue is not None: - if prefixValue not in remappings: - base = baseForElement(m) - if base: - replaceValue = os.path.join(base, replaceValue) - if replaceValue and not isAbsolute(replaceValue): - # neither None nor '' - if not os.path.isabs(replaceValue): - replaceValue = fileBase + replaceValue - if not isHttpUrl(replaceValue): - replaceValue = replaceValue.replace("/", os.sep) - _normedValue = cntlr.webCache.normalizeUrl(replaceValue) - if replaceValue.endswith(os.sep) and not _normedValue.endswith(os.sep): - _normedValue += os.sep - remappings[prefixValue] = _normedValue - else: - cntlr.addToLog(_("Package catalog duplicate rewrite start string %(rewriteStartString)s"), - messageArgs={"rewriteStartString": prefixValue}, - messageCode="tpe:multipleRewriteURIsForStartString", - file=os.path.basename(catalogFile), - level=logging.ERROR) - errors.append("tpe:multipleRewriteURIsForStartString") - - - pkg["remappings"] = remappings - entryPoints = defaultdict(list) pkg["entryPoints"] = entryPoints @@ -304,6 +276,63 @@ def parsePackage(cntlr, filesource, metadataFile, fileBase, errors=[]): return pkg + +def _parseCatalog( + cntlr: Cntlr, + filesource: FileSource, + parser: etree.XMLParser, + catalogFile: str, + fileBase: str, + errors: list[str], +) -> dict[str, str]: + from arelle.FileSource import ArchiveFileIOError + remappings = {} + rewriteTree = None + try: + _file = filesource.file(catalogFile)[0] + rewriteTree = _parseFile(cntlr, parser, catalogFile, _file, CAT_XSD) + except (etree.XMLSyntaxError, etree.DocumentInvalid) as err: + cntlr.addToLog(_("Catalog file syntax error %(error)s"), + messageArgs={"error": str(err)}, + messageCode="tpe:invalidCatalogFile", + file=os.path.basename(catalogFile), + level=logging.ERROR) + errors.append("tpe:invalidCatalogFile") + except ArchiveFileIOError: + pass + if rewriteTree is not None: + for tag, prefixAttr, replaceAttr in ( + ("{urn:oasis:names:tc:entity:xmlns:xml:catalog}rewriteSystem", "systemIdStartString", "rewritePrefix"), + ("{urn:oasis:names:tc:entity:xmlns:xml:catalog}rewriteURI", "uriStartString", "rewritePrefix")): # oasis catalog + for m in rewriteTree.iter(tag=tag): + prefixValue = m.get(prefixAttr) + replaceValue = m.get(replaceAttr) + if prefixValue and replaceValue is not None: + if prefixValue not in remappings: + base = baseForElement(m) + if base: + replaceValue = os.path.join(base, replaceValue) + if replaceValue and not isAbsolute(replaceValue): + # neither None nor '' + if not os.path.isabs(replaceValue): + replaceValue = fileBase + replaceValue + if not isHttpUrl(replaceValue): + replaceValue = replaceValue.replace("/", os.sep) + _normedValue = cntlr.webCache.normalizeUrl(replaceValue) + if replaceValue.endswith(os.sep) and not _normedValue.endswith(os.sep): + _normedValue += os.sep + remappings[prefixValue] = _normedValue + else: + cntlr.addToLog(_("Package catalog duplicate rewrite start string %(rewriteStartString)s"), + messageArgs={"rewriteStartString": prefixValue}, + messageCode="tpe:multipleRewriteURIsForStartString", + file=os.path.basename(catalogFile), + level=logging.ERROR) + errors.append("tpe:multipleRewriteURIsForStartString") + + return remappings + + # taxonomy package manager # plugin control is static to correspond to statically loaded modules packagesJsonFile = None diff --git a/tests/integration_tests/validation/conformance_suite_configurations/xbrl_report_packages_1_0.py b/tests/integration_tests/validation/conformance_suite_configurations/xbrl_report_packages_1_0.py index d33f7a684..e4e702b7d 100644 --- a/tests/integration_tests/validation/conformance_suite_configurations/xbrl_report_packages_1_0.py +++ b/tests/integration_tests/validation/conformance_suite_configurations/xbrl_report_packages_1_0.py @@ -29,7 +29,7 @@ # 0xx - basic zip structure and package identification tests "V-000-invalid-zip", # rpe:invalidArchiveFormat tpe:invalidArchiveFormat,0,A report package MUST conform to the .ZIP File Format Specification # "V-001-valid-taxonomy-package", # ,0,"Minimal valid taxonomy package (not a report package). If the package has a file extension of .zip and neither [META-INF/reportPackage.json nor reports] exists, the file is treated as a taxonomy package, and further constraints and processing defined by this specification are not applied." - "V-002-invalid-taxonomy-package-metadata", # tpe:invalidMetaDataFile,0,If a report package contains the path META-INF/taxonomyPackage.xml within the STLD then it MUST be a valid taxonomy package. + # "V-002-invalid-taxonomy-package-metadata", # tpe:invalidMetaDataFile,0,If a report package contains the path META-INF/taxonomyPackage.xml within the STLD then it MUST be a valid taxonomy package. "V-003-multiple-top-level-directories", # rpe:invalidDirectoryStructure tpe:invalidDirectoryStructure,0,A report package conforming to this specification MUST contain a single top-level directory "V-004-empty-zip", # rpe:invalidDirectoryStructure tpe:invalidDirectoryStructure,0,A report package conforming to this specification MUST contain a single top-level directory "V-005-leading-slash-in-zip-entry", # rpe:invalidArchiveFormat tpe:invalidArchiveFormat,0,Leading slash is illegal according to the ZIP specficiation From 08a077ce1f4364fbfb715198ea36278baa12bb4f Mon Sep 17 00:00:00 2001 From: "Austin M. Matherne" Date: Sun, 27 Oct 2024 04:59:51 -0400 Subject: [PATCH 5/9] Detect zip filesources with non-zip file extensions --- arelle/FileSource.py | 88 +++++++++++++++---------- arelle/Validate.py | 35 ++++++---- arelle/plugin/validate/ESEF/__init__.py | 6 +- 3 files changed, 83 insertions(+), 46 deletions(-) diff --git a/arelle/FileSource.py b/arelle/FileSource.py index 77016a988..5ea9031c4 100644 --- a/arelle/FileSource.py +++ b/arelle/FileSource.py @@ -31,10 +31,10 @@ from arelle.Cntlr import Cntlr -archivePathSeparators = (".zip" + os.sep, ".tar.gz" + os.sep, ".eis" + os.sep, ".xml" + os.sep, ".xfd" + os.sep, ".frm" + os.sep, '.taxonomyPackage.xml' + os.sep) + \ +archivePathSeparators = (".zip" + os.sep, ".xbr" + os.sep, ".xbri" + os.sep, ".tar.gz" + os.sep, ".eis" + os.sep, ".xml" + os.sep, ".xfd" + os.sep, ".frm" + os.sep, '.taxonomyPackage.xml' + os.sep) + \ ((".zip/", ".tar.gz/", ".eis/", ".xml/", ".xfd/", ".frm/", '.taxonomyPackage.xml/') if os.sep != "/" else ()) #acomodate windows and http styles -archiveFilenameSuffixes = {".zip", ".tar.gz", ".eis", ".xml", ".xfd", ".frm"} +archiveFilenameSuffixes = {".zip", ".xbr", ".xbri", ".tar.gz", ".eis", ".xml", ".xfd", ".frm"} POST_UPLOADED_ZIP = os.sep + "POSTupload.zip" SERVER_WEB_CACHE = os.sep + "_HTTP_CACHE" @@ -67,13 +67,23 @@ def openFileSource( selection: str | None = archivepathSelection[1] assert selection is not None + selectionIsEmbeddedZip = False if ( sourceFileSource is not None and sourceFileSource.dir is not None and sourceFileSource.isArchive and selection in sourceFileSource.dir - and isReportPackageExtension(selection) ): + if isReportPackageExtension(selection): + selectionIsEmbeddedZip = True + else: + try: + assert isinstance(sourceFileSource.fs, zipfile.ZipFile) + with sourceFileSource.fs.open(selection) as f: + selectionIsEmbeddedZip = zipfile.is_zipfile(f) + except Exception: + pass + if selectionIsEmbeddedZip: assert cntlr is not None filesource = FileSource(filename, cntlr) selection = None @@ -101,19 +111,19 @@ def archiveFilenameParts(filename: str | None, checkIfXmlIsEis: bool = False) -> class FileNamedStringIO(io.StringIO): # provide string IO in memory but behave as a fileName string def __init__(self, fileName: str, *args: Any, **kwargs: Any) -> None: - super(FileNamedStringIO, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.fileName = fileName def close(self) -> None: del self.fileName - super(FileNamedStringIO, self).close() + super().close() def __str__(self) -> str: return self.fileName class FileNamedTextIOWrapper(io.TextIOWrapper): # provide string IO in memory but behave as a fileName string def __init__(self, fileName: str, *args: Any, **kwargs: Any): - super(FileNamedTextIOWrapper, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.fileName = fileName def __str__(self) -> str: @@ -121,19 +131,19 @@ def __str__(self) -> str: class FileNamedBytesIO(io.BytesIO): # provide Bytes IO in memory but behave as a fileName string def __init__(self, fileName: str, *args: Any, **kwargs: Any) -> None: - super(FileNamedBytesIO, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.fileName = fileName def close(self) -> None: del self.fileName - super(FileNamedBytesIO, self).close() + super().close() def __str__(self) -> str: return self.fileName -class ArchiveFileIOError(IOError): +class ArchiveFileIOError(OSError): def __init__(self, fileSource: FileSource, errno: int, fileName: str) -> None: - super(ArchiveFileIOError, self).__init__(errno, + super().__init__(errno, _("Archive {}").format(fileSource.url), fileName) self.fileName = fileName @@ -150,6 +160,8 @@ class FileSource: url: str | list[str] | None basefile: str | list[str] | None xfdDocument: etree._ElementTree | None + taxonomyPackage: dict[str, str | dict[str, str]] | None + mappedPaths: dict[str, str] | None def __init__(self, url: str, cntlr: Cntlr | None = None, checkIfXmlIsEis: bool = False) -> None: self.url = str(url) # allow either string or FileNamedStringIO @@ -173,6 +185,16 @@ def __init__(self, url: str, cntlr: Cntlr | None = None, checkIfXmlIsEis: bool = self.taxonomyPackage = None # taxonomy package self.mappedPaths = None # remappings of path segments may be loaded by taxonomyPackage manifest self.isValid = True # filesource is assumed to be valid until a call to open fails. + if not self.isZip: + # Try to detect zip files with unrecognized file extensions. + try: + basefile = self.cntlr.webCache.getfilename(self.url) if self.cntlr is not None else self.url + if basefile: + with openFileStream(self.cntlr, basefile, 'rb') as fileStream: + self.isZip = zipfile.is_zipfile(fileStream) + except Exception: + pass + # for SEC xml files, check if it's an EIS anyway if (not (self.isZip or self.isEis or self.isXfd or self.isRss) and @@ -184,12 +206,12 @@ def __init__(self, url: str, cntlr: Cntlr | None = None, checkIfXmlIsEis: bool = assert self.cntlr is not None _filename = self.cntlr.webCache.getfilename(self.url) assert _filename is not None - file = open(_filename, 'r', errors='replace') + file = open(_filename, errors='replace') l = file.read(256) # may have comments before first element file.close() if re.match(r"\s*(<[?]xml[^?]+[?]>)?\s*(\s*)*<(cor[a-z]*:|sdf:|\w+:)?edgarSubmission", l): self.isEis = True - except EnvironmentError as err: + except OSError as err: self.isValid = False if self.cntlr: self.cntlr.addToLog(_("[{0}] {1}").format(type(err).__name__, err)) @@ -215,7 +237,7 @@ def open(self, reloadCache: bool = False) -> None: fileStream = openFileStream(self.cntlr, self.basefile, 'rb') self.fs = zipfile.ZipFile(fileStream, mode="r") self.isOpen = True - except (EnvironmentError, zipfile.BadZipFile) as err: + except (OSError, zipfile.BadZipFile) as err: self.isValid = False self.logError(err) pass @@ -224,7 +246,7 @@ def open(self, reloadCache: bool = False) -> None: assert isinstance(self.basefile, str) self.fs = tarfile.open(self.basefile, "r:gz") self.isOpen = True - except EnvironmentError as err: + except OSError as err: self.isValid = False self.logError(err) pass @@ -248,7 +270,7 @@ def open(self, reloadCache: bool = False) -> None: break buf += zlib.decompress(compressedBytes) file.close() - except EnvironmentError as err: + except OSError as err: self.isValid = False self.logError(err) pass @@ -268,7 +290,7 @@ def open(self, reloadCache: bool = False) -> None: self.eisDocument = etree.parse(_file, parser=parser) _file.close() self.isOpen = True - except (EnvironmentError, etree.LxmlError) as err: + except (OSError, etree.LxmlError) as err: self.isValid = False self.logError(err) return # provide error message later @@ -304,7 +326,7 @@ def open(self, reloadCache: bool = False) -> None: ungzippedBytes += readBytes if len(readBytes) == 0 or (lenUncomp - lenRead) <= 0: break - except IOError as err: + except OSError: self.isValid = False pass # provide error message later @@ -322,7 +344,7 @@ def open(self, reloadCache: bool = False) -> None: self.xfdDocument = etree.parse(file) file.close() self.isOpen = True - except (EnvironmentError, etree.LxmlError) as err: + except (OSError, etree.LxmlError) as err: self.isValid = False self.logError(err) return # provide error message later @@ -332,7 +354,7 @@ def open(self, reloadCache: bool = False) -> None: assert isinstance(self.basefile, str) self.rssDocument = etree.parse(self.basefile) self.isOpen = True - except (EnvironmentError, etree.LxmlError) as err: + except (OSError, etree.LxmlError) as err: self.isValid = False self.logError(err) return # provide error message later @@ -343,16 +365,16 @@ def open(self, reloadCache: bool = False) -> None: self.loadTaxonomyPackageMappings() def loadTaxonomyPackageMappings(self, errors: list[str] = [], expectTaxonomyPackage: bool = False) -> None: - if not self.mappedPaths and (self.taxonomyPackageMetadataFiles or expectTaxonomyPackage): + if not self.mappedPaths and (self.taxonomyPackageMetadataFiles or expectTaxonomyPackage) and self.cntlr: if PackageManager.validateTaxonomyPackage(self.cntlr, self, errors=errors): assert isinstance(self.baseurl, str) metadata = self.baseurl + os.sep + self.taxonomyPackageMetadataFiles[0] - self.taxonomyPackage = PackageManager.parsePackage(self.cntlr, self, metadata, # type: ignore[no-untyped-call] + self.taxonomyPackage = PackageManager.parsePackage(self.cntlr, self, metadata, os.sep.join(os.path.split(metadata)[:-1]) + os.sep, errors=errors) assert self.taxonomyPackage is not None - self.mappedPaths = self.taxonomyPackage.get("remappings") + self.mappedPaths = cast('dict[str, str]', self.taxonomyPackage.get("remappings")) def openZipStream(self, sourceZipStream: str) -> None: if not self.isOpen: @@ -418,7 +440,7 @@ def reportPackage(self) -> ReportPackage | None: self._reportPackage: ReportPackage | None return self._reportPackage except AttributeError: - self._reportPackage = ReportPackage.fromFileSource(self) if self.isZip else None + self._reportPackage = ReportPackage.fromFileSource(self) return self._reportPackage @property @@ -526,8 +548,8 @@ def file( b = stripDeclarationBytes(b) return (FileNamedTextIOWrapper(filepath, io.BytesIO(b), encoding=encoding), encoding) - except KeyError: - raise ArchiveFileIOError(self, errno.ENOENT, archiveFileName) + except KeyError as err: + raise ArchiveFileIOError(self, errno.ENOENT, archiveFileName) from err elif archiveFileSource.isTarGz: try: assert isinstance(archiveFileSource.fs, tarfile.TarFile) @@ -543,13 +565,13 @@ def file( b = stripDeclarationBytes(b) return (FileNamedTextIOWrapper(filepath, io.BytesIO(b), encoding=encoding), encoding) - except KeyError: + except KeyError as err: # Note 2022-09-06 # The following error is raised by mypy, indicating there's a bug here: # Missing positional argument "fileName" # Not fixing this bug as a part of this PR # Also expecting second argument to be int but is str here - raise ArchiveFileIOError(self, archiveFileName) # type: ignore[call-arg, arg-type] + raise ArchiveFileIOError(self, archiveFileName) from err # type: ignore[call-arg, arg-type] elif archiveFileSource.isEis: assert self.eisDocument is not None for docElt in self.eisDocument.iter(tag="{http://www.sec.gov/edgar/common}document"): @@ -723,7 +745,7 @@ def dir(self) -> list[str] | None: XmlUtil.text(descendantPubDate), instDoc)) self.filesDir = files - except (EnvironmentError, + except (OSError, etree.LxmlError) as err: pass elif self.isInstalledTaxonomyPackage: @@ -787,10 +809,10 @@ def openFileStream( if isHttpUrl(filepath) and cntlr: _cacheFilepath = cntlr.webCache.getfilename(filepath, normalize=True) # normalize is separate step in ModelDocument retrieval, combined here if _cacheFilepath is None: - raise IOError(_("Unable to open file: {0}.").format(filepath)) + raise OSError(_("Unable to open file: {0}.").format(filepath)) filepath = _cacheFilepath if not filepath and cntlr: - raise IOError(_("Unable to open file: \"{0}\".").format(filepath)) + raise OSError(_("Unable to open file: \"{0}\".").format(filepath)) # file path may be server (or memcache) or local file system if filepath.startswith(SERVER_WEB_CACHE) and cntlr: filestream = None @@ -843,7 +865,7 @@ def openXmlFileStream( # allow filepath to close # this may not be needed for Mac or Linux, needs confirmation!!! if text is None: # ok to read as utf-8 - return io.open(filepath, 'rt', encoding=encoding or 'utf-8'), encoding + return open(filepath, encoding=encoding or 'utf-8'), encoding else: if stripDeclaration: text = stripDeclarationText(text) @@ -873,7 +895,7 @@ def saveFile(cntlr: Cntlr, filepath: str, contents: str, encoding: str | None = if isHttpUrl(filepath): _cacheFilepath = cntlr.webCache.getfilename(filepath) if _cacheFilepath is None: - raise IOError(_("Unable to open file: {0}.").format(filepath)) + raise OSError(_("Unable to open file: {0}.").format(filepath)) filepath = _cacheFilepath # file path may be server (or memcache) or local file system if filepath.startswith(SERVER_WEB_CACHE): @@ -884,7 +906,7 @@ def saveFile(cntlr: Cntlr, filepath: str, contents: str, encoding: str | None = _dirpath = os.path.dirname(filepath) if not os.path.exists(_dirpath): # directory must exist before io.open os.makedirs(_dirpath) - with io.open(filepath, mode, encoding=(encoding or 'utf-8')) as f: + with open(filepath, mode, encoding=(encoding or 'utf-8')) as f: f.write(contents) # GAE Blobcache diff --git a/arelle/Validate.py b/arelle/Validate.py index 9bee26012..07fcb1268 100644 --- a/arelle/Validate.py +++ b/arelle/Validate.py @@ -5,6 +5,7 @@ import os, sys, traceback, logging import time from urllib.parse import unquote +import zipfile import regex as re from collections import defaultdict, OrderedDict @@ -20,7 +21,6 @@ from arelle.ModelTestcaseObject import testcaseVariationsByTarget, ModelTestcaseVariation from arelle.ModelValue import (qname, QName) from arelle.PluginManager import pluginClassMethods -from arelle.packages.report import ReportPackageConst from arelle.packages.report.DetectReportPackage import isReportPackageExtension from arelle.rendering import RenderingEvaluator from arelle.XmlUtil import collapseWhitespace, xmlstring @@ -271,6 +271,7 @@ def _validateTestcaseVariation(self, testcase, modelTestcaseVariation): modelTestcaseVariation.duration = time.perf_counter() - startTime def _testcaseLoadReadMeFirstUri(self, testcase, modelTestcaseVariation, index, readMeFirstUri, resultIsVersioningReport, resultIsTaxonomyPackage, inputDTSes, errorCaptureLevel, baseForElement, parameters): + preLoadingErrors = [] # accumulate pre-loading errors, such as during taxonomy package loading loadedModels = [] readMeFirstElements = modelTestcaseVariation.readMeFirstElements expectTaxonomyPackage = (index < len(readMeFirstElements) and @@ -307,7 +308,17 @@ def _testcaseLoadReadMeFirstUri(self, testcase, modelTestcaseVariation, index, r loadedModels.append(modelXbrl) PackageManager.packageInfo(self.modelXbrl.modelManager.cntlr, readMeFirstUri, reload=True, errors=modelXbrl.errors) else: # not a multi-schemaRef versioning report - if self.useFileSource.isArchive and (os.path.isabs(readMeFirstUri) or not isReportPackageExtension(readMeFirstUri)): + readMeFirstUriIsEmbeddedZipFile = False + if self.useFileSource.isArchive and not os.path.isabs(readMeFirstUri): + if isReportPackageExtension(readMeFirstUri): + readMeFirstUriIsEmbeddedZipFile = True + else: + normalizedReadMeFirstUri = self.modelXbrl.modelManager.cntlr.webCache.normalizeUrl(readMeFirstUri, baseForElement) + archivePath = FileSource.archiveFilenameParts(normalizedReadMeFirstUri) + if archivePath: + with self.useFileSource.fs.open(archivePath[1]) as embeddedFile: + readMeFirstUriIsEmbeddedZipFile = zipfile.is_zipfile(embeddedFile) + if not readMeFirstUriIsEmbeddedZipFile: modelXbrl = ModelXbrl.load(self.modelXbrl.modelManager, readMeFirstUri, _("validating"), @@ -322,7 +333,7 @@ def _testcaseLoadReadMeFirstUri(self, testcase, modelTestcaseVariation, index, r if ( self.useFileSource and not os.path.isabs(readMeFirstUri) - and isReportPackageExtension(readMeFirstUri) + and (readMeFirstUriIsEmbeddedZipFile or isReportPackageExtension(readMeFirstUri)) ): if self.useFileSource.isArchive: sourceFileSource = self.useFileSource @@ -339,11 +350,10 @@ def _testcaseLoadReadMeFirstUri(self, testcase, modelTestcaseVariation, index, r if newSourceFileSource: sourceFileSource.close() - _errors = [] # accumulate pre-loading errors, such as during taxonomy package loading if filesource and not filesource.selection and filesource.isArchive: try: if filesource.isTaxonomyPackage or expectTaxonomyPackage: - filesource.loadTaxonomyPackageMappings(errors=_errors, expectTaxonomyPackage=expectTaxonomyPackage) + filesource.loadTaxonomyPackageMappings(errors=preLoadingErrors, expectTaxonomyPackage=expectTaxonomyPackage) filesource.select(None) # must select loadable reports (not the taxonomy package itself) elif not filesource.isReportPackage: from arelle.CntlrCmdLine import filesourceEntrypointFiles @@ -362,7 +372,7 @@ def _testcaseLoadReadMeFirstUri(self, testcase, modelTestcaseVariation, index, r for pluginXbrlMethod in pluginClassMethods("ModelTestcaseVariation.ReportPackageIxdsOptions"): pluginXbrlMethod(self, _rptPkgIxdsOptions) if filesource and filesource.isReportPackage and not _rptPkgIxdsOptions: - for report in filesource.reportPackage.reports: + for report in filesource.reportPackage.reports or []: assert isinstance(filesource.basefile, str) modelXbrl = ModelXbrl.load(self.modelXbrl.modelManager, report.primary, @@ -371,7 +381,7 @@ def _testcaseLoadReadMeFirstUri(self, testcase, modelTestcaseVariation, index, r base=filesource.basefile + "/", errorCaptureLevel=errorCaptureLevel, ixdsTarget=modelTestcaseVariation.ixdsTarget, - errors=_errors) + errors=preLoadingErrors) loadedModels.append(modelXbrl) else: if _rptPkgIxdsOptions and filesource.isTaxonomyPackage: @@ -385,7 +395,7 @@ def _testcaseLoadReadMeFirstUri(self, testcase, modelTestcaseVariation, index, r errorCaptureLevel=errorCaptureLevel, ixdsTarget=modelTestcaseVariation.ixdsTarget, isLoadable=modelTestcaseVariation.variationDiscoversDTS or filesource.url, - errors=_errors) + errors=preLoadingErrors) loadedModels.append(modelXbrl) for model in loadedModels: @@ -428,6 +438,10 @@ def _testcaseLoadReadMeFirstUri(self, testcase, modelTestcaseVariation, index, r modelXbrl=model, instance=model.modelDocument.basename, error=err, exc_info=(type(err) is not AssertionError)) model.hasFormulae = _hasFormulae errors = [error for model in loadedModels for error in model.errors] + for err in preLoadingErrors: + if err not in errors: + # include errors from models which failed to load. + errors.append(err) reportModelCount = len([ model for model in loadedModels if model.modelDocument is not None and (model.fileSource.isReportPackage or not model.fileSource.isTaxonomyPackage) @@ -660,9 +674,8 @@ def determineTestStatus(self, modelTestcaseVariation, errors, validateModelCount if expected is None: expected = [] expected.extend(userExpectedErrors) - if expectedCount is None: - expectedCount = 0 - expectedCount += len(userExpectedErrors) + if expectedCount is not None: + expectedCount += len(userExpectedErrors) if matchAllExpected: if isinstance(expected, list): if not expectedCount: diff --git a/arelle/plugin/validate/ESEF/__init__.py b/arelle/plugin/validate/ESEF/__init__.py index 1fa55e4a7..6b5a5c510 100644 --- a/arelle/plugin/validate/ESEF/__init__.py +++ b/arelle/plugin/validate/ESEF/__init__.py @@ -131,14 +131,16 @@ def modelDocumentPullLoader( if documentType in {ModelDocument.Type.TESTCASESINDEX, ModelDocument.Type.TESTCASE}: return None # allow zipped test case to load normally - if disclosureSystemYear >= 2023 and not modelXbrl.fileSource.isZip: + isZipFormat = modelXbrl.fileSource.isZip + hasZipFileExtension = modelXbrl.fileSource.type.lower() == ".zip" + if disclosureSystemYear >= 2023 and not (isZipFormat and hasZipFileExtension): modelXbrl.error("ESEF.2.6.1.disallowedReportPackageFileExtension", _("A report package MUST conform to the .ZIP File Format Specification and MUST have a .zip extension."), fileSourceType=modelXbrl.fileSource.type, modelObject=modelXbrl) return LoadingException("ESEF Report Package must be .ZIP File Format") if modelXbrl.fileSource.isArchive: - if not validateTaxonomyPackage(modelXbrl.modelManager.cntlr, modelXbrl.fileSource): + if not hasZipFileExtension or not validateTaxonomyPackage(modelXbrl.modelManager.cntlr, modelXbrl.fileSource): modelXbrl.error("ESEF.RTS.Annex.III.3.missingOrInvalidTaxonomyPackage", _("Single reporting package with issuer's XBRL extension taxonomy files and Inline XBRL instance document must be compliant with the latest recommended version of the Taxonomy Packages specification (1.0)"), modelObject=modelXbrl) From 366c59c59253880e536a13bff671a2c2e25f9a5b Mon Sep 17 00:00:00 2001 From: "Austin M. Matherne" Date: Thu, 24 Oct 2024 20:50:35 -0400 Subject: [PATCH 6/9] Validate report packages --- arelle/Validate.py | 40 ++-- arelle/packages/report/ReportPackage.py | 172 +++++++++++---- arelle/packages/report/ReportPackageConst.py | 21 +- .../packages/report/ReportPackageValidator.py | 206 ++++++++++++++++++ .../xbrl_report_packages_1_0.py | 94 -------- 5 files changed, 371 insertions(+), 162 deletions(-) create mode 100644 arelle/packages/report/ReportPackageValidator.py diff --git a/arelle/Validate.py b/arelle/Validate.py index 07fcb1268..2d191bb6a 100644 --- a/arelle/Validate.py +++ b/arelle/Validate.py @@ -22,6 +22,7 @@ from arelle.ModelValue import (qname, QName) from arelle.PluginManager import pluginClassMethods from arelle.packages.report.DetectReportPackage import isReportPackageExtension +from arelle.packages.report.ReportPackageValidator import ReportPackageValidator from arelle.rendering import RenderingEvaluator from arelle.XmlUtil import collapseWhitespace, xmlstring @@ -122,6 +123,10 @@ def validate(self): elif self.modelXbrl.modelDocument.type == Type.RSSFEED: self.validateRssFeed() else: + if self.modelXbrl.fileSource.isReportPackage or self.modelXbrl.modelManager.validateAllFilesAsReportPackages: + rpValidator = ReportPackageValidator(self.modelXbrl.fileSource) + for val in rpValidator.validate(): + self.modelXbrl.log(level=val.level.name, codes=val.codes, msg=val.msg, modelXbrl=self.modelXbrl, **val.args) try: self.instValidator.validate(self.modelXbrl, self.modelXbrl.modelManager.formulaOptions.typedParameters(self.modelXbrl.prefixedNamespaces)) self.instValidator.close() @@ -350,6 +355,15 @@ def _testcaseLoadReadMeFirstUri(self, testcase, modelTestcaseVariation, index, r if newSourceFileSource: sourceFileSource.close() + _rptPkgIxdsOptions = {} + for pluginXbrlMethod in pluginClassMethods("ModelTestcaseVariation.ReportPackageIxdsOptions"): + pluginXbrlMethod(self, _rptPkgIxdsOptions) + reportPackageErrors = False + if (filesource.isReportPackage or self.modelXbrl.modelManager.validateAllFilesAsReportPackages) and not _rptPkgIxdsOptions: + rpValidator = ReportPackageValidator(filesource) + for val in rpValidator.validate(): + reportPackageErrors = True + preLoadingErrors.append(val.codes) if filesource and not filesource.selection and filesource.isArchive: try: if filesource.isTaxonomyPackage or expectTaxonomyPackage: @@ -368,21 +382,19 @@ def _testcaseLoadReadMeFirstUri(self, testcase, modelTestcaseVariation, index, r _("Testcase variation validation exception: %(error)s, entry URL: %(instance)s"), modelXbrl=self.modelXbrl, instance=readMeFirstUri, error=err) return [] # don't try to load this entry URL - _rptPkgIxdsOptions = {} - for pluginXbrlMethod in pluginClassMethods("ModelTestcaseVariation.ReportPackageIxdsOptions"): - pluginXbrlMethod(self, _rptPkgIxdsOptions) if filesource and filesource.isReportPackage and not _rptPkgIxdsOptions: - for report in filesource.reportPackage.reports or []: - assert isinstance(filesource.basefile, str) - modelXbrl = ModelXbrl.load(self.modelXbrl.modelManager, - report.primary, - _("validating"), - useFileSource=filesource, - base=filesource.basefile + "/", - errorCaptureLevel=errorCaptureLevel, - ixdsTarget=modelTestcaseVariation.ixdsTarget, - errors=preLoadingErrors) - loadedModels.append(modelXbrl) + if not reportPackageErrors: + for report in filesource.reportPackage.reports or []: + assert isinstance(filesource.basefile, str) + modelXbrl = ModelXbrl.load(self.modelXbrl.modelManager, + report.primary, + _("validating"), + useFileSource=filesource, + base=filesource.basefile + "/", + errorCaptureLevel=errorCaptureLevel, + ixdsTarget=modelTestcaseVariation.ixdsTarget, + errors=preLoadingErrors) + loadedModels.append(modelXbrl) else: if _rptPkgIxdsOptions and filesource.isTaxonomyPackage: # Legacy ESEF conformance suite logic. diff --git a/arelle/packages/report/ReportPackage.py b/arelle/packages/report/ReportPackage.py index 796339cb4..bed6ad4f8 100644 --- a/arelle/packages/report/ReportPackage.py +++ b/arelle/packages/report/ReportPackage.py @@ -5,11 +5,12 @@ from __future__ import annotations import json +import os import zipfile from collections import defaultdict from dataclasses import dataclass from pathlib import Path, PurePosixPath -from typing import TYPE_CHECKING, Any, cast +from typing import TYPE_CHECKING, Any, Counter, cast from arelle.packages import PackageUtils from arelle.packages.report import ReportPackageConst as Const @@ -18,7 +19,7 @@ from arelle.FileSource import FileSource -def _getReportPackageTopLevelDirectory(filesource: FileSource) -> str | None: +def getReportPackageTopLevelDirectory(filesource: FileSource) -> str | None: packageEntries = set(filesource.dir or []) potentialTopLevelReportDirs = { topLevelDir @@ -30,28 +31,75 @@ def _getReportPackageTopLevelDirectory(filesource: FileSource) -> str | None: return None -def _getReportPackageJson(filesource: FileSource, stld: str | None) -> dict[str, Any] | None: - packageJsonPath = f"{stld}/{Const.REPORT_PACKAGE_FILE}" - if stld is None or packageJsonPath not in (filesource.dir or []): +def getReportPackageJsonFile(filesource: FileSource, stld: str | None) -> str | None: + if not isinstance(filesource.fs, zipfile.ZipFile): + return None + # Match against original unsanitized file names and all their backslash glory. + packageOriginalFileNames = [f.orig_filename for f in filesource.fs.infolist()] + expectedFutureReportPackageJsonPath = Const.REPORT_PACKAGE_FILE + expectedReportPackageJsonPath = f"{stld}/{Const.REPORT_PACKAGE_FILE}" + futureReportPackageJsonPathPartialMatches = [] + reportPackageJsonPathPartialMatches = [] + for path in packageOriginalFileNames: + if path.startswith(expectedFutureReportPackageJsonPath): + futureReportPackageJsonPathPartialMatches.append(path) + elif path.startswith(expectedReportPackageJsonPath): + reportPackageJsonPathPartialMatches.append(path) + + jsonPath = None + # Don't return content if there are duplicate entries or a directory with the same name as the json file. + if len(futureReportPackageJsonPathPartialMatches) == 1: + futureReportPackageJsonPath = futureReportPackageJsonPathPartialMatches[0] + if futureReportPackageJsonPath == expectedFutureReportPackageJsonPath: + jsonPath = expectedFutureReportPackageJsonPath + elif len(futureReportPackageJsonPathPartialMatches) == 0: + if len(reportPackageJsonPathPartialMatches) == 1: + reportPackageJsonPath = reportPackageJsonPathPartialMatches[0] + if reportPackageJsonPath == expectedReportPackageJsonPath: + jsonPath = expectedReportPackageJsonPath + return jsonPath + + +def getPackageJson(filesource: FileSource, selection: str | None) -> dict[str, Any] | None: + if selection is None: return None initialSelection = filesource.selection initialUrl = filesource.url - try: - filesource.select(packageJsonPath) - fullPackageJsonPath = cast(str, filesource.url) - with filesource.file(fullPackageJsonPath, binary=True)[0] as rpj: - return cast(dict[str, Any], json.load(rpj)) - except (OSError, zipfile.BadZipFile, json.JSONDecodeError): - return None - finally: - filesource.selection = initialSelection - filesource.url = initialUrl + filesource.select(selection) + fullJsonFilePath = cast(str, filesource.url) + content = None + encodings = ["utf-8", "utf-8-sig"] + for encoding in encodings: + try: + with filesource.file(fullJsonFilePath, encoding=encoding)[0] as f: + content = cast(dict[str, Any], json.load(f, object_pairs_hook=forbidDuplicateKeys)) + break + except (OSError, ValueError, zipfile.BadZipFile, json.JSONDecodeError): + continue + # Restore selection and url including initial taxonomy packages with url, but no selection. + filesource.selection = initialSelection + filesource.url = initialUrl + return content -def _getAllReportEntries(filesource: FileSource, stld: str | None) -> list[ReportEntry] | None: +def forbidDuplicateKeys(pairs: list[tuple[Any, Any]]) -> Any: + seen = {} + for key, value in pairs: + if key in seen: + raise ValueError(f"duplicate key: {key}") + else: + seen[key] = value + return seen + + +def getAllReportEntries(filesource: FileSource, stld: str | None) -> list[ReportEntry] | None: if stld is None: return None - entries = filesource.dir or [] + if not isinstance(filesource.fs, zipfile.ZipFile): + return None + entries = [f.orig_filename for f in filesource.fs.infolist()] + if not any(entry.startswith(f"{stld}/{Const.REPORTS_DIRECTORY}") for entry in entries): + return None topReportEntries = [] entriesBySubDir = defaultdict(list) for entry in entries: @@ -68,16 +116,13 @@ def _getAllReportEntries(filesource: FileSource, stld: str | None) -> list[Repor topReportEntries.append(entry) else: entriesBySubDir[path.parts[2]].append(entry) - reportEntries = [] assert isinstance(filesource.basefile, str) - if topReportEntries: - reportEntries.extend([ReportEntry(filesource.basefile, [entry]) for entry in topReportEntries]) - else: - for entries in entriesBySubDir.values(): - if len(entries) == 1 or all(PurePosixPath(entry).suffix in Const.INLINE_REPORT_FILE_EXTENSIONS for entry in entries): - reportEntries.append(ReportEntry(filesource.basefile, entries)) - else: - reportEntries.extend(sorted(ReportEntry(filesource.basefile, [entry]) for entry in entries)) + reportEntries = [ReportEntry(filesource.basefile, [entry]) for entry in topReportEntries] + for entries in entriesBySubDir.values(): + if len(entries) == 1 or all(PurePosixPath(entry).suffix in Const.INLINE_REPORT_FILE_EXTENSIONS for entry in entries): + reportEntries.append(ReportEntry(filesource.basefile, entries)) + else: + reportEntries.extend(sorted(ReportEntry(filesource.basefile, [entry]) for entry in entries)) return sorted(reportEntries) @@ -89,8 +134,13 @@ class ReportEntry: def __post_init__(self) -> None: if len(self.files) == 0: raise ValueError("Report entry must have at least one file") - elif len(self.files) > 1 and any(PurePosixPath(f).suffix not in Const.INLINE_REPORT_FILE_EXTENSIONS for f in self.files): + elif len(self.files) > 1 and any( + PurePosixPath(f).suffix not in Const.INLINE_REPORT_FILE_EXTENSIONS for f in self.files + ): raise ValueError("Non-inline report entries must be a single file") + primaryDir = os.path.dirname(self.primary) + if any(os.path.dirname(f) != primaryDir for f in self.files[1:]): + raise ValueError("Report entry files must all be in the same directory") @property def primary(self) -> str: @@ -108,24 +158,34 @@ def isInline(self) -> bool: def fullPathFiles(self) -> list[str]: return [f"{self.baseDir}/{f}" for f in self.files] + @property + def isTopLevel(self) -> bool: + return len(PurePosixPath(self.primary).parts) == 3 + + class ReportPackage: def __init__( self, reportPackageZip: zipfile.ZipFile, - stld: str, - reportType: Const.ReportType, - reportPackageJson: dict[str, Any], - reports: list[ReportEntry], + stld: str | None, + reportType: Const.ReportType | None, + reportPackageJson: dict[str, Any] | None, + reports: list[ReportEntry] | None, ) -> None: self._reportPackageZip = reportPackageZip self._stld = stld self._reportType = reportType self._reportPackageJson = reportPackageJson self._allReports = reports - self._reports = [ - report for report in self._allReports - if all(PurePosixPath(f).suffix in reportType.reportFileExtensions for f in report.files) - ] + if self._allReports is None: + self._reports = None + else: + reportTypeFileExtensions = reportType.reportFileExtensions if reportType is not None else frozenset() + self._reports = [ + report + for report in self._allReports + if all(PurePosixPath(f).suffix in reportTypeFileExtensions for f in report.files) + ] @staticmethod def fromFileSource(filesource: FileSource) -> ReportPackage | None: @@ -136,39 +196,55 @@ def fromFileSource(filesource: FileSource) -> ReportPackage | None: if not isinstance(filesource.basefile, str): raise ValueError(f"Report Package base file must be a string: {filesource.basefile}") reportType = Const.ReportType.fromExtension(Path(filesource.basefile).suffix) - if reportType is None: + stld = getReportPackageTopLevelDirectory(filesource) + reportPackageJsonFile = getReportPackageJsonFile(filesource, stld) + reportPackageJson = None + if reportPackageJsonFile: + reportPackageJson = getPackageJson(filesource, reportPackageJsonFile) + reports = getAllReportEntries(filesource, stld) + if reportPackageJsonFile is None and reports is None: return None - stld = _getReportPackageTopLevelDirectory(filesource) - if stld is None: + reportEntriesBySubDir = Counter(dir for report in reports or [] if not report.isTopLevel) + if reports is not None and any(report.isTopLevel for report in reports): + reports = [report for report in reports if report.isTopLevel] + if any(subdirCount > 1 for subdirCount in reportEntriesBySubDir.values()): return None - reportPackageJson = _getReportPackageJson(filesource, stld) - reports = _getAllReportEntries(filesource, stld) - if reportPackageJson is None and reports is None: + if reportType and reportType.isConstrained and len(reports or []) > 1: return None return ReportPackage( reportPackageZip=filesource.fs, stld=stld, reportType=reportType, - reportPackageJson=reportPackageJson or {}, - reports=reports or [], + reportPackageJson=reportPackageJson, + reports=reports, ) @property - def documentType(self) -> Any: + def documentInfo(self) -> Any: if self._reportPackageJson is None: return None - return self._reportPackageJson.get("documentInfo", {}).get("documentType") + return self._reportPackageJson.get("documentInfo") + + @property + def documentType(self) -> Any: + if isinstance(self.documentInfo, dict): + return self.documentInfo.get("documentType") + return None + + @property + def stld(self) -> str | None: + return self._stld @property def reportType(self) -> Const.ReportType | None: return self._reportType @property - def allReports(self) -> list[ReportEntry]: + def allReports(self) -> list[ReportEntry] | None: return self._allReports @property - def reports(self) -> list[ReportEntry]: + def reports(self) -> list[ReportEntry] | None: return self._reports @property @@ -176,5 +252,5 @@ def reportPackageJson(self) -> dict[str, Any] | None: return self._reportPackageJson @property - def reportPackageZip(self) -> zipfile.ZipFile | None: + def reportPackageZip(self) -> zipfile.ZipFile: return self._reportPackageZip diff --git a/arelle/packages/report/ReportPackageConst.py b/arelle/packages/report/ReportPackageConst.py index dd4176e57..919cd3748 100644 --- a/arelle/packages/report/ReportPackageConst.py +++ b/arelle/packages/report/ReportPackageConst.py @@ -8,17 +8,22 @@ from arelle.packages.PackageConst import META_INF_DIRECTORY -INLINE_XBRL_REPORT_PACKAGE_EXTENSION = ".xbri" -NON_INLINE_XBRL_REPORT_PACKAGE_EXTENSION = ".xbr" -UNCONSTRAINED_REPORT_PACKAGE_EXTENSION = ".zip" +INLINE_XBRL_REPORT_PACKAGE_DOCUMENT_TYPE = "https://xbrl.org/report-package/2023/xbri" +NON_INLINE_XBRL_REPORT_PACKAGE_DOCUMENT_TYPE = "https://xbrl.org/report-package/2023/xbr" +UNCONSTRAINED_REPORT_PACKAGE_DOCUMENT_TYPE = "https://xbrl.org/report-package/2023" -CONSTRAINED_REPORT_PACKAGE_EXTENSIONS = frozenset( +SUPPORTED_DOCUMENT_TYPES = frozenset( [ - INLINE_XBRL_REPORT_PACKAGE_EXTENSION, - NON_INLINE_XBRL_REPORT_PACKAGE_EXTENSION, + INLINE_XBRL_REPORT_PACKAGE_DOCUMENT_TYPE, + NON_INLINE_XBRL_REPORT_PACKAGE_DOCUMENT_TYPE, + UNCONSTRAINED_REPORT_PACKAGE_DOCUMENT_TYPE, ] ) +INLINE_XBRL_REPORT_PACKAGE_EXTENSION = ".xbri" +NON_INLINE_XBRL_REPORT_PACKAGE_EXTENSION = ".xbr" +UNCONSTRAINED_REPORT_PACKAGE_EXTENSION = ".zip" + REPORT_PACKAGE_EXTENSIONS = frozenset( [ @@ -69,3 +74,7 @@ def reportFileExtensions(self) -> frozenset[str]: if self == ReportType.UNCONSTRAINED_REPORT_PACKAGE: return REPORT_FILE_EXTENSIONS raise ValueError(f"Report type without defined report file extensions: {self}") + + @property + def isConstrained(self) -> bool: + return self != ReportType.UNCONSTRAINED_REPORT_PACKAGE diff --git a/arelle/packages/report/ReportPackageValidator.py b/arelle/packages/report/ReportPackageValidator.py new file mode 100644 index 000000000..be1157d1b --- /dev/null +++ b/arelle/packages/report/ReportPackageValidator.py @@ -0,0 +1,206 @@ +""" +See COPYRIGHT.md for copyright information. +""" + +from __future__ import annotations + +from collections.abc import Generator +from pathlib import Path, PurePosixPath +from typing import TYPE_CHECKING, Counter + +from arelle.packages import PackageValidation +from arelle.packages.PackageType import PackageType +from arelle.packages.report import ReportPackageConst as Const +from arelle.packages.report.ReportPackage import ( + getAllReportEntries, + getPackageJson, + getReportPackageJsonFile, + getReportPackageTopLevelDirectory, +) +from arelle.typing import TypeGetText +from arelle.utils.validate.Validation import Validation + +if TYPE_CHECKING: + from arelle.FileSource import FileSource + +_: TypeGetText + + +REPORT_PACKAGE_TYPE = PackageType("Report", "rpe") + +REPORT_PACKAGE_ABORTING_VALIDATIONS = ( + PackageValidation.validatePackageZipFormat, + PackageValidation.validateZipFileSeparators, + PackageValidation.validatePackageNotEncrypted, + PackageValidation.validateTopLevelDirectories, + PackageValidation.validateDuplicateEntries, + PackageValidation.validateConflictingEntries, + PackageValidation.validateEntries, +) + +REPORT_PACKAGE_NON_ABORTING_VALIDATIONS = (PackageValidation.validateMetadataDirectory,) + + +class ReportPackageValidator: + def __init__(self, filesource: FileSource) -> None: + self._filesource = filesource + assert isinstance(self._filesource.basefile, str) + self._reportType = Const.ReportType.fromExtension(Path(self._filesource.basefile).suffix) + self._stld = getReportPackageTopLevelDirectory(self._filesource) + self._reportPackageJsonFile = getReportPackageJsonFile(self._filesource, self._stld) + + def validate(self) -> Generator[Validation, None, None]: + if self._filesource.reportPackage is not None and self._filesource.reportPackage.reportType is None: + yield Validation.error( + "rpe:unsupportedFileExtension", + _("Report package has unsupported file extension."), + ) + return + for validation in REPORT_PACKAGE_ABORTING_VALIDATIONS: + if error := validation(REPORT_PACKAGE_TYPE, self._filesource): + yield error + return + if (error := self._validatePackageJson()) or (error := self._validateReports()): + yield error + return + + def _validatePackageJson(self) -> Validation | None: + reportPackageJson = getPackageJson(self._filesource, self._reportPackageJsonFile) + if reportPackageJson is None: + if self._reportPackageJsonFile in (self._filesource.dir or []): + return Validation.error( + "rpe:invalidJSON", + _("Report package JSON file must be a valid JSON file, per RFC 8259."), + ) + elif self._reportType in { + Const.ReportType.NON_INLINE_XBRL_REPORT_PACKAGE, + Const.ReportType.INLINE_XBRL_REPORT_PACKAGE, + }: + return Validation.error( + "rpe:documentTypeFileExtensionMismatch", + _("%(reportType)s report package requires a report package JSON file"), + reportType="Inline" + if self._reportType == Const.ReportType.INLINE_XBRL_REPORT_PACKAGE + else "Non-Inline", + ) + return None + documentInfo = reportPackageJson.get("documentInfo") + if not isinstance(documentInfo, dict): + return Validation.error( + "rpe:invalidJSONStructure", + _("Report package 'documentInfo' must resolve to a JSON object: %(documentInfo)s"), + documentInfo=documentInfo, + ) + documentType = documentInfo.get("documentType") + if not isinstance(documentType, str): + return Validation.error( + "rpe:invalidJSONStructure", + _("Report package type 'documentInfo.documentType' must resolve to a JSON string: %(documentType)s"), + documentType=documentType, + ) + if documentType not in Const.SUPPORTED_DOCUMENT_TYPES or self._stld is None: + return Validation.error( + "rpe:unsupportedReportPackageVersion", + _("Report package document type '%(documentType)s' is not supported."), + documentType=documentType, + ) + validDocumentTypeForFileExtension = True + if documentType == Const.INLINE_XBRL_REPORT_PACKAGE_DOCUMENT_TYPE: + validDocumentTypeForFileExtension = self._reportType == Const.ReportType.INLINE_XBRL_REPORT_PACKAGE + elif documentType == Const.NON_INLINE_XBRL_REPORT_PACKAGE_DOCUMENT_TYPE: + validDocumentTypeForFileExtension = self._reportType == Const.ReportType.NON_INLINE_XBRL_REPORT_PACKAGE + elif documentType == Const.UNCONSTRAINED_REPORT_PACKAGE_DOCUMENT_TYPE: + validDocumentTypeForFileExtension = self._reportType == Const.ReportType.UNCONSTRAINED_REPORT_PACKAGE + else: + return Validation.error( + "rpe:unsupportedReportPackageVersion", + _("Report package document type '%(documentType)s' is not supported."), + documentType=documentType, + ) + if not validDocumentTypeForFileExtension: + return Validation.error( + "rpe:documentTypeFileExtensionMismatch", + _("Report package document type '%(documentType)s' does not match the file extension: %(reportType)s"), + documentType=documentType, + reportType=self._reportType.value if self._reportType is not None else None, + ) + return None + + def _validateReports(self) -> Validation | None: + reportEntries = getAllReportEntries(self._filesource, self._stld) + filesourceFiles = self._filesource.dir or [] + topLevelDir = f"{self._stld}/" if self._stld else "" + reportsDirExist = any(entry.startswith(f"{topLevelDir}/reports") for entry in filesourceFiles) + reportPackageJsonFileExist = f"{self._stld}/{Const.REPORT_PACKAGE_FILE}" in filesourceFiles + if not reportsDirExist and not reportPackageJsonFileExist: + return None + if self._reportType is not None: + if not any(entry.startswith(f"{self._stld}/reports") for entry in self._filesource.dir or []): + return Validation.error( + "rpe:missingReportsDirectory", + _("Report package must contain a reports directory"), + ) + if not reportEntries: + return Validation.error( + "rpe:missingReport", + _("Report package must contain at least one report"), + ) + if len(reportEntries) > 1 and not any(report.isTopLevel for report in reportEntries): + byBaseDir = Counter(report.baseDir for report in reportEntries) + if byBaseDir: + return Validation.error( + "rpe:multipleReportsInSubdirectory", + _("Report package must contain only one report"), + ) + if self._reportType == Const.ReportType.NON_INLINE_XBRL_REPORT_PACKAGE: + if len(reportEntries) > 1: + return Validation.error( + "rpe:multipleReports", + _("Non-inline XBRL report package must contain only one report"), + ) + if any( + PurePosixPath(entry.primary).suffix not in Const.NON_INLINE_REPORT_FILE_EXTENSIONS + for entry in reportEntries + ): + return Validation.error( + "rpe:incorrectReportType", + _("Non-inline XBRL report package must contain only non-inline XBRL reports"), + ) + for report in reportEntries or []: + if PurePosixPath(report.primary).suffix == Const.JSON_REPORT_FILE_EXTENSION: + reportContent = getPackageJson(self._filesource, report.primary) + if reportContent is None: + return Validation.error( + "rpe:invalidJSON", + _("Non-inline XBRL report package must contain only valid JSON reports"), + ) + reportDocumentInfo = reportContent.get("documentInfo") + if not isinstance(reportDocumentInfo, dict): + return Validation.error( + "rpe:invalidJSONStructure", + _("Report package 'documentInfo' must resolve to a JSON object: %(documentInfo)s"), + documentInfo=reportDocumentInfo, + ) + reportDocumentType = reportDocumentInfo.get("documentType") + if not isinstance(reportDocumentType, str): + return Validation.error( + "rpe:invalidJSONStructure", + _("Report package type 'documentInfo.documentType' must resolve to a JSON string: %(documentType)s"), + documentType=reportDocumentType, + ) + + elif self._reportType == Const.ReportType.INLINE_XBRL_REPORT_PACKAGE: + if len(reportEntries) > 1: + return Validation.error( + "rpe:multipleReports", + _("Inline XBRL report package must contain only one report"), + ) + if any( + PurePosixPath(entry.primary).suffix not in Const.INLINE_REPORT_FILE_EXTENSIONS + for entry in reportEntries + ): + return Validation.error( + "rpe:incorrectReportType", + _("Inline XBRL report package must contain only inline XBRL reports"), + ) + return None diff --git a/tests/integration_tests/validation/conformance_suite_configurations/xbrl_report_packages_1_0.py b/tests/integration_tests/validation/conformance_suite_configurations/xbrl_report_packages_1_0.py index e4e702b7d..fa3f59396 100644 --- a/tests/integration_tests/validation/conformance_suite_configurations/xbrl_report_packages_1_0.py +++ b/tests/integration_tests/validation/conformance_suite_configurations/xbrl_report_packages_1_0.py @@ -25,100 +25,6 @@ "V-509-xbr-with-json-in-dot-xhtml-directory": frozenset({"IOerror", "oime:invalidTaxonomy"}), "V-701-zip-with-no-taxonomy": frozenset({"IOerror", "oime:invalidTaxonomy"}), }.items()}, - expected_failure_ids=frozenset(f"report-package-conformance/index.csv:{s}" for s in [ - # 0xx - basic zip structure and package identification tests - "V-000-invalid-zip", # rpe:invalidArchiveFormat tpe:invalidArchiveFormat,0,A report package MUST conform to the .ZIP File Format Specification - # "V-001-valid-taxonomy-package", # ,0,"Minimal valid taxonomy package (not a report package). If the package has a file extension of .zip and neither [META-INF/reportPackage.json nor reports] exists, the file is treated as a taxonomy package, and further constraints and processing defined by this specification are not applied." - # "V-002-invalid-taxonomy-package-metadata", # tpe:invalidMetaDataFile,0,If a report package contains the path META-INF/taxonomyPackage.xml within the STLD then it MUST be a valid taxonomy package. - "V-003-multiple-top-level-directories", # rpe:invalidDirectoryStructure tpe:invalidDirectoryStructure,0,A report package conforming to this specification MUST contain a single top-level directory - "V-004-empty-zip", # rpe:invalidDirectoryStructure tpe:invalidDirectoryStructure,0,A report package conforming to this specification MUST contain a single top-level directory - "V-005-leading-slash-in-zip-entry", # rpe:invalidArchiveFormat tpe:invalidArchiveFormat,0,Leading slash is illegal according to the ZIP specficiation - "V-006-dot-slash-in-zip-entry", # rpe:invalidDirectoryStructure tpe:invalidDirectoryStructure,0,Forbidden dot segment - "V-007-dot-dot-slash-in-zip-entry", # rpe:invalidDirectoryStructure tpe:invalidDirectoryStructure,0,Forbidden dot dot segment - "V-008-double-slash-in-zip-entry", # rpe:invalidDirectoryStructure tpe:invalidDirectoryStructure,0,Forbidden empty segment - "V-009-backslash-in-zip-entry", # rpe:invalidArchiveFormat tpe:invalidArchiveFormat,0,Backslash is illegal according to the zip specification - "V-010-duplicate-paths-in-zip-entry", # rpe:invalidDirectoryStructure tpe:invalidDirectoryStructure,0,Two entries with the same path - "V-011-duplicate-paths-in-zip-entry-dir-under-file", # rpe:invalidDirectoryStructure tpe:invalidDirectoryStructure,0,reportPackage.json as a directory as well as a file - "V-012-encrypted-zip", # rpe:invalidArchiveFormat tpe:invalidArchiveFormat,0,A report package MUST NOT make use of the encryption features of the .ZIP File Format - - # 1xx - structural JSON constraints for reportPackage.json - "V-100-invalid-documentType", # rpe:invalidJSONStructure,0,The JSON Pointer /documentInfo/documentType MUST resolve to a string (rpe:invalidJSONStructure). - "V-101-missing-documentType", # rpe:invalidJSONStructure,0,The JSON Pointer /documentInfo/documentType MUST resolve to a string (rpe:invalidJSONStructure). - "V-102-invalid-documentInfo", # rpe:invalidJSONStructure,0,The JSON Pointer /documentInfo/documentType MUST resolve to a string (rpe:invalidJSONStructure). - "V-103-missing-documentInfo", # rpe:invalidJSONStructure,0,The JSON Pointer /documentInfo/documentType MUST resolve to a string (rpe:invalidJSONStructure). - "V-104-invalid-reportPackage-json", # rpe:invalidJSON,0,"JSON files defined by this specification MUST be valid JSON, per RFC 8259" - "V-105-invalid-reportPackage-json-duplicate-keys", # rpe:invalidJSON,0,JSON documents defined by this specification MUST have unique keys - "V-106-utf16-reportPackage-json", # rpe:invalidJSON,0,JSON documents MUST use the UTF-8 character encoding - "V-107-utf7-reportPackage-json", # rpe:invalidJSON,0,JSON documents MUST use the UTF-8 character encoding - "V-108-utf32-reportPackage-json", # rpe:invalidJSON,0,JSON documents MUST use the UTF-8 character encoding - # "V-109-utf8-reportPackage-json", # ,1,"MAY include a Unicode Byte Order Mark, although this is neither required nor recommended" - - # 2xx - co-constraints on documentType and package file extension - "V-200-unsupportedReportPackageVersion", # rpe:unsupportedReportPackageVersion,0,There will never be a version of the spec with this documentType - "V-201-missing-report-package-json", # rpe:documentTypeFileExtensionMismatch,0,"rpe:documentTypeFileExtensionMismatch is ... raised if ... The .xbr ... file extension is used, and reportPackage.json is absent" - "V-202-missing-report-package-json", # rpe:documentTypeFileExtensionMismatch,0,"rpe:documentTypeFileExtensionMismatch is ... raised if ... The .xbri ... file extension is used, and reportPackage.json is absent" - "V-203-xbri-documentType", # rpe:documentTypeFileExtensionMismatch,0,rpe:documentTypeFileExtensionMismatch is ... raised if ... One of the three document type URIs specified in Section 3.4 is used with the incorrect file extension - "V-204-xbr-documentType", # rpe:documentTypeFileExtensionMismatch,0,rpe:documentTypeFileExtensionMismatch is ... raised if ... One of the three document type URIs specified in Section 3.4 is used with the incorrect file extension - "V-205-unconstrained-documentType", # rpe:documentTypeFileExtensionMismatch,0,rpe:documentTypeFileExtensionMismatch is ... raised if ... One of the three document type URIs specified in Section 3.4 is used with the incorrect file extension - "V-206-xbri-documentType", # rpe:documentTypeFileExtensionMismatch,0,rpe:documentTypeFileExtensionMismatch is ... raised if ... One of the three document type URIs specified in Section 3.4 is used with the incorrect file extension - "V-207-xbri-without-reportPackage-json", # rpe:documentTypeFileExtensionMismatch,0,rpe:documentTypeFileExtensionMismatch is ... raised if ... One of the three document type URIs specified in Section 3.4 is used with the incorrect file extension - "V-208-xbri-without-reportPackage-json-and-reports", # rpe:documentTypeFileExtensionMismatch,0,rpe:documentTypeFileExtensionMismatch is ... raised if ... One of the three document type URIs specified in Section 3.4 is used with the incorrect file extension - "V-209-xbr-without-reportPackage-json", # rpe:documentTypeFileExtensionMismatch,0,rpe:documentTypeFileExtensionMismatch is ... raised if ... One of the three document type URIs specified in Section 3.4 is used with the incorrect file extension - "V-210-xbr-without-reportPackage-json-and-reports", # rpe:documentTypeFileExtensionMismatch,0,rpe:documentTypeFileExtensionMismatch is ... raised if ... One of the three document type URIs specified in Section 3.4 is used with the incorrect file extension - "V-211-unsupported-file-extension", # rpe:unsupportedFileExtension,0,Current report package with unsupported file extension (.xbrx) - - # 4xx - invalid.xbri packages - "V-400-xbri-without-reports-directory", # rpe:missingReportsDirectory,0,A report package MUST contain a directory called reports as a child of the STLD - "V-401-xbri-with-only-txt-in-reports-directory", # rpe:missingReport,0,.xbri file without recognised files in the reports directory - "V-402-xbri-with-xhtml-too-deep", # rpe:missingReport,0,.xbri file with .xhtml buried too deep to be recognised - "V-403-xbri-with-multiple-reports", # rpe:multipleReports,0,If the report package is an Inline XBRL report package ... then there MUST NOT be more than one report in the report package - "V-404-xbri-with-json-report", # rpe:incorrectReportType,0,If the report package is an Inline XBRL report package then the contained report MUST be an Inline XBRL Document Set - "V-405-xbri-with-xbrl-report", # rpe:incorrectReportType,0,If the report package is an Inline XBRL report package then the contained report MUST be an Inline XBRL Document Set - "V-406-xbri-with-multiple-reports-in-a-subdirectory", # rpe:multipleReportsInSubdirectory,0,.xbri file with multiple reports in a subdirectory - - # 6xx - invalid.xbr packages - "V-600-xbr-without-reports-directory", # rpe:missingReportsDirectory,0,A report package MUST contain a directory called reports as a child of the STLD - "V-601-xbr-with-only-txt-in-reports-directory", # rpe:missingReport,0,.xbr file without recognised files in the reports directory - "V-603-xbr-with-invalid-jrr", # rpe:invalidJSON,0,.xbr file with a single invalid JSON-rooted report - "V-604-xbr-with-invalid-jrr-duplicate-keys", # rpe:invalidJSON,0,.xbr file with a single invalid JSON-rooted report (duplicate keys) - "V-605-xbr-with-invalid-jrr-utf32", # rpe:invalidJSON,0,JSON documents MUST use the UTF-8 character encoding - "V-606-xbr-with-invalid-jrr-utf16", # rpe:invalidJSON,0,JSON documents MUST use the UTF-8 character encoding - "V-607-xbr-with-invalid-jrr-utf7", # rpe:invalidJSON,0,JSON documents MUST use the UTF-8 character encoding - "V-608-xbr-with-invalid-jrr-missing-documentInfo", # rpe:invalidJSONStructure,0,The JSON Pointer /documentInfo/documentType MUST resolve to a string (rpe:invalidJSONStructure). - "V-609-xbr-with-invalid-jrr-missing-documentType", # rpe:invalidJSONStructure,0,The JSON Pointer /documentInfo/documentType MUST resolve to a string (rpe:invalidJSONStructure). - "V-610-xbr-with-invalid-jrr-non-string-documentType", # rpe:invalidJSONStructure,0,The JSON Pointer /documentInfo/documentType MUST resolve to a string (rpe:invalidJSONStructure). - "V-611-xbr-with-invalid-jrr-non-object-documentInfo", # rpe:invalidJSONStructure,0,The JSON Pointer /documentInfo/documentType MUST resolve to a string (rpe:invalidJSONStructure). - "V-612-xbr-with-multiple-reports", # rpe:multipleReports,0,.xbr file with multiple reports - "V-613-xbr-with-json-and-xbrl-too-deep", # rpe:missingReport,0,.xbr file with .json and .xbrl buried too deep to be recognised - "V-614-xbr-with-xhtml-report", # rpe:incorrectReportType,0,If the report package is a non-Inline XBRL report package then the contained report MUST be either an XBRL v2.1 report or an JSON-rooted report - "V-615-xbr-with-html-report", # rpe:incorrectReportType,0,If the report package is a non-Inline XBRL report package then the contained report MUST be either an XBRL v2.1 report or an JSON-rooted report - "V-616-xbr-with-htm-report", # rpe:incorrectReportType,0,If the report package is a non-Inline XBRL report package then the contained report MUST be either an XBRL v2.1 report or an JSON-rooted report - "V-617-xbr-with-multiple-reports-in-a-subdirectory", # rpe:multipleReportsInSubdirectory,0,.xbr file with multiple reports in a subdirectory - - # 8xx - invalid.zip packages - "V-800-zip-without-reports-directory", # rpe:missingReportsDirectory,0,A report package MUST contain a directory called reports as a child of the STLD - "V-801-zip-with-only-txt-in-reports-directory", # rpe:missingReport,0,.zip file without recognised files in the reports directory - "V-802-zip-with-reports-too-deep", # rpe:missingReport,0,".zip file with .json, .xbrl and .xhtml buried too deep to be recognised" - "V-803-zip-with-multiple-reports-in-a-subdirectory", # rpe:multipleReportsInSubdirectory,0,.zip file with multiple reports in a subdirectory - "V-804-zip-with-multiple-reports-in-a-subdirectory-uppercase", # rpe:multipleReportsInSubdirectory,0,.ZIP file (uppercase) with multiple reports in a subdirectory - - # 9xx - future report packages - "V-900-future-zip", # rpe:unsupportedReportPackageVersion,0,A future report package with a .zip extension - "V-901-future-xbri", # rpe:unsupportedReportPackageVersion,0,A future report package with a .xbri extension - "V-902-future-xbr", # rpe:unsupportedReportPackageVersion,0,A future report package with a .xbr extension - "V-903-future-xbrx", # rpe:unsupportedFileExtension,0,A future report package with an as-yet-undefined extension (.xbrx) - "V-904-future-package-with-invalid-reportPackage-json", # rpe:invalidJSON,0,Future report package with invalid JSON in META-INF/reportPackage.json - "V-905-future-package-with-invalid-reportPackage-json-duplicate-keys", # rpe:invalidJSON,0,Future report package with invalid JSON in META-INF/reportPackage.json - "V-906-future-package-with-invalid-reportPackage-json-utf32", # rpe:invalidJSON,0,Future report package with invalid JSON in META-INF/reportPackage.json - "V-907-future-package-with-invalid-reportPackage-json-utf16", # rpe:invalidJSON,0,Future report package with invalid JSON in META-INF/reportPackage.json - "V-908-future-package-with-invalid-reportPackage-json-utf7", # rpe:invalidJSON,0,Future report package with invalid JSON in META-INF/reportPackage.json - "V-909-future-package-with-invalid-reportPackage-json-missing-documentInfo", # rpe:invalidJSONStructure,0,Future report package with invalid JSON in META-INF/reportPackage.json - "V-910-future-package-with-invalid-reportPackage-json-missing-documentType", # rpe:invalidJSONStructure,0,Future report package with invalid JSON in META-INF/reportPackage.json - "V-911-future-package-with-invalid-reportPackage-json-non-string-documentType", # rpe:invalidJSONStructure,0,Future report package with invalid JSON in META-INF/reportPackage.json - "V-912-future-package-with-invalid-reportPackage-json-non-object-documentInfo", # rpe:invalidJSONStructure,0,Future report package with invalid JSON in META-INF/reportPackage.json - "V-913-future-package-with-bom-in-reportPackage-json", # rpe:unsupportedReportPackageVersion,0,Future report package with Byte Order Mark in META-INF/reportPackage.json - "V-914-current-and-future-package", # rpe:unsupportedReportPackageVersion,0,META-INF as STLD means this gets interpreted as a future report package - ]), info_url="https://specifications.xbrl.org/work-product-index-taxonomy-packages-report-packages-1.0.html", membership_url="https://www.xbrl.org/join", name=PurePath(__file__).stem, From 66ee4ec4247a02002b61fd6d7861fdc2f7c9d129 Mon Sep 17 00:00:00 2001 From: "Austin M. Matherne" Date: Mon, 28 Oct 2024 10:53:11 -0600 Subject: [PATCH 7/9] Support both logging level names and int values in addToLog --- arelle/CntlrWinMain.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arelle/CntlrWinMain.py b/arelle/CntlrWinMain.py index 51da4ad09..38e6ed6fb 100644 --- a/arelle/CntlrWinMain.py +++ b/arelle/CntlrWinMain.py @@ -860,7 +860,7 @@ def backgroundLoadXbrl(self, filesource, importToDTS, selectTopView): action = _("loaded") profileStat = "load" if (reportPackage := filesource.reportPackage) and "_IXDS#?#" not in filesource.url: - for report in reportPackage.reports: + for report in reportPackage.reports or []: if len(report.fullPathFiles) > 1: self.addToLog(_("Loading error. Inline document set encountered. Enable 'Inline XBRL Document Set' plug-in and use the Open Inline Doc Set dialog from the file menu to open this filing: {0}").format(filesource.url)) continue @@ -1472,6 +1472,8 @@ def helpAbout(self, event=None): # worker threads addToLog def addToLog(self, message, messageCode="", messageArgs=None, file="", refs=[], level=logging.INFO): + if isinstance(level, str): + level = logging.getLevelNamesMapping().get(level, logging.INFO) if level < logging.INFO and not self.showDebugMessages.get(): return # skip DEBUG and INFO-RESULT messages if messageCode and messageCode not in message: # prepend message code From 6521c33801a6c094a9664d5ee8d76d600777f74b Mon Sep 17 00:00:00 2001 From: "Austin M. Matherne" Date: Mon, 28 Oct 2024 11:04:33 -0600 Subject: [PATCH 8/9] Log error messages on exceptions during zip detection --- arelle/FileSource.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arelle/FileSource.py b/arelle/FileSource.py index 5ea9031c4..d8466f4a6 100644 --- a/arelle/FileSource.py +++ b/arelle/FileSource.py @@ -192,7 +192,10 @@ def __init__(self, url: str, cntlr: Cntlr | None = None, checkIfXmlIsEis: bool = if basefile: with openFileStream(self.cntlr, basefile, 'rb') as fileStream: self.isZip = zipfile.is_zipfile(fileStream) - except Exception: + except Exception as err: + # Log the error, but don't record a validation error. + # Validation is deferred to the validation classes. Filesource is unaware of the specific errors that should be raised. + self.logError(err) pass From 6e017034516fb924c6ee79f31fdd901a900baad8 Mon Sep 17 00:00:00 2001 From: "Austin M. Matherne" Date: Mon, 28 Oct 2024 11:15:56 -0600 Subject: [PATCH 9/9] Add required directories to nuget dependabot config --- .github/dependabot.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 193a62c72..24230993d 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -27,7 +27,9 @@ updates: ignore: - dependency-name: "cx_Freeze" - package-ecosystem: nuget - directory: "/" + directories: + - /tests/integration_tests/ui_tests/ArelleGUITest + - /tests/integration_tests/ui_tests/ArelleGUITest/ArelleGUITest schedule: interval: weekly groups: