Skip to content

Commit

Permalink
Merge pull request Arelle#1391 from austinmatherne-wk/XT-4012
Browse files Browse the repository at this point in the history
Validate Report Packages
  • Loading branch information
derekgengenbacher-wf authored Oct 28, 2024
2 parents 6ff95f6 + 6e01703 commit f242885
Show file tree
Hide file tree
Showing 11 changed files with 675 additions and 328 deletions.
4 changes: 3 additions & 1 deletion .github/dependabot.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ updates:
ignore:
- dependency-name: "cx_Freeze"
- package-ecosystem: nuget
directory: "/"
directories:
- /tests/integration_tests/ui_tests/ArelleGUITest
- /tests/integration_tests/ui_tests/ArelleGUITest/ArelleGUITest
schedule:
interval: weekly
groups:
Expand Down
4 changes: 3 additions & 1 deletion arelle/CntlrWinMain.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,7 @@ def backgroundLoadXbrl(self, filesource, importToDTS, selectTopView):
action = _("loaded")
profileStat = "load"
if (reportPackage := filesource.reportPackage) and "_IXDS#?#" not in filesource.url:
for report in reportPackage.reports:
for report in reportPackage.reports or []:
if len(report.fullPathFiles) > 1:
self.addToLog(_("Loading error. Inline document set encountered. Enable 'Inline XBRL Document Set' plug-in and use the Open Inline Doc Set dialog from the file menu to open this filing: {0}").format(filesource.url))
continue
Expand Down Expand Up @@ -1472,6 +1472,8 @@ def helpAbout(self, event=None):

# worker threads addToLog
def addToLog(self, message, messageCode="", messageArgs=None, file="", refs=[], level=logging.INFO):
if isinstance(level, str):
level = logging.getLevelNamesMapping().get(level, logging.INFO)
if level < logging.INFO and not self.showDebugMessages.get():
return # skip DEBUG and INFO-RESULT messages
if messageCode and messageCode not in message: # prepend message code
Expand Down
91 changes: 58 additions & 33 deletions arelle/FileSource.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,10 @@
from arelle.Cntlr import Cntlr


archivePathSeparators = (".zip" + os.sep, ".tar.gz" + os.sep, ".eis" + os.sep, ".xml" + os.sep, ".xfd" + os.sep, ".frm" + os.sep, '.taxonomyPackage.xml' + os.sep) + \
archivePathSeparators = (".zip" + os.sep, ".xbr" + os.sep, ".xbri" + os.sep, ".tar.gz" + os.sep, ".eis" + os.sep, ".xml" + os.sep, ".xfd" + os.sep, ".frm" + os.sep, '.taxonomyPackage.xml' + os.sep) + \
((".zip/", ".tar.gz/", ".eis/", ".xml/", ".xfd/", ".frm/", '.taxonomyPackage.xml/') if os.sep != "/" else ()) #acomodate windows and http styles

archiveFilenameSuffixes = {".zip", ".tar.gz", ".eis", ".xml", ".xfd", ".frm"}
archiveFilenameSuffixes = {".zip", ".xbr", ".xbri", ".tar.gz", ".eis", ".xml", ".xfd", ".frm"}

POST_UPLOADED_ZIP = os.sep + "POSTupload.zip"
SERVER_WEB_CACHE = os.sep + "_HTTP_CACHE"
Expand Down Expand Up @@ -67,13 +67,23 @@ def openFileSource(
selection: str | None = archivepathSelection[1]

assert selection is not None
selectionIsEmbeddedZip = False
if (
sourceFileSource is not None
and sourceFileSource.dir is not None
and sourceFileSource.isArchive
and selection in sourceFileSource.dir
and isReportPackageExtension(selection)
):
if isReportPackageExtension(selection):
selectionIsEmbeddedZip = True
else:
try:
assert isinstance(sourceFileSource.fs, zipfile.ZipFile)
with sourceFileSource.fs.open(selection) as f:
selectionIsEmbeddedZip = zipfile.is_zipfile(f)
except Exception:
pass
if selectionIsEmbeddedZip:
assert cntlr is not None
filesource = FileSource(filename, cntlr)
selection = None
Expand Down Expand Up @@ -101,39 +111,39 @@ def archiveFilenameParts(filename: str | None, checkIfXmlIsEis: bool = False) ->

class FileNamedStringIO(io.StringIO): # provide string IO in memory but behave as a fileName string
def __init__(self, fileName: str, *args: Any, **kwargs: Any) -> None:
super(FileNamedStringIO, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)
self.fileName = fileName

def close(self) -> None:
del self.fileName
super(FileNamedStringIO, self).close()
super().close()

def __str__(self) -> str:
return self.fileName

class FileNamedTextIOWrapper(io.TextIOWrapper): # provide string IO in memory but behave as a fileName string
def __init__(self, fileName: str, *args: Any, **kwargs: Any):
super(FileNamedTextIOWrapper, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)
self.fileName = fileName

def __str__(self) -> str:
return self.fileName

class FileNamedBytesIO(io.BytesIO): # provide Bytes IO in memory but behave as a fileName string
def __init__(self, fileName: str, *args: Any, **kwargs: Any) -> None:
super(FileNamedBytesIO, self).__init__(*args, **kwargs)
super().__init__(*args, **kwargs)
self.fileName = fileName

def close(self) -> None:
del self.fileName
super(FileNamedBytesIO, self).close()
super().close()

def __str__(self) -> str:
return self.fileName

class ArchiveFileIOError(IOError):
class ArchiveFileIOError(OSError):
def __init__(self, fileSource: FileSource, errno: int, fileName: str) -> None:
super(ArchiveFileIOError, self).__init__(errno,
super().__init__(errno,
_("Archive {}").format(fileSource.url),
fileName)
self.fileName = fileName
Expand All @@ -150,6 +160,8 @@ class FileSource:
url: str | list[str] | None
basefile: str | list[str] | None
xfdDocument: etree._ElementTree | None
taxonomyPackage: dict[str, str | dict[str, str]] | None
mappedPaths: dict[str, str] | None

def __init__(self, url: str, cntlr: Cntlr | None = None, checkIfXmlIsEis: bool = False) -> None:
self.url = str(url) # allow either string or FileNamedStringIO
Expand All @@ -173,6 +185,19 @@ def __init__(self, url: str, cntlr: Cntlr | None = None, checkIfXmlIsEis: bool =
self.taxonomyPackage = None # taxonomy package
self.mappedPaths = None # remappings of path segments may be loaded by taxonomyPackage manifest
self.isValid = True # filesource is assumed to be valid until a call to open fails.
if not self.isZip:
# Try to detect zip files with unrecognized file extensions.
try:
basefile = self.cntlr.webCache.getfilename(self.url) if self.cntlr is not None else self.url
if basefile:
with openFileStream(self.cntlr, basefile, 'rb') as fileStream:
self.isZip = zipfile.is_zipfile(fileStream)
except Exception as err:
# Log the error, but don't record a validation error.
# Validation is deferred to the validation classes. Filesource is unaware of the specific errors that should be raised.
self.logError(err)
pass


# for SEC xml files, check if it's an EIS anyway
if (not (self.isZip or self.isEis or self.isXfd or self.isRss) and
Expand All @@ -184,12 +209,12 @@ def __init__(self, url: str, cntlr: Cntlr | None = None, checkIfXmlIsEis: bool =
assert self.cntlr is not None
_filename = self.cntlr.webCache.getfilename(self.url)
assert _filename is not None
file = open(_filename, 'r', errors='replace')
file = open(_filename, errors='replace')
l = file.read(256) # may have comments before first element
file.close()
if re.match(r"\s*(<[?]xml[^?]+[?]>)?\s*(<!--.*-->\s*)*<(cor[a-z]*:|sdf:|\w+:)?edgarSubmission", l):
self.isEis = True
except EnvironmentError as err:
except OSError as err:
self.isValid = False
if self.cntlr:
self.cntlr.addToLog(_("[{0}] {1}").format(type(err).__name__, err))
Expand All @@ -215,7 +240,7 @@ def open(self, reloadCache: bool = False) -> None:
fileStream = openFileStream(self.cntlr, self.basefile, 'rb')
self.fs = zipfile.ZipFile(fileStream, mode="r")
self.isOpen = True
except (EnvironmentError, zipfile.BadZipFile) as err:
except (OSError, zipfile.BadZipFile) as err:
self.isValid = False
self.logError(err)
pass
Expand All @@ -224,7 +249,7 @@ def open(self, reloadCache: bool = False) -> None:
assert isinstance(self.basefile, str)
self.fs = tarfile.open(self.basefile, "r:gz")
self.isOpen = True
except EnvironmentError as err:
except OSError as err:
self.isValid = False
self.logError(err)
pass
Expand All @@ -248,7 +273,7 @@ def open(self, reloadCache: bool = False) -> None:
break
buf += zlib.decompress(compressedBytes)
file.close()
except EnvironmentError as err:
except OSError as err:
self.isValid = False
self.logError(err)
pass
Expand All @@ -268,7 +293,7 @@ def open(self, reloadCache: bool = False) -> None:
self.eisDocument = etree.parse(_file, parser=parser)
_file.close()
self.isOpen = True
except (EnvironmentError, etree.LxmlError) as err:
except (OSError, etree.LxmlError) as err:
self.isValid = False
self.logError(err)
return # provide error message later
Expand Down Expand Up @@ -304,7 +329,7 @@ def open(self, reloadCache: bool = False) -> None:
ungzippedBytes += readBytes
if len(readBytes) == 0 or (lenUncomp - lenRead) <= 0:
break
except IOError as err:
except OSError:
self.isValid = False
pass # provide error message later

Expand All @@ -322,7 +347,7 @@ def open(self, reloadCache: bool = False) -> None:
self.xfdDocument = etree.parse(file)
file.close()
self.isOpen = True
except (EnvironmentError, etree.LxmlError) as err:
except (OSError, etree.LxmlError) as err:
self.isValid = False
self.logError(err)
return # provide error message later
Expand All @@ -332,7 +357,7 @@ def open(self, reloadCache: bool = False) -> None:
assert isinstance(self.basefile, str)
self.rssDocument = etree.parse(self.basefile)
self.isOpen = True
except (EnvironmentError, etree.LxmlError) as err:
except (OSError, etree.LxmlError) as err:
self.isValid = False
self.logError(err)
return # provide error message later
Expand All @@ -343,16 +368,16 @@ def open(self, reloadCache: bool = False) -> None:
self.loadTaxonomyPackageMappings()

def loadTaxonomyPackageMappings(self, errors: list[str] = [], expectTaxonomyPackage: bool = False) -> None:
if not self.mappedPaths and (self.taxonomyPackageMetadataFiles or expectTaxonomyPackage):
if not self.mappedPaths and (self.taxonomyPackageMetadataFiles or expectTaxonomyPackage) and self.cntlr:
if PackageManager.validateTaxonomyPackage(self.cntlr, self, errors=errors):
assert isinstance(self.baseurl, str)
metadata = self.baseurl + os.sep + self.taxonomyPackageMetadataFiles[0]
self.taxonomyPackage = PackageManager.parsePackage(self.cntlr, self, metadata, # type: ignore[no-untyped-call]
self.taxonomyPackage = PackageManager.parsePackage(self.cntlr, self, metadata,
os.sep.join(os.path.split(metadata)[:-1]) + os.sep,
errors=errors)

assert self.taxonomyPackage is not None
self.mappedPaths = self.taxonomyPackage.get("remappings")
self.mappedPaths = cast('dict[str, str]', self.taxonomyPackage.get("remappings"))

def openZipStream(self, sourceZipStream: str) -> None:
if not self.isOpen:
Expand Down Expand Up @@ -418,7 +443,7 @@ def reportPackage(self) -> ReportPackage | None:
self._reportPackage: ReportPackage | None
return self._reportPackage
except AttributeError:
self._reportPackage = ReportPackage.fromFileSource(self) if self.isZip else None
self._reportPackage = ReportPackage.fromFileSource(self)
return self._reportPackage

@property
Expand Down Expand Up @@ -526,8 +551,8 @@ def file(
b = stripDeclarationBytes(b)
return (FileNamedTextIOWrapper(filepath, io.BytesIO(b), encoding=encoding),
encoding)
except KeyError:
raise ArchiveFileIOError(self, errno.ENOENT, archiveFileName)
except KeyError as err:
raise ArchiveFileIOError(self, errno.ENOENT, archiveFileName) from err
elif archiveFileSource.isTarGz:
try:
assert isinstance(archiveFileSource.fs, tarfile.TarFile)
Expand All @@ -543,13 +568,13 @@ def file(
b = stripDeclarationBytes(b)
return (FileNamedTextIOWrapper(filepath, io.BytesIO(b), encoding=encoding),
encoding)
except KeyError:
except KeyError as err:
# Note 2022-09-06
# The following error is raised by mypy, indicating there's a bug here:
# Missing positional argument "fileName"
# Not fixing this bug as a part of this PR
# Also expecting second argument to be int but is str here
raise ArchiveFileIOError(self, archiveFileName) # type: ignore[call-arg, arg-type]
raise ArchiveFileIOError(self, archiveFileName) from err # type: ignore[call-arg, arg-type]
elif archiveFileSource.isEis:
assert self.eisDocument is not None
for docElt in self.eisDocument.iter(tag="{http://www.sec.gov/edgar/common}document"):
Expand Down Expand Up @@ -723,7 +748,7 @@ def dir(self) -> list[str] | None:
XmlUtil.text(descendantPubDate),
instDoc))
self.filesDir = files
except (EnvironmentError,
except (OSError,
etree.LxmlError) as err:
pass
elif self.isInstalledTaxonomyPackage:
Expand Down Expand Up @@ -787,10 +812,10 @@ def openFileStream(
if isHttpUrl(filepath) and cntlr:
_cacheFilepath = cntlr.webCache.getfilename(filepath, normalize=True) # normalize is separate step in ModelDocument retrieval, combined here
if _cacheFilepath is None:
raise IOError(_("Unable to open file: {0}.").format(filepath))
raise OSError(_("Unable to open file: {0}.").format(filepath))
filepath = _cacheFilepath
if not filepath and cntlr:
raise IOError(_("Unable to open file: \"{0}\".").format(filepath))
raise OSError(_("Unable to open file: \"{0}\".").format(filepath))
# file path may be server (or memcache) or local file system
if filepath.startswith(SERVER_WEB_CACHE) and cntlr:
filestream = None
Expand Down Expand Up @@ -843,7 +868,7 @@ def openXmlFileStream(
# allow filepath to close
# this may not be needed for Mac or Linux, needs confirmation!!!
if text is None: # ok to read as utf-8
return io.open(filepath, 'rt', encoding=encoding or 'utf-8'), encoding
return open(filepath, encoding=encoding or 'utf-8'), encoding
else:
if stripDeclaration:
text = stripDeclarationText(text)
Expand Down Expand Up @@ -873,7 +898,7 @@ def saveFile(cntlr: Cntlr, filepath: str, contents: str, encoding: str | None =
if isHttpUrl(filepath):
_cacheFilepath = cntlr.webCache.getfilename(filepath)
if _cacheFilepath is None:
raise IOError(_("Unable to open file: {0}.").format(filepath))
raise OSError(_("Unable to open file: {0}.").format(filepath))
filepath = _cacheFilepath
# file path may be server (or memcache) or local file system
if filepath.startswith(SERVER_WEB_CACHE):
Expand All @@ -884,7 +909,7 @@ def saveFile(cntlr: Cntlr, filepath: str, contents: str, encoding: str | None =
_dirpath = os.path.dirname(filepath)
if not os.path.exists(_dirpath): # directory must exist before io.open
os.makedirs(_dirpath)
with io.open(filepath, mode, encoding=(encoding or 'utf-8')) as f:
with open(filepath, mode, encoding=(encoding or 'utf-8')) as f:
f.write(contents)

# GAE Blobcache
Expand Down
Loading

0 comments on commit f242885

Please sign in to comment.