Skip to content

Commit

Permalink
Merge pull request Arelle#1293 from aaroncameron-wk/api-instance-extr…
Browse files Browse the repository at this point in the history
…action

Support for source/response streams in instance extraction
  • Loading branch information
derekgengenbacher-wf authored Jul 29, 2024
2 parents f61ce4d + cd5dc7a commit e88ddf4
Show file tree
Hide file tree
Showing 3 changed files with 144 additions and 7 deletions.
2 changes: 1 addition & 1 deletion arelle/CntlrCmdLine.py
Original file line number Diff line number Diff line change
Expand Up @@ -1146,7 +1146,7 @@ def monitorParentProcess():
ViewFileRoleTypes.viewRoleTypes(modelXbrl, options.arcroleTypesFile, "Arcrole Types", isArcrole=True, lang=options.labelLang)

for pluginXbrlMethod in pluginClassMethods("CntlrCmdLine.Xbrl.Run"):
pluginXbrlMethod(self, options, modelXbrl, _entrypoint, responseZipStream=responseZipStream)
pluginXbrlMethod(self, options, modelXbrl, _entrypoint, sourceZipStream=sourceZipStream, responseZipStream=responseZipStream)

except (IOError, EnvironmentError) as err:
self.addToLog(_("[IOError] Failed to save output:\n {0}").format(err),
Expand Down
31 changes: 25 additions & 6 deletions arelle/plugin/inlineXbrlDocumentSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@
"""
from __future__ import annotations

from typing import BinaryIO

from arelle import FileSource, ModelXbrl, ValidateXbrlDimensions, XbrlConst, ValidateDuplicateFacts
from arelle.RuntimeOptions import RuntimeOptions
from arelle.ValidateDuplicateFacts import DeduplicationType
Expand Down Expand Up @@ -547,6 +549,8 @@ def runSaveTargetDocumentMenuCommand(
saveTargetFiling=False,
encodeSavedXmlChars=False,
xbrliNamespacePrefix=None,
sourceZipStream: BinaryIO | None = None,
responseZipStream: BinaryIO | None = None,
deduplicationType: DeduplicationType | None = None):
# skip if another class handles saving (e.g., EdgarRenderer)
if saveTargetInstanceOverriden(deduplicationType):
Expand Down Expand Up @@ -579,9 +583,13 @@ def runSaveTargetDocumentMenuCommand(
thread.daemon = True
thread.start()
else:
filingZip = None
filingFiles = set()
if responseZipStream is not None:
filingZip = zipfile.ZipFile(responseZipStream, 'w', zipfile.ZIP_DEFLATED, True)
if saveTargetFiling:
filingZip = zipfile.ZipFile(os.path.splitext(targetFilename)[0] + ".zip", 'w', zipfile.ZIP_DEFLATED, True)
filingFiles = set()
if filingZip is None:
filingZip = zipfile.ZipFile(os.path.splitext(targetFilename)[0] + ".zip", 'w', zipfile.ZIP_DEFLATED, True)
# copy referencedDocs to two levels
def addRefDocs(doc):
for refDoc in doc.referencesDocument.keys():
Expand All @@ -590,16 +598,25 @@ def addRefDocs(doc):
addRefDocs(refDoc)
addRefDocs(modelDocument)
else:
filingZip = None
filingFiles = None
saveTargetDocument(modelDocument.modelXbrl, targetFilename, targetSchemaRefs, filingZip, filingFiles,
encodeSavedXmlChars=encodeSavedXmlChars, xbrliNamespacePrefix=xbrliNamespacePrefix,
deduplicationType=deduplicationType)
if saveTargetFiling:
instDir = os.path.dirname(modelDocument.uri.split(IXDS_DOC_SEPARATOR)[0])
for refFile in filingFiles:
if refFile.startswith(instDir):
filingZip.write(refFile, modelDocument.relativeUri(refFile))
copyFilingPaths = [
(f, modelDocument.relativeUri(f))
for f in filingFiles if f.startswith(instDir)
]
if sourceZipStream is not None:
with zipfile.ZipFile(sourceZipStream, 'r') as sourceZip:
for filingFile, arcname in copyFilingPaths:
filingFile = filingFile.replace(instDir + os.sep, "")
sourceFile = sourceZip.read(filingFile)
filingZip.writestr(arcname, sourceFile)
else:
for filingFile, arcname in copyFilingPaths:
filingZip.write(filingFile, arcname)


def commandLineOptionExtender(parser, *args, **kwargs):
Expand Down Expand Up @@ -730,6 +747,8 @@ def commandLineXbrlRun(cntlr, options: RuntimeOptions, modelXbrl, *args, **kwarg
saveTargetFiling=getattr(options, "saveTargetFiling", False),
encodeSavedXmlChars=getattr(options, "encodeSavedXmlChars", False),
xbrliNamespacePrefix=getattr(options, "xbrliNamespacePrefix"),
sourceZipStream=kwargs.get("sourceZipStream", None),
responseZipStream=kwargs.get("responseZipStream", None),
deduplicationType=deduplicationType)

def testcaseVariationReadMeFirstUris(modelTestcaseVariation):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
from __future__ import annotations

import io
import os
import urllib.request
import zipfile
from pathlib import Path
from shutil import rmtree

import regex

from arelle.RuntimeOptions import RuntimeOptions
from arelle.api.Session import Session
from arelle.logging.handlers.StructuredMessageLogHandler import StructuredMessageLogHandler
from tests.integration_tests.integration_test_util import get_s3_uri
from tests.integration_tests.scripts.script_util import parse_args, assert_result, prepare_logfile, validate_log_xml, validate_log_file

errors = []
this_file = Path(__file__)
args = parse_args(
this_file.stem,
"Extract and validate IXDS instance using Arelle's Python API.",
arelle=False,
cache='japan_ixds.zip',
cache_version_id='PiPwS2lDqbtid8K3dbUlF0m.KIa5Jm8E',
)
arelle_offline = args.offline
working_directory = Path(args.working_directory)
test_directory = Path(args.test_directory)
arelle_log_file1 = prepare_logfile(test_directory, this_file, name="save")
arelle_log_file2 = prepare_logfile(test_directory, this_file, name="validate")
report_zip_path = test_directory / 'report.zip'
manifest_path = report_zip_path / "manifest.xml"
extracted_zip_path = test_directory / "extracted.zip"
extracted_instance_path = test_directory / "tse-acedjpfr-19990-2023-06-30-01-2023-08-18_extracted.xbrl"
extracted_final_path = report_zip_path / "tse-acedjpfr-19990-2023-06-30-01-2023-08-18_extracted.xbrl"
report_zip_url = get_s3_uri(
'ci/packages/JapaneseXBRLReport.zip',
version_id='M7vTPhHhir1rOm7nSMPiCGcbCA0ksObh'
)

print(f"Downloading report: {report_zip_url}")
urllib.request.urlretrieve(report_zip_url, report_zip_path)

print(f"Extracting instance: {manifest_path}")
with io.BytesIO() as extracted_stream:
with open(report_zip_path, 'rb') as stream:
options = RuntimeOptions(
entrypointFile=str(manifest_path),
internetConnectivity='offline' if arelle_offline else 'online',
keepOpen=True,
logFile=str(arelle_log_file1),
logFormat="[%(messageCode)s] %(message)s - %(file)s",
pluginOptions={
'deduplicateIxbrlFacts': 'consistent-pairs',
'saveTargetFiling': True,
'saveTargetInstance': True,
},
plugins='inlineXbrlDocumentSet',
strictOptions=False,
)
with Session() as session:
session.run(
options,
sourceZipStream=stream,
responseZipStream=extracted_stream,
)
log_xml1 = session.get_logs('xml')
print(f"Writing extracted stream to zip: {extracted_zip_path}")
with open(extracted_zip_path, 'wb') as extracted_file:
extracted_file.write(extracted_stream.getvalue())
print(f"Extracting instance document: {extracted_instance_path}")
with zipfile.ZipFile(extracted_zip_path, "r") as zip_ref:
zip_ref.extractall(test_directory)
print(f"Copying instance document to report zip: {extracted_instance_path}")
with zipfile.ZipFile(report_zip_path, "a") as zip_ref:
zip_ref.write(
extracted_instance_path,
arcname=extracted_instance_path.name
)
with open(report_zip_path, 'rb') as stream:
# Verify no schemaImportMissing errors in extracted doc
print(f"Validating instance: {extracted_final_path}")
options = RuntimeOptions(
entrypointFile=str(extracted_final_path),
internetConnectivity='offline' if arelle_offline else 'online',
keepOpen=True,
logFile=str(arelle_log_file2),
logFormat="[%(messageCode)s] %(message)s - %(file)s",
strictOptions=False,
validate=True,
validateDuplicateFacts='consistent',
)
with Session() as session:
session.run(options, sourceZipStream=stream)
log_xml2 = session.get_logs('xml')

print(f"Checking for log errors: {arelle_log_file1}")
expected_infos = {
regex.compile(r'^\[info:deduplicatedFact] Duplicate fact was excluded'): 33,
}
errors += validate_log_xml(log_xml1, expected_results={"info": expected_infos})

print(f"Checking for log errors: {arelle_log_file2}")
expected_warnings = {
regex.compile(r'^\[arelle:duplicateFacts] Duplicate fact set '): 0,
}
errors += validate_log_xml(log_xml2, expected_results={"warning": expected_warnings})

assert_result(errors)

print("Cleaning up")
try:
os.unlink(working_directory / 'python_api_instance_extraction' / 'extracted.zip')
os.unlink(working_directory / 'python_api_instance_extraction' / 'report.zip')
os.unlink(working_directory / 'python_api_instance_extraction' / 'tse-acedjpfr-19990-2023-06-30-01-2023-08-18_extracted.xbrl')
except PermissionError as exc:
print(f"Failed to cleanup test files: {exc}")

0 comments on commit e88ddf4

Please sign in to comment.