Merge pull request #2042 from openoereb/default_toc_length_config

Default toc length config
openoereb · Sep 4, 2024 · a6c972f · a6c972f
2 parents 86fe195 + ea54626
commit a6c972f
Show file tree

Hide file tree

Showing 5 changed files with 69 additions and 33 deletions.
diff --git a/dev/config/pyramid_oereb.yml.mako b/dev/config/pyramid_oereb.yml.mako
@@ -85,9 +85,18 @@ pyramid_oereb:
     # Will make an estimation of the total length of the Table of Content (TOC) and control that the page
     # numbering in the output pdf is consistent with TOC numbering. If it is known that the TOC is very long and
     # could run over more than one page, it is preferred to set this to true. The drawback is that it might need
-    # more time to generate the PDF. If set to false, it will assume that only one TOC page exists, and this can
-    # lead to wrong numbering in the TOC.
-    compute_toc_pages: true
+    # more time to generate the PDF. If set to false, the expected_toc_length setting below will be used. If it is
+    # not set it will assume that only one TOC page exists, and this can lead to wrong numbering in the TOC, which
+    # will be fixed by a second PDF extract call that has an impact on performance.
+    compute_toc_pages: false
+    # In order to skip the computation of the estimated number of TOC pages which might return an erroneous result 
+    # for your setting, you can specify a default for the number of TOC pages. For most of the cantons the number of 
+    # TOC pages is pretty constant unless a real estate is concerned by none or a huge number of restrictions.
+    # In both cases (computing an estimate or setting a default for the number of TOC pages) the exact number of TOC 
+    # pages is extracted from the created PDF and if it differs from the expected value the PDF is created a second 
+    # time with the correct page numbers.
+    # Note that if "compute_toc_pages" is set true the "expected_toc_length" is not taken into account.
+    expected_toc_length: 2
     # Specify any additional URL parameters that the print shall use for WMS calls
     wms_url_params:
       TRANSPARENT: 'true'

diff --git a/doc/source/changes.rst b/doc/source/changes.rst
@@ -6,6 +6,13 @@ Changes/Hints for migration
 This chapter will give you hints on how to handle version migration, in particular regarding what you may need
 to adapt in your project configuration, database etc. when upgrading to a new version.
 
+Version 2.6.0
+-------------
+* New parameter 'expected_toc_length' allows to define a default table of content pages number avoiding a second
+call for the pdf extract in most cases. This value may be be set if most of the PDF extracts have the same number
+of TOC pages. It complements the 'compute_toc_pages' parameter. If the latter is set to true 'expected_toc_length' 
+is ignored.
+
 Version 2.5.3
 -------------
 Feature and maintenance release:

diff --git a/pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py b/pyramid_oereb/contrib/print_proxy/mapfish_print/mapfish_print.py
@@ -73,17 +73,20 @@ def __call__(self, value, system):
         extract_as_dict = self._render(extract_record, value[1])
         feature_geometry = mapping(extract_record.real_estate.limit)
 
-        if Config.get('print', {}).get('compute_toc_pages', False):
+        print_config = Config.get('print', {})
+
+        if print_config.get('compute_toc_pages', False):
             extract_as_dict['nbTocPages'] = TocPages(extract_as_dict).getNbPages()
         else:
-            extract_as_dict['nbTocPages'] = 1
+            if print_config.get('expected_toc_length') and int(print_config.get('expected_toc_length')) > 0:
+                extract_as_dict['nbTocPages'] = print_config.get('expected_toc_length')
+            else:
+                extract_as_dict['nbTocPages'] = 1
 
         # set the global_datetime variable so that it can be used later for the archive
         self.set_global_datetime(extract_as_dict['CreationDate'])
         self.convert_to_printable_extract(extract_as_dict, feature_geometry)
 
-        print_config = Config.get('print', {})
-
         extract_as_dict['Display_RealEstate_SubunitOfLandRegister'] = print_config.get(
             'display_real_estate_subunit_of_land_register', True
         )
@@ -97,7 +100,7 @@ def __call__(self, value, system):
         )
 
         spec = {
-            'layout': Config.get('print', {})['template_name'],
+            'layout': print_config['template_name'],
             'outputFormat': 'pdf',
             'lang': self._language,
             'attributes': extract_as_dict,
@@ -108,37 +111,41 @@ def __call__(self, value, system):
         if self._request.GET.get('getspec', 'no') != 'no':
             response.headers['Content-Type'] = 'application/json; charset=UTF-8'
             return json.dumps(spec, sort_keys=True, indent=4)
-        pdf_url = urlparse.urljoin(Config.get('print', {})['base_url'] + '/', 'buildreport.pdf')
-        pdf_headers = Config.get('print', {})['headers']
+        pdf_url = urlparse.urljoin(print_config['base_url'] + '/', 'buildreport.pdf')
+        pdf_headers = print_config['headers']
         print_result = requests.post(
             pdf_url,
             headers=pdf_headers,
             data=json.dumps(spec)
         )
         try:
-            if Config.get('print', {}).get('compute_toc_pages', False):
-                with io.BytesIO() as pdf:
-                    pdf.write(print_result.content)
-                    pdf_reader = PdfReader(pdf)
-                    x = []
-                    for i in range(len(pdf_reader.outline)):
-                        if isinstance(pdf_reader.outline[i], list):
-                            x.append(pdf_reader.outline[i][0]['/Page']['/StructParents'])
-                        else:
-                            x.append(pdf_reader.outline[i]['/Page']['/StructParents'])
-                    try:
-                        true_nb_of_toc = min(x)-1
-                    except ValueError:
-                        true_nb_of_toc = 1
-
-                    if true_nb_of_toc != extract_as_dict['nbTocPages']:
-                        log.warning('nbTocPages in result pdf: {} are not equal to the one predicted : {}, request new pdf'.format(true_nb_of_toc,extract_as_dict['nbTocPages'])) # noqa
-                        extract_as_dict['nbTocPages'] = true_nb_of_toc
-                        print_result = requests.post(
-                            pdf_url,
-                            headers=pdf_headers,
-                            data=json.dumps(spec)
-                        )
+            log.debug('Validation of the TOC length with compute_toc_pages set to {} and expected_toc_length set to {}'.format(print_config.get('compute_toc_pages'), print_config.get('expected_toc_length'))) # noqa
+            with io.BytesIO() as pdf:
+                pdf.write(print_result.content)
+                pdf_reader = PdfReader(pdf)
+                x = []
+                for i in range(len(pdf_reader.outline)):
+                    if isinstance(pdf_reader.outline[i], list):
+                        x.append(pdf_reader.outline[i][0]['/Page']['/StructParents'])
+                    else:
+                        x.append(pdf_reader.outline[i]['/Page']['/StructParents'])
+                try:
+                    true_nb_of_toc = min(x)-1
+                except ValueError:
+                    true_nb_of_toc = 1
+
+                log.debug('True number of TOC pages is {}, expected number was {}'.format(true_nb_of_toc, extract_as_dict['nbTocPages'])) # noqa
+                if true_nb_of_toc != extract_as_dict['nbTocPages']:
+                    log.warning('nbTocPages in result pdf: {} are not equal to the one predicted : {}, request new pdf'.format(true_nb_of_toc,extract_as_dict['nbTocPages'])) # noqa
+                    log.debug('Secondary PDF extract call STARTED')
+                    extract_as_dict['nbTocPages'] = true_nb_of_toc
+                    print_result = requests.post(
+                        pdf_url,
+                        headers=pdf_headers,
+                        data=json.dumps(spec)
+                    )
+                    log.debug('Secondary PDF extract call to fix TOC pages number DONE')
+
         except PdfReadError as e:
             err_msg = 'a problem occurred while generating the pdf file'
             log.error(err_msg + ': ' + str(e))

diff --git a/tests/contrib.print_proxy.mapfish_print/resources/test_config.yml b/tests/contrib.print_proxy.mapfish_print/resources/test_config.yml
@@ -4,6 +4,8 @@ pyramid_oereb:
     wms_url_params:
       TRANSPARENT: 'true'
       OTHERCUSTOM: 'myvalue'
+    compute_toc_pages: false
+    expected_toc_length: 2
 
   theme:
     source:

diff --git a/tests/contrib.print_proxy.mapfish_print/test_mapfish_print_configuration.py b/tests/contrib.print_proxy.mapfish_print/test_mapfish_print_configuration.py
@@ -100,3 +100,14 @@ def test_default_wms_url_param_config(DummyRenderInfo):
     config = renderer.get_wms_url_params()
     # Do the check for this test. Value should be the default setting.
     assert config == {'TRANSPARENT': 'true'}
+
+
+def test_toc_pages_default_config():
+    Config._config = None
+    Config.init('./tests/contrib.print_proxy.mapfish_print/resources/test_config.yml', 'pyramid_oereb')
+    compute_toc_pages = Config.get('print', {}).get('compute_toc_pages')
+    expected_toc_length = Config.get('print', {}).get('expected_toc_length')
+
+    assert isinstance(compute_toc_pages, bool)
+    assert bool(compute_toc_pages) is False
+    assert expected_toc_length == 2