From 411611b51f013c48970096b82abfca9304dc2fa6 Mon Sep 17 00:00:00 2001 From: anton-climate Date: Mon, 15 Apr 2024 11:24:26 +1000 Subject: [PATCH] Test failures --- LICENSE | 396 +++++++++++++ nmltab.py | 745 ------------------------- run_summary.py | 1074 ------------------------------------ sync_data.sh | 166 ------ tidy_restarts.py | 71 --- resub.sh => tools/resub.sh | 0 6 files changed, 396 insertions(+), 2056 deletions(-) create mode 100644 LICENSE delete mode 100755 nmltab.py delete mode 100755 run_summary.py delete mode 100755 sync_data.sh delete mode 100755 tidy_restarts.py rename resub.sh => tools/resub.sh (100%) diff --git a/LICENSE b/LICENSE new file mode 100644 index 00000000..da6ab6cc --- /dev/null +++ b/LICENSE @@ -0,0 +1,396 @@ +Attribution 4.0 International + +======================================================================= + +Creative Commons Corporation ("Creative Commons") is not a law firm and +does not provide legal services or legal advice. Distribution of +Creative Commons public licenses does not create a lawyer-client or +other relationship. Creative Commons makes its licenses and related +information available on an "as-is" basis. Creative Commons gives no +warranties regarding its licenses, any material licensed under their +terms and conditions, or any related information. Creative Commons +disclaims all liability for damages resulting from their use to the +fullest extent possible. + +Using Creative Commons Public Licenses + +Creative Commons public licenses provide a standard set of terms and +conditions that creators and other rights holders may use to share +original works of authorship and other material subject to copyright +and certain other rights specified in the public license below. The +following considerations are for informational purposes only, are not +exhaustive, and do not form part of our licenses. + + Considerations for licensors: Our public licenses are + intended for use by those authorized to give the public + permission to use material in ways otherwise restricted by + copyright and certain other rights. Our licenses are + irrevocable. Licensors should read and understand the terms + and conditions of the license they choose before applying it. + Licensors should also secure all rights necessary before + applying our licenses so that the public can reuse the + material as expected. Licensors should clearly mark any + material not subject to the license. This includes other CC- + licensed material, or material used under an exception or + limitation to copyright. More considerations for licensors: + wiki.creativecommons.org/Considerations_for_licensors + + Considerations for the public: By using one of our public + licenses, a licensor grants the public permission to use the + licensed material under specified terms and conditions. If + the licensor's permission is not necessary for any reason--for + example, because of any applicable exception or limitation to + copyright--then that use is not regulated by the license. Our + licenses grant only permissions under copyright and certain + other rights that a licensor has authority to grant. Use of + the licensed material may still be restricted for other + reasons, including because others have copyright or other + rights in the material. A licensor may make special requests, + such as asking that all changes be marked or described. + Although not required by our licenses, you are encouraged to + respect those requests where reasonable. More considerations + for the public: + wiki.creativecommons.org/Considerations_for_licensees + +======================================================================= + +Creative Commons Attribution 4.0 International Public License + +By exercising the Licensed Rights (defined below), You accept and agree +to be bound by the terms and conditions of this Creative Commons +Attribution 4.0 International Public License ("Public License"). To the +extent this Public License may be interpreted as a contract, You are +granted the Licensed Rights in consideration of Your acceptance of +these terms and conditions, and the Licensor grants You such rights in +consideration of benefits the Licensor receives from making the +Licensed Material available under these terms and conditions. + + +Section 1 -- Definitions. + + a. Adapted Material means material subject to Copyright and Similar + Rights that is derived from or based upon the Licensed Material + and in which the Licensed Material is translated, altered, + arranged, transformed, or otherwise modified in a manner requiring + permission under the Copyright and Similar Rights held by the + Licensor. For purposes of this Public License, where the Licensed + Material is a musical work, performance, or sound recording, + Adapted Material is always produced where the Licensed Material is + synched in timed relation with a moving image. + + b. Adapter's License means the license You apply to Your Copyright + and Similar Rights in Your contributions to Adapted Material in + accordance with the terms and conditions of this Public License. + + c. Copyright and Similar Rights means copyright and/or similar rights + closely related to copyright including, without limitation, + performance, broadcast, sound recording, and Sui Generis Database + Rights, without regard to how the rights are labeled or + categorized. For purposes of this Public License, the rights + specified in Section 2(b)(1)-(2) are not Copyright and Similar + Rights. + + d. Effective Technological Measures means those measures that, in the + absence of proper authority, may not be circumvented under laws + fulfilling obligations under Article 11 of the WIPO Copyright + Treaty adopted on December 20, 1996, and/or similar international + agreements. + + e. Exceptions and Limitations means fair use, fair dealing, and/or + any other exception or limitation to Copyright and Similar Rights + that applies to Your use of the Licensed Material. + + f. Licensed Material means the artistic or literary work, database, + or other material to which the Licensor applied this Public + License. + + g. Licensed Rights means the rights granted to You subject to the + terms and conditions of this Public License, which are limited to + all Copyright and Similar Rights that apply to Your use of the + Licensed Material and that the Licensor has authority to license. + + h. Licensor means the individual(s) or entity(ies) granting rights + under this Public License. + + i. Share means to provide material to the public by any means or + process that requires permission under the Licensed Rights, such + as reproduction, public display, public performance, distribution, + dissemination, communication, or importation, and to make material + available to the public including in ways that members of the + public may access the material from a place and at a time + individually chosen by them. + + j. Sui Generis Database Rights means rights other than copyright + resulting from Directive 96/9/EC of the European Parliament and of + the Council of 11 March 1996 on the legal protection of databases, + as amended and/or succeeded, as well as other essentially + equivalent rights anywhere in the world. + + k. You means the individual or entity exercising the Licensed Rights + under this Public License. Your has a corresponding meaning. + + +Section 2 -- Scope. + + a. License grant. + + 1. Subject to the terms and conditions of this Public License, + the Licensor hereby grants You a worldwide, royalty-free, + non-sublicensable, non-exclusive, irrevocable license to + exercise the Licensed Rights in the Licensed Material to: + + a. reproduce and Share the Licensed Material, in whole or + in part; and + + b. produce, reproduce, and Share Adapted Material. + + 2. Exceptions and Limitations. For the avoidance of doubt, where + Exceptions and Limitations apply to Your use, this Public + License does not apply, and You do not need to comply with + its terms and conditions. + + 3. Term. The term of this Public License is specified in Section + 6(a). + + 4. Media and formats; technical modifications allowed. The + Licensor authorizes You to exercise the Licensed Rights in + all media and formats whether now known or hereafter created, + and to make technical modifications necessary to do so. The + Licensor waives and/or agrees not to assert any right or + authority to forbid You from making technical modifications + necessary to exercise the Licensed Rights, including + technical modifications necessary to circumvent Effective + Technological Measures. For purposes of this Public License, + simply making modifications authorized by this Section 2(a) + (4) never produces Adapted Material. + + 5. Downstream recipients. + + a. Offer from the Licensor -- Licensed Material. Every + recipient of the Licensed Material automatically + receives an offer from the Licensor to exercise the + Licensed Rights under the terms and conditions of this + Public License. + + b. No downstream restrictions. You may not offer or impose + any additional or different terms or conditions on, or + apply any Effective Technological Measures to, the + Licensed Material if doing so restricts exercise of the + Licensed Rights by any recipient of the Licensed + Material. + + 6. No endorsement. Nothing in this Public License constitutes or + may be construed as permission to assert or imply that You + are, or that Your use of the Licensed Material is, connected + with, or sponsored, endorsed, or granted official status by, + the Licensor or others designated to receive attribution as + provided in Section 3(a)(1)(A)(i). + + b. Other rights. + + 1. Moral rights, such as the right of integrity, are not + licensed under this Public License, nor are publicity, + privacy, and/or other similar personality rights; however, to + the extent possible, the Licensor waives and/or agrees not to + assert any such rights held by the Licensor to the limited + extent necessary to allow You to exercise the Licensed + Rights, but not otherwise. + + 2. Patent and trademark rights are not licensed under this + Public License. + + 3. To the extent possible, the Licensor waives any right to + collect royalties from You for the exercise of the Licensed + Rights, whether directly or through a collecting society + under any voluntary or waivable statutory or compulsory + licensing scheme. In all other cases the Licensor expressly + reserves any right to collect such royalties. + + +Section 3 -- License Conditions. + +Your exercise of the Licensed Rights is expressly made subject to the +following conditions. + + a. Attribution. + + 1. If You Share the Licensed Material (including in modified + form), You must: + + a. retain the following if it is supplied by the Licensor + with the Licensed Material: + + i. identification of the creator(s) of the Licensed + Material and any others designated to receive + attribution, in any reasonable manner requested by + the Licensor (including by pseudonym if + designated); + + ii. a copyright notice; + + iii. a notice that refers to this Public License; + + iv. a notice that refers to the disclaimer of + warranties; + + v. a URI or hyperlink to the Licensed Material to the + extent reasonably practicable; + + b. indicate if You modified the Licensed Material and + retain an indication of any previous modifications; and + + c. indicate the Licensed Material is licensed under this + Public License, and include the text of, or the URI or + hyperlink to, this Public License. + + 2. You may satisfy the conditions in Section 3(a)(1) in any + reasonable manner based on the medium, means, and context in + which You Share the Licensed Material. For example, it may be + reasonable to satisfy the conditions by providing a URI or + hyperlink to a resource that includes the required + information. + + 3. If requested by the Licensor, You must remove any of the + information required by Section 3(a)(1)(A) to the extent + reasonably practicable. + + 4. If You Share Adapted Material You produce, the Adapter's + License You apply must not prevent recipients of the Adapted + Material from complying with this Public License. + + +Section 4 -- Sui Generis Database Rights. + +Where the Licensed Rights include Sui Generis Database Rights that +apply to Your use of the Licensed Material: + + a. for the avoidance of doubt, Section 2(a)(1) grants You the right + to extract, reuse, reproduce, and Share all or a substantial + portion of the contents of the database; + + b. if You include all or a substantial portion of the database + contents in a database in which You have Sui Generis Database + Rights, then the database in which You have Sui Generis Database + Rights (but not its individual contents) is Adapted Material; and + + c. You must comply with the conditions in Section 3(a) if You Share + all or a substantial portion of the contents of the database. + +For the avoidance of doubt, this Section 4 supplements and does not +replace Your obligations under this Public License where the Licensed +Rights include other Copyright and Similar Rights. + + +Section 5 -- Disclaimer of Warranties and Limitation of Liability. + + a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE + EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS + AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF + ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, + IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, + WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR + PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, + ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT + KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT + ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. + + b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE + TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, + NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, + INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, + COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR + USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN + ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR + DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR + IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. + + c. The disclaimer of warranties and limitation of liability provided + above shall be interpreted in a manner that, to the extent + possible, most closely approximates an absolute disclaimer and + waiver of all liability. + + +Section 6 -- Term and Termination. + + a. This Public License applies for the term of the Copyright and + Similar Rights licensed here. However, if You fail to comply with + this Public License, then Your rights under this Public License + terminate automatically. + + b. Where Your right to use the Licensed Material has terminated under + Section 6(a), it reinstates: + + 1. automatically as of the date the violation is cured, provided + it is cured within 30 days of Your discovery of the + violation; or + + 2. upon express reinstatement by the Licensor. + + For the avoidance of doubt, this Section 6(b) does not affect any + right the Licensor may have to seek remedies for Your violations + of this Public License. + + c. For the avoidance of doubt, the Licensor may also offer the + Licensed Material under separate terms or conditions or stop + distributing the Licensed Material at any time; however, doing so + will not terminate this Public License. + + d. Sections 1, 5, 6, 7, and 8 survive termination of this Public + License. + + +Section 7 -- Other Terms and Conditions. + + a. The Licensor shall not be bound by any additional or different + terms or conditions communicated by You unless expressly agreed. + + b. Any arrangements, understandings, or agreements regarding the + Licensed Material not stated herein are separate from and + independent of the terms and conditions of this Public License. + + +Section 8 -- Interpretation. + + a. For the avoidance of doubt, this Public License does not, and + shall not be interpreted to, reduce, limit, restrict, or impose + conditions on any use of the Licensed Material that could lawfully + be made without permission under this Public License. + + b. To the extent possible, if any provision of this Public License is + deemed unenforceable, it shall be automatically reformed to the + minimum extent necessary to make it enforceable. If the provision + cannot be reformed, it shall be severed from this Public License + without affecting the enforceability of the remaining terms and + conditions. + + c. No term or condition of this Public License will be waived and no + failure to comply consented to unless expressly agreed to by the + Licensor. + + d. Nothing in this Public License constitutes or may be interpreted + as a limitation upon, or waiver of, any privileges and immunities + that apply to the Licensor or You, including from the legal + processes of any jurisdiction or authority. + + +======================================================================= + +Creative Commons is not a party to its public +licenses. Notwithstanding, Creative Commons may elect to apply one of +its public licenses to material it publishes and in those instances +will be considered the “Licensor.” The text of the Creative Commons +public licenses is dedicated to the public domain under the CC0 Public +Domain Dedication. Except for the limited purpose of indicating that +material is shared under a Creative Commons public license or as +otherwise permitted by the Creative Commons policies published at +creativecommons.org/policies, Creative Commons does not authorize the +use of the trademark "Creative Commons" or any other trademark or logo +of Creative Commons without its prior written consent including, +without limitation, in connection with any unauthorized modifications +to any of its public licenses or any other arrangements, +understandings, or agreements concerning use of licensed material. For +the avoidance of doubt, this paragraph does not form part of the +public licenses. + +Creative Commons may be contacted at creativecommons.org. + diff --git a/nmltab.py b/nmltab.py deleted file mode 100755 index 1b844658..00000000 --- a/nmltab.py +++ /dev/null @@ -1,745 +0,0 @@ -#!/usr/bin/env python3 -""" - -General-purpose tools to semantically tabulate, diff and superset Fortran namelist files. -Also includes a command-line interface. - -Latest version: https://github.com/aekiss/nmltab -Author: Andrew Kiss https://github.com/aekiss -Apache 2.0 License http://www.apache.org/licenses/LICENSE-2.0.txt -""" - - -# TODO: handle multiple groups with the same name. tidy should consolidate names, with definitions in later groups taking priority if the same name is defined in two groups of the same name. What happens if a name is repeated in one group? Ask Marshall. - -from __future__ import print_function - -# for testing my modified f90nml -# import sys -# sys.path.insert(0, '/Users/andy/Documents/COSIMA/github/aekiss/f90nml') # BUG: doesn't work with /Users/andy/anaconda/bin/python3 /Users/andy/bin/nmltab.py --fmt latex new/control/025deg_jra55_ryf/ice/input_ice_gfdl.nml - -import f90nml # from http://f90nml.readthedocs.io -import filecmp -import textwrap -import copy -import warnings -import collections -import os -import itertools - -# from IPython.display import display, Markdown - - -def nmldict(nmlfnames): - """ - Return OrderedDict of the groups and variables in Fortran namelist files. - - Parameters - ---------- - nmlfnames : str, tuple or list - string, or tuple or list of any number of namelist file path strings. - Repeated files are silently ignored. - If a file contains repeated groups, only the first instance is used. - - Returns - ------- - OrderedDict - OrderedDict with `key`:`value` pairs where - `key` is filename path string (in supplied order) - `value` is complete Namelist from filename as returned by f90nml.read - - """ - if isinstance(nmlfnames, str): - nmlfnames = [nmlfnames] - - nmlall = collections.OrderedDict() # dict keys are nml paths, values are Namelist dicts - for nml in nmlfnames: - nmlall[nml] = f90nml.read(nml) - if len(nmlall[nml]) == 0: - warnings.warn('{} does not contain any namelist data'.format(nml)) - for nml in nmlall: - for group in nmlall[nml]: - if isinstance(nmlall[nml][group], list): - # A list indicates group is defined more than once in nml file. - # The list contains the groups in order of occurrence. - # For the nth group's values to have any effect in f90, - # the namelist needs to be read n times from the input file, - # without closing the file in between. - # If the same variable name occurs in multiple instances of - # the same group, the last read instance is used. - # Since we don't know how many times the group is read in f90, - # ignoring all but the first seems the safest option. - # TODO: provide an option to consolidate all groups in list? - warnings.warn('&{} occurs {} times in {}. Using only the first instance of this group.'.format(group, str(len(nmlall[nml][group])), nml)) - nmlall[nml][group] = nmlall[nml][group][0] - return nmlall - - -def superset(nmlall): - """ - Return dict of groups and variables present in any of the input Namelists. - - Parameters - ---------- - nmlall : dict or OrderedDict - dict (e.g. returned by nmldict) with `key`:`value` pairs where - `key` is arbitrary (typically a filename string) - `value` is Namelist (typically from filename via f90nml.read) - - Returns - ------- - dict - dict with `key`:`value` pairs where - `key` is group name (including all groups present in any input Namelist) - `value` is Namelist for group (including every variable present in this - group in any input Namelist) - - """ - # if len(nmlall) == 1: # just do a deep copy of the only value - # nmlsuperset = copy.deepcopy(nmlall[list(nmlall.keys())[0]]) - # else: - nmlsuperset = {} - for nml in nmlall: - nmlsuperset.update(nmlall[nml]) - # nmlsuperset now contains all groups that were in any nml - for group in nmlsuperset: - # to avoid the next bit changing the original groups - nmlsuperset[group] = nmlsuperset[group].copy() - # if isinstance(nmlallsuperset[group], list): - # for gr in nmlall[nml][group]: - # nmlsuperset[group].update(gr) - for nml in nmlall: - if group in nmlall[nml]: - nmlsuperset[group].update(nmlall[nml][group]) - # nmlsuperset groups now contain all keys that were in any nml - return nmlsuperset - - -def nmldiff(nmlall, keep=''): - """ - In-place remove every group/variable that's the same in all file Namelists. - - Parameters - ---------- - nmlall : dict or OrderedDict - dict (e.g. returned by nmldict) with `key`:`value` pairs where - `key` is arbitrary (typically a filename path string) - `value` is Namelist (typically from filename via f90nml.read) - keep : variable name - variable name to always keep in diff, unless the group has no differences - - Returns - ------- - dict or OrderedDict - In-place modified input dict with `key`:`value` pairs where - `key` is arbitrary (typically a filename path string) - `value` is Namelist from nmlall, with any variable - common to all other keys (i.e. files) in input removed. - Groups whose contents are identical are also removed. - - """ -# Create diff by removing common groups/variables from nmlall. -# This is complicated by the fact group names / variable names may differ -# or be absent across different nml files. -# -# First make a superset that has all group names and variables that -# appear in any nml file - nmlsuperset = superset(nmlall) - - # now go through nmlall and remove any groups / variables from nmlall that - # are identical to superset in all nmls - # first delete any variables that are common to all nmls, then delete - # any empty groups common to all nmls - for group in nmlsuperset: - # init: whether group is present and identical in all namelist files - deletegroup = True - varkept = False # whether var is kept when it would otherwise be deleted - onlyvarkept = False # whether var is kept and is the only var in this group across all nmls - for nml in nmlall: - deletegroup = deletegroup and (group in nmlall[nml]) - if deletegroup: # group present in all namelist files - for var in nmlsuperset[group]: - # init: whether variable is present and identical - # in all namelist files - deletevar = True - for nml in nmlall: - deletevar = deletevar and (var in nmlall[nml][group]) - if deletevar: # variable is present in all namelist files - for nml in nmlall: - # ... now check if values match in all namelist files - # print("nml={}, group={}, var={}".format(nml, group, var)) - deletevar = deletevar and \ - (nmlall[nml][group][var] == - nmlsuperset[group][var]) - if deletevar: - if var == keep: - varkept = True - else: - for nml in nmlall: - # delete var from this group in all nmls - del nmlall[nml][group][var] - if varkept: - onlyvarkept = True - for nml in nmlall: - onlyvarkept = onlyvarkept and len(nmlall[nml][group]) < 2 - if onlyvarkept and len(nmlall[nml][group]) == 1: - onlyvarkept = list(nmlall[nml][group].keys())[0] == keep - if onlyvarkept: - deletegroup = True - else: - deletegroup = max([len(nmlall[nml][group]) for nml in nmlall]) == 0 - if deletegroup: - # group is common to all nmls and now empty (or only holding keep) so delete - for nml in nmlall: - del nmlall[nml][group] - return nmlall - - -def prunefilelist(fnames): - """ - Remove names of files with identical content to the previous file in list. - - Parameters - ---------- - fnames : List - List of any number of file path strings. - - Returns - ------- - List - New list in same order as fnames but including only names of files with - content that is not identical to that of the previous file in the list. - Non-existent files are ignored and not included in output list. - - Examples - -------- - >>> nmlprune(nmldict(prunefilelist(glob.glob(*.nml)))) - - """ - fntmp = [fn for fn in fnames if os.path.isfile(fn)] - if len(fntmp) <= 1: - outfnames = fntmp - else: - outfnames = [] - outfnames.append(fntmp[0]) - for fn in fntmp[1:]: - if not(filecmp.cmp(outfnames[-1], fn, shallow=False)): - outfnames.append(fn) - return outfnames - - -def nmlprune(nmlall, ignore={}): - """ - In-place remove all Namelists that are the same as the previous one in nmlall. - - Does nothing if nml is not an OrderedDict. - - Parameters - ---------- - nmlall : OrderedDict - OrderedDict (e.g. returned by nmldict) with `key`:`value` pairs where - `key` is arbitrary (typically a filename path string) - `value` is Namelist (typically from filename via f90nml.read) - For efficiency use prunefilelist on file list before passing to nmldict. - - ignore : dict, optional, default={} - dict specifying namelist variables whose differences should be ignored. - key is namelist group - value is a list of variable names within that group - - Returns - ------- - OrderedDict - In-place modified input OrderedDict with `key`:`value` pairs where - `key` is arbitrary (typically a filename path string) - `value` is Namelist from nmlall, with any variable - common to all other keys (i.e. files) in input removed. - Groups whose contents are identical are also removed. - - Examples - -------- - >>> nmlprune(nmldict(prunefilelist(glob.glob(*.nml)))) - """ - if len(nmlall) > 1: - idx = 0 - while True: - # need deepcopy to avoid in-place modification by nmldiff - pair = copy.deepcopy(collections.OrderedDict( - itertools.islice(nmlall.items(), idx, idx+2))) - for group in ignore: - for var in ignore[group]: - for fn in pair: - if group in pair[fn]: - if var in pair[fn][group]: - del pair[fn][group][var] - nmldiff(pair) - if max([len(x) for x in pair.values()]) == 0: - del nmlall[list(pair.keys())[1]] # remove 2nd of pair - else: - idx += 1 # 2nd of pair is different from first, so retain it - if idx > len(nmlall)-2: - break - return nmlall - - -def tidy_overwrite(nmlall): - """ - Overwrite namelist files with parsed namelist data from those files, - sorted alphabetically by group then variable name. - Files with no namelist data are left untouched. - - Parameters - ---------- - nmlall : dict or OrderedDict - dict (e.g. returned by nmldict) with `key`:`value` pairs where - `key` is filename path string to be overwritten - `value` is Namelist (typically from filename via f90nml.read) - - Returns - ------- - None - - """ - for nml in nmlall: - if len(nmlall[nml]) > 0: - nmlout = nml + '-tmp' - try: - f90nml.write(nmlall[nml], nmlout, sort=True) - os.replace(nmlout, nml) - except: # TODO: don't use bare except - warnings.warn("Error {} tidying '{}'; file left untouched. \ -Delete part-converted file '{}' before trying again." - .format(sys.exc_info()[0], nml, nmlout)) - return None - - -def strnmldict(nmlall, fmt='', masterswitch='', hide={}, heading='', url=''): - """ - Return string representation of dict of Namelists. - - Parameters - ---------- - nmlall : dict or OrderedDict - dict (e.g. returned by nmldict) with `key`:`value` pairs where - `key` is arbitrary (typically a filename path string) - `value` is Namelist (typically from filename via f90nml.read) - - fmt : str, optional, case insensitive, default='' - 'md' or 'markdown': markdown string output - 'latex': latex string output (table only, suitable as an input file) - 'latex-complete': latex string, suitable for a complete .tex file - 'text': text output ([*] &group variable [value] file) - 'text-tight': as for 'text', but without aligned columns - anything else: standard string output (different from 'text') - - masterswitch : str, optional, case insensitive, default='' - key with boolean value that disables other variables in group - if present and false, e.g. 'use_this_module' in MOM. - NB: this key might be absent in namelist differences. - Only used for fmt='latex' or 'latex-complete'. - - hide : dict, optional, default={} - dict specifying namelist variables that will not be shown in output. - key is namelist group - value is a list of variable names within that group - Ignored for fmt='md' or 'markdown'. - TODO: implement for all formats - - heading : string, optional, default='' - string to be written above table if fmt='latex-complete' - - url : string, optional, default='' - url prefix for hyperlinked variables and groups if fmt='latex-complete' - url='' (the default) has no hyperlinks - - Returns - ------- - string - String representaion of nmlall. - Default lists alphabetically by group, variable, then dict key, - with undefined namelist variables shown as blank. - - """ - def latexstr(item): - return item.replace('_', '\\_').replace('/', '\\slash ').replace('%', '\%') - - def latexrepr(item): - if isinstance(item, str): - return "'" + latexstr(item) + "'" - elif isinstance(item, float): - return '\\num*{' + repr(item).replace('e+0', 'e+').replace('e-0', 'e-') + '}{}' - elif isinstance(item, list): - s = '' - for i in item: - s += latexrepr(i) + ', ' - return s[:-2] - else: - return repr(item) - - # TODO: fail on unknown fmt - # TODO: put data format in Fortran syntax eg for booleans and arrays - does nf90nml do this? - # - see f90repr in namelist.py: https://github.com/marshallward/f90nml/blob/master/f90nml/namelist.py#L405 - fmt = fmt.lower() - nmlss = superset(nmlall) - nmldss = superset(nmldiff(copy.deepcopy(nmlall))) # avoid in-place modification - fnames = list(nmlall.keys()) - colwidth = max((len(f) for f in fnames), default=0) # default keyword requires Python 3.4 or later - # TODO: test if the following works in python pre- and post-3.4 - # colwidth = max([len(f) for f in fnames] or [0]) # defaults to 0 if fnames is empty list, since empty list evaluates to False - - # TODO: would be faster & more efficient to .append a list of strings - # and then join them: - # http://docs.python-guide.org/en/latest/writing/structure/#mutable-and-immutable-types - st = '' - if fmt in ('md', 'markdown'): - if len(nmlss) > 0: - st += '| ' + 'File'.ljust(colwidth) + ' | ' - nvar = 0 - for group in sorted(nmlss): - for var in sorted(nmlss[group]): - st += '&' + group + '
' + var + ' | ' - nvar += 1 - st += '\n|-' + '-' * colwidth + ':|' + '--:|' * nvar - for fn in fnames: - st += '\n| ' + fn + ' | ' - for group in sorted(nmlss): - for var in sorted(nmlss[group]): - if group in nmlall[fn]: - if var in nmlall[fn][group]: - st += repr(nmlall[fn][group][var]) # TODO: use f90repr - st += ' | ' - st += '\n' - elif fmt.startswith('latex'): - if len(nmlss) > 0: - if fmt == 'latex': - st += textwrap.dedent(r""" - % Latex tabulation of Fortran namelist, auto-generated by nmltab.py - % - % Include this file in a latex document using \import{path/to/this/file}. - % The importing document requires - % \usepackage{ltablex, array, sistyle} - % and possibly (depending on definitions below) - % \usepackage{hyperref, color} - % and also needs to define 'nmldiffer', 'nmllink' and 'ignored' commands, e.g. - % \newcommand{\nmldiffer}[1]{#1} % no special display of differing variables - % \newcommand{\nmldiffer}[1]{\textbf{#1}} % bold display of differing variables - % \definecolor{hilite}{cmyk}{0, 0, 0.9, 0}\newcommand{\nmldiffer}[1]{\colorbox{hilite}{#1}}\setlength{\fboxsep}{0pt} % colour highlight of differing variables (requires color package) - % \newcommand{\nmllink}[2]{#1} % don't link variables - % \newcommand{\nmllink}[2]{\href{https://github.com/mom-ocean/MOM5/search?q=#2}{#1}} % link variables to documentation (requires hyperref package) - % \newcommand{\ignored}[1]{#1} % no special display of ignored variables - % \definecolor{ignore}{gray}{0.7}\newcommand{\ignored}[1]{\textcolor{ignore}{#1}} % gray display of ignored variables (but only in groups where masterswitch key is present and false, so may not work well for differences; requires color package) - % and also define the length 'nmllen' that sets the column width, e.g. - % \newlength{\nmllen}\setlength{\nmllen}{12ex} - - """) - elif fmt == 'latex-complete': - st = textwrap.dedent(r""" % generated by https://github.com/aekiss/nmltab - \documentclass[10pt]{article} - \usepackage[a4paper, truedimen, top=2cm,bottom=2cm,left=2cm,right=2cm]{geometry} - - \usepackage{PTSansNarrow} % narrow sans serif font for urls - \usepackage[scaled=.9]{inconsolata} % for texttt - \renewcommand{\familydefault}{\sfdefault} - - \usepackage[table,dvipsnames]{xcolor} % loads also colortbl - \definecolor{lightblue}{rgb}{0.93,0.95,1.0} % for table rows - \rowcolors{1}{lightblue}{white} - \definecolor{link}{rgb}{0,0,1} - \usepackage[colorlinks, linkcolor={link},citecolor={link},urlcolor={link}, - breaklinks, bookmarks, bookmarksnumbered]{hyperref} - \usepackage{url} - \usepackage{breakurl} - \urlstyle{sf} - - \usepackage{ltablex}\keepXColumns - \usepackage{array, sistyle} - - \usepackage[strings]{underscore} % allows hyphenation at underscores - \usepackage{datetime2}\DTMsetdatestyle{iso} - - \usepackage{makeidx} - \makeindex - - \usepackage{fancyhdr} - \pagestyle{fancy} - \renewcommand{\headrulewidth}{0pt} - \lfoot{{\footnotesize \textsl{Fortran namelist table generated by \url{https://github.com/aekiss/nmltab}}}} - \rfoot{\textsl{\today\ \DTMcurrenttime\ \DTMcurrentzone}} - - \begin{document} - - \definecolor{ignore}{gray}{0.7}\newcommand{\ignored}[1]{\textcolor{ignore}{#1}} % gray display of ignored variables (but only in groups where masterswitch key is present and false, so may not work well for differences; requires color package) - \newlength{\nmllen}\setlength{\nmllen}{12ex} - - """) - st += heading - if url == '': - st += r'\newcommand{\nmllink}[2]{#1\index{#1}}' - else: - st += 'Variables are weblinks to source code searches.\n' - st += r'\newcommand{\nmllink}[2]{\href{' + url + r'#2}{#1}\index{#1}}' - st += '\n' - # TODO: get this use case working: - # % \definecolor{hilite}{cmyk}{0, 0, 0.9, 0}\newcommand{\nmldiffer}[1]{\rowcolor{hilite}#1} % colour highlight of rows with differing variables (requires xcolor package) BUG: DOESN'T WORK! Misplaced \noalign due to leading \hfill (and namelist group name if at start of group) - st += '\\newcolumntype{R}{>{\\raggedleft\\arraybackslash}b{\\nmllen}}\n' - st += '\\begin{tabularx}{\\linewidth}{X' + 'R' * len(fnames) + '}\n' - st += '\\hline\n\\hiderowcolors\n' - st += '\\textbf{Group\\quad\\hfill Variable}' - # for fn in rmcommonprefix(rmcommonsuffix(fnames)): - for fn in fnames: - st += '\t & \t\\textbf{' + latexstr(fn) + '}' - st += ' \\\\\n\\showrowcolors\n\\hline\\endfirsthead\n' - st += '\\hline\n\\hiderowcolors\n' - st += '\\textbf{Group (continued)\\quad\\hfill Variable}' - # for fn in rmcommonprefix(rmcommonsuffix(fnames)): - for fn in fnames: - st += '\t & \t\\textbf{' + latexstr(fn) + '}' - st += ' \\\\\n\\showrowcolors\n\\hline\\endhead\n' - for group in sorted(nmlss): - firstvar = True - for var in sorted(nmlss[group]): - if not ((group in hide) and (var in hide[group])): - if firstvar: # only show group once - gr = '\\&\\nmllink{{{}}}{{{}}}'.format( - latexstr(group), group) - firstvar = False - else: - gr = '' - st1 = '{} \\hfill \\nmllink{{{}}}{{{}}}'.format( - gr, latexstr(var), var) # replaced below if differences - if group in nmldss: - if var in nmldss[group]: # new st1 if differences - st1 = '{} \\hfill \\nmldiffer{{\\nmllink{{{}}}{{{}}}}}'.format( - gr, latexstr(var), var) - st += st1 - for fn in fnames: - st += '\t & \t' - if group in nmlall[fn]: - if var in nmlall[fn][group]: - st1 = latexrepr(nmlall[fn][group][var]) # TODO: use f90repr - if masterswitch in nmlall[fn][group]: - if not nmlall[fn][group][masterswitch] \ - and var != masterswitch: - st1 = '\\ignored{' + st1 + '}' - st += st1 - st += ' \\\\\n' - if not firstvar: - st += '\\hline\n' - st += '\\end{tabularx}\n' - if fmt == 'latex-complete': - st += textwrap.dedent(r""" - \clearpage - \phantomsection % fix hyperrefs to index - \addcontentsline{toc}{part}{\indexname} - \printindex - \end{document} - """) - elif fmt.startswith('text'): - if fmt == 'text': - gwidth = max([len(g) for g in list(nmlss.keys())], default=0) - vwidth = max([max([len(v) for v in list(g.keys())], default=0) - for g in list(nmlss.values())], default=0) - dwidth = max([ - max([max([len(repr(v)) for v in list(g.values())], default=0) - for g in list(nmlall[fn].values())], default=0) - for fn in nmlall.keys()], default=0) - else: # assumes text-tight - TODO: be more stringent - gwidth = 0 - vwidth = 0 - dwidth = 0 - for group in sorted(nmlss): - for var in sorted(nmlss[group]): - if not ((group in hide) and (var in hide[group])): - st1 = ' ' - if group in nmldss: - if var in nmldss[group]: # star if differences - st1 = '* ' - for fn in fnames: - st += st1 + '&' + group.ljust(gwidth) + ' ' + var.ljust(vwidth) + ' ' - dstr = '' - if group in nmlall[fn]: - if var in nmlall[fn][group]: - dstr = repr(nmlall[fn][group][var]) # TODO: use f90repr - st += dstr.ljust(dwidth) + ' ' + fn + '\n' - else: - for group in sorted(nmlss): - for var in sorted(nmlss[group]): - if not ((group in hide) and (var in hide[group])): - st += ' ' * (colwidth + 2) + '&{}\n'.format(group) - st += ' ' * (colwidth + 2) + ' {}\n'.format(var) - for fn in fnames: - st += '{} : '.format(fn.ljust(colwidth)) - if group in nmlall[fn]: - if var in nmlall[fn][group]: - st += repr(nmlall[fn][group][var]) # TODO: use f90repr - st += '\n' - return st - - -def nml_md(nmlfnames, diff=False, prune=False, - ignore={'setup_nml': ['istep0'], - 'coupling': ['inidate', 'truntime0']}): - """ - Display table in a Jupter notebook of groups and variables in Fortran - namelist files. - - Parameters - ---------- - nmlfnames : str, tuple or list - string, or tuple or list of any number of namelist file path strings. - Repeated files are silently ignored. - - diff : boolean, optional, default=False - just display semantic differences - - prune : boolean, optional, default=False - just display the first file in which each variable change occurs - - ignore : dict, optional, - default={'setup_nml': ['istep0'], 'coupling': ['inidate', 'truntime0']} - variable names to ignore differences in if prune=True - - Returns - ------- - None - - """ - from IPython.display import display, Markdown # slow to load so do it here - if prune: - nmld = nmldict(prunefilelist(nmlfnames)) - else: - nmld = nmldict(nmlfnames) - if diff: - nmldiff(nmld) - if prune: - nmlprune(nmld, ignore=ignore) - display(Markdown(strnmldict(nmld, fmt='md'))) - return None - - -# def nmldiff_md(nmlfnames): -# """ -# Display table in a Jupter notebook of semantic differences in groups and -# variables in Fortran namelist files. -# -# Parameters -# ---------- -# nmlfnames : str, tuple or list -# string, or tuple or list of any number of namelist file path strings. -# Repeated files are silently ignored. -# -# Returns -# ------- -# None -# -# """ -# from IPython.display import display, Markdown # slow to load so do it here -# display(Markdown(strnmldict(nmldiff(nmldict(nmlfnames)), fmt='md'))) -# return None - - -if __name__ == '__main__': - import argparse - import sys - parser = argparse.ArgumentParser(description= - 'Semantically tabulate (and optionally diff) multiple Fortran namelist files.\ - Undefined namelist variables are shown as blank.\ - Repeated files are silently ignored.\ - Latest version: https://github.com/aekiss/nmltab') - parser.add_argument('-d', '--diff', - action='store_true', default=False, - help='only show semantic differences (default: show all); \ - exit code 0: no differences; 1: differences') - parser.add_argument('-p', '--prune', - action='store_true', default=False, - help='ignore all but the first in any sequence of files \ - with semantically indentical content') - parser.add_argument('-i', '--ignore_counters', - action='store_true', default=False, - help='when doing --prune, ignore differences in timestep\ - counters etc in CICE and MATM namelists, and also hide\ - them from output (ignored for markdown output)') - parser.add_argument('-k', '--keep', type=str, - metavar='str', default='', - help="variable to always keep in diff, unless it's the\ - only one in a group, e.g. 'use_this_module'") - parser.add_argument('-F', '--format', type=str, - metavar='fmt', default='str', - choices=['markdown', 'latex', 'latex-complete', - 'text', 'text-tight'], - help="optional alternative output format: \ - 'markdown' or 'latex' (table only, suitable as an \ - input file) or 'latex-complete' (a complete .tex file) \ - or 'text' (plain text; with each row row showing \ - [*] &group variable [value] file) \ - or 'text-tight' (like 'text', but without aligned columns)") - parser.add_argument('-u', '--url', type=str, - metavar='url', default='', - help="link all variable and group names to this \ - URL followed by the variable/group name, e.g. \ - https://github.com/COSIMA/libaccessom2/search?q=") - parser.add_argument('--tidy_overwrite', - action='store_true', default=False, - help='OVERWRITE files with only their parsed contents \ - (all comments and non-namelist content are removed), \ - with consistent formatting and sorted alphabetically \ - by group then variable name. \ - This makes standard diff much more useful. \ - Files with no namelist data are left untouched. \ - All other options are ignored. \ - USE WITH CARE!') - parser.add_argument('file', metavar='file', type=str, nargs='+', - help='Fortran namelist file') - args = parser.parse_args() - fmt = vars(args)['format'] - url = vars(args)['url'] - keep = vars(args)['keep'] - diff = vars(args)['diff'] - prune = vars(args)['prune'] - ignore = vars(args)['ignore_counters'] - tidy = vars(args)['tidy_overwrite'] - files = vars(args)['file'] - if prune and ignore: - ignored = {'setup_nml': ['istep0'], #, 'npt', 'restart', 'runtype'], - 'coupling': ['inidate', 'runtime', 'truntime0']} - else: - ignored = {} - if prune and not tidy: - nmld = nmldict(prunefilelist(files)) - else: - nmld = nmldict(files) - if tidy: - tidy_overwrite(nmld) - else: - if diff: - nmldiff(nmld, keep=keep) - if prune: - nmlprune(nmld, ignore=ignored) - nmldss = superset(nmld) - if len(nmldss) == 0: - sys.exit(0) - else: - if fmt == 'latex-complete': - if diff: - heading = textwrap.dedent(r""" - \newcommand{\nmldiffer}[1]{#1} % no special display of differing variables - \noindent Only differences are shown. - \ignored{Greyed values} are ignored. - """) - else: - heading = textwrap.dedent(r""" - \definecolor{hilite}{cmyk}{0, 0, 0.9, 0}\newcommand{\nmldiffer}[1]{\colorbox{hilite}{#1}}\setlength{\fboxsep}{0pt} - \noindent Variables that differ between the namelists are \nmldiffer{\textcolor{link}{highlighted}}. - \ignored{Greyed values} are ignored. - """) - print(strnmldict(nmld, fmt=fmt, masterswitch='use_this_module', - hide=ignored, heading=heading, url=url), - end='', flush=True) - else: - print(strnmldict(nmld, fmt=fmt, masterswitch='use_this_module', - hide=ignored), - end='', flush=True) - if diff: - sys.exit(1) - else: - sys.exit(0) diff --git a/run_summary.py b/run_summary.py deleted file mode 100755 index 77a12800..00000000 --- a/run_summary.py +++ /dev/null @@ -1,1074 +0,0 @@ -#!/usr/bin/env python3 -""" - -Tools to summarise access-om2 runs. - -Latest version: https://github.com/aekiss/run_summary -Author: Andrew Kiss https://github.com/aekiss -Apache 2.0 License http://www.apache.org/licenses/LICENSE-2.0.txt -""" - -# TODO: use PAYU_N_RUNS - does this tell you whether the run is part of a sequence? if so we can determine queue wait for runs in a sequence - but sometimes it is None - -from __future__ import print_function -import sys -try: - assert sys.version_info >= (3, 3) # need python >= 3.3 for print flush keyword -except AssertionError: - print('\nFatal error: Python version too old.') - print('On NCI, do the following and try again:') - print(' module use /g/data/hh5/public/modules; module load conda/analysis3\n') - raise - -import os -import glob # BUG: fails if payu module loaded - some sort of module clash with re -import subprocess -import datetime -import dateutil.parser -from collections import OrderedDict -import csv -import copy - -try: - import numpy as np - import yaml - import f90nml # from https://f90nml.readthedocs.io/en/latest/ -except ImportError: # BUG: don't get this exception if payu module loaded, even if on python 2.6.6 - print('\nFatal error: modules not available.') - print('On NCI, do the following and try again:') - print(' module use /g/data/hh5/public/modules; module load conda/analysis3\n') - raise -import nmltab # from https://github.com/aekiss/nmltab -import warnings -warnings.simplefilter('ignore', np.RankWarning) - - -def num(s): - """ - Return input string as int or float if possible, otherwise return string. - """ - try: - return int(s) - except ValueError: - try: - return float(s) - except ValueError: - return s - - -def get_sync_path(fname): - """ - Return GDATADIR or SYNCDIR path from sync_output_to_gdata.sh or sync_data.sh - - fname: file path to sync_output_to_gdata.sh or sync_data.sh etc - - output: directory string - - """ - dir = None # default return value - searchstrings = ['GDATADIR=', 'SYNCDIR='] - with open(fname, 'r') as infile: - for line in infile: - # NB: subsequent matches will replace earlier ones - for s in searchstrings: - splitline = line.split(s) - if len(splitline) == 2 and splitline[0].strip() == '': - dir_maybe = splitline[1].strip().rstrip('/') - if not(dir_maybe.startswith('/ERROR')): - dir = dir_maybe - return dir - - -def parse_pbs_log(fname): - """ - Return dict of items from parsed PBS log file. - - fname: PBS log file path - - output: dict - - example of PBS log file content to parse: - qsub -q normal -P g40 -l walltime=12600 -l ncpus=2064 -l mem=8256GB -N minimal_01deg_j -l wd -j n -v PAYU_MODULENAME=payu/dev,PYTHONPATH=/projects/access/apps/mnctools/0.1/lib:/projects/access/apps/mnctools/0.1/lib:/projects/access/apps/mnctools/0.1/lib:/projects/v45/apps/payu/dev/lib:/projects/access/apps/mnctools/0.1/lib:/projects/v45/python,PAYU_CURRENT_RUN=137,PAYU_MODULEPATH=/projects/v45/modules,PAYU_N_RUNS=10 -lother=hyperthread -W umask=027 /projects/v45/apps/payu/dev/bin/payu-run - ... - git commit -am "2018-10-08 22:32:26: Run 137" - TODO: Check if commit is unchanged - ====================================================================================== - Resource Usage on 2018-10-08 22:32:36: - Job Id: 949753.r-man2 - Project: x77 - Exit Status: 0 - Service Units: 20440.40 - NCPUs Requested: 5968 NCPUs Used: 5968 - CPU Time Used: 20196:31:07 - Memory Requested: 11.66TB Memory Used: 2.61TB - Walltime requested: 05:00:00 Walltime Used: 03:25:30 - JobFS requested: 36.43GB JobFS used: 1.0KB - ====================================================================================== - - """ - def getproject(l): - return l[1] - - def getpayuversion(l): - return os.path.dirname(os.path.dirname(l[-1])) - # return os.path.dirname([s for s in l[0].split(',')[0].split(':') - # if s.find('payu') > -1][0]) - - def getpayu(l): - return l[0].split(',')[0] - - def getpayuint(l): - return int(l[0].split(',')[0]) - - def getrun(l): - return int(l[4].rstrip('"')) - - def getjob(l): - return int(l[1].split('.')[0]) - - def getint(l): - return int(l[1]) - - def getfloat(l): - return float(l[1]) - - def getsec(l): # convert hh:mm:ss to sec - return sum(x * int(t) for x, t in zip([3600, 60, 1], l[1].split(':'))) - - def getdatetime(l): # BUG: doesn't include time zone (can't tell if we're on daylight savings time) - return l[0]+'T'+l[1].rstrip(':') - - def getbytes(l): # assumes PBS log info uses binary prefixes - TODO: check - s = l[1] - ns = s.strip('BKMGT') # numerical part - units = {'B': 1, - 'KB': 2**10, - 'MB': 2**20, - 'GB': 2**30, - 'TB': 2**40} - return int(round(float(ns)*units[s[len(ns):]])) - - search_items = { # keys are strings to search for; items are functions to apply to whitespace-delimited list of strings following key - 'PAYU_CURRENT_RUN': getpayuversion, # gets path to payu; PAYU_CURRENT_RUN is redundant as this is obtained below from git commit message - # 'PAYU_CURRENT_RUN=': getpayuint, # BUG: misses some runs - 'PAYU_MODULENAME=': getpayu, - 'PAYU_MODULEPATH=': getpayu, - 'PAYU_PATH=': getpayu, - 'LD_LIBRARY_PATH=': getpayu, - 'PAYU_N_RUNS=': getpayuint, - 'PYTHONPATH=': getpayu, -# BUG: git commit will be missing if runlog: False in config.yaml - so we won't get run number! - 'git commit': getrun, # instead of using PAYU_CURRENT_RUN; NB: run with this number might have failed - check Exit Status - 'Resource Usage on': getdatetime, - 'Job Id': getjob, - 'Project': getproject, - 'Exit Status': getint, - 'Service Units': getfloat, - 'NCPUs Requested': getint, - 'NCPUs Used': getint, - 'CPU Time Used': getsec, - 'Memory Requested': getbytes, - 'Memory Used': getbytes, - 'Walltime requested': getsec, - 'Walltime Used': getsec, - 'JobFS requested': getbytes, - 'JobFS used': getbytes} - parsed_items = search_items.fromkeys(search_items, None) # set defaults to None - - with open(fname, 'r') as infile: - for line in infile: - # NB: subsequent matches will replace earlier ones - # NB: processes only the first match of each line - for key, op in search_items.items(): - try: - parsed_items[key] = op(line.split(key)[1].split()) - except IndexError: # key not present in this line - continue - - # change to more self-explanatory keys - rename_keys = {'PAYU_CURRENT_RUN': 'payu version', - # 'PAYU_CURRENT_RUN=': 'Run number', - 'git commit': 'Run number', - 'Memory Requested': 'Memory Requested (bytes)', - 'Memory Used': 'Memory Used (bytes)', - 'Walltime requested': 'Walltime Requested (s)', - 'Walltime Used': 'Walltime Used (s)', - 'Resource Usage on': 'Run completion date'} - for oldkey, newkey in rename_keys.items(): - parsed_items[newkey] = parsed_items.pop(oldkey) - - if parsed_items['Memory Requested (bytes)'] is None: - parsed_items['Memory Requested (Gb)'] = None - else: - parsed_items['Memory Requested (Gb)'] = parsed_items['Memory Requested (bytes)']/2**30 - - if parsed_items['Memory Used (bytes)'] is None: - parsed_items['Memory Used (Gb)'] = None - else: - parsed_items['Memory Used (Gb)'] = parsed_items['Memory Used (bytes)']/2**30 - - if parsed_items['Walltime Requested (s)'] is None: - parsed_items['Walltime Requested (hr)'] = None - else: - parsed_items['Walltime Requested (hr)'] = parsed_items['Walltime Requested (s)']/3600 - - if parsed_items['Walltime Used (s)'] is None: - parsed_items['Walltime Used (hr)'] = None - else: - parsed_items['Walltime Used (hr)'] = parsed_items['Walltime Used (s)']/3600 - - try: - parsed_items['Timeout'] = parsed_items['Walltime Used (s)'] > parsed_items['Walltime Requested (s)'] - except: - parsed_items['Timeout'] = None - - return parsed_items - - -def parse_git_log(basepath, datestr): - """ - Return dict of items from git log from most recent commit before a given date. - - basepath: base directory path string - - datestr: date string - - output: dict - """ - # possible BUG: what time zone flag should be use? local is problematic if run from overseas....? - # use Popen for backwards-compatiblity with Python <2.7 - # pretty format is tab-delimited (%x09) - try: - p = subprocess.Popen('cd ' + basepath - + ' && git log -1 ' - + '--pretty="format:%H%x09%an%x09%ai%x09%B" ' - + '`git rev-list -1 --date=local --before="' - + datestr + '" HEAD`', # TODO: add 1 sec to datestr so we don't rely on the delay between git commit and PBS log? - stdout=subprocess.PIPE, shell=True) - log = p.communicate()[0].decode('ascii').split('\t') - # log = p.communicate()[0].decode('ascii').encode('ascii').split('\t') # for python 2.6 - log[3] = log[3].strip() # strip whitespace from message - except: - log = [None]*4 # default values in case there's no .git, e.g. if runlog: False in config.yaml - parsed_items = dict() - parsed_items['Commit'] = log[0] - parsed_items['Author'] = log[1] - parsed_items['Date'] = log[2] - parsed_items['Message'] = log[3] - return parsed_items - - -def parse_mom_time_stamp(paths): - """ - Return dict of items from parsed MOM time_stamp.out. - - paths: list of base paths - - output: dict parsed from first matching time_stamp.out in paths - - example of MOM time_stamp.out content to parse: - 2001 9 1 0 0 0 Sep - 2001 11 1 0 0 0 Nov - - """ - parsed_items = dict() - keys = ['Model start time', 'Model end time'] - for path in paths: - fname = os.path.join(path, 'ocean/time_stamp.out') - if os.path.isfile(fname): - parsed_items['Time stamp file'] = fname - with open(fname, 'r') as infile: - for key in keys: - line = infile.readline() - parsed_items[key] = datetime.datetime( - *list(map(int, line.split()[0:-1]))).isoformat() - break - try: - d1 = dateutil.parser.parse(parsed_items[keys[0]]) - d2 = dateutil.parser.parse(parsed_items[keys[1]]) - duration = d2-d1 # BUG: presumably assumes Gregorian calendar with leap years and time in UTC - parsed_items['Model run length (s)'] = duration.total_seconds() - parsed_items['Model run length (days)'] = duration.total_seconds()/3600/24 - except KeyError: - pass - return parsed_items - - -def parse_yaml(paths, filename): - """ - Return dict of items from parsed yaml file. - - paths: list of base paths - filename: yaml filename to attempt to read from base paths - - output: dict parsed from first matching filename in paths - """ - parsed_items = dict() - for path in paths: - fpath = os.path.join(path, filename) - if os.path.isfile(fpath): - with open(fpath, 'r') as infile: - # Need to use load_all to handle manifests. Only return final part. - parsed_items = list(yaml.load_all(infile, Loader=yaml.FullLoader))[-1] - break - return parsed_items - - -def parse_accessom2_out(paths): - """ - Return dict of items from parsed access-om2.out. - - paths: list of base paths - - output: dict of timing names, with dict of statistics - - NB: output may also contain bad data from intermingled CICE output. - """ - parsed_items = dict() - for path in paths: - fname = os.path.join(path, 'access-om2.out') - if os.path.isfile(fname): - with open(fname, 'r') as infile: - for l in infile: - if l.startswith('Tabulating mpp_clock statistics'): - break - for l in infile: - if l.startswith(' tmin'): - break - keys = l.split() - for l in infile: - if l.startswith(' MPP_STACK high water mark='): - break - name = l[0:32].strip() # relies on name being cropped at 32 chars - vals = [num(n) for n in l[32:].split()] - parsed_items[name] = dict(zip(keys, vals)) - break - return parsed_items - - -def parse_ice_diag_d(paths): - """ - Return dict of cice info from ice/ice_diag.d. - - paths: list of base paths - - output: dict - """ - # this is pretty rough-and-ready, e.g. repeated entries end up containing the final value - parsed_items = dict() - for path in paths: - fname = os.path.join(path, 'ice/ice_diag.d') - if os.path.isfile(fname): - with open(fname, 'r') as infile: - for l in infile: - if l.startswith('Timing information:'): - break # handle timing data with parse_cice_timing - try: - key = l.split('=')[0].strip() - val = num(l.split('=')[1].strip()) - parsed_items[key] = val - except: - try: - key = l.split(':')[0].strip() - val = num(l.split(':')[1].strip()) - parsed_items[key] = val - except: - pass - break - - if 'Block size: nx_block' in parsed_items: - parsed_items['nx_block'] = parsed_items['Block size: nx_block'] -# NB: in ice_blocks.F90 -# nx_block = block_size_x + 2*nghost, &! x,y dir including ghost -# ny_block = block_size_y + 2*nghost ! cells - if 'Number of ghost cells' in parsed_items: - if 'nx_block' in parsed_items: - parsed_items['block_size_x'] = parsed_items['nx_block'] - 2*parsed_items['Number of ghost cells'] - if 'ny_block' in parsed_items: - parsed_items['block_size_y'] = parsed_items['ny_block'] - 2*parsed_items['Number of ghost cells'] - parsed_items['timing'] = parse_cice_timing(paths) - return parsed_items - - -def parse_cice_timing(paths): - """ - Return dict of cice timing info from ice/ice_diag.d. - - paths: list of base paths - - output: dict of timing names, with dict of statistics - """ -# sample to parse: -# Timing information: -# -# Timer 1: Total 10894.88 seconds -# Timer stats (node): min = 10894.69 seconds -# max = 10894.88 seconds -# mean= 10894.70 seconds -# Timer stats(block): min = 0.00 seconds -# max = 0.00 seconds -# mean= 0.00 seconds -# Timer 2: TimeLoop 10802.50 seconds -# Timer stats (node): min = 10802.33 seconds -# max = 10802.50 seconds -# mean= 10802.33 seconds -# Timer stats(block): min = 0.00 seconds -# max = 0.00 seconds -# mean= 0.00 seconds - - parsed_items = dict() - for path in paths: - fname = os.path.join(path, 'ice/ice_diag.d') - if os.path.isfile(fname): - with open(fname, 'r') as infile: - for l in infile: - if l.startswith('Timing information:'): - break - for l in infile: - if l.startswith('Timer'): # ignore time is it it node max - timerkey = ' '.join(l[0:21].split()[2:]) - parsed_items[timerkey] = dict() - else: - if l.startswith(' Timer'): - typekey = l.split('(')[-1].split(')')[0] - parsed_items[timerkey][typekey] = dict() - try: - key = l.split('=')[0].split()[-1] - val = num(l.split()[-2]) - parsed_items[timerkey][typekey][key] = val - except: - pass - break - return parsed_items - - -def parse_nml(paths): - """ - Return dict of items from parsed namelists. - - paths: list of base paths to parse for namelists - - output: dict - """ - parsed_items = dict() - parsed_items['accessom2.nml'] = None # default value for non-YATM run - for path in paths: - fnames = [os.path.join(path, 'accessom2.nml')]\ - + glob.glob(os.path.join(path, '*/*.nml')) - for fname in fnames: - if os.path.isfile(fname): # no accessom2.nml for non-YATM run - parsed_items[fname.split(path)[1].strip('/')] \ - = f90nml.read(fname) - return parsed_items - - -def git_diff(basepath, sha1, sha2): - """ - Return dict of git-tracked differences between two commits. - - basepath: base directory path string - - sha1, sha2: strings; sha1 should be earlier than or same as sha2 - """ - try: - p = subprocess.Popen('cd ' + basepath - + ' && git diff --name-only ' + sha1 + ' ' + sha2, - stdout=subprocess.PIPE, shell=True) - c = ', '.join( - p.communicate()[0].decode('ascii').split()) - p = subprocess.Popen('cd ' + basepath - + ' && git log --ancestry-path --pretty="%B\%x09" ' - + sha1 + '..' + sha2, - stdout=subprocess.PIPE, shell=True) - m = [s.strip('\n\\') - for s in p.communicate()[0].decode('ascii').split('\t')][0:-1] - m.reverse() # put in chronological order - if len(m) == 0: - m = None - except: - c = None - m = None - parsed_items = dict() - parsed_items['Changed files'] = c - parsed_items['Messages'] = m # NB: will be None if there's no direct ancestry path from sha1 to sha2) - return parsed_items - - -def dictget(d, l): - """ - Lookup item in nested dict using a list of keys, or None if non-existent - - d: nested dict - l: list of keys, or None - """ - try: - dl0 = d[l[0]] - except (KeyError, TypeError): - return None - if len(l) == 1: - return dl0 - return dictget(dl0, l[1:]) - - -def keylists(d): - """ - Return list of key lists to every leaf node in a nested dict. - Each key list can be used as an argument to dictget. - - d: nested dict - """ - l = [] - for k, v in d.items(): - if isinstance(v, dict): - sublists = keylists(v) - for sli in sublists: - l.append([k]+sli) - else: - l.append([k]) - return l - - -def keylistssuperset(d): - """ - Apply keylists to every value at the top level of input dict d, - and return a list containing one instance of every key list found. - Each key list can be used as an argument to dictget. - - d: nested dict whose values are to be supplied to keylists - """ - all = set() - for v in d.values(): - all.update(['\b'.join(l) for l in keylists(v)]) - return [s.split('\b') for s in all] - - -def tryfunc(func, arg): - """ - Return func(arg) or None if there's an exception. - """ - try: - return func(arg) - except: - return None - - -def run_summary(basepath=os.getcwd(), outfile=None, list_available=False, - dump_all=False, show_fails=False, outfile_syncdir=False, - no_header=False, no_stats=False): - """ - Generate run summary - """ - basepath = os.path.abspath(basepath) - archive_path = os.path.realpath(os.path.join(basepath, 'archive')) - print('Generating run summary of ' + basepath, end='') - - # get jobname from config.yaml -- NB: we assume this is the same for all jobs - with open(os.path.join(basepath, 'config.yaml'), 'r') as infile: - configyaml = yaml.load(infile, Loader=yaml.FullLoader) - jobname = configyaml.get('jobname') - sync_script = configyaml.get('postscript') - - if sync_script: - sync_path = get_sync_path(os.path.join(basepath, sync_script)) - else: - sync_path = None - - if outfile is None: - if outfile_syncdir and sync_path: - outfile = 'run_summary_' + sync_path.strip(os.sep).replace(os.sep, '_') + '.csv' - else: - outfile = 'run_summary_' + basepath.strip(os.sep).replace(os.sep, '_') + '.csv' - # if show_fails: - # outfile = os.path.splitext(outfile)[0]+'_fails.csv' - - - try: - p = subprocess.Popen('cd ' + basepath - + ' && git rev-parse --abbrev-ref HEAD', - stdout=subprocess.PIPE, shell=True) - git_branch = p.communicate()[0].decode('ascii').strip() - except: - git_branch = None - - # get data from all PBS job logs - run_data = dict() - # NB: match jobname[:15] because in some cases the pbs log files use a shortened version of the jobname in config.yaml - # e.g. see /home/157/amh157/payu/025deg_jra55_ryf8485 - # NB: logs in archive may be duplicated in sync_path, in which case the latter is used - logfiles = glob.glob(os.path.join(archive_path, 'pbs_logs', jobname[:15] + '*.o*'))\ - + glob.glob(os.path.join(basepath, jobname[:15] + '*.o*')) - if sync_path: - logfiles += glob.glob(os.path.join(sync_path, 'pbs_logs', jobname[:15] + '*.o*')) - logfiles = [f for f in logfiles if '_c.o' not in f] # exclude collation files *_c.o* - for f in logfiles: - print('.', end='', flush=True) - jobid = int(f.split('.o')[1]) - run_data[jobid] = dict() - run_data[jobid]['PBS log'] = parse_pbs_log(f) - run_data[jobid]['PBS log']['PBS log file'] = f - # fudge: these paths might actually apply only to the latest job - run_data[jobid]['paths'] = dict() - run_data[jobid]['paths']['Control path'] = basepath - run_data[jobid]['paths']['Sync path'] = sync_path - run_data[jobid]['paths']['Archive path'] = archive_path - run_data[jobid]['storage'] = dict() - - # get run data for all jobs - for jobid in run_data: - print('.', end='', flush=True) - pbs = run_data[jobid]['PBS log'] - date = pbs['Run completion date'] # BUG: would be better to have time when run began, including time zone - if date is not None: - run_data[jobid]['git log'] = parse_git_log(basepath, date) - # BUG: assumes no commits between run start and end - # BUG: assumes the time zones match - no timezone specified in date - what does git assume? UTC? - if pbs['Exit Status'] == 0: # output dir belongs to this job only if Exit Status = 0 - outdir = 'output' + str(pbs['Run number']).zfill(3) - restartdir = 'restart' + str(pbs['Run number']).zfill(3) - paths = [] - - if sync_path: - sync_output_path = os.path.join(sync_path, outdir) - if os.path.isdir(sync_output_path): - paths += [sync_output_path] - run_data[jobid]['paths']['Sync output path'] =\ - sync_output_path - sync_restart_path = os.path.join(sync_path, restartdir) - if os.path.isdir(sync_restart_path): - run_data[jobid]['paths']['Sync restart path'] =\ - sync_restart_path - - archive_output_path = os.path.join(archive_path, outdir) - if os.path.isdir(archive_output_path): - paths += [archive_output_path] - run_data[jobid]['paths']['Archive output path'] =\ - archive_output_path - archive_restart_path = os.path.join(archive_path, restartdir) - if os.path.isdir(archive_restart_path): - run_data[jobid]['paths']['Archive restart path'] =\ - archive_restart_path - - # 'Sync output path' if it exists, otherwise 'Archive output path' - run_data[jobid]['paths']['Output path'] =\ - run_data[jobid]['paths'].get('Sync output path') or\ - run_data[jobid]['paths'].get('Archive output path') - # 'Sync restart path' if it exists, otherwise 'Archive restart path' - run_data[jobid]['paths']['Restart path'] =\ - run_data[jobid]['paths'].get('Sync restart path') or\ - run_data[jobid]['paths'].get('Archive restart path') - - # find GiB for output and restart - for k in ['Output path', 'Restart path']: - path = run_data[jobid]['paths'][k] - if path: - p = subprocess.Popen('du -bs ' + path, - stdout=subprocess.PIPE, shell=True) - ret = p.communicate()[0].decode('ascii') - bytes = int(ret.split()[0]) - run_data[jobid]['storage'][k + ' GiB'] = \ - round(bytes/1073741824, 3) - - run_data[jobid]['MOM_time_stamp.out'] = parse_mom_time_stamp(paths) - run_data[jobid]['namelists'] = parse_nml(paths) - run_data[jobid]['access-om2.out'] = parse_accessom2_out(paths) - run_data[jobid]['ice_diag.d'] = parse_ice_diag_d(paths) - run_data[jobid]['metadata.yaml'] = parse_yaml([basepath, sync_path], 'metadata.yaml') - for fn in ['config.yaml', 'env.yaml', 'job.yaml', - 'manifests/exe.yaml', 'manifests/input.yaml', 'manifests/restart.yaml']: - run_data[jobid][fn] = parse_yaml(paths, fn) - - all_run_data = copy.deepcopy(run_data) # all_run_data includes failed jobs - - - if show_fails: - # remove all jobs that have no PBS info in log file - for jobid in all_run_data: - if all_run_data[jobid]['PBS log']['Run completion date'] is None: - del run_data[jobid] - # (jobid, run completion date) tuples sorted by run completion date - jobid_run_tuples = sorted([(k, v['PBS log']['Run completion date']) - for (k, v) in run_data.items()], - key=lambda t: t[1]) - if len(jobid_run_tuples) == 0: - print('\nAborting: no jobs?') - return - # jobid keys into run_data sorted by run completion date - sortedjobids = [k[0] for k in jobid_run_tuples] - else: - # remove failed jobs from run_data - for jobid in all_run_data: - print('.', end='', flush=True) - pbs = all_run_data[jobid]['PBS log'] - date = pbs['Run completion date'] - if date is None: # no PBS info in log file - del run_data[jobid] - elif pbs['Run number'] is None: # not a model run log file - del run_data[jobid] - elif pbs['Exit Status'] != 0: # output dir belongs to this job only if Exit Status = 0 - del run_data[jobid] - elif len(run_data[jobid]['config.yaml']) == 0: # output dir missing - del run_data[jobid] - - # (jobid, run number) tuples sorted by run number - re-done below - jobid_run_tuples = sorted([(k, v['PBS log']['Run number']) - for (k, v) in run_data.items()], - key=lambda t: t[1]) - if len(jobid_run_tuples) == 0: - print('\nAborting: no successful jobs?') - return - - # Remove the older jobid if run number is duplicated - assume run was re-done - # (check by date rather than jobid, since jobid sometimes rolls over) - prev_jobid_run = jobid_run_tuples[0] - for jobid_run in jobid_run_tuples[1:]: - if jobid_run[1] == prev_jobid_run[1]: # duplicated run number - if run_data[jobid_run[0]]['PBS log']['Run completion date']\ - > run_data[prev_jobid_run[0]]['PBS log']['Run completion date']: - del run_data[prev_jobid_run[0]] - prev_jobid_run = jobid_run - else: - del run_data[jobid_run[0]] - else: - prev_jobid_run = jobid_run - - # re-do (jobid, run number) tuples sorted by run number - jobid_run_tuples = sorted([(k, v['PBS log']['Run number']) - for (k, v) in run_data.items()], - key=lambda t: t[1]) - if len(jobid_run_tuples) == 0: - print('\nAborting: no successful jobs?') - return - - # jobid keys into run_data sorted by run number - sortedjobids = [k[0] for k in jobid_run_tuples] - - # allow referencing by submodel name as well as list index - for jobid in run_data: - run_data[jobid]['config.yaml']['submodels-by-name'] = dict() - for sm in run_data[jobid]['config.yaml']['submodels']: - run_data[jobid]['config.yaml']['submodels-by-name'][sm['name']] = sm - - # make a 'timing' entry to contain model timestep and run length for both MATM and YATM runs - # run length is [years, months, days, seconds] to accommodate both MATM and YATM - prevjobid = -1 - for jobid in sortedjobids: - r = run_data[jobid] - timing = dict() - if r['namelists']['accessom2.nml'] is None: # non-YATM run - timing['Timestep'] = r['config.yaml']['submodels'][1]['timestep'] # MOM timestep - rt = r['config.yaml']['calendar']['runtime'] - timing['Run length'] = [rt['years'], rt['months'], rt['days'], 0] # insert 0 seconds - else: - timing['Timestep'] = r['namelists']['accessom2.nml']['accessom2_nml']['ice_ocean_timestep'] - rp = r['namelists']['accessom2.nml']['date_manager_nml']['restart_period'] - timing['Run length'] = rp[0:2] + [0] + [rp[2]] # insert 0 days - yrs = r['MOM_time_stamp.out']['Model run length (days)']/365.25 # FUDGE: assumes 365.25-day year - timing['SU per model year'] = r['PBS log']['Service Units']/yrs - timing['Walltime (hr) per model year'] = r['PBS log']['Walltime Used (hr)']/yrs - storagekeys = list(r['storage'].keys()) - for k in storagekeys: - timing[k + ' per model year'] = round(r['storage'][k]/yrs, 3) - - if prevjobid >= 0: # also record time including wait between runs - d1 = dateutil.parser.parse(run_data[prevjobid]['PBS log']['Run completion date']) - d2 = dateutil.parser.parse(r['PBS log']['Run completion date']) - tot_walltime = (d2-d1).total_seconds()/3600 - timing['Walltime (hr) between this completion and previous completion'] = tot_walltime - timing['Wait (hr) between this run and previous'] = tot_walltime - r['PBS log']['Walltime Used (hr)'] - timing['SU per calendar day'] = r['PBS log']['Service Units']/tot_walltime*24 - timing['Model years per calendar day'] = yrs/tot_walltime*24 - for k in storagekeys: - timing[k + ' per calendar day'] = round(r['storage'][k]/tot_walltime*24, 3) - - r['timing'] = timing - prevjobid = jobid - - # include changes in all git commits since previous run - for i, jobid in enumerate(sortedjobids): - print('.', end='', flush=True) - run_data[jobid]['git diff'] = \ - git_diff(basepath, - run_data[sortedjobids[max(i-1, 0)]]['git log']['Commit'], - run_data[jobid]['git log']['Commit']) - - # count failed jobs prior to each successful run - # BUG: always have zero count between two successful runs straddling a jobid rollover - # BUG: first run also counts all fails after a rollover - prevjobid = -1 - for jobid in sortedjobids: - c = [e for e in all_run_data.keys() if e > prevjobid and e < jobid - and e not in run_data] - c.sort() - run_data[jobid]['PBS log']['Failed previous jobids'] = c - run_data[jobid]['PBS log']['Failed previous jobs'] = len(c) - prevjobid = jobid - - if list_available: - print('\nAvailable data which can be tabulated if added to output_format') - print('(but you may need to edit some keys to ensure uniqueness):') - keylist = [] - for k in keylistssuperset(run_data): - keylist.append((k[-1], "['" + "', '".join(k) + "']")) - keylist.sort(key = lambda x: x[1]) - maxkeywidth = max([len(k[0]) for k in keylist]) - for k in keylist: - print(" ('" + k[0] + "', " + " "*(maxkeywidth-len(k[0])) + k[1] + "),") - - if dump_all: - dumpoutfile = os.path.splitext(outfile)[0]+'.yaml' - print('\nWriting', dumpoutfile) - with open(dumpoutfile, 'w') as outf: - yaml.dump(run_data, outf, default_flow_style=False) - - ########################################################################### - # Specify the output format here. - ########################################################################### - # output_format is a OrderedDict of (key, value) tuples, one for each column. - # keys are column headers (arbitrary but must be unique) - # values are lists of keys into run_data (omitting job id) - # "run_summary.py --list" will list all available data you can add here - # (but you may need to edit some keys to ensure uniqueness) - output_format = OrderedDict([ - ('Run', ['PBS log', 'Run number']), - ('Run start', ['MOM_time_stamp.out', 'Model start time']), - ('Run end', ['MOM_time_stamp.out', 'Model end time']), - ('Run length (y, m, d, s)', ['timing', 'Run length']), - ('Run length (days)', ['MOM_time_stamp.out', 'Model run length (days)']), - ('Control directory', ['paths', 'Control path']), - # ('Archive directory', ['paths', 'Archive path']), - # ('Sync directory', ['paths', 'Sync path']), - ('Output directory', ['paths', 'Output path']), - ('Output GiB', ['storage', 'Output path GiB']), - ('Restart directory', ['paths', 'Restart path']), - ('Restart GiB', ['storage', 'Restart path GiB']), - ('Run by', ['git log', 'Author']), - ('Run completion date', ['PBS log', 'Run completion date']), - ('Job Id', ['PBS log', 'Job Id']), - ('Failed jobs', ['PBS log', 'Failed previous jobs']), - ('Failed jobids', ['PBS log', 'Failed previous jobids']), - ('Queue', ['config.yaml', 'queue']), - ('Service Units', ['PBS log', 'Service Units']), - ('Walltime Used (hr)', ['PBS log', 'Walltime Used (hr)']), - ('SU per model year', ['timing', 'SU per model year']), - ('Walltime (hr) per model year', ['timing', 'Walltime (hr) per model year']), - ('Wait (hr) between runs', ['timing', 'Wait (hr) between this run and previous']), - ('SU per calendar day', ['timing', 'SU per calendar day']), - ('Model years per calendar day', ['timing', 'Model years per calendar day']), - ('Memory Used (Gb)', ['PBS log', 'Memory Used (Gb)']), - ('NCPUs Used', ['PBS log', 'NCPUs Used']), - ('MOM NCPUs', ['config.yaml', 'submodels-by-name', 'ocean', 'ncpus']), - ('CICE NCPUs', ['config.yaml', 'submodels-by-name', 'ice', 'ncpus']), - # ('Max Ocean diagnostics (s)', ['access-om2.out', '(Ocean diagnostics)', 'tmax']), - # ('Max Ocean diagnostics: tracer (s)', ['access-om2.out', '(Ocean diagnostics: tracer)', 'tmax']), - ('Fraction of MOM runtime in oasis_recv', ['access-om2.out', 'oasis_recv', 'tfrac']), - ('Max MOM wait for oasis_recv (s)', ['access-om2.out', 'oasis_recv', 'tmax']), - ('Max CICE wait for coupler (s)', ['ice_diag.d', 'timing', 'waiting_o', 'node', 'max']), - ('Max CICE I/O time (s)', ['ice_diag.d', 'timing', 'ReadWrite', 'node', 'max']), - ('MOM tile layout', ['namelists', 'ocean/input.nml', 'ocean_model_nml', 'layout']), - ('CICE tile distribution', ['namelists', 'ice/cice_in.nml', 'domain_nml', 'distribution_type']), - ('CICE block_size_x', ['ice_diag.d', 'block_size_x']), - ('CICE block_size_y', ['ice_diag.d', 'block_size_y']), - ('Timestep (s)', ['timing', 'Timestep']), - ('MOM barotropic split', ['namelists', 'ocean/input.nml', 'ocean_model_nml', 'barotropic_split']), - ('CICE dynamic split (ndtd)', ['namelists', 'ice/cice_in.nml', 'setup_nml', 'ndtd']), - # ('ktherm', ['namelists', 'ice/cice_in.nml', 'thermo_nml', 'ktherm']), - # ('Common inputs', ['config.yaml', 'input']), - # ('Atmosphere executable', ['config.yaml', 'submodels-by-name', 'atmosphere', 'exe']), - # ('Atmosphere inputs', ['config.yaml', 'submodels-by-name', 'atmosphere', 'input']), - # ('MOM executable', ['config.yaml', 'submodels-by-name', 'ocean', 'exe']), - # ('MOM inputs', ['config.yaml', 'submodels-by-name', 'ocean', 'input']), - # ('CICE executable', ['config.yaml', 'submodels-by-name', 'ice', 'exe']), - # ('CICE inputs', ['config.yaml', 'submodels-by-name', 'ice', 'input']), - # ('Payu version', ['PBS log', 'payu version']), - ('Git hash of run', ['git log', 'Commit']), - ('Commit date', ['git log', 'Date']), - ('Git-tracked file changes', ['git diff', 'Changed files']), - ('Git log messages', ['git diff', 'Messages']), - ]) - SUdata = [dictget(run_data, [jobid] + ['PBS log', 'Service Units']) - for jobid in sortedjobids] - stats = OrderedDict([ # tuples: (label, function) - ('Total', sum), - ('Mean', np.mean), - ('Median', np.median), - ('Min', min), - ('Max', max), - ('Std dev', np.std), - ('SU correlation', lambda x: np.corrcoef(x, SUdata)[0, 1]), - ('SU slope', lambda x: np.polyfit(x, SUdata, 1)[0]), - ('Dimensionless SU slope', lambda x: np.polyfit(x, SUdata, 1)[0]*np.mean(x)/np.mean(SUdata)) - ]) - ########################################################################### - if no_stats: - stats = OrderedDict([]) - if show_fails: - # output crash-related info (redefines order of any keys already in output_format) - output_format_prefix = OrderedDict([ - ('Job Id', ['PBS log', 'Job Id']), - ('Run completion date', ['PBS log', 'Run completion date']), - ('Exit Status', ['PBS log', 'Exit Status']), - ('Timeout', ['PBS log', 'Timeout']), - ('Walltime Requested (hr)', ['PBS log', 'Walltime Requested (hr)']), - ('Walltime Used (hr)', ['PBS log', 'Walltime Used (hr)']), - ('qsub_flags', ['config.yaml', 'qsub_flags']), - ]) - output_format_prefix.update(output_format) - output_format = output_format_prefix - else: - # output all namelist changes - output_format_nmls = OrderedDict() - nmls_any_runs = set(run_data[list(run_data.keys())[0]]['namelists'].keys()) - nmls_all_runs = nmls_any_runs - # avoid dict comprehension here to avoid python<2.7 syntax error - nmls_no_runs = dict([(k, True) for k in nmls_any_runs]) # True for namelists that are None for all runs - # nmls_no_runs = {k: True for k in nmls_any_runs} # True for namelists that are None for all runs - for jobid in run_data: - run_nmls = run_data[jobid]['namelists'] - nmls_any_runs = set(run_nmls.keys()) | nmls_any_runs - nmls_all_runs = set(run_nmls.keys()) & nmls_all_runs - for nml in set(nmls_all_runs): - if run_nmls[nml] is None: - nmls_all_runs.remove(nml) - for nml in run_nmls: - newnone = (nml is None) - if nml in nmls_no_runs: - nmls_no_runs[nml] = nmls_no_runs[nml] and newnone - else: - nmls_no_runs.update({nml: newnone}) - for nml in set(nmls_any_runs): - if nmls_no_runs[nml]: - nmls_any_runs.remove(nml) - - # add every changed group/variable in nml files that exist in all runs - for nml in nmls_all_runs: - # avoid dict comprehension here to avoid python<2.7 syntax error - nmllistall = dict([(jobid, - copy.deepcopy(run_data[jobid]['namelists'][nml])) - for jobid in run_data]) - # nmllistall = {jobid: copy.deepcopy(run_data[jobid]['namelists'][nml]) - # for jobid in run_data} - groups = nmltab.superset(nmltab.nmldiff(nmllistall)) - for group in groups: - for var in groups[group]: - ngv = [nml, group, var] - output_format_nmls.update(OrderedDict([ - (' -> '.join(ngv), ['namelists'] + ngv)])) - - # add all group/variables in nml files that exist in only some runs - for nml in nmls_any_runs - nmls_all_runs: - nmllistall = dict() - for jobid in run_data: - if nml in run_data[jobid]['namelists']: - if run_data[jobid]['namelists'][nml] is not None: - nmllistall.update({jobid: - copy.deepcopy(run_data[jobid]['namelists'][nml])}) - groups = nmltab.superset(nmllistall) - for group in groups: - for var in groups[group]: - ngv = [nml, group, var] - output_format_nmls.update(OrderedDict([ - (' -> '.join(ngv), ['namelists'] + ngv)])) - - # alphabetize - output_format_nmls = OrderedDict([(k, output_format_nmls[k]) - for k in sorted(output_format_nmls.keys())]) - - # add output_format entries for every namelist variable that has changed in any run - output_format.update(output_format_nmls) - - # output csv file according to output_format above - print('\nWriting', outfile) - - if len(stats) > 0: - lhcol = [None] - else: - lhcol = [] # don't allow space for stats headings if we don't have any - - with open(outfile, 'w', newline='') as csvfile: - csvw = csv.writer(csvfile, dialect='excel', lineterminator='\n') - - if not no_header: - csvw.writerow(['Summary report generated by run_summary.py, https://github.com/aekiss/run_summary']) - csvw.writerow(['report generated:', datetime.datetime.now().replace(microsecond=0).astimezone().isoformat()]) - csvw.writerow(['control directory path:', basepath, 'git branch:', git_branch]) - csvw.writerow(['output path:', sync_path]) - - csvw.writerow(lhcol + list(output_format.keys())) # header - for jobid in sortedjobids: # output a row for each jobid - csvw.writerow(lhcol + [dictget(run_data, [jobid] + keylist) for keylist in output_format.values()]) - - if len(stats) > 0: - # calculate and save summary stats - statsdata = copy.deepcopy(output_format) - for k, keylist in output_format.items(): # calculate summary stats - coldata = [dictget(run_data, [jobid] + keylist) for jobid in sortedjobids] - coldata = [c for c in coldata if c is not None] - statsdata[k] = {label:tryfunc(func, coldata) for (label, func) in stats.items()} - # write summary stats - csvw.writerow(lhcol + [None]*len(list(output_format.keys()))) # blank row - csvw.writerow(lhcol + list(output_format.keys())) # header - for s in stats: - csvw.writerow([s] + [statsdata[k][s] for k in statsdata.keys()]) - - print('Done.') - return - - -if __name__ == '__main__': - import argparse - parser = argparse.ArgumentParser(description= - 'Summarise ACCESS-OM2 runs.\ - Latest version and help: https://github.com/aekiss/run_summary') - parser.add_argument('-f', '--show_fails', - action='store_true', default=False, - help='include failed runs (disables some output columns)') - parser.add_argument('-l', '--list', - action='store_true', default=False, - help='list all data that could be tabulated by adding it to output_format') - parser.add_argument('-d', '--dump_all', - action='store_true', default=False, - help='also dump all data to .yaml') - parser.add_argument('-o', '--outfile', type=str, - metavar='file', - default=None, - help="output file path; default is\ - 'run_summary_.csv';\ - overrides --outfile_syncdir if set.\ - WARNING: output file will be overwritten") - parser.add_argument('--outfile_syncdir', - action='store_true', default=False, - help="set output file path to\ - 'run_summary_.csv'\ - or 'run_summary_.csv' if sync dir\ - path is invalid;\ - ignored if '-o', '--outfile' is set.\ - WARNING: output file will be overwritten") - parser.add_argument('--no_header', - action='store_true', default=False, - help="don't write header rows in output .csv") - parser.add_argument('--no_stats', - action='store_true', default=False, - help="don't output summary statistics") - parser.add_argument('path', metavar='path', type=str, nargs='*', - help='zero or more ACCESS-OM2 control directory paths; default is current working directory') - args = parser.parse_args() - show_fails = vars(args)['show_fails'] - lst = vars(args)['list'] - dump_all = vars(args)['dump_all'] - outfile = vars(args)['outfile'] - outfile_syncdir = vars(args)['outfile_syncdir'] - no_header = vars(args)['no_header'] - no_stats = vars(args)['no_stats'] - basepaths = vars(args)['path'] # a list of length >=0 since nargs='*' - if not basepaths: - basepaths = [os.getcwd()] - for bp in basepaths: - try: - run_summary(show_fails=show_fails, basepath=bp, - outfile=outfile, list_available=lst, - dump_all=dump_all, - outfile_syncdir=outfile_syncdir, - no_header=no_header, - no_stats=no_stats) - except: - print('\nFailed. Error:', sys.exc_info()) diff --git a/sync_data.sh b/sync_data.sh deleted file mode 100755 index 48b22b1e..00000000 --- a/sync_data.sh +++ /dev/null @@ -1,166 +0,0 @@ -#!/bin/bash -#PBS -q copyq -#PBS -l ncpus=1 -#PBS -l wd -#PBS -l walltime=10:00:00,mem=12GB -#PBS -l storage=gdata/hh5+gdata/ik11+gdata/cj50+scratch/v45+scratch/x77+scratch/g40 -#PBS -N sync - -# Set SYNCDIR to the path you want your data copied to. -# This must be a unique absolute path for your set of runs. -# To share your data, sync to a subdirectory in /g/data/ik11/outputs/ -# but first add an experiment description - see /g/data/ik11/outputs/README -# and make sure metadata.yaml is correct. -# DOUBLE-CHECK SYNCDIR PATH IS UNIQUE SO YOU DON'T OVERWRITE EXISTING OUTPUT! -SYNCDIR=/ERROR/SET/SYNCDIR/IN/sync_data.sh - -exitcode=0 -help=false -dirtype=output -exclude="--exclude *.nc.* --exclude iceh.????-??-??.nc --exclude *-DEPRECATED --exclude *-DELETE --exclude *-IN-PROGRESS" -rsyncflags="-vrltoD --safe-links" -rmlocal=false -backward=false - -# parse argument list -while [ $# -ge 1 ]; do - case $1 in - -h) - help=true - ;; - -r) - echo "syncing restarts instead of output directories" - dirtype=restart - ;; - -u) - echo "ignoring exclusions - syncing collated and uncollated .nc files" - exclude="" - ;; - -b) - echo "backward sync (from SYNCDIR to local)" - backward=true - ;; - -D) - # --remove-source-files tells rsync to remove from the sending side the files (meaning non-directories) - # that are a part of the transfer and have been successfully duplicated on the receiving side. - # This option should only be used on source files that are quiescent. - # Require interaction here to avoid syncing and removing partially-written files. - echo "DELETING SOURCE COPIES OF SYNCED FILES!" - echo "WARNING: to avoid losing data, do not proceed if there are any running jobs or collations underway." - read -p "Proceed? (y/n) " yesno - case $yesno in - [Yy] ) rmlocal=true;; - * ) echo "Cancelled. Wait until all jobs are finished before trying again."; exit 0;; - esac - ;; - -*) - echo $1": invalid option" - exitcode=1 - ;; - *) - echo $1": invalid argument" - exitcode=1 - ;; - esac - shift -done - -if [ $exitcode != "0" -o $help == true ]; then - echo "$0: rsync model run outputs (and optionally restarts) to another location." - echo " Must be invoked from a control directory." - echo " $0 should be edited to set SYNCDIR." - echo " Default will rsync all output directories, leaving local copies intact." - echo " Uncollated .nc files are not rsynced unless the -u option is used." - echo " Also rsyncs error_logs and pbs_logs." - echo " Also updates git-runlog, a git clone of the control directory (whose git history documents all changes in the run)." - echo " Also updates, rsyncs and commits run summary" - echo "usage: $0 [-h] [-r] [-u] [-D]" - echo " -h: show this help message and exit" - echo " -r: sync all restart directories instead of output directories" - echo " -u: ignore exclusions - sync collated and uncollated .nc files (default is collated only)" - echo " -b: backward sync, i.e. from SYNCDIR to local dir (default is from local to SYNCDIR)." - echo " -D: delete all source copies (i.e. local copies, or copies on SYNCDIR if -b is used) of synced output or restart files (depending on -r), retaining only the empty directories. Must be done interactively. If -b is not used, the most recent synced local files are not deleted, so model run can continue. Does not delete non-output/restart files. (Default leaves all source copies intact.)" - exit $exitcode -fi - - -# concatenate ice daily files -module load nco -for d in archive/output*/ice/OUTPUT; do - for f in $d/iceh.????-??-01.nc; do - if [[ ! -f ${f/-01.nc/-IN-PROGRESS} ]] && [[ ! -f ${f/-01.nc/-daily.nc} ]]; - then - touch ${f/-01.nc/-IN-PROGRESS} - echo "doing ncrcat -O -L 5 -7 ${f/-01.nc/-??.nc} ${f/-01.nc/-daily.nc}" - ncrcat -O -L 5 -7 ${f/-01.nc/-??.nc} ${f/-01.nc/-daily.nc} && chmod g+r ${f/-01.nc/-daily.nc} && rm ${f/-01.nc/-IN-PROGRESS} - if [[ ! -f ${f/-01.nc/-IN-PROGRESS} ]] && [[ -f ${f/-01.nc/-daily.nc} ]]; - then - for daily in ${f/-01.nc/-??.nc} - do - # mv $daily $daily-DELETE # rename individual daily files - user to delete - rm $daily - done - else - rm ${f/-01.nc/-IN-PROGRESS} - fi - fi - done -done - - -sourcepath="$PWD" -mkdir -p $SYNCDIR || { echo "Error: cannot create $SYNCDIR - edit $0 to set SYNCDIR"; exit 1; } - -cd archive || exit 1 - -# copy all outputs/restarts -if [ $backward == true ]; then - rsync $exclude $rsyncflags $SYNCDIR/${dirtype}[0-9][0-9]*[0-9] . - if [ $rmlocal == true ]; then - rsync --remove-source-files $exclude $rsyncflags $SYNCDIR/${dirtype}[0-9][0-9]*[0-9] . - fi - # Also sync error and PBS logs and metadata.yaml and run summary - rsync $rsyncflags $SYNCDIR/error_logs . - rsync $rsyncflags $SYNCDIR/pbs_logs . - cd $sourcepath - rsync $rsyncflags $SYNCDIR/metadata.yaml . - rsync $rsyncflags $SYNCDIR/run_summary*.csv . -else - # normal case: forward sync from current dir to SYNCDIR - # first delete any cice log files that only have a 105-character header and nothing else - find output* -size 105c -iname "ice.log.task_*" -delete - - rsync $exclude $rsyncflags ${dirtype}[0-9][0-9]*[0-9] $SYNCDIR - if [ $rmlocal == true ]; then - # Now do removals. Don't remove final local copy, so we can continue run. - rsync --remove-source-files --exclude `\ls -1d ${dirtype}[0-9][0-9]*[0-9] | tail -1` $exclude $rsyncflags ${dirtype}[0-9][0-9]*[0-9] $SYNCDIR - for d in ${dirtype}[0-9][0-9]*[0-9]/ice/OUTPUT; do - rm $d/iceh.????-??-??.nc-DELETE - done - fi - # Also sync error and PBS logs and metadata.yaml and run summary - rsync $rsyncflags error_logs $SYNCDIR - rsync $rsyncflags pbs_logs $SYNCDIR - cd $sourcepath - rsync $rsyncflags metadata.yaml $SYNCDIR - rsync $rsyncflags run_summary*.csv $SYNCDIR - - # create/update a clone of the run history in $SYNCDIR/git-runlog - cd $SYNCDIR || exit 1 - ls git-runlog || git clone $sourcepath git-runlog - cd git-runlog - git pull --no-rebase - - # update and sync run summary - do this last in case it doesn't work - cd $sourcepath - module use /g/data/hh5/public/modules - module load conda/analysis3 - module load python3-as-python - ./run_summary.py --no_header - rsync $rsyncflags run_summary*.csv $SYNCDIR - git add run_summary*.csv - git commit -m "update run summary" - cd $SYNCDIR/git-runlog && git pull --no-rebase -fi - -echo "$0 completed successfully" diff --git a/tidy_restarts.py b/tidy_restarts.py deleted file mode 100755 index 1341a6db..00000000 --- a/tidy_restarts.py +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/env python3 -""" - -Tidy up restart directories - see argparse description below. - -This is best used with restart_freq: 1 in config.yaml. - -Author: Andrew Kiss, ANU - -""" - -from __future__ import print_function -import os -import sys -from glob import glob - -def tidy(yearskip=1, keeplast=1): - yearskip = abs(yearskip) - keeplast = max(1, abs(keeplast)) # always keep the last restart - restarts = glob('archive/restart[0-9][0-9]*[0-9]') - restarts.sort(key=lambda f: int(f.split('restart')[1])) - - keptyear = None - for r in restarts[:-keeplast]: # don't touch the most recent |keeplast| restarts - try: - fn = os.path.join(r, 'ocean/ocean_solo.res') - with open(fn, 'r') as f: - lineList = f.readlines() - year = int(lineList[-1].split()[0]) # the year of the final time in the run - if (keptyear is not None) and (year < keptyear+yearskip): - print('Moving ' + r + ' to ' + r.replace('archive', 'archive/CHUCKABLE')) - os.renames(r, r.replace('archive', 'archive/CHUCKABLE')) - else: # always keep the earliest restart, so tidy has a consistent reference point - print('Keeping ' + r + ': ending time is' + lineList[-1], end='') - keptyear = year - except: - print('Error in', fn+':', sys.exc_info()) - - for r in restarts[-keeplast:]: # this just reports dates of the most recent |keeplast| restarts - try: - fn = os.path.join(r, 'ocean/ocean_solo.res') - with open(fn, 'r') as f: - lineList = f.readlines() - print('Keeping ' + r + ': ending time is' + lineList[-1], end='') - except: - print('Error in', fn+':', sys.exc_info()) - - print("Note: it's up to you to delete anything moved to archive/CHUCKABLE") - - -if __name__ == '__main__': - import argparse - parser = argparse.ArgumentParser(description= - 'Tidy up restart directories, keeping only the most recent \ - --keep-last restarts and the first \ - restart each --year-skip years (counting from the earliest forward, \ - keeping the last one written from the previous year, \ - dated 1 Jan 00:00:00, if available), \ - and moving the rest to archive/CHUCKABLE/ \ - from which the user can delete them manually if needed. \ - This is best used with restart_freq: 1 in config.yaml.') - parser.add_argument('-y', '--year-skip', type=int, - metavar='n', default=1, - help="keep one restart every n years (default=1)") - parser.add_argument('-k', '--keep-last', type=int, - metavar='m', default=1, - help="keep last m >= 1 restarts (default=1)") - args = parser.parse_args() - yearskip = vars(args)['year_skip'] - keeplast = vars(args)['keep_last'] - tidy(yearskip=yearskip, keeplast=keeplast) diff --git a/resub.sh b/tools/resub.sh similarity index 100% rename from resub.sh rename to tools/resub.sh