Skip to content

Commit

Permalink
ready for 1.5.1 release rolled back esgf dataset-id query
Browse files Browse the repository at this point in the history
  • Loading branch information
paolap committed Aug 15, 2023
1 parent cc7b632 commit 6aec1e2
Show file tree
Hide file tree
Showing 7 changed files with 16 additions and 21 deletions.
1 change: 1 addition & 0 deletions clef/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,7 @@ def common_esgf_cli(ctx, project, query, latest, replica, distrib,
limit=10000,
**constraints,
)
val = [x for x in s.query(q)][0]

ids=sorted(set(x.dataset_id for x in s.query(q)))
# when stats or csvf are True first extract attributes from dataset_ids
Expand Down
22 changes: 8 additions & 14 deletions clef/esgf.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
from sqlalchemy import String, Float, Integer, or_, func

#from .pgvalues import values
from .model import Path, Checksum
from .model import Path, Checksum, C6Dataset, C5Dataset, CordexDataset
from .exception import ClefException


Expand Down Expand Up @@ -167,11 +167,8 @@ def find_checksum_id(query, **kwargs):
raise ESGFException('No matches found on ESGF, check at %s'%link_to_esgf(query, **constraints))

if response['response']['numFound'] > int(response['responseHeader']['params']['rows']):
print(f"Too many files ({response['response']['numFound']}), try limiting your search.\n")
print("Returning only dataset results, hence a full comparison with local collection is not possible")
response = esgf_query(query, 'id,dataset_id,title,version', otype='Dataset', **constraints)
#raise ESGFException('Too many results (%d), try limiting your search %s'%(response['response']['numFound'],
# link_to_esgf(query, **constraints)))
raise ESGFException('Too many results (%d), try limiting your search %s'%(response['response']['numFound'],
link_to_esgf(query, **constraints)))
# separate records that do not have checksum in response (nosums list) from others (records list)
# we should call local_search for these i.e. a search not based on checksums but is not yet implemented
nosums=[]
Expand All @@ -189,8 +186,7 @@ def find_checksum_id(query, **kwargs):
records.append(doc)
else:
nosums.append(doc)
print(doc)


record_list = [
(doc['checksum'][0],
doc['id'].split('|')[0], # drop the server name
Expand All @@ -202,7 +198,7 @@ def find_checksum_id(query, **kwargs):
nosums_list = [
('NA',
doc['id'].split('|')[0], # drop the server name
doc['dataset_id'].split('|')[0], # Drop the server name
doc['id'].split('|')[0], # Drop the server name
doc['title'],
doc['version'],
doc['score'])
Expand All @@ -221,7 +217,7 @@ def find_checksum_id(query, **kwargs):
table = sqlalvalues(
column('checksum', String),
column('id', String),
#column('dataset_id', String),
column('dataset_id', String),
column('title', String),
column('version', Integer),
column('score', Float),
Expand Down Expand Up @@ -259,13 +255,11 @@ def match_query(session, query, latest=None, **kwargs):
.outerjoin(Path))
else:
# Match on file name
#return values.outerjoin(Path, Path.path.like('%/'+values.c.title))
#return values.outerjoin(Path, func.regexp_replace(Path.path, '^.*/', '') == values.c.title)
matches = checksum_table.join(Path, func.regexp_replace(Path.path, '^.*/', '') == checksum_table.c.title)

if nocksum is True:
if project == 'CMIP6':
matches = (checksum_table.join(C6Dataset, C6.Dataset.dataset_id == checksum_table.c.dataset_id))
raise ESGFException(f'Some datasets have incomplete records try --local option')

return matches


Expand Down
2 changes: 1 addition & 1 deletion clef/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ def expr(self, model):


class Path(Base):
"""Path of a file on Raijin, with links to metadata
"""Path of a file on Gadi, with links to metadata
"""
__tablename__ = 'esgf_paths'

Expand Down
2 changes: 1 addition & 1 deletion conda/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{% set version = "1.5.0" %}
{% set version = "1.5.1" %}
package:
name: clef
version: {{ version }}
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@
#version = _dist.parsed_version.base_version
# The full version, including alpha/beta/rc tags.
#release = _dist.version
version = u'1.5.0'
release = u'1.5.0'
version = u'1.5.1'
release = u'1.5.1'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = clef
version = 1.5.0
version = 1.5.1
author = Scott Wales, Paola Petrelli
author_email = scott.wales@unimelb.edu.au, paola.petrelli@utas.edu.au
summary = 'CleF queries ESGF data at NCI'
Expand Down
4 changes: 2 additions & 2 deletions test/test_esgf.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,14 +131,14 @@ def test_checksum_id_empty(session):
"""
with mock.patch('clef.esgf.esgf_query', side_effect=empty_query):
with pytest.raises(ClefException):
table = find_checksum_id('')
table, nocksum = find_checksum_id('')

def test_checksum_id_missing(session):
"""
Create a values table with the returned result
"""
with mock.patch('clef.esgf.esgf_query', side_effect=missing_query):
table = find_checksum_id('')
table, nocksum = find_checksum_id('')
match = session.query(table).one()
assert match.id == 'abcde'
assert match.score == 1.0
Expand Down

0 comments on commit 6aec1e2

Please sign in to comment.