-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjournalmap2meta.py
120 lines (104 loc) · 4.79 KB
/
journalmap2meta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
__author__ = 'valentin'
import sys
import ulmo.cuahsi.his_central as his
import ulmo.cuahsi.wof as wof
from resource import cvs2meta
reload(sys)
sys.setdefaultencoding('utf-8')
import os
from dateparser.date import DateDataParser
import ssl
import urllib2
import uuid
import json
import time
# {'abstract': "The goal of NLDAS is to construct quality-controlled, and spatially and "
# " 'service_description_url': 'http://hiscentral.cuahsi.org/pub_network.aspx?n=274',
# 'organization_website': 'http://disc.sci.gsfc.nasa.gov/hydrology/data-holdings',
# 'service_status': None,
# 'variable_count': 5,
# 'network_name': 'NLDAS_NOAH',
# 'min_x': -124.9375,
# 'min_y': 25.0625,
# 'title': 'NLDAS Hourly NOAH Data',
# 'value_count': 2147483647,
# 'service_url': 'http://hydro1.sci.gsfc.nasa.gov/daac-bin/his/1.0/NLDAS_NOAH_002.cgi?WSDL',
# 'site_count': 103936,
# 'email': 'gsfc-help-disc@lists.nasa.gov',
# 'max_x': -67.0625, 'max_y': 52.9375,
# 'citation': 'Mitchell, K.E., D. Lohmann, P.R. Houser, E.F. Wood, J.C. Schaake, A. Robock, B.A. Cosgrove, J. Sheffield, Q. Duan, L. Luo, R.W. Higgins, R.T. Pinker, J.D. Tarpley, D.P. Lettenmaier, C.H. Marshall, J.K. Entin, M. Pan, W. Shi, V. Koren, J. Meng, B. H. Ramsay, and A.A. Bailey (2004), The multi-institution North American Land Data Assimilation System (NLDAS): Utilizing multiple GCIP products and partners in a continental distributed hydrological modeling system, J. Geophys. Res., 109, D07S90, doi:10.1029/2003JD003823.',
# 'phone': '301-614-5224',
# 'service_id': 274,
# 'organization': 'NASA Goddard Earth Sciences (GES) Data and Information Services Center'}
def main():
#baseurl = "https://www.journalmap.org/api/articles.json?key=QLT-2Xqqorgxhz3pF_cN&page=%s"
search_url = "https://www.journalmap.org/articles/search?key=QLT-2Xqqorgxhz3pF_cN&page=%s"
article_url = "https://www.journalmap.org/api/articles/%s.json?key=QLT-2Xqqorgxhz3pF_cN"
#wof_vars = wof.get_variable_info( out[0]['service_url'] )
#print wof_vars
template = cvs2meta.load_template('journalmap_metadata19115.xml',base_bath='./templates')
id_field = 'if'
base_path="output/journalmap"
base_name="journalmap"
try:
os.stat(base_path)
except:
os.mkdir(base_path)
context = ssl.create_default_context(ssl.Purpose.CLIENT_AUTH)
context.options &= ~ssl.OP_NO_SSLv3
i = 1
max_pages = 1000000
while i < max_pages:
try:
url_search= search_url % i
resp = urllib2.urlopen(url_search, context=context)
search = json.load(resp)
max_pages = search['meta']['total_pages']
if (i > max_pages):
exit(0)
articles = search['articles']
#out = render_template(template_values, template)
ddp = DateDataParser()
for a in articles:
try:
url_article = article_url % a['id']
resp = urllib2.urlopen(url_article, context=context)
an_article = json.load(resp)
except urllib2.HTTPError as ex:
if (ex.code == 403):
print "403 article id:" +str(a['id']) +" page:"+ str(i)+ ex.msg
time.sleep(30)
elif (ex.code == 500):
print "500 article: " +str(a['id']) +" page:"+ str(i)+ ex.msg
else:
print "http error article:" +str(a['id']) +" page:"+ str(i)+ ex.message
if an_article['authors']:
authors= an_article['authors']
if an_article['locations']:
locations=an_article['locations']
try:
out = template.render(a=a, authors=authors,locations=locations, article=an_article, ddp=ddp)
# d[id_field]) inlcudes colons... so bad filenames
#f_out = open(base_path+"/%s_%s.xml" % (base_name , d[id_field]) , "w")
f_out = open(base_path+"/%s_%s.xml" % (base_name , a["id"]) , "w")
f_out.write(out)
f_out.close()
except Exception as ex:
print "error in template article id: " + str(a["id"])
time.sleep(0.25)
i += 1
time.sleep(1)
except urllib2.HTTPError as ex:
if (ex.code == 403):
print "403 search: "+" page:"+ str(i) + ex.msg
time.sleep(30)
elif (ex.code == 500):
print "500 search: "+" page:"+ str(i) + ex.msg
else:
print " http error search:"+" "+" page:"+ str(i) + ex.message
except Exception as ex:
print "error:" + ex.message
i += 1
print "ended"
if __name__ == "__main__":
main()