-
Notifications
You must be signed in to change notification settings - Fork 0
/
edit_xml_saildrone_newintake_v2.5.py
201 lines (159 loc) · 9.33 KB
/
edit_xml_saildrone_newintake_v2.5.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#!/usr/bin/env python
# coding: utf-8
# # purpose: to easily work through a generated xml file for the data served to an ERDDAP
import os
os.environ['HDF5_USE_FILE_LOCKING']='FALSE'
import xml.etree.ElementTree as ET
#make a new output file with additional root <toplevel> to the very top and bottom so that we can treat the whole thing like true xml
#adding <toplevel> doesn't need to happen with the full datasets.xml because erddap GDX.sh does it for you
#inputFilename = '/home/users/kobrien/erddap/tomcat8/webapps/erddap/WEB-INF/sd1031_hurricane_2024_temp.xml'
#outputFilename = '/home/users/koukel/tomcat/content/erddap/metaedit_test_xmls/datasets_again_check.xml'
#asking for a filename to read and a filename to save out to
inputFilename = input('Enter a filename to read in: ')
outputFilename = input('Enter a filename to be saved out: ')
outputFilename_del = outputFilename.split('.')[0] + '_del' + '.' + outputFilename.split('.')[1]
with open(inputFilename) as inXML, open(outputFilename_del, 'w') as outXML:
outXML.write('<toplevel>\n')#uncomment this line and the last line of this cell if there is no overhanging xml snippet -- edit_xml_saildrone.ipynb should tell you if there are problems
for line in inXML.readlines():
outXML.write(line)
outXML.write('</toplevel>\n')
#opens xml with new root, using a parser to keep comments
#outputFilename = '/home/users/koukel/tomcat/content/erddap/metaedit_test_xmls/no_metaedit_xmls/datasets_xmledit.xml'
parser = ET.XMLParser(target=ET.TreeBuilder(insert_comments=True))
tree = ET.parse(outputFilename_del, parser)
root = tree.getroot()
inst_name = 'NOAA AOML PMEL'
mission_name = 'hurricane'
proj_name = 'Hurricane Monitoring'
year = '2024'
#asking for the mission name input
#mission_name = input('Enter the mission name (such as hurricane_monitoring or tpos): ')
#year = input('Enter the year: ')
#edits the trajectory addAttributes attribs (cdm_trajectory_variables and subsetVariables) through searching for saildrone data only
for elemental in root.iter('dataset'):
for elemencheck in elemental.iter('fileDir'):
if 'saildrone' in elemencheck.text:
for elemenkid in elemental.iter('addAttributes'):
for elemkid in elemenkid.iter('att'):
attname = elemkid.get('name')
if attname == 'cdm_trajectory_variables':
elemkid.text = 'trajectory'
#print(elemkid.attrib, ', ', elemkid.text)
elif attname == 'subsetVariables':
elemkid.text = 'trajectory'
#print(elemkid.attrib, ', ', elemkid.text)
#if printing, should have sets of
#{'name': 'cdm_trajectory_variables'} , trajectory
#{'name': 'subsetVariables'} , trajectory
#edits the <reloadEveryNMinutes> from some high number to 15
for elemental in root.iter('dataset'):
for elemencheck in elemental.iter('fileDir'):
#print(elemencheck.text)
if 'saildrone' in elemencheck.text:
for elemenkid in elemental.iter('reloadEveryNMinutes'):
#print(elemenkid.text)
elemenkid.text = '15'
#print(elemenkid.text)
#if printing, should have sets of
#/home/users/koukel/test/metaedit_test/saildrone2/hawaiian_islands_ocean_chemistry/2023/offset/1091/
#10080
#15
#edits the trajectory dataVariable attrib datatype to be String through searching for saildrone data only
for elemental in root.iter('dataset'):
for elemencheck in elemental.iter('fileDir'):
if 'saildrone' in elemencheck.text:
for elemenkid in elemental.iter('dataVariable'):
#print('kid ',elemenkid.tag)
for elemenchild in elemenkid.iter('sourceName'):
#print('child ',elemenchild.text)
if elemenchild.text == 'trajectory':
#print('child ',elemenchild.text)
for elemchild in elemenkid.iter('dataType'):
elemchild.text = 'String'
for elemental in root.iter('dataset'):
for elemenkid in elemental.iter('fileDir'):
#print('fileDir ,', elemenkid.text)
for elemenchild in elemental.iter('addAttributes'):
for elemkid in elemenchild.iter('att'):
attname = elemkid.get('name')
#print(elemkid.text)
if attname == 'testOutOfDate':
elemkid.text = 'now-2hours'
#GTS_yes = 'y'
GTS_yes = input('Are these GTS data? Type y for yes: ')
#edits datasetID based on fileDir for all saildrone datasets, not changing it for preexisting datasetIDs
#does this even when title isn't pulled out of attribute comments
for elemental in root.iter('dataset'):
for elemenkid in elemental.iter('fileDir'):
print('fileDir ,', elemenkid.text)
#making datasetID, independent of preexisting title
if ('high_res' in elemenkid.text) and ('saildrone' in elemenkid.text):
elemental.attrib['datasetID'] = 'sd' + elemenkid.text.split('/')[-2] + '_' + mission_name + '_' + year + '_' + elemenkid.text.split('/')[8]
print('high res saildrone, ', elemental.attrib['datasetID'])
elif ('ADCP' in elemenkid.text or 'adcp' in elemenkid.text) and ('saildrone' in elemenkid.text):
elemental.attrib['datasetID'] = 'sd' + elemenkid.text.split('/')[-2] + '_' + mission_name + '_' + year + '_adcp'
print('adcp saildrone, ', elemental.attrib['datasetID'])
elif ('saildrone' in elemenkid.text):
#print('saildrone')
#print(elemental.attrib['datasetID'])
elemental.attrib['datasetID'] = 'sd' + elemenkid.text.split('/')[-2] + '_' + mission_name + '_' + year
print('datasetID, ', elemental.attrib['datasetID'])
#making new title
if elemenkid.text[-5] == '-':
drid = elemenkid.text[-4:-1]
else:
drid = elemenkid.text[-5:-1]
newtitle = str(inst_name + ' ' + proj_name + ' ' + year + ' Saildrone ' + drid)
if ('ADCP' in elemenkid.text) or ('adcp' in elemenkid.text) and ('hz' in elemenkid.text):
newtitle = str(inst_name + ' ' + proj_name + ' ' + year + ' High Resolution ' + elemenkid.text.split('/')[-2] + ' ADCP Saildrone ' + drid)
elif ('ADCP' in elemenkid.text) or ('adcp' in elemenkid.text):
newtitle = str(inst_name + ' ' + proj_name + ' ' + year + ' ADCP Saildrone ' + drid)
if ('ek80' in elemenkid.text) or ('EK80' in elemenkid.text) or ('echosounder' in elemenkid.text) or ('Echosounder' in elemenkid.text):
newtitle = str(inst_name + ' ' + proj_name + ' ' + year + ' Echosounder Saildrone ' + drid)
if ('wave' in elemenkid.text) or ('wave_spectra' in elemenkid.text):
newtitle = str(inst_name + ' ' + proj_name + ' ' + year + ' Wave Data Saildrone ' + drid)
if 'sea-trial' in elemenkid.text:
newtitle = str(inst_name + ' ' + proj_name + ' ' + year + ' Sea Trial Saildrone ' + drid)
if ('daily_files' in elemenkid.text) or ('real-time' in elemenkid.text):
newtitle = str(inst_name + ' ' + proj_name + ' ' + year + ' NRT Saildrone ' + drid)
if ('delayed' in elemenkid.text) or ('offset' in elemenkid.text):
newtitle = str(inst_name + ' ' + proj_name + ' ' + year + ' Offset Saildrone ' + drid)
if 'hz' in elemenkid.text:
newtitle = str(inst_name + ' ' + proj_name + ' ' + year + ' High Resolution ' + elemenkid.text.split('/')[-2] + ' Saildrone ' + drid)
if 'hz' in newtitle:
newtitle = newtitle.replace('hz','Hz')#this capitalizes Hz to the proper form
if GTS_yes == 'y':
newtitle = str(inst_name + ' ' + proj_name + ' ' + year + ' GTS Saildrone ' + drid)
print('newtitle',newtitle)
count = 0
for elemenchild in elemental.iter('addAttributes'):
#print('elemenchild ', elemenchild.tag)
try:
if count == 0:
ET.SubElement(elemenchild, 'att', name="title")
for elemkid in elemenchild.iter('att'):
attname = elemkid.get('name')
if attname == 'title':
ET.indent(elemkid, ' ')
elemkid.text = newtitle
count = count + 1
except:
for elemkid in elemenchild.iter('att'):
attname = elemkid.get('name')
if attname == 'title':
elemkid.text = newtitle
#print(attname)
#if printing, should have sets of: (datasetID preexisting, datasetID not preexisting)
#exact title, NOAA Saildrone Hurricane Monitoring, drone 1031 yesmetaedit
#exact datasetID, newdatasetID_hurr1031
#/home/users/koukel/test/metaedit_test/saildrone2/hurricane_monitoring/2021/daily_files/1040/
#exact title, New Hurricane 2021 drone 1040 Title
#saildrone saildrone, sd1040_metaedit_test_saildrone2
#saves edits out to output file
tree.write(outputFilename_del)
#writes the edited xml to another xml, this time without the added <toplevel> node, back to original form
with open(outputFilename_del) as inXML, open(outputFilename, 'w') as outXML:
for line in inXML.readlines()[1:-1]:
outXML.write(line)
#deletes the additionally created xml file
os.remove(outputFilename_del)