forked from PhasesResearchLab/MHDB
-
Notifications
You must be signed in to change notification settings - Fork 0
252 lines (216 loc) · 9.88 KB
/
tdb2mhdb.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
name: tdb2mhdb
on:
issues:
types: [opened, edited]
jobs:
respond:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Verify whether issue is labeled with 'newTDB'
uses: actions/github-script@v7
with:
script: |
const issueNumber = context.issue.number;
const issue = await github.rest.issues.get({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: issueNumber,
});
const labels = issue.data.labels.map(label => label.name);
if (!labels.includes('newTDB')) {
console.log("Issue doesn't contain 'newTDB' label, cancelling workflow");
const { owner, repo } = context.repo;
const runId = context.runId;
await github.rest.actions.cancelWorkflowRun({ owner, repo, run_id: runId });
}
- name: Check for ZIP file
uses: actions/github-script@v7
id: respond
with:
script: |
const fs = require('fs');
const issue = context.issue;
const regex = /(https:\/\/[^\s]+\.zip)\b/g;
const issueDetails = await github.rest.issues.get({
owner: issue.owner,
repo: issue.repo,
issue_number: issue.number,
});
const matches = issueDetails.data.body.match(regex);
console.log('Matches:', matches);
let message;
if (matches && matches.length > 0) {
message = '👍 I found the ZIP file linked in your comment. I will now attempt to download it and validate contents!';
fs.writeFileSync('match.txt', matches[0]);
} else {
message = '😢 Your comment does not contain any direct links to ZIP files.\n\n';
message += `You may edit your original request, include your ZIP file, and update your comment to try again.`;
fs.writeFileSync('match.txt', 'false');
}
github.rest.issues.createComment({
owner: issue.owner,
repo: issue.repo,
issue_number: issue.number,
body: message
});
- name: Download file
id: download
run: |
file=$(cat match.txt)
file=$(echo $file | sed 's/\.zipLICENSE/\.zip/g')
echo $file | tee log.txt
sleep 5
wget -O task.zip -L $file || echo "failed=true" >> $GITHUB_ENV
- name: Send message if download failed
uses: actions/github-script@v7
if: steps.download.outputs.failed == 'true'
with:
script: |
const issue = context.issue;
let message = `😢 I could not download your ZIP file. Please double check if you have attached it to your message. When attached, GitHub would show a link similar to this: [FileName.zip](https://github.com/PhasesResearchLab/MHDB/files/.../FileName.zip)\n\n`;
message += `You may edit your original request, include your ZIP file, and update your comment to try again.`;
github.rest.issues.createComment({
owner: issue.owner,
repo: issue.repo,
issue_number: issue.number,
body: message
});
- name: Unzip file
if: steps.download.outputs.failed != 'true'
run: |
mkdir -p zipfolder
mkdir -p tdbs
unzip task.zip -d zipfolder
find ./zipfolder -type f -exec mv {} ./tdbs \;
find ./tdbs -type f -name ".*" -delete
ls -A tdbs | tee -a log.txt
- name: Verify file extensions
if: steps.download.outputs.failed != 'true'
id: verify
run: |
shopt -s nocasematch
if [ -z "$(ls -A tdbs)" ]; then
echo "No files found in structures directory"
echo "valid=false" >> $GITHUB_ENV
exit 0
fi
for file in tdbs/*; do
extension="${file##*.}"
if [[ "$extension" != "tdb" && "$extension" != "dat" && "$extension" != "xml" ]]; then
echo "Invalid file extension: $file"
echo "valid=false" >> $GITHUB_ENV
exit 0
fi
done
echo "valid=true" >> $GITHUB_ENV
- name: Send message if file extensions are invalid
uses: actions/github-script@v7
if: env.valid == 'false' && steps.download.outputs.failed != 'true'
with:
script: |
const issue = context.issue;
let message = `😢 The files supplied in ZIP do not have one of the required extensions: `TDB`. If you need another format to work, such as `DAT` or `XML`, let us know!\n\n`;
message += `You may edit your original request, modify your TDB files, and update your comment to try again.`;
github.rest.issues.createComment({
owner: issue.owner,
repo: issue.repo,
issue_number: issue.number,
body: message
});
- name: Set up Python 3.12
uses: actions/setup-python@v5
if: env.valid == 'true'
with:
python-version: "3.12"
cache: 'pip'
cache-dependency-path: 'requirements.txt'
- name: Install dependencies
if: env.valid == 'true'
run: |
python -m pip install -r requirements.txt | tee -a log.txt
git clone https://github.com/PhasesResearchLab/MHDB.git
- name: Extract issue details
uses: actions/github-script@v7
id: extract
with:
script: |
const fs = require('fs');
const issue = context.issue;
const issueDetails = await github.rest.issues.get({
owner: issue.owner,
repo: issue.repo,
issue_number: issue.number,
});
const body = issueDetails.data.body;
fs.writeFileSync('issues.txt', body);
return body;
- name: Run MHDB parsing scripts
if: env.valid == 'true'
env:
GITHUB_USER: ${{ github.event.issue.user.login }}
run: |
export CLIENT_STRING=${{ secrets.CLIENT_STRING }}
export ISSUE_BODY=$(cat issues.txt)
find tdbs -type f \( -iname \*.tdb -o -iname \*.TDB \) -exec sh -c "python -c \"import os; from mhdb.core import parseTDB, mongo; data = database.tdb2one('{}'); data_collection = database.one2many(data); print(f'> {len(data_collection)} entries uploaded from {data['references'][0].split(' | ')[0]}'); [mongo.TDBEntryGenerator(data=d, client_string=os.environ['CLIENT_STRING'], db='MHDB', col='community') for d in data_collection]\" >> MHDB.log" \;
- name: Run another step if Python step fails
if: failure() && env.valid == 'true'
uses: actions/github-script@v7
with:
script: |
const issue = context.issue;
const message = '😢 Something went wrong while processing your TDB files...\nYou can see logs under Actions tab.';
github.rest.issues.createComment({
owner: issue.owner,
repo: issue.repo,
issue_number: issue.number,
body: message
});
- name: TAR GZ all files with 7zip
if: env.valid == 'true'
run: |
echo "Packaging into .tar.gz file now..."
issue_number=${{ github.event.issue.number }}
date_string=$(date "+%d%b%y_%H%M")
echo $date_string
issue_number="${issue_number}_${date_string}"
echo "${issue_number}"
tar -c tdbs/* | 7z a -si -tgzip -mx=9 "contributions/contrib_${issue_number}.tar.gz"
- name: Commit changes with Add & Commit
id: commit
uses: EndBug/add-and-commit@v9
with:
message: '(automatic) Contribution Persisted'
add: "contributions/*.tar.gz"
- name: Get commit hash
id: gethash
run: echo "::set-output name=hash::$(git rev-parse HEAD)"
- name: Update MHDB hash
if: env.valid == 'true'
env:
GITHUB_USER: ${{ github.event.issue.user.login }}
COMMIT_HASH: ${{ steps.gethash.outputs.hash }}
run: |
export CLIENT_STRING=${{ secrets.CLIENT_STRING }}
echo "COMMIT_HASH: $COMMIT_HASH"
echo "CLIENT_STRING: $CLIENT_STRING"
python -c "import os; from pymongo import MongoClient; client = MongoClient(os.environ['CLIENT_STRING']); db = client['MHDB']; collection = db['community']; collection.update_many({'metadata.parentDatabaseURL': 'hash'}, {'$set': {'metadata.parentDatabaseURL': os.environ['COMMIT_HASH']}})"
- name: Report results
if: env.valid == 'true'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const issue = context.issue;
const full_commit_hash = "${{ steps.gethash.outputs.hash }}";
const commit_hash = full_commit_hash.substring(0, 8);
let message = '🚀 All Done! Your contribution hash is [${commit_hash}](https://mhdb.mat-x.org//${commit_hash}).\n\n';
message += 'Your submission is now completed and all entries found in your TDB file(s) were added to MHDB-community. Please refer to `NewData.ipynb` for further instructions on how to access the database.\n';
message += 'Here's a summary of all files and entries processed:\n';
message += fs.readFileSync('MHDB.log', 'utf8');
github.rest.issues.createComment({
owner: issue.owner,
repo: issue.repo,
issue_number: issue.number,
body: message
});