From e54d3ab536fb7ed65ae16090ff0a91cabfe625de Mon Sep 17 00:00:00 2001 From: rdamaral Date: Fri, 19 Apr 2024 15:01:21 -0400 Subject: [PATCH] Add hash feature --- .github/workflows/tdb2mhdb.yaml | 38 ++++++++- mhdb/core/mongo.py | 133 +++++++++++++++++++++++++++++--- 2 files changed, 160 insertions(+), 11 deletions(-) diff --git a/.github/workflows/tdb2mhdb.yaml b/.github/workflows/tdb2mhdb.yaml index 1a120c7..b40418d 100644 --- a/.github/workflows/tdb2mhdb.yaml +++ b/.github/workflows/tdb2mhdb.yaml @@ -192,6 +192,37 @@ jobs: issue_number: issue.number, body: message }); + + - name: TAR GZ all files with 7zip + if: env.valid == 'true' + run: | + echo "Packaging into .tar.gz file now..." + issue_number=${{ github.event.issue.number }} + date_string=$(date "+%d%b%y_%H%M") + echo $date_string + issue_number="${issue_number}_${date_string}" + echo "${issue_number}" + tar -c tdbs/* | 7z a -si -tgzip -mx=9 "contributions/contrib_${issue_number}.tar.gz" + + - name: Commit changes with Add & Commit + id: commit + uses: EndBug/add-and-commit@v9 + with: + message: '(automatic) Contribution Persisted' + add: "contributions/*.tar.gz" + + - name: Get commit hash + id: gethash + run: echo "::set-output name=hash::$(git rev-parse HEAD)" + + - name: Update MHDB hash + if: env.valid == 'true' + env: + GITHUB_USER: ${{ github.event.issue.user.login }} + COMMIT_HASH: ${{ steps.gethash.outputs.hash }} + run: | + export CLIENT_STRING=${{ secrets.CLIENT_STRING }} + python -c "from pymongo import MongoClient; client = MongoClient(os.environ['CLIENT_STRING']); db = client['MHDB']; collection = db['community']; collection.update_many({'metadata.parentDatabaseURL': 'hash'}, {'$set': {'metadata.parentDatabaseURL': os.environ['COMMIT_HASH']}})" - name: Report results if: env.valid == 'true' @@ -200,9 +231,12 @@ jobs: script: | const fs = require('fs'); const issue = context.issue; + const full_commit_hash = "${{ steps.gethash.outputs.hash }}"; + const commit_hash = full_commit_hash.substring(0, 8); - let message = `🚀 All Done! Your submission is now completed and all entries found in your TDB file(s) were added to MHDB-community.\n\n`; - message += `Here's a summary of all files and entries processed:\n`; + let message = '🚀 All Done! Your contribution hash is [${commit_hash}](https://mhdb.mat-x.org//${commit_hash}).\n\n'; + message += 'Your submission is now completed and all entries found in your TDB file(s) were added to MHDB-community. Please refer to `NewData.ipynb` for further instructions on how to access the database.\n'; + message += 'Here's a summary of all files and entries processed:\n'; message += fs.readFileSync('MHDB.log', 'utf8'); diff --git a/mhdb/core/mongo.py b/mhdb/core/mongo.py index 23bd3d4..7b4ab7c 100644 --- a/mhdb/core/mongo.py +++ b/mhdb/core/mongo.py @@ -1,4 +1,8 @@ +from mhdb.core import parseTDB +from pycalphad import Database, calculate +from pymatgen.core import Composition import datetime, re +from pprint import pprint def updateEntry(entry:dict, client_string:str, db:str, col:str): import dns.resolver @@ -10,7 +14,7 @@ def updateEntry(entry:dict, client_string:str, db:str, col:str): database = client[db] collection = database[col] - if collection.find_one({'material.phaseModel': entry['material']['phaseModel'], 'material.phaseLabel': entry['material']['phaseLabel'], 'material.endmembers': entry['material']['endmembers']}) is None: + if collection.find_one({'metadata.parentDatabaseURL': entry['metadata']['parentDatabaseURL'], 'material.phaseModel': entry['material']['phaseModel']}) is None: entry['metadata']['created'] = datetime.datetime.now() collection.insert_one(entry) @@ -22,17 +26,17 @@ def updateEntry(entry:dict, client_string:str, db:str, col:str): return entry - -from pymatgen.core import Composition - def TDBEntryGenerator(data:dict, client_string:str, db:str, col:str): + parentDatabaseID = data["phases"][0].split()[1] + metadata = { 'name': 'TDBGenerated', 'comment': f'Automated generated based on the {data["references"]} database.', 'affiliation': 'MHDB', 'parentDatabase': data["references"][0], - 'parentDatabaseID': data["phases"][0].split()[1] + 'parentDatabaseID': parentDatabaseID, + 'parentDatabaseURL': None if "github" not in client_string else "hash" } elements = [element.split()[1] for element in data["elements"]] @@ -42,17 +46,128 @@ def TDBEntryGenerator(data:dict, client_string:str, db:str, col:str): try: formula = Composition(re.sub(r'\(\)\d+(\.\d+)?', '', re.sub(r'[+-]\d+', '', phaseModel).replace('VA',''))).reduced_formula #Accounts for vacancies and charged species except: - formula = data["phases"][0].split()[1].split('_')[0] + formula = parentDatabaseID.split('_')[0] material = { 'system': '-'.join(elements), 'endmembers': '-'.join([formula]), #Still need to separate endmembers in case of solid solutions - 'phaseLabel': data["phases"][0].split()[1].split('_')[-1].split(':')[-1], + 'phaseLabel': parentDatabaseID.split('_')[-1].split(':')[-1], 'phaseModel': phaseModel, - 'SER': phaseModel + 'SER': round(SER.GM.values[0][0][0][0], 4) } + + dbf = Database(parseTDB.one2tdb(data)) + + try: + SER = round(calculate(dbf, elements + ['VA'], parentDatabaseID.split(':')[0], P=101325, T=298.15).GM.values[0][0][0][0], 4) + except: + SER = None + material.update({'SER': SER}) + entry = {"metadata": metadata, "material": material, "tdb": data} - # Check if an entry already exists and update collection: return updateEntry(entry, client_string, db, col) + +def DFTEntryGenerator(data:dict, client_string:str, db:str, col:str): + + from pymongo import MongoClient, ASCENDING + client = MongoClient(client_string) + + metadata = { + 'name': 'DFTGenerated', + 'comment': f'Automated generated based on the {data['parentDatabase']} database.', + 'affiliation': 'MHDB', + 'parentDatabase': data['parentDatabase'], + 'parentDatabaseID': data['parentDatabaseID'], + 'parentDatabaseURL': data['parentDatabaseURL'] + } + + elements = data['elements'] + endmembers = [data['reducedFormula']] + formationReaction = data['formationReaction'] + + material = { + 'system': '-'.join(elements), + 'endmembers': '-'.join(endmembers), #Still need to separate endmembers in case of solid solutions + 'phaseLabel': data['structureLabel'], + 'phaseModel': f'({endmembers[0]})1.0' + } + + dft = { + 'decomposesTo': formationReaction, + 'formationEnthalpy': data['formationEnthalpy'], + 'formationEntropy': data['formationEntropy']*data['totalAtoms'] if 'formationEntropy' in data.keys() else 0, + 'mixingEnthalpy': data['mixingEnthalpy']*data['totalAtoms'] if 'mixingEnthalpy' in data.keys() else 0 + } + + decomposesTo = {} + for constituent in formationReaction.split('->')[1].split('+'): + # Use a regular expression to separate the coefficient from the compound name + match = re.match(r'(\d*\.?\d*)\s*(\w+)', Composition(constituent).formula.replace(" ","")) + if match: + # If no coefficient is found, assume it to be 1 + coefficient = float(match.group(1)) if match.group(1) else 1.0 + compound_name = match.group(2) + decomposesTo[compound_name] = coefficient + + tdb_elements = [] + tdb_parameters = [] + tdb_symbols = [] + tdb_references = [] + for constituent in decomposesTo.keys(): + result = client['MHDB']['MSUB'].find({"material.endmembers": constituent}).sort("material.SER", ASCENDING).limit(1) + for key, value in result[0]['tdb'].items(): # Need to account when len(result) == 0 + if key == 'elements': + tdb_elements += value if value not in tdb_elements else [] + elif key == 'symbols': + tdb_symbols += value if value not in tdb_symbols else [] + elif key == 'references': + tdb_references += value if value not in tdb_references else [] + elif key == 'parameters': + for contribution in value: + contr_name = 'FSER' + contribution.split(' ')[1].split('(')[0] + constituent.upper() + contr_func = contribution.split(' N ')[0].split(' ',2)[2] + ' N !' + tdb_parameters.append('+' + contr_name) + tdb_symbols.append(f"FUNCTION {contr_name} {contr_func}") + + phase_name = f'{material['endmembers'].upper()}_{material['phaseLabel'].upper()}' + + phase_model = {} + matches = re.findall(r'\((.*?)\)(\d*\.?\d*)', material['phaseModel']) + for match in matches: + phase_model[match[0]] = float(match[1]) + + for species in map(lambda x: x.upper(), phase_model.keys()): + tdb_species = [f"SPECIES {specie} {specie}!" for specie in species.split(',')] # Update for multiple sublattices + tdb_species = list(set(tdb_species)) + + tdb = { + "elements": tdb_elements, + "species": tdb_species, + "phases": [f"PHASE {phase_name} % {len(phase_model)} {' '.join(map(str, phase_model.values()))} ! CONSTITUENT {phase_name} :{':'.join(map(lambda x: x.upper(), phase_model.keys()))}: !"], + "parameters": [f"PARAMETER G({phase_name},{':'.join(map(lambda x: x.upper(), phase_model.keys()))};0) 298.15 {''.join(tdb_parameters)} {dft['formationEnthalpy']*data['totalAtoms']*96.48792534459*1000}-T*{dft['formationEntropy']*data['totalAtoms']*96.48792534459}; 6000 N !"], # will need to separate in case of solid solutions + "symbols": tdb_symbols, + "references": tdb_references + } + + pprint(tdb) + dbf = Database(parseTDB.one2tdb(tdb)) + + try: + SER = round(calculate(dbf, elements + ['VA'], phase_name, P=101325, T=298.15).GM.values[0][0][0][0], 4) + except: + SER = None + + material.update({'SER': SER}) + + entry = {"metadata": metadata, "material": material, "dft": dft, "tdb": tdb} + + # return from_string + return updateEntry(entry, client_string, db, col) + +# Alternative method for decomposeTo: +# decomp = pd.get_decomposition(comp) +# Print the decomposition products and their amounts +# for entry, amount in decomp.items(): +# print(f"{entry.composition.reduced_formula}: {amount}") \ No newline at end of file