Skip to content

Commit

Permalink
Add hash feature
Browse files Browse the repository at this point in the history
  • Loading branch information
ricardonpa committed Apr 19, 2024
1 parent cc16cf6 commit e54d3ab
Show file tree
Hide file tree
Showing 2 changed files with 160 additions and 11 deletions.
38 changes: 36 additions & 2 deletions .github/workflows/tdb2mhdb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,37 @@ jobs:
issue_number: issue.number,
body: message
});

- name: TAR GZ all files with 7zip
if: env.valid == 'true'
run: |
echo "Packaging into .tar.gz file now..."
issue_number=${{ github.event.issue.number }}
date_string=$(date "+%d%b%y_%H%M")
echo $date_string
issue_number="${issue_number}_${date_string}"
echo "${issue_number}"
tar -c tdbs/* | 7z a -si -tgzip -mx=9 "contributions/contrib_${issue_number}.tar.gz"
- name: Commit changes with Add & Commit
id: commit
uses: EndBug/add-and-commit@v9
with:
message: '(automatic) Contribution Persisted'
add: "contributions/*.tar.gz"

- name: Get commit hash
id: gethash
run: echo "::set-output name=hash::$(git rev-parse HEAD)"

- name: Update MHDB hash
if: env.valid == 'true'
env:
GITHUB_USER: ${{ github.event.issue.user.login }}
COMMIT_HASH: ${{ steps.gethash.outputs.hash }}
run: |
export CLIENT_STRING=${{ secrets.CLIENT_STRING }}
python -c "from pymongo import MongoClient; client = MongoClient(os.environ['CLIENT_STRING']); db = client['MHDB']; collection = db['community']; collection.update_many({'metadata.parentDatabaseURL': 'hash'}, {'$set': {'metadata.parentDatabaseURL': os.environ['COMMIT_HASH']}})"
- name: Report results
if: env.valid == 'true'
Expand All @@ -200,9 +231,12 @@ jobs:
script: |
const fs = require('fs');
const issue = context.issue;
const full_commit_hash = "${{ steps.gethash.outputs.hash }}";
const commit_hash = full_commit_hash.substring(0, 8);
let message = `🚀 All Done! Your submission is now completed and all entries found in your TDB file(s) were added to MHDB-community.\n\n`;
message += `Here's a summary of all files and entries processed:\n`;
let message = '🚀 All Done! Your contribution hash is [${commit_hash}](https://mhdb.mat-x.org//${commit_hash}).\n\n';
message += 'Your submission is now completed and all entries found in your TDB file(s) were added to MHDB-community. Please refer to `NewData.ipynb` for further instructions on how to access the database.\n';
message += 'Here's a summary of all files and entries processed:\n';
message += fs.readFileSync('MHDB.log', 'utf8');
Expand Down
133 changes: 124 additions & 9 deletions mhdb/core/mongo.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from mhdb.core import parseTDB
from pycalphad import Database, calculate
from pymatgen.core import Composition
import datetime, re
from pprint import pprint

def updateEntry(entry:dict, client_string:str, db:str, col:str):
import dns.resolver
Expand All @@ -10,7 +14,7 @@ def updateEntry(entry:dict, client_string:str, db:str, col:str):
database = client[db]
collection = database[col]

if collection.find_one({'material.phaseModel': entry['material']['phaseModel'], 'material.phaseLabel': entry['material']['phaseLabel'], 'material.endmembers': entry['material']['endmembers']}) is None:
if collection.find_one({'metadata.parentDatabaseURL': entry['metadata']['parentDatabaseURL'], 'material.phaseModel': entry['material']['phaseModel']}) is None:
entry['metadata']['created'] = datetime.datetime.now()
collection.insert_one(entry)

Expand All @@ -22,17 +26,17 @@ def updateEntry(entry:dict, client_string:str, db:str, col:str):

return entry


from pymatgen.core import Composition

def TDBEntryGenerator(data:dict, client_string:str, db:str, col:str):

parentDatabaseID = data["phases"][0].split()[1]

metadata = {
'name': 'TDBGenerated',
'comment': f'Automated generated based on the {data["references"]} database.',
'affiliation': 'MHDB',
'parentDatabase': data["references"][0],
'parentDatabaseID': data["phases"][0].split()[1]
'parentDatabaseID': parentDatabaseID,
'parentDatabaseURL': None if "github" not in client_string else "hash"
}

elements = [element.split()[1] for element in data["elements"]]
Expand All @@ -42,17 +46,128 @@ def TDBEntryGenerator(data:dict, client_string:str, db:str, col:str):
try:
formula = Composition(re.sub(r'\(\)\d+(\.\d+)?', '', re.sub(r'[+-]\d+', '', phaseModel).replace('VA',''))).reduced_formula #Accounts for vacancies and charged species
except:
formula = data["phases"][0].split()[1].split('_')[0]
formula = parentDatabaseID.split('_')[0]

material = {
'system': '-'.join(elements),
'endmembers': '-'.join([formula]), #Still need to separate endmembers in case of solid solutions
'phaseLabel': data["phases"][0].split()[1].split('_')[-1].split(':')[-1],
'phaseLabel': parentDatabaseID.split('_')[-1].split(':')[-1],
'phaseModel': phaseModel,
'SER': phaseModel
'SER': round(SER.GM.values[0][0][0][0], 4)
}

dbf = Database(parseTDB.one2tdb(data))

try:
SER = round(calculate(dbf, elements + ['VA'], parentDatabaseID.split(':')[0], P=101325, T=298.15).GM.values[0][0][0][0], 4)
except:
SER = None

material.update({'SER': SER})

entry = {"metadata": metadata, "material": material, "tdb": data}

# Check if an entry already exists and update collection:
return updateEntry(entry, client_string, db, col)

def DFTEntryGenerator(data:dict, client_string:str, db:str, col:str):

from pymongo import MongoClient, ASCENDING
client = MongoClient(client_string)

metadata = {
'name': 'DFTGenerated',
'comment': f'Automated generated based on the {data['parentDatabase']} database.',
'affiliation': 'MHDB',
'parentDatabase': data['parentDatabase'],
'parentDatabaseID': data['parentDatabaseID'],
'parentDatabaseURL': data['parentDatabaseURL']
}

elements = data['elements']
endmembers = [data['reducedFormula']]
formationReaction = data['formationReaction']

material = {
'system': '-'.join(elements),
'endmembers': '-'.join(endmembers), #Still need to separate endmembers in case of solid solutions
'phaseLabel': data['structureLabel'],
'phaseModel': f'({endmembers[0]})1.0'
}

dft = {
'decomposesTo': formationReaction,
'formationEnthalpy': data['formationEnthalpy'],
'formationEntropy': data['formationEntropy']*data['totalAtoms'] if 'formationEntropy' in data.keys() else 0,
'mixingEnthalpy': data['mixingEnthalpy']*data['totalAtoms'] if 'mixingEnthalpy' in data.keys() else 0
}

decomposesTo = {}
for constituent in formationReaction.split('->')[1].split('+'):
# Use a regular expression to separate the coefficient from the compound name
match = re.match(r'(\d*\.?\d*)\s*(\w+)', Composition(constituent).formula.replace(" ",""))
if match:
# If no coefficient is found, assume it to be 1
coefficient = float(match.group(1)) if match.group(1) else 1.0
compound_name = match.group(2)
decomposesTo[compound_name] = coefficient

tdb_elements = []
tdb_parameters = []
tdb_symbols = []
tdb_references = []
for constituent in decomposesTo.keys():
result = client['MHDB']['MSUB'].find({"material.endmembers": constituent}).sort("material.SER", ASCENDING).limit(1)
for key, value in result[0]['tdb'].items(): # Need to account when len(result) == 0
if key == 'elements':
tdb_elements += value if value not in tdb_elements else []
elif key == 'symbols':
tdb_symbols += value if value not in tdb_symbols else []
elif key == 'references':
tdb_references += value if value not in tdb_references else []
elif key == 'parameters':
for contribution in value:
contr_name = 'FSER' + contribution.split(' ')[1].split('(')[0] + constituent.upper()
contr_func = contribution.split(' N ')[0].split(' ',2)[2] + ' N !'
tdb_parameters.append('+' + contr_name)
tdb_symbols.append(f"FUNCTION {contr_name} {contr_func}")

phase_name = f'{material['endmembers'].upper()}_{material['phaseLabel'].upper()}'

phase_model = {}
matches = re.findall(r'\((.*?)\)(\d*\.?\d*)', material['phaseModel'])
for match in matches:
phase_model[match[0]] = float(match[1])

for species in map(lambda x: x.upper(), phase_model.keys()):
tdb_species = [f"SPECIES {specie} {specie}!" for specie in species.split(',')] # Update for multiple sublattices
tdb_species = list(set(tdb_species))

tdb = {
"elements": tdb_elements,
"species": tdb_species,
"phases": [f"PHASE {phase_name} % {len(phase_model)} {' '.join(map(str, phase_model.values()))} ! CONSTITUENT {phase_name} :{':'.join(map(lambda x: x.upper(), phase_model.keys()))}: !"],
"parameters": [f"PARAMETER G({phase_name},{':'.join(map(lambda x: x.upper(), phase_model.keys()))};0) 298.15 {''.join(tdb_parameters)} {dft['formationEnthalpy']*data['totalAtoms']*96.48792534459*1000}-T*{dft['formationEntropy']*data['totalAtoms']*96.48792534459}; 6000 N !"], # will need to separate in case of solid solutions
"symbols": tdb_symbols,
"references": tdb_references
}

pprint(tdb)
dbf = Database(parseTDB.one2tdb(tdb))

try:
SER = round(calculate(dbf, elements + ['VA'], phase_name, P=101325, T=298.15).GM.values[0][0][0][0], 4)
except:
SER = None

material.update({'SER': SER})

entry = {"metadata": metadata, "material": material, "dft": dft, "tdb": tdb}

# return from_string
return updateEntry(entry, client_string, db, col)

# Alternative method for decomposeTo:
# decomp = pd.get_decomposition(comp)
# Print the decomposition products and their amounts
# for entry, amount in decomp.items():
# print(f"{entry.composition.reduced_formula}: {amount}")

0 comments on commit e54d3ab

Please sign in to comment.