From e54d3ab536fb7ed65ae16090ff0a91cabfe625de Mon Sep 17 00:00:00 2001
From: rdamaral <rna5137@psu.edu>
Date: Fri, 19 Apr 2024 15:01:21 -0400
Subject: [PATCH] Add hash feature

---
 .github/workflows/tdb2mhdb.yaml |  38 ++++++++-
 mhdb/core/mongo.py              | 133 +++++++++++++++++++++++++++++---
 2 files changed, 160 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/tdb2mhdb.yaml b/.github/workflows/tdb2mhdb.yaml
index 1a120c7..b40418d 100644
--- a/.github/workflows/tdb2mhdb.yaml
+++ b/.github/workflows/tdb2mhdb.yaml
@@ -192,6 +192,37 @@ jobs:
                 issue_number: issue.number,
                 body: message
             });
+
+      - name: TAR GZ all files with 7zip
+        if: env.valid == 'true'
+        run: |
+          echo "Packaging into .tar.gz file now..."
+          issue_number=${{ github.event.issue.number }}
+          date_string=$(date "+%d%b%y_%H%M")
+          echo $date_string
+          issue_number="${issue_number}_${date_string}"
+          echo "${issue_number}"
+          tar -c tdbs/*  | 7z a -si -tgzip -mx=9 "contributions/contrib_${issue_number}.tar.gz"
+
+      - name: Commit changes with Add & Commit
+        id: commit
+        uses: EndBug/add-and-commit@v9
+        with:
+          message: '(automatic) Contribution Persisted'
+          add: "contributions/*.tar.gz"
+
+      - name: Get commit hash
+        id: gethash
+        run: echo "::set-output name=hash::$(git rev-parse HEAD)"
+
+      - name: Update MHDB hash
+        if: env.valid == 'true'
+        env:
+            GITHUB_USER: ${{ github.event.issue.user.login }}
+            COMMIT_HASH: ${{ steps.gethash.outputs.hash }}
+        run: |
+            export CLIENT_STRING=${{ secrets.CLIENT_STRING }}
+            python -c "from pymongo import MongoClient; client = MongoClient(os.environ['CLIENT_STRING']); db = client['MHDB']; collection = db['community']; collection.update_many({'metadata.parentDatabaseURL': 'hash'}, {'$set': {'metadata.parentDatabaseURL': os.environ['COMMIT_HASH']}})"
     
       - name: Report results
         if: env.valid == 'true'
@@ -200,9 +231,12 @@ jobs:
           script: |
             const fs = require('fs');
             const issue = context.issue;
+            const full_commit_hash = "${{ steps.gethash.outputs.hash }}";
+            const commit_hash = full_commit_hash.substring(0, 8);
             
-            let message = `🚀 All Done! Your submission is now completed and all entries found in your TDB file(s) were added to MHDB-community.\n\n`;
-            message += `Here's a summary of all files and entries processed:\n`;
+            let message = '🚀 All Done! Your contribution hash is [${commit_hash}](https://mhdb.mat-x.org//${commit_hash}).\n\n';
+            message += 'Your submission is now completed and all entries found in your TDB file(s) were added to MHDB-community. Please refer to `NewData.ipynb` for further instructions on how to access the database.\n';
+            message += 'Here's a summary of all files and entries processed:\n';
             
             message += fs.readFileSync('MHDB.log', 'utf8');
         
diff --git a/mhdb/core/mongo.py b/mhdb/core/mongo.py
index 23bd3d4..7b4ab7c 100644
--- a/mhdb/core/mongo.py
+++ b/mhdb/core/mongo.py
@@ -1,4 +1,8 @@
+from mhdb.core import parseTDB
+from pycalphad import Database, calculate
+from pymatgen.core import Composition
 import datetime, re
+from pprint import pprint
 
 def updateEntry(entry:dict, client_string:str, db:str, col:str):
     import dns.resolver
@@ -10,7 +14,7 @@ def updateEntry(entry:dict, client_string:str, db:str, col:str):
     database = client[db] 
     collection = database[col]
     
-    if collection.find_one({'material.phaseModel': entry['material']['phaseModel'], 'material.phaseLabel': entry['material']['phaseLabel'], 'material.endmembers': entry['material']['endmembers']}) is None:
+    if collection.find_one({'metadata.parentDatabaseURL': entry['metadata']['parentDatabaseURL'], 'material.phaseModel': entry['material']['phaseModel']}) is None:
         entry['metadata']['created'] = datetime.datetime.now()
         collection.insert_one(entry)
     
@@ -22,17 +26,17 @@ def updateEntry(entry:dict, client_string:str, db:str, col:str):
     
     return entry
 
-
-from pymatgen.core import Composition
-
 def TDBEntryGenerator(data:dict, client_string:str, db:str, col:str):
     
+    parentDatabaseID = data["phases"][0].split()[1]
+    
     metadata = {
         'name': 'TDBGenerated',
         'comment': f'Automated generated based on the {data["references"]} database.',
         'affiliation': 'MHDB',
         'parentDatabase': data["references"][0],
-        'parentDatabaseID': data["phases"][0].split()[1]
+        'parentDatabaseID': parentDatabaseID,
+        'parentDatabaseURL': None if "github" not in client_string else "hash"
     }
     
     elements = [element.split()[1] for element in data["elements"]]
@@ -42,17 +46,128 @@ def TDBEntryGenerator(data:dict, client_string:str, db:str, col:str):
     try:
         formula = Composition(re.sub(r'\(\)\d+(\.\d+)?', '', re.sub(r'[+-]\d+', '', phaseModel).replace('VA',''))).reduced_formula #Accounts for vacancies and charged species
     except:
-        formula = data["phases"][0].split()[1].split('_')[0]
+        formula = parentDatabaseID.split('_')[0]
     
     material = {
         'system': '-'.join(elements),
         'endmembers': '-'.join([formula]), #Still need to separate endmembers in case of solid solutions
-        'phaseLabel': data["phases"][0].split()[1].split('_')[-1].split(':')[-1],
+        'phaseLabel': parentDatabaseID.split('_')[-1].split(':')[-1],
         'phaseModel': phaseModel,
-        'SER': phaseModel
+        'SER': round(SER.GM.values[0][0][0][0], 4)
     }
+
+    dbf = Database(parseTDB.one2tdb(data))
+
+    try:
+        SER = round(calculate(dbf, elements + ['VA'], parentDatabaseID.split(':')[0], P=101325, T=298.15).GM.values[0][0][0][0], 4)
+    except:
+        SER = None
     
+    material.update({'SER': SER})
+       
     entry = {"metadata": metadata, "material": material, "tdb": data}
     
-    # Check if an entry already exists and update collection:
     return updateEntry(entry, client_string, db, col)
+
+def DFTEntryGenerator(data:dict, client_string:str, db:str, col:str):
+
+    from pymongo import MongoClient, ASCENDING
+    client = MongoClient(client_string)
+
+    metadata = {
+        'name': 'DFTGenerated', 
+        'comment': f'Automated generated based on the {data['parentDatabase']} database.',
+        'affiliation': 'MHDB',
+        'parentDatabase': data['parentDatabase'], 
+        'parentDatabaseID': data['parentDatabaseID'],
+        'parentDatabaseURL': data['parentDatabaseURL']
+    }
+    
+    elements = data['elements']
+    endmembers = [data['reducedFormula']]
+    formationReaction = data['formationReaction']
+
+    material = {
+        'system': '-'.join(elements),
+        'endmembers': '-'.join(endmembers), #Still need to separate endmembers in case of solid solutions
+        'phaseLabel': data['structureLabel'],
+        'phaseModel': f'({endmembers[0]})1.0'
+    }
+    
+    dft = {
+        'decomposesTo': formationReaction,
+        'formationEnthalpy': data['formationEnthalpy'],
+        'formationEntropy': data['formationEntropy']*data['totalAtoms'] if 'formationEntropy' in data.keys() else 0,
+        'mixingEnthalpy': data['mixingEnthalpy']*data['totalAtoms'] if 'mixingEnthalpy' in data.keys() else 0
+    }
+
+    decomposesTo = {}
+    for constituent in formationReaction.split('->')[1].split('+'):
+        # Use a regular expression to separate the coefficient from the compound name
+        match = re.match(r'(\d*\.?\d*)\s*(\w+)', Composition(constituent).formula.replace(" ",""))
+        if match:
+            # If no coefficient is found, assume it to be 1
+            coefficient = float(match.group(1)) if match.group(1) else 1.0
+            compound_name = match.group(2)
+            decomposesTo[compound_name] = coefficient
+
+    tdb_elements = []
+    tdb_parameters = []
+    tdb_symbols = []
+    tdb_references = []
+    for constituent in decomposesTo.keys():
+        result = client['MHDB']['MSUB'].find({"material.endmembers": constituent}).sort("material.SER", ASCENDING).limit(1)
+        for key, value in result[0]['tdb'].items(): # Need to account when len(result) == 0
+            if key == 'elements':
+                tdb_elements += value if value not in tdb_elements else []
+            elif key == 'symbols':
+                tdb_symbols += value if value not in tdb_symbols else []
+            elif key == 'references':
+                tdb_references += value if value not in tdb_references else []
+            elif key == 'parameters':
+                for contribution in value:
+                    contr_name = 'FSER' + contribution.split(' ')[1].split('(')[0] + constituent.upper()
+                    contr_func = contribution.split(' N ')[0].split(' ',2)[2] + ' N !'
+                    tdb_parameters.append('+' + contr_name)
+                    tdb_symbols.append(f"FUNCTION {contr_name} {contr_func}")
+
+    phase_name = f'{material['endmembers'].upper()}_{material['phaseLabel'].upper()}'
+
+    phase_model = {}
+    matches = re.findall(r'\((.*?)\)(\d*\.?\d*)', material['phaseModel'])
+    for match in matches:
+        phase_model[match[0]] = float(match[1])
+
+    for species in map(lambda x: x.upper(), phase_model.keys()):
+        tdb_species = [f"SPECIES {specie} {specie}!" for specie in species.split(',')] # Update for multiple sublattices
+        tdb_species = list(set(tdb_species))
+
+    tdb = {
+        "elements": tdb_elements,
+        "species": tdb_species,
+        "phases": [f"PHASE {phase_name} % {len(phase_model)} {' '.join(map(str, phase_model.values()))} ! CONSTITUENT {phase_name} :{':'.join(map(lambda x: x.upper(), phase_model.keys()))}: !"],
+        "parameters": [f"PARAMETER G({phase_name},{':'.join(map(lambda x: x.upper(), phase_model.keys()))};0) 298.15 {''.join(tdb_parameters)} {dft['formationEnthalpy']*data['totalAtoms']*96.48792534459*1000}-T*{dft['formationEntropy']*data['totalAtoms']*96.48792534459};  6000 N !"], # will need to separate in case of solid solutions
+        "symbols": tdb_symbols,
+        "references": tdb_references
+    }
+
+    pprint(tdb)
+    dbf = Database(parseTDB.one2tdb(tdb))
+
+    try:
+        SER = round(calculate(dbf, elements + ['VA'], phase_name, P=101325, T=298.15).GM.values[0][0][0][0], 4)
+    except:
+        SER = None
+    
+    material.update({'SER': SER})
+
+    entry = {"metadata": metadata, "material": material, "dft": dft, "tdb": tdb}
+
+    # return from_string
+    return updateEntry(entry, client_string, db, col)
+
+# Alternative method for decomposeTo:
+# decomp = pd.get_decomposition(comp)
+# Print the decomposition products and their amounts
+# for entry, amount in decomp.items():
+#    print(f"{entry.composition.reduced_formula}: {amount}")
\ No newline at end of file