Skip to content

Commit

Permalink
update parsing.py
Browse files Browse the repository at this point in the history
  • Loading branch information
ricardonpa committed May 7, 2024
1 parent 81c44f8 commit 35b445d
Showing 1 changed file with 15 additions and 8 deletions.
23 changes: 15 additions & 8 deletions mhdb/core/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def tdb2one(file_path:str):

for phase_description in data['phase_descriptions']:
for j, phase in enumerate(data['phases']):
if phase_description.split()[4] == phase.split()[1]:
if phase_description.split()[4] == phase.split()[1].split(':')[0]:
data['phases'][j] = phase + f' {phase_description}'

del data['phase_descriptions']
Expand Down Expand Up @@ -125,7 +125,7 @@ def one2many(data:list):
cache = []

for k, parameter in enumerate(data['parameters']):
phase_species = re.search(r'PARAMETER [A-Z]+\(([^;]+);', parameter).group(1).replace(' ','')
phase_species = re.search(r'PARAMETER [A-Z0-9]+\(([^;]+);', parameter).group(1).replace(' ','')
if phase_species in cache:
continue

Expand All @@ -136,26 +136,32 @@ def one2many(data:list):
cache.append(phase_species)

for j, secondary_parameter in enumerate(data['parameters']):
if k != j and phase_species == re.search(r'PARAMETER [A-Z]+\(([^;]+);', secondary_parameter).group(1):
if k != j and phase_species == re.search(r'PARAMETER [A-Z0-9]+\(([^;]+);', secondary_parameter).group(1):
data_collection[i]['parameters'].append(secondary_parameter)

# Get phase > Update constituents
for phase in data['phases']:
if phase_species.split(',')[0] == phase.split()[1].split(':')[0]:
data_collection[i]['phases'].append(f"{phase.split('!')[0]}! CONSTITUENT {phase_species.split(',')[0]} :{phase_species.split(',', 1)[1]}: !{phase.split('!', 2)[2] if phase.split('!')[2] else ''}")
data_collection[i]['phases'].append(f"{phase.split('!')[0]}! CONSTITUENT {phase_species.split(',')[0]} :{re.split(r'[+-]', phase_species.split(',', 1)[1])[0]}: !{phase.split('!', 2)[2] if phase.split('!')[2] else ''}")

# Get species > Get elements
phase_elements = []
for phase_specie in phase_species.split(',', 1)[1].replace(':', ',').split(','):
for specie in data['species']:
if phase_specie == specie.split()[1]:
data_collection[i]['species'].append(specie)
for phase_element in [part for part in re.split(r'\d+', specie.split()[2]) if part]:
if phase_element not in phase_elements:
phase_elements.append(element)
for element in data['elements']:
if phase_element == element.split()[1]:
data_collection[i]['elements'].append(element)

for phase_element in [part for part in re.split(r'\d+', phase_specie) if part]:
if phase_element not in phase_elements:
phase_elements.append(phase_element)
for element in data['elements']:
if phase_element == element.split()[1]:
if re.split(r'[+-]', phase_element)[0] == element.split()[1]:
data_collection[i]['elements'].append(element)

# Get symbols called in parameters
Expand All @@ -169,7 +175,8 @@ def one2many(data:list):
j = 0
while j < len(data_collection[i]['symbols']):
phase_symbol = data_collection[i]['symbols'][j]
for phase_secondary_symbol in re.findall(r'[-+ ](\w+)(?=#)', phase_symbol):
phase_symbol = ' '.join(phase_symbol.split(' ')[3:-3])
for phase_secondary_symbol in re.findall(r'(?!EXP)(?=[0-9]*?[A-Z_]+)(?![0-9]+E)([A-Z0-9_]{3,})', phase_symbol):
for secondary_symbol in data['symbols']:
if phase_secondary_symbol == secondary_symbol.split()[1]:
data_collection[i]['symbols'].append(secondary_symbol) if secondary_symbol not in data_collection[i]['symbols'] else None
Expand Down Expand Up @@ -208,8 +215,8 @@ def many2one(elements:list,data_collection:list):
grouped_data[phase].append(species)

for phase, species in grouped_data.items():
grouped_data[phase] = [[species[sublattice][specie] for sublattice in range(len(species))] for specie in range(len(species[0]))]
grouped_data[phase] = ':'.join([','.join(set(group)) for group in grouped_data[phase]])
grouped_data[phase] = [[species[sublattice][specie] for sublattice in range(len(species)) if species[sublattice][specie] != '*'] for specie in range(len(species[0]))]
grouped_data[phase] = ':'.join([','.join(set(','.join(group).split(','))) for group in grouped_data[phase]])

TDB[key] = [f'PHASE {phase.split('!')[0]}! CONSTITUENT {phase.split()[0].split(':')[0]} :{grouped_data[phase]}: !{phase.split('!', 1)[1] if phase.split('!')[1] else ''}' for phase in grouped_data.keys()]

Expand Down

0 comments on commit 35b445d

Please sign in to comment.