Skip to content

Commit

Permalink
Castep parser: a little refactoring
Browse files Browse the repository at this point in the history
Using some features of Python 3.8+ and newer Numpy

I tried not to go too crazy, there are more places where assignment
expressions (i.e. walrus) could be used to save a line but overall
flow and clarity don't really benefit.
  • Loading branch information
ajjackson committed Sep 4, 2023
1 parent 60ad0aa commit 8325156
Showing 1 changed file with 23 additions and 44 deletions.
67 changes: 23 additions & 44 deletions euphonic/readers/castep.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,11 @@ def read_phonon_dos_data(
weights = np.empty((0,))
freqs = np.empty((0, n_branches))
mode_grads = np.empty((0, n_branches))
while True:
frequency_block = _read_frequency_block(
while (frequency_block := _read_frequency_block(
f,
n_branches,
extra_columns=[0],
terminator=' END GRADIENTS\n')
if frequency_block is None:
# We've reached 'END GRADIENTS' line
break
terminator=' END GRADIENTS\n')) is not None:

qmode_grad = frequency_block.extra

Expand All @@ -73,17 +69,11 @@ def read_phonon_dos_data(
if 'BEGIN DOS' not in line:
raise RuntimeError(
f'Expected "BEGIN DOS" in {filename}, got {line}')
# max_rows arg not available until Numpy 1.16.0
try:
dos_data = np.loadtxt(f, max_rows=n_bins)
except TypeError:
data = f.readlines()
dos_data = np.array([[float(elem) for elem in line.split()]
for line in data[:n_bins]])

dos_data = np.loadtxt(f, max_rows=n_bins)

data_dict: Dict[str, Any] = {}
data_dict['crystal'] = {}
cry_dict = data_dict['crystal']
cry_dict = data_dict['crystal'] = {}
cry_dict['n_atoms'] = n_atoms
cry_dict['cell_vectors'] = (cell_vectors*ureg('angstrom').to(
cell_vectors_unit)).magnitude
Expand Down Expand Up @@ -195,11 +185,8 @@ def read_phonon_data(
repeated_qpt_ids = defaultdict(set)
loto_split_indices = set()

while True:
frequency_block = _read_frequency_block(f, n_branches)
if frequency_block is None:
# Reached empty line, this should be end of file
break
while (frequency_block := _read_frequency_block(f, n_branches)
) is not None:

qpt_id = frequency_block.qpt_id

Expand Down Expand Up @@ -243,9 +230,8 @@ def read_phonon_data(

# Multiple qpts with same CASTEP q-pt index: correct weights
for qpt_id, indices in repeated_qpt_ids.items():
intersection = indices & loto_split_indices
if (prefer_non_loto
and intersection
and (intersection := indices & loto_split_indices)
and len(indices) > len(intersection)):
# Repeated q-point has both split and un-split variations;
# set weights of split points to zero
Expand Down Expand Up @@ -396,11 +382,9 @@ def _read_frequency_block(
qpt = np.array([float(x) for x in floats[:3]])
qweight = float(floats[3])

direction: Optional[np.ndarray]
direction: Optional[np.ndarray] = None
if len(floats) >= 6:
direction = floats[4:7]
else:
direction = None

freq_lines = [f.readline().split()
for i in range(n_branches)]
Expand Down Expand Up @@ -458,19 +442,17 @@ def read_interpolation_data(
with open(filename, 'rb') as f:
int_type = '>i4'
float_type = '>f8'
header = ''
first_cell_read = True
while header.strip() != b'END':
header = _read_entry(f)
if header.strip() == b'BEGIN_UNIT_CELL':
while (header := _read_entry(f).strip()) != b'END':
if header == b'BEGIN_UNIT_CELL':
# CASTEP writes the cell twice: the first is the
# geometry optimised cell, the second is the original
# cell. We only want the geometry optimised cell.
if first_cell_read:
(n_atoms, cell_vectors, atom_r, atom_mass,
atom_type) = _read_cell(f, int_type, float_type)
first_cell_read = False
elif header.strip() == b'FORCE_CON':
elif header == b'FORCE_CON':
sc_matrix = np.transpose(np.reshape(
_read_entry(f, int_type), (3, 3)))
n_cells_in_sc = int(np.rint(np.absolute(
Expand All @@ -483,16 +465,15 @@ def read_interpolation_data(
cell_origins = np.reshape(
_read_entry(f, int_type), (n_cells_in_sc, 3))
_ = _read_entry(f, int_type) # FC row not used
elif header.strip() == b'BORN_CHGS':
elif header == b'BORN_CHGS':
born = np.reshape(
_read_entry(f, float_type), (n_atoms, 3, 3))
elif header.strip() == b'DIELECTRIC':
elif header == b'DIELECTRIC':
dielectric = np.transpose(np.reshape(
_read_entry(f, float_type), (3, 3)))

data_dict: Dict[str, Any] = {}
data_dict['crystal'] = {}
cry_dict = data_dict['crystal']
cry_dict = data_dict['crystal'] = {}
cry_dict['n_atoms'] = n_atoms
cry_dict['cell_vectors'] = cell_vectors*ureg(
'bohr').to(cell_vectors_unit).magnitude
Expand Down Expand Up @@ -563,29 +544,27 @@ def _read_cell(file_obj: BinaryIO, int_type: str, float_type: str
Shape (n_atoms,) string ndarray. The chemical symbols of each
atom in the unit cell
"""
header = ''
while header.strip() != b'END_UNIT_CELL':
header = _read_entry(file_obj)
if header.strip() == b'CELL%NUM_IONS':
while (header := _read_entry(file_obj).strip()) != b'END_UNIT_CELL':
if header == b'CELL%NUM_IONS':
n_atoms = _read_entry(file_obj, int_type)
elif header.strip() == b'CELL%REAL_LATTICE':
elif header == b'CELL%REAL_LATTICE':
cell_vectors = np.transpose(np.reshape(
_read_entry(file_obj, float_type), (3, 3)))
elif header.strip() == b'CELL%NUM_SPECIES':
elif header == b'CELL%NUM_SPECIES':
n_species = _read_entry(file_obj, int_type)
elif header.strip() == b'CELL%NUM_IONS_IN_SPECIES':
elif header == b'CELL%NUM_IONS_IN_SPECIES':
n_atoms_in_species = _read_entry(file_obj, int_type)
if n_species == 1:
n_atoms_in_species = np.array([n_atoms_in_species])
elif header.strip() == b'CELL%IONIC_POSITIONS':
elif header == b'CELL%IONIC_POSITIONS':
max_atoms_in_species = max(n_atoms_in_species)
atom_r_tmp = np.reshape(_read_entry(file_obj, float_type),
(n_species, max_atoms_in_species, 3))
elif header.strip() == b'CELL%SPECIES_MASS':
elif header == b'CELL%SPECIES_MASS':
atom_mass_tmp = _read_entry(file_obj, float_type)
if n_species == 1:
atom_mass_tmp = np.array([atom_mass_tmp])
elif header.strip() == b'CELL%SPECIES_SYMBOL':
elif header == b'CELL%SPECIES_SYMBOL':
# Need to decode binary string for Python 3 compatibility
if n_species == 1:
atom_type_tmp = [_read_entry(file_obj, 'S8')
Expand Down

0 comments on commit 8325156

Please sign in to comment.