Skip to content

Commit

Permalink
Downloader skips samples with very large forces (#41)
Browse files Browse the repository at this point in the history
  • Loading branch information
peastman authored Aug 30, 2022
1 parent d72b175 commit e7ee509
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 1 deletion.
7 changes: 6 additions & 1 deletion downloader/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,9 @@ values:
- 'SCF DIPOLE'
- 'SCF QUADRUPOLE'
- 'WIBERG LOWDIN INDICES'
- 'MAYER INDICES'
- 'MAYER INDICES'

# This specifies a cutoff on forces. A sample will be skipped if any component of the force on any atom
# is larger than this value (in hartree/bohr). Comment out this line to include all samples regardless of
# force magnitude.
max_force: 1.0
9 changes: 9 additions & 0 deletions downloader/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ def compute_reference_energy(smiles):

with open('config.yaml') as input:
config = yaml.safe_load(input.read())
if 'max_force' in config:
max_force = float(config['max_force'])
else:
max_force = None
client = FractalClient()
outputfile = h5py.File('SPICE.hdf5', 'w')
for subset in config['subsets']:
Expand Down Expand Up @@ -98,6 +102,11 @@ def compute_reference_energy(smiles):
group.create_dataset('subset', data=[subset], dtype=h5py.string_dtype())
group.create_dataset('smiles', data=[smiles], dtype=h5py.string_dtype())
group.create_dataset("atomic_numbers", data=molecules[0].atomic_numbers, dtype=np.int16)
if max_force is not None:
force = np.array([vars['DFT TOTAL GRADIENT'] for vars in qcvars])
samples = [i for i in range(len(molecules)) if np.max(np.abs(force[i])) <= max_force]
molecules = [molecules[i] for i in samples]
qcvars = [qcvars[i] for i in samples]
ds = group.create_dataset('conformations', data=np.array([m.geometry for m in molecules]), dtype=np.float32)
ds.attrs['units'] = 'bohr'
ds = group.create_dataset('formation_energy', data=np.array([vars['DFT TOTAL ENERGY']-ref_energy for vars in qcvars]), dtype=np.float32)
Expand Down

0 comments on commit e7ee509

Please sign in to comment.