Skip to content

Commit

Permalink
bin/molenc_panascan.py: more efficient removal of duplicate analogs
Browse files Browse the repository at this point in the history
using a string set (rather than a list of strings)
  • Loading branch information
UnixJunkie committed Mar 17, 2021
1 parent 11b4ea8 commit e21175e
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions bin/molenc_panascan.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,19 @@ def positional_analog_scan(mol, smarts_patt = '[cH]',
smi_substs = ['N','CF','CC','CO',
'CCN','CCl','CC(F)(F)(F)','COC']):
res = []
ss = set() # a string set
patt = Chem.MolFromSmarts(smarts_patt)
for smi in smi_substs:
subst = Chem.MolFromSmiles(smi)
analogs = AllChem.ReplaceSubstructs(mol, patt, subst)
for a in analogs:
analog_smi = Chem.MolToSmiles(a) # canonicalization
if not analog_smi in res: # remove duplicates
# remove duplicates
if analog_smi not in ss:
res.append(analog_smi)
ss.add(analog_smi)
return res

# FBR: use a string set rather than a list of strings when avoiding duplicates

if __name__ == '__main__':
before = time.time()
# CLI options
Expand Down

0 comments on commit e21175e

Please sign in to comment.