-
Notifications
You must be signed in to change notification settings - Fork 1
/
leaf_desk.py
74 lines (63 loc) · 2.33 KB
/
leaf_desk.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
""" Local environment-based atomic features
"""
import os
import sys
import numpy as np
import pandas as pd
from ase.data import chemical_symbols
from pymatgen.analysis.local_env import *
from pymatgen.io.cif import CifParser
#import pickle
try:
# Python>=3.7
from matminer.featurizers.site.fingerprint import OPSiteFingerprint, VoronoiFingerprint
except: pass
# # Python 3.6
# from matminer.featurizers.site import OPSiteFingerprint, VoronoiFingerprint
class Leaf:
"""
Creates a matrix of one-hot encoded atomic representation
Values: lists of local environment features (lostops, voronoi tes.)
Keys: atomic elements
"""
def __init__(self, featurizer=OPSiteFingerprint()):
self.featurizer = featurizer
@staticmethod
def get_species(site):
species = str(site.species).split()
return [''.join([a for a in s if a.isalpha()]) for s in species \
if ''.join([a for a in s if a.isalpha()]) in chemical_symbols]
@staticmethod
def readfile(list_cifs):
""" read file with list of cifs e.g. 1.dat """
cifs = open(list_cifs, 'r').readlines()
cifs = [i.strip() for i in cifs]
order = list_cifs.split('.')[0]
return cifs, order
def average_features_cifs(self, list_cifs):
""" process a list of cifs (e.g. 1.dat)
average features over a number of occurences of the elements in icsd
write results into a dictionary """
cifs, order = self.readfile(list_cifs)
leaf = {atom: np.zeros(37) for atom in chemical_symbols}
nleaf = {atom: 0 for atom in chemical_symbols}
for cif in cifs:
# print(f"Processing {cif}")
try:
structure = CifParser(cif).get_structures()[0]
except:
continue
features = []
for i, s in enumerate(structure):
species = self.get_species(s)
for element in species:
leaf[element] += self.featurizer.featurize(structure, i)
nleaf[element] += 1
df = pd.DataFrame(leaf)
df.to_pickle(f'OPleaf_{order}.pickle')
df = pd.DataFrame(nleaf, index=[order])
df.to_pickle(f'nleaf_{order}.pickle')
if __name__ == "__main__":
fname = sys.argv[1]
leaf = Leaf()
leaf.average_features_cifs(fname)