Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
Alice0416 authored Jun 20, 2024
1 parent 936176d commit 7ae41de
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 0 deletions.
55 changes: 55 additions & 0 deletions info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
{
"author": "Xinping Song"
"algorithm": "RF-SCM/Magpie v1.0"
"algorithm_long":
"bibtex_refs": ['@article{Dunn2020,\n'
' doi = {10.1038/s41524-020-00406-3},\n'
' url = {https://doi.org/10.1038/s41524-020-00406-3},\n'
' year = {2020},\n'
' month = sep,\n'
' publisher = {Springer Science and Business Media {LLC}},\n'
' volume = {6},\n'
' number = {1},\n'
' author = {Alexander Dunn and Qi Wang and Alex Ganose and Daniel Dopp and '
'Anubhav Jain},\n'
' title = {Benchmarking materials property prediction methods: the Matbench '
'test set and Automatminer reference algorithm},\n'
' journal = {npj Computational Materials}\n'
'}',
'@article{Breiman2001,\n'
' doi = {10.1023/a:1010933404324},\n'
' url = {https://doi.org/10.1023/a:1010933404324},\n'
' year = {2001},\n'
' publisher = {Springer Science and Business Media {LLC}},\n'
' volume = {45},\n'
' number = {1},\n'
' pages = {5--32},\n'
' author = {Leo Breiman},\n'
' journal = {Machine Learning}\n'
'}',
'@article{Ward2016,\n'
' doi = {10.1038/npjcompumats.2016.28},\n'
' url = {https://doi.org/10.1038/npjcompumats.2016.28},\n'
' year = {2016},\n'
' month = aug,\n'
' publisher = {Springer Science and Business Media {LLC}},\n'
' volume = {2},\n'
' number = {1},\n'
' author = {Logan Ward and Ankit Agrawal and Alok Choudhary and Christopher '
'Wolverton},\n'
' title = {A general-purpose machine learning framework for predicting '
'properties of inorganic materials},\n'
' journal = {npj Computational Materials}\n'
'}',
'@article {QUA:QUA24917,author = {Faber, Felix and Lindmaa, Alexander and von '
'Lilienfeld, O. Anatole and Armiento, Rickard},title = {Crystal structure '
'representations for machine learning models of formation energies},journal = '
'{International Journal of Quantum Chemistry},volume = {115},number = '
'{16},issn = {1097-461X},url = {http://dx.doi.org/10.1002/qua.24917},doi = '
'{10.1002/qua.24917},pages = {1094--1101},keywords = {machine learning, '
'formation energies, representations, crystal structure, periodic '
'systems},year = {2015},}']
"notes":
"requirements":{"python": ["scikit-learn==0.23.2", "numpy==1.22.4", "matbench==0.6.0"]}

}
66 changes: 66 additions & 0 deletions my_python_file.py.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""
Code for training and recording the matbench_v0.1 random forest benchmark.
The ML pipeline is placed within the Automatminer pipeline code infrastructure for convenience.
All training and inference was done on a single 128-core HPC node.
Reduce the number of jobs n_jobs for less memory usage on consumer machines.
"""

if __name__ == '__main__':
from automatminer import MatPipe
from automatminer.automl.adaptors import SinglePipelineAdaptor, TPOTAdaptor
from automatminer.featurization import AutoFeaturizer
from automatminer.preprocessing import DataCleaner, FeatureReducer
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

from matbench.bench import MatbenchBenchmark
from multiprocessing import set_start_method

set_start_method("spawn", force=True)

# The learner is a single 500-estimator Random Forest model
learner = SinglePipelineAdaptor(
regressor=RandomForestRegressor(n_estimators=500),
classifier=RandomForestClassifier(n_estimators=500),
)
pipe_config = {
"learner": learner,
"reducer": FeatureReducer(reducers=[]),
"cleaner": DataCleaner(feature_na_method="mean", max_na_frac=0.01, na_method_fit="drop", na_method_transform="mean"),
"autofeaturizer": AutoFeaturizer(n_jobs=8, preset="debug"),
}

pipe = MatPipe(**pipe_config)

mb = MatbenchBenchmark(autoload=False)

i = 0

#for task in mb.tasks:
task = mb.matbench_jdft2d
print(task)
task.load()
for fold in task.folds:

df_train = task.get_train_and_val_data(fold, as_type="df")

# Fit the RF with matpipe
pipe.fit(df_train, task.metadata.target)

df_test = task.get_test_data(fold, include_target=False, as_type="df")
predictions = pipe.predict(df_test)[f"{task.metadata.target} predicted"]

# A single configuration is used
params = {'note': 'single config; see benchmark user metadata'}

task.record(fold, predictions, params=params)

mb.to_file("results_" + str(i) + ".json.gz")
i += 1

# Save your results
mb.to_file("results.json.gz")


Binary file added results.json.gz
Binary file not shown.

0 comments on commit 7ae41de

Please sign in to comment.