Skip to content

Commit

Permalink
optimake serve (#58)
Browse files Browse the repository at this point in the history
* wip

* serve command; cli reorg; click

* update tests provider prefix to _optimake

* update opt dep

* add tests for the optimake serve command
  • Loading branch information
eimrek authored Jul 30, 2024
1 parent 4ac4dea commit 69042da
Show file tree
Hide file tree
Showing 9 changed files with 299 additions and 87 deletions.
71 changes: 1 addition & 70 deletions examples/xyz_files_no_compression/.testing/first_entry.json
Original file line number Diff line number Diff line change
@@ -1,70 +1 @@
{
"id": "H_1",
"type": "structures",
"links": null,
"meta": null,
"attributes": {
"immutable_id": "H_1.xyz",
"last_modified": "1970",
"elements": [
"H"
],
"nelements": 1,
"elements_ratios": [
1.0
],
"chemical_formula_descriptive": "H",
"chemical_formula_reduced": "H",
"chemical_formula_hill": null,
"chemical_formula_anonymous": "A",
"dimension_types": [
0,
0,
0
],
"nperiodic_dimensions": 0,
"lattice_vectors": [
[
0.0,
0.0,
0.0
],
[
0.0,
0.0,
0.0
],
[
0.0,
0.0,
0.0
]
],
"cartesian_site_positions": [
[6.354493954, 10.954788065, 2.579653783]
],
"nsites": 1,
"species": [
{
"name": "H",
"chemical_symbols": [
"H"
],
"concentration": [
1.0
],
"mass": null,
"original_name": null,
"attached": null,
"nattached": null
}
],
"species_at_sites": [
"H"
],
"assemblies": null,
"structure_features": [],
"_mcloudarchive_name": "1"
},
"relationships": null
}
{"id": "H_1", "type": "structures", "links": null, "meta": null, "attributes": {"immutable_id": "H_1.xyz", "last_modified": "2024-07-09T17:49:28.302851", "elements": ["H"], "nelements": 1, "elements_ratios": [1.0], "chemical_formula_descriptive": "H", "chemical_formula_reduced": "H", "chemical_formula_hill": null, "chemical_formula_anonymous": "A", "dimension_types": [0, 0, 0], "nperiodic_dimensions": 0, "lattice_vectors": [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], "cartesian_site_positions": [[6.354493954, 10.954788065, 2.579653783]], "nsites": 1, "species": [{"name": "H", "chemical_symbols": ["H"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}], "species_at_sites": ["H"], "assemblies": null, "structure_features": [], "_optimake_name": "1"}, "relationships": null}
2 changes: 1 addition & 1 deletion examples/zip_of_cif/.testing/first_entry.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"id": "55c564f6-ac6a-4122-b8d9-0ad9fe61e961", "type": "structures", "links": null, "meta": null, "attributes": {"immutable_id": "structures.zip/structures/cifs/55c564f6-ac6a-4122-b8d9-0ad9fe61e961.cif", "last_modified": "1970", "elements": ["Ba", "C", "N", "S"], "nelements": 4, "elements_ratios": [0.14285714285714285, 0.2857142857142857, 0.2857142857142857, 0.2857142857142857], "chemical_formula_descriptive": "C4Ba2N4S4", "chemical_formula_reduced": "BaC2N2S2", "chemical_formula_hill": null, "chemical_formula_anonymous": "A2B2C2D", "dimension_types": [1, 1, 1], "nperiodic_dimensions": 3, "lattice_vectors": [[6.3587627540404945, 0.0, 0.0], [-2.672647488887009, 5.769819681958754, 0.0], [0.25844951934994664, -0.16511343006546234, 8.71190314896161]], "cartesian_site_positions": [[3.4987802863851005, 5.049341739457014, 6.533927361693402], [0.4457844982025419, 0.5553645123824795, 2.177975787264444], [0.37416734784252487, 2.642448780492868, 5.291889331690525], [2.6281157156591126, 1.202488098280357, 7.775965391700042], [3.5703974366609055, 2.962257471400425, 3.420013817271085], [1.316449068841148, 4.402218153614962, 0.9359377571616379], [0.3512600296798777, 4.156339511491648, 5.900755701251229], [4.011419364830723, 1.818004876809729, 7.167099022143105], [3.5933047547393455, 1.4483667404554434, 2.8111474477141454], [-0.06685458032729186, 3.786701375083563, 1.5448041268185058], [0.3773832460379156, 1.5350786992068879, 4.8597303327393915], [1.621957615426957, 0.7399681351855376, 8.208124390751108], [3.567181538468686, 4.069627552684378, 3.8521728163221476], [2.3226071690796486, 4.864738116705728, 0.5037787583104332]], "nsites": 14, "species": [{"name": "Ba", "chemical_symbols": ["Ba"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}, {"name": "C", "chemical_symbols": ["C"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}, {"name": "N", "chemical_symbols": ["N"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}, {"name": "S", "chemical_symbols": ["S"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}], "species_at_sites": ["Ba", "Ba", "C", "C", "C", "C", "S", "S", "S", "S", "N", "N", "N", "N"], "assemblies": null, "structure_features": [], "_mcloudarchive_property_b": 0.99, "_mcloudarchive_energy": -0.54, "_mcloudarchive_structure_description": "some description"}, "relationships": null}
{"id": "55c564f6-ac6a-4122-b8d9-0ad9fe61e961", "type": "structures", "links": null, "meta": null, "attributes": {"immutable_id": "structures.zip/structures/cifs/55c564f6-ac6a-4122-b8d9-0ad9fe61e961.cif", "last_modified": "2024-07-09T18:33:49.249593", "elements": ["Ba", "C", "N", "S"], "nelements": 4, "elements_ratios": [0.14285714285714285, 0.2857142857142857, 0.2857142857142857, 0.2857142857142857], "chemical_formula_descriptive": "C4Ba2N4S4", "chemical_formula_reduced": "BaC2N2S2", "chemical_formula_hill": null, "chemical_formula_anonymous": "A2B2C2D", "dimension_types": [1, 1, 1], "nperiodic_dimensions": 3, "lattice_vectors": [[6.3587627540404945, 0.0, 0.0], [-2.672647488887009, 5.769819681958754, 0.0], [0.25844951934994664, -0.16511343006546234, 8.71190314896161]], "cartesian_site_positions": [[3.4987802863851005, 5.049341739457014, 6.533927361693402], [0.4457844982025419, 0.5553645123824795, 2.177975787264444], [0.37416734784252487, 2.642448780492868, 5.291889331690525], [2.6281157156591126, 1.202488098280357, 7.775965391700042], [3.5703974366609055, 2.962257471400425, 3.420013817271085], [1.316449068841148, 4.402218153614962, 0.9359377571616379], [0.3512600296798777, 4.156339511491648, 5.900755701251229], [4.011419364830723, 1.818004876809729, 7.167099022143105], [3.5933047547393455, 1.4483667404554434, 2.8111474477141454], [-0.06685458032729186, 3.786701375083563, 1.5448041268185058], [0.3773832460379156, 1.5350786992068879, 4.8597303327393915], [1.621957615426957, 0.7399681351855376, 8.208124390751108], [3.567181538468686, 4.069627552684378, 3.8521728163221476], [2.3226071690796486, 4.864738116705728, 0.5037787583104332]], "nsites": 14, "species": [{"name": "C", "chemical_symbols": ["C"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}, {"name": "Ba", "chemical_symbols": ["Ba"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}, {"name": "S", "chemical_symbols": ["S"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}, {"name": "N", "chemical_symbols": ["N"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}], "species_at_sites": ["Ba", "Ba", "C", "C", "C", "C", "S", "S", "S", "S", "N", "N", "N", "N"], "assemblies": null, "structure_features": [], "_optimake_structure_description": "some description", "_optimake_property_b": 0.99, "_optimake_energy": -0.54}, "relationships": null}
2 changes: 1 addition & 1 deletion examples/zip_of_cif_and_xyz/.testing/first_entry.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"id": "set1/55c564f6-ac6a-4122-b8d9-0ad9fe61e961.cif", "type": "structures", "links": null, "meta": null, "attributes": {"immutable_id": "structures.zip/structures/set1/55c564f6-ac6a-4122-b8d9-0ad9fe61e961.cif", "last_modified": "2024-05-30T18:52:22.987047", "elements": ["Ba", "C", "N", "S"], "nelements": 4, "elements_ratios": [0.14285714285714285, 0.2857142857142857, 0.2857142857142857, 0.2857142857142857], "chemical_formula_descriptive": "C4Ba2N4S4", "chemical_formula_reduced": "BaC2N2S2", "chemical_formula_hill": null, "chemical_formula_anonymous": "A2B2C2D", "dimension_types": [1, 1, 1], "nperiodic_dimensions": 3, "lattice_vectors": [[6.3587627540404945, 0.0, 0.0], [-2.672647488887009, 5.769819681958754, 0.0], [0.25844951934994664, -0.16511343006546234, 8.71190314896161]], "cartesian_site_positions": [[3.4987802863851005, 5.049341739457014, 6.533927361693402], [0.4457844982025419, 0.5553645123824795, 2.177975787264444], [0.37416734784252487, 2.642448780492868, 5.291889331690525], [2.6281157156591126, 1.202488098280357, 7.775965391700042], [3.5703974366609055, 2.962257471400425, 3.420013817271085], [1.316449068841148, 4.402218153614962, 0.9359377571616379], [0.3512600296798777, 4.156339511491648, 5.900755701251229], [4.011419364830723, 1.818004876809729, 7.167099022143105], [3.5933047547393455, 1.4483667404554434, 2.8111474477141454], [-0.06685458032729186, 3.786701375083563, 1.5448041268185058], [0.3773832460379156, 1.5350786992068879, 4.8597303327393915], [1.621957615426957, 0.7399681351855376, 8.208124390751108], [3.567181538468686, 4.069627552684378, 3.8521728163221476], [2.3226071690796486, 4.864738116705728, 0.5037787583104332]], "nsites": 14, "species": [{"name": "Ba", "chemical_symbols": ["Ba"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}, {"name": "N", "chemical_symbols": ["N"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}, {"name": "S", "chemical_symbols": ["S"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}, {"name": "C", "chemical_symbols": ["C"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}], "species_at_sites": ["Ba", "Ba", "C", "C", "C", "C", "S", "S", "S", "S", "N", "N", "N", "N"], "assemblies": null, "structure_features": []}, "relationships": null}
{"id": "set1/55c564f6-ac6a-4122-b8d9-0ad9fe61e961.cif", "type": "structures", "links": null, "meta": null, "attributes": {"immutable_id": "structures.zip/structures/set1/55c564f6-ac6a-4122-b8d9-0ad9fe61e961.cif", "last_modified": "2024-07-09T20:08:01.155095", "elements": ["Ba", "C", "N", "S"], "nelements": 4, "elements_ratios": [0.14285714285714285, 0.2857142857142857, 0.2857142857142857, 0.2857142857142857], "chemical_formula_descriptive": "C4Ba2N4S4", "chemical_formula_reduced": "BaC2N2S2", "chemical_formula_hill": null, "chemical_formula_anonymous": "A2B2C2D", "dimension_types": [1, 1, 1], "nperiodic_dimensions": 3, "lattice_vectors": [[6.3587627540404945, 0.0, 0.0], [-2.672647488887009, 5.769819681958754, 0.0], [0.25844951934994664, -0.16511343006546234, 8.71190314896161]], "cartesian_site_positions": [[3.4987802863851005, 5.049341739457014, 6.533927361693402], [0.4457844982025419, 0.5553645123824795, 2.177975787264444], [0.37416734784252487, 2.642448780492868, 5.291889331690525], [2.6281157156591126, 1.202488098280357, 7.775965391700042], [3.5703974366609055, 2.962257471400425, 3.420013817271085], [1.316449068841148, 4.402218153614962, 0.9359377571616379], [0.3512600296798777, 4.156339511491648, 5.900755701251229], [4.011419364830723, 1.818004876809729, 7.167099022143105], [3.5933047547393455, 1.4483667404554434, 2.8111474477141454], [-0.06685458032729186, 3.786701375083563, 1.5448041268185058], [0.3773832460379156, 1.5350786992068879, 4.8597303327393915], [1.621957615426957, 0.7399681351855376, 8.208124390751108], [3.567181538468686, 4.069627552684378, 3.8521728163221476], [2.3226071690796486, 4.864738116705728, 0.5037787583104332]], "nsites": 14, "species": [{"name": "C", "chemical_symbols": ["C"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}, {"name": "Ba", "chemical_symbols": ["Ba"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}, {"name": "N", "chemical_symbols": ["N"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}, {"name": "S", "chemical_symbols": ["S"], "concentration": [1.0], "mass": null, "original_name": null, "attached": null, "nattached": null}], "species_at_sites": ["Ba", "Ba", "C", "C", "C", "C", "S", "S", "S", "S", "N", "N", "N", "N"], "assemblies": null, "structure_features": []}, "relationships": null}
7 changes: 4 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@ requires-python = ">=3.10"

dependencies = [
"pydantic~=2.2",
"optimade[ase]~=1.0",
"optimade[server,ase]~=1.1",
"pyyaml~=6.0",
"pymatgen>=2023.9",
"pandas~=2.1",
"pybtex~=0.24",
"tqdm~=4.65",
"requests~=2.31",
"numpy~=1.26"
"numpy~=1.26",
"click~=8.1"
]

[project.optional-dependencies]
Expand All @@ -39,4 +40,4 @@ known_first_party = "optimake"
profile = "black"

[project.scripts]
optimake = "optimake.cli:main"
optimake = "optimake.cli:cli"
84 changes: 73 additions & 11 deletions src/optimake/cli.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,83 @@
import argparse
from pathlib import Path

import click

from optimake.convert import convert_archive
from optimake.logger import LOGGER
from optimake.serve import OptimakeServer


@click.group()
def cli():
"""
Tools for making OPTIMADE APIs for a raw data archives annotated with an
`optimade.yaml` file.
"""
pass


@cli.command()
@click.option(
"--jsonl_path",
type=click.Path(),
help="The path to write the JSONL file to.",
)
@click.argument(
"path",
type=click.Path(),
)
def convert(jsonl_path, path):
"""
Convert a raw data archive into OPTIMADE JSONL.
PATH needs to contain the full raw data archive, with the `optimade.yaml` config
file at the top level. The data is converted into the OPTIMADE JSON Lines format.
"""

def main():
parser = argparse.ArgumentParser(
prog="optimake",
description="Use an `optimade.yaml` config to describe archived data and create a OPTIMADE JSONL file for ingestion as an OPTIMADE API.",
)
parser.add_argument("archive_path", help="The path to the archive to ingest.")
parser.add_argument("--jsonl-path", help="The path to write the JSONL file to.")
args = parser.parse_args()
jsonl_path = args.jsonl_path
if jsonl_path:
jsonl_path = Path(jsonl_path)
if jsonl_path.exists():
raise FileExistsError(f"File already exists at {jsonl_path}.")

convert_archive(Path(args.archive_path), jsonl_path=jsonl_path)
convert_archive(Path(path), jsonl_path=jsonl_path)


@cli.command()
@click.option(
"--port",
type=int,
default=5000,
help="The port to serve the API on.",
)
@click.argument(
"path",
type=click.Path(),
)
def serve(port, path):
"""
Serve a raw data archive with an OPTIMADE API.
PATH needs to contain the full raw data archive, with the `optimade.yaml` config
file at the top level. If needed, the data is first converted into an OPTIMADE JSONL
file. However, if the JSONL file already exists, the API is started from it.
Note that this command starts the API using a simple backend, which is not recommended
for a production environment.
"""

jsonl_file = "optimade.jsonl"
path = Path(path)

if not (path / jsonl_file).exists():
LOGGER.info(f"{jsonl_file} doesn't exist. Converting archive.")
convert_archive(path)
else:
LOGGER.info(f"{jsonl_file} already exists!")

LOGGER.info("Starting the API")
optimake_server = OptimakeServer(path, port)
optimake_server.start_api()


if __name__ == "__main__":
cli()
2 changes: 1 addition & 1 deletion src/optimake/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from .config import Config, EntryConfig, JSONLConfig, ParsedFiles, PropertyDefinition
from .parsers import ENTRY_PARSERS, OPTIMADE_CONVERTERS, PROPERTY_PARSERS, TYPE_MAP

PROVIDER_PREFIX = os.environ.get("optimake_PROVIDER_PREFIX", "mcloudarchive")
PROVIDER_PREFIX = os.environ.get("optimake_PROVIDER_PREFIX", "optimake")


def _construct_entry_type_info(
Expand Down
9 changes: 9 additions & 0 deletions src/optimake/logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import logging

logging.basicConfig(
level=logging.DEBUG,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)

LOGGER = logging.getLogger("optimake")
Loading

0 comments on commit 69042da

Please sign in to comment.