diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index ca2b7874e..000000000 Binary files a/.DS_Store and /dev/null differ diff --git a/.github/workflows/install.yaml b/.github/workflows/install.yaml index bc327c087..94a50480f 100644 --- a/.github/workflows/install.yaml +++ b/.github/workflows/install.yaml @@ -1,29 +1,27 @@ ---- # GitHub action that attempts to install the conda env # from conda.yaml # then run black, isort, flake8 - name: Install on: [push, pull_request] jobs: - install: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - uses: conda-incubator/setup-miniconda@v2 - with: - environment-file: conda.yaml - activate-environment: chemnlp - python-version: 3.9 - auto-update-conda: true - auto-activate-base: false - - name: Validate yaml - shell: bash -l {0} - run: | - conda activate chemnlp - python -m src.chemnlp.data_val.validate data - - name: Tests - shell: bash -l {0} - run: | - pip install pytest - pytest tests + install: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: conda-incubator/setup-miniconda@v2 + with: + environment-file: conda.yaml + activate-environment: chemnlp + python-version: 3.9 + auto-update-conda: true + auto-activate-base: false + - name: Validate yaml + shell: bash -l {0} + run: | + conda activate chemnlp + python -m src.chemnlp.data_val.validate data + - name: Tests + shell: bash -l {0} + run: | + pip install pytest + pytest tests diff --git a/.gitignore b/.gitignore index edff3a531..4c2a71c41 100644 --- a/.gitignore +++ b/.gitignore @@ -144,3 +144,5 @@ scratch/ # vim *~ *.swp + +.DS_Store diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 67b00b7f6..000000000 --- a/.gitmodules +++ /dev/null @@ -1,12 +0,0 @@ -[submodule "gpt-neox"] - path = gpt-neox - url = git@github.com:OpenBioML/gpt-neox.git - branch = main -[submodule "lm-eval2"] - path = lm-eval2 - url = git@github.com:OpenBioML/lm-eval2.git - branch = main -[submodule "lm-eval1"] - path = lm-evaluation-harness - url = git@github.com:OpenBioML/lm-evaluation-harness.git - branch = master diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c382fca0a..a794fa2f9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,41 +1,90 @@ ---- -ci: - autoupdate_schedule: quarterly - autofix_prs: false - submodules: false - +default_language_version: + python: python3 repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.5.0 - hooks: - - id: check-json - - id: check-yaml - - id: end-of-file-fixer - - id: trailing-whitespace - exclude: miscellaneous/structures/SiO2.xyz - - - repo: https://github.com/jumanjihouse/pre-commit-hook-yamlfmt - rev: 0.2.3 - hooks: - - id: yamlfmt - exclude: ^experiments/configs - - - repo: https://github.com/psf/black - rev: 24.3.0 - hooks: - - id: black - language_version: python3 # Should be a command that runs python3.6+ - - - repo: https://github.com/PyCQA/flake8 - rev: 7.0.0 - hooks: - - id: flake8 - args: [--count, --show-source, --statistics] - additional_dependencies: - - flake8-bugbear==22.7.1 - - - repo: https://github.com/pycqa/isort - rev: 5.13.2 - hooks: - - id: isort - args: [--profile, black, --filter-files] + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: check-added-large-files + - id: check-byte-order-marker + - id: check-case-conflict + - id: check-merge-conflict + - id: check-shebang-scripts-are-executable + - id: check-symlinks + - id: check-toml + - id: check-yaml + exclude: "kubernetes.yaml$" # This line excludes kubernetes.yaml from being checked as it is WIP + - id: debug-statements + - id: detect-private-key + - id: end-of-file-fixer + - id: mixed-line-ending + exclude: .gitignore + - id: trailing-whitespace + exclude: .gitignore + - repo: https://github.com/psf/black-pre-commit-mirror + rev: 24.4.2 + hooks: + - id: black-jupyter + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.5.1 + hooks: + - id: ruff + args: [--fix, --exit-non-zero-on-fix] + - repo: https://github.com/rbubley/mirrors-prettier + rev: v3.3.2 + hooks: + - id: prettier + - repo: https://github.com/google/yamlfmt + rev: v0.13.0 + hooks: + - id: yamlfmt + - repo: https://github.com/Yelp/detect-secrets + rev: v1.5.0 + hooks: + - id: detect-secrets + - repo: https://github.com/pappasam/toml-sort + rev: v0.23.1 + hooks: + - id: toml-sort-fix + exclude: poetry.lock + - repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell + additional_dependencies: [".[toml]"] + - repo: https://github.com/sqlfluff/sqlfluff + rev: 3.1.0 + hooks: + - id: sqlfluff-fix + - repo: https://github.com/hadolint/hadolint + rev: v2.13.0-beta + hooks: + - id: hadolint-docker + - repo: https://github.com/jsh9/markdown-toc-creator + rev: 0.0.6 + hooks: + - id: markdown-toc-creator + - repo: https://github.com/jumanjihouse/pre-commit-hooks + rev: 3.0.0 + hooks: + - id: check-mailmap + - repo: https://github.com/python-poetry/poetry + rev: 1.8.0 + hooks: + - id: poetry-check + - repo: https://github.com/pre-commit/mirrors-mypy + rev: v1.10.1 + hooks: + - id: mypy + additional_dependencies: + - types-requests + - repo: https://github.com/srstevenson/nb-clean + rev: 3.3.0 + hooks: + - id: nb-clean + args: [--preserve-cell-outputs, --remove-empty-cells] + - repo: https://github.com/abravalheri/validate-pyproject + rev: v0.18 + hooks: + - id: validate-pyproject + additional_dependencies: + - "validate-pyproject-schema-store[all]>=2024.06.24" # Pin for Ruff's FURB154 diff --git a/README.md b/README.md index a69442e27..cba9a42a2 100644 --- a/README.md +++ b/README.md @@ -32,14 +32,12 @@ ChemNLP is an open-source project - your involvement is warmly welcome! If you'r - Looking for ideas? See our [task board](https://github.com/orgs/OpenBioML/projects/5/views/1) to see what we may need help with. - Have an idea? Create an [issue](https://github.com/OpenBioML/chemnlp/issues)! - Over the past months ChemNLP has received many contributions and a lot of feedback. We appreciate all contributions from community to make ChemNLP thrive. - # Note on the "ChemNLP" name Our OpenBioML ChemNLP project is not affiliated to the [ChemNLP library from NIST](https://arxiv.org/abs/2209.08203) and we use "ChemNLP" as a general term to highlight our project focus. The datasets and models we create through our project will have a unique and recognizable name when we release them. diff --git a/code_of_conduct.md b/code_of_conduct.md index 45d257b29..8b4fcfd34 100644 --- a/code_of_conduct.md +++ b/code_of_conduct.md @@ -1,4 +1,3 @@ - # Contributor Covenant Code of Conduct ## Our Pledge @@ -18,23 +17,23 @@ diverse, inclusive, and healthy community. Examples of behavior that contributes to a positive environment for our community include: -* Demonstrating empathy and kindness toward other people -* Being respectful of differing opinions, viewpoints, and experiences -* Giving and gracefully accepting constructive feedback -* Accepting responsibility and apologizing to those affected by our mistakes, +- Demonstrating empathy and kindness toward other people +- Being respectful of differing opinions, viewpoints, and experiences +- Giving and gracefully accepting constructive feedback +- Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience -* Focusing on what is best not just for us as individuals, but for the overall +- Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: -* The use of sexualized language or imagery, and sexual attention or advances of +- The use of sexualized language or imagery, and sexual attention or advances of any kind -* Trolling, insulting or derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or email address, +- Trolling, insulting or derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others' private information, such as a physical or email address, without their explicit permission -* Other conduct which could reasonably be considered inappropriate in a +- Other conduct which could reasonably be considered inappropriate in a professional setting ## Enforcement Responsibilities diff --git a/conda.yaml b/conda.yaml index b6badfc78..92628149a 100644 --- a/conda.yaml +++ b/conda.yaml @@ -1,9 +1,8 @@ ---- name: dummy dependencies: - - python==3.9.* - - pip - - pip: - - . - - .[dev] - - .[dataset_creation] + - python==3.9.* + - pip + - pip: + - . + - .[dev] + - .[dataset_creation] diff --git a/data/kg/chebi_chebi/meta.yaml b/data/kg/chebi_chebi/meta.yaml index 49f1d325b..73e0fae00 100644 --- a/data/kg/chebi_chebi/meta.yaml +++ b/data/kg/chebi_chebi/meta.yaml @@ -1,48 +1,43 @@ ---- name: chebi_chebi description: Knowledgegraph data samples. targets: - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id identifiers: - - id: node1_type - description: node1_type - type: Other - - id: node1_name - description: node1_name - type: Other - - id: node1_id - description: node1_id - type: Other - - id: rel1_type - description: rel1_type - type: Other + - id: node1_type + description: node1_type + type: Other + - id: node1_name + description: node1_name + type: Other + - id: node1_id + description: node1_id + type: Other + - id: rel1_type + description: rel1_type + type: Other license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 638182 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_name#} {rel1_type#} {node2_name#}. + - The {node1_name#} {rel1_type#} {node2_name#}. diff --git a/data/kg/chembl33_preprocessed_filtered_bioactivity_dataset_w_fullprotnames_smiles/meta.yaml b/data/kg/chembl33_preprocessed_filtered_bioactivity_dataset_w_fullprotnames_smiles/meta.yaml index 50b55545d..450197858 100644 --- a/data/kg/chembl33_preprocessed_filtered_bioactivity_dataset_w_fullprotnames_smiles/meta.yaml +++ b/data/kg/chembl33_preprocessed_filtered_bioactivity_dataset_w_fullprotnames_smiles/meta.yaml @@ -1,107 +1,101 @@ ---- name: chembl33_preprocessed_filtered_bioactivity_dataset_w_fullprotnames_smiles description: Knowledgegraph data samples. targets: - - id: protein_name - description: protein_name - type: Other - units: protein_name - names: - - noun: protein_name - - id: pchembl_value - description: pchembl_value - type: Other - units: pchembl_value - names: - - noun: pchembl_value - - id: standard_type - description: standard_type - type: Other - units: standard_type - names: - - noun: standard_type - - id: standard_value - description: standard_value - type: Other - units: standard_value - names: - - noun: standard_value - - id: standard_units - description: standard_units - type: Other - units: standard_units - names: - - noun: standard_units - - id: description - description: description - type: Other - units: description - names: - - noun: description + - id: protein_name + description: protein_name + type: Other + units: protein_name + names: + - noun: protein_name + - id: pchembl_value + description: pchembl_value + type: Other + units: pchembl_value + names: + - noun: pchembl_value + - id: standard_type + description: standard_type + type: Other + units: standard_type + names: + - noun: standard_type + - id: standard_value + description: standard_value + type: Other + units: standard_value + names: + - noun: standard_value + - id: standard_units + description: standard_units + type: Other + units: standard_units + names: + - noun: standard_units + - id: description + description: description + type: Other + units: description + names: + - noun: description identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 1059070 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {#molecule with the |!}{SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} a {#bioaffinity|affinity!} for {#the - protein |!}{protein_name#} with a {standard_type#} {#value |!}of {standard_value#} {standard_units#}. - - |- - Task: Please {#derive|estimate|approximate!} {#the bioaffinity|the affinity!} of a {#molecule to a protein|protein to a molecule!}. - Protein{# name|!}: {protein_name#} - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint{#s|!}: The {#resulting|derived|calculated!} {standard_type#} {#value |!}should be in {standard_units#}. Even if you are {#uncertain|not sure!}, you must {#derive|estimate|come up with!} a {standard_type#} {#value |!}without using any {#other|additional!} words. - Result: {standard_value#} {standard_units#} - - |- - Task: Please {#create|generate!} {#a molecule |a !}{SMILES__description} that has a {#bioaffinity|affinity!} to {#the protein |!}{protein_name#} with a {standard_type#} {#value |!}of {standard_value#} {standard_units#}. - Result: {SMILES#} - - |- - User: Can you {#give me|come up with!} {#one|an!} example of a protein that has a {#bioaffinity|affinity!} to the {SMILES__description} {SMILES#}? - Assistant: {#For example, the protein |For example, |!}{protein_name#} has a {#bioaffinity|affinity!} to the {SMILES__description} {SMILES#}. - User: Can you {#derive|estimate|approximate!} the {standard_type#} {#of this molecule|of this molecule for me|for me!}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, the {standard_type#} {#value |!}is {standard_value#} {standard_units#}. - - |- - User: Can you {#give me|come up with!} {#one|an!} example of a protein that has a {#bioaffinity|affinity!} to the {SMILES__description} {SMILES#}? - Assistant: {#The protein |!}{protein_name#} has for example a {#bioaffinity|affinity!} to the {SMILES__description} {SMILES#}. - User: Can you {#derive|estimate|approximate!} the {standard_type#} {#of this molecule|of this molecule for me|for me!}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, the {standard_type#} {#value |!}is {standard_value#} {standard_units#}. - User: Can you give {#me |!}{#additional|more!} {#information|details!} {#on|about!} the assay{# used| used for this estimation!}? - Assistant: {#Sure|Yes|Of course!}, here you go: - {description#} - - |- - Task: Please {#derive|estimate|approximate!} {#the bioaffinity|the affinity!} of a {#molecule to a protein|protein to a molecule!}. - Protein{# name|!}: {protein_name#} - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint{#s|!}: The {#resulting|derived|calculated!} {standard_type#} {#value |!}should be in {standard_units#}. Even if you are {#uncertain|not sure!}, you must {#derive|estimate|come up with!} a {standard_type#} {#value |!}without using any {#other|additional!} words. - Result: {standard_value#} {standard_units#} - - |- - Task: Please {#create|generate!} a {#molecule |!}{SMILES__description} that has a {#bioaffinity|affinity!} to {#the protein |!}{protein_name#} with a {standard_type#} {#value |!}of {standard_value#} {standard_units#}. - Result: {SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: What is the {#the bioaffinity|the affinity!} of a {#molecule to a protein|protein to a molecule!}? - Protein{# name|!}: {protein_name#} - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: The {#shown|listed!} {standard_type#} values {#below |!}are in {standard_units#}. Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%3-5%aA1} without using any other words. - Options: - {standard_value%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: What is the {#the bioaffinity|the affinity!} of a {#molecule to a protein|protein to a molecule!}? - Protein{# name|!}: {protein_name#} - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: The {#shown|listed!} {standard_type#} values {#below |!}are in {standard_units#}. Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%3-5%aA1} without using any other words. - Options: - {standard_value%} - Answer: {%multiple_choice_result} + - The {#molecule with the |!}{SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} a {#bioaffinity|affinity!} for {#the protein |!}{protein_name#} with a {standard_type#} {#value |!}of {standard_value#} {standard_units#}. + - |- + Task: Please {#derive|estimate|approximate!} {#the bioaffinity|the affinity!} of a {#molecule to a protein|protein to a molecule!}. + Protein{# name|!}: {protein_name#} + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint{#s|!}: The {#resulting|derived|calculated!} {standard_type#} {#value |!}should be in {standard_units#}. Even if you are {#uncertain|not sure!}, you must {#derive|estimate|come up with!} a {standard_type#} {#value |!}without using any {#other|additional!} words. + Result: {standard_value#} {standard_units#} + - |- + Task: Please {#create|generate!} {#a molecule |a !}{SMILES__description} that has a {#bioaffinity|affinity!} to {#the protein |!}{protein_name#} with a {standard_type#} {#value |!}of {standard_value#} {standard_units#}. + Result: {SMILES#} + - |- + User: Can you {#give me|come up with!} {#one|an!} example of a protein that has a {#bioaffinity|affinity!} to the {SMILES__description} {SMILES#}? + Assistant: {#For example, the protein |For example, |!}{protein_name#} has a {#bioaffinity|affinity!} to the {SMILES__description} {SMILES#}. + User: Can you {#derive|estimate|approximate!} the {standard_type#} {#of this molecule|of this molecule for me|for me!}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, the {standard_type#} {#value |!}is {standard_value#} {standard_units#}. + - |- + User: Can you {#give me|come up with!} {#one|an!} example of a protein that has a {#bioaffinity|affinity!} to the {SMILES__description} {SMILES#}? + Assistant: {#The protein |!}{protein_name#} has for example a {#bioaffinity|affinity!} to the {SMILES__description} {SMILES#}. + User: Can you {#derive|estimate|approximate!} the {standard_type#} {#of this molecule|of this molecule for me|for me!}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, the {standard_type#} {#value |!}is {standard_value#} {standard_units#}. + User: Can you give {#me |!}{#additional|more!} {#information|details!} {#on|about!} the assay{# used| used for this estimation!}? + Assistant: {#Sure|Yes|Of course!}, here you go: + {description#} + - |- + Task: Please {#derive|estimate|approximate!} {#the bioaffinity|the affinity!} of a {#molecule to a protein|protein to a molecule!}. + Protein{# name|!}: {protein_name#} + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint{#s|!}: The {#resulting|derived|calculated!} {standard_type#} {#value |!}should be in {standard_units#}. Even if you are {#uncertain|not sure!}, you must {#derive|estimate|come up with!} a {standard_type#} {#value |!}without using any {#other|additional!} words. + Result: {standard_value#} {standard_units#} + - |- + Task: Please {#create|generate!} a {#molecule |!}{SMILES__description} that has a {#bioaffinity|affinity!} to {#the protein |!}{protein_name#} with a {standard_type#} {#value |!}of {standard_value#} {standard_units#}. + Result: {SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: What is the {#the bioaffinity|the affinity!} of a {#molecule to a protein|protein to a molecule!}? + Protein{# name|!}: {protein_name#} + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: The {#shown|listed!} {standard_type#} values {#below |!}are in {standard_units#}. Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%3-5%aA1} without using any other words. + Options: + {standard_value%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: What is the {#the bioaffinity|the affinity!} of a {#molecule to a protein|protein to a molecule!}? + Protein{# name|!}: {protein_name#} + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: The {#shown|listed!} {standard_type#} values {#below |!}are in {standard_units#}. Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%3-5%aA1} without using any other words. + Options: + {standard_value%} + Answer: {%multiple_choice_result} diff --git a/data/kg/compound_chebi/meta.yaml b/data/kg/compound_chebi/meta.yaml index 0215a4eac..a3784a4f0 100644 --- a/data/kg/compound_chebi/meta.yaml +++ b/data/kg/compound_chebi/meta.yaml @@ -1,69 +1,64 @@ ---- name: compound_chebi description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 6754 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} {node2_name#}. - - |- - Task: Please {#create|generate!} {#a compound |a !}{SMILES__description} that {rel1_type#} {node2_name#}. - Result: {SMILES#} - - |- - Task: Please {#create|generate!} {#a compound |a !}{SMILES__description} that {rel1_type#} {node2_name#}. - Result: {SMILES#} + - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} {node2_name#}. + - |- + Task: Please {#create|generate!} {#a compound |a !}{SMILES__description} that {rel1_type#} {node2_name#}. + Result: {SMILES#} + - |- + Task: Please {#create|generate!} {#a compound |a !}{SMILES__description} that {rel1_type#} {node2_name#}. + Result: {SMILES#} diff --git a/data/kg/compound_chebi_chebi/meta.yaml b/data/kg/compound_chebi_chebi/meta.yaml index 91e13ec0c..9a36f991a 100644 --- a/data/kg/compound_chebi_chebi/meta.yaml +++ b/data/kg/compound_chebi_chebi/meta.yaml @@ -1,87 +1,82 @@ ---- name: compound_chebi_chebi description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 26991 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} {node2_name#} and {rel2_type#} {node3_name#}. + - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} {node2_name#} and {rel2_type#} {node3_name#}. diff --git a/data/kg/compound_chebi_chebi_chebi_1/meta.yaml b/data/kg/compound_chebi_chebi_chebi_1/meta.yaml index b1153cc31..40437de34 100644 --- a/data/kg/compound_chebi_chebi_chebi_1/meta.yaml +++ b/data/kg/compound_chebi_chebi_chebi_1/meta.yaml @@ -1,110 +1,105 @@ ---- name: compound_chebi_chebi_chebi_1 description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id - - id: rel3_type - description: rel3_type - type: Other - units: rel3_type - names: - - noun: rel3_type - - id: node4_type - description: node4_type - type: Other - units: node4_type - names: - - noun: node4_type - - id: node4_name - description: node4_name - type: Other - units: node4_name - names: - - noun: node4_name - - id: node4_id - description: node4_id - type: Other - units: node4_id - names: - - noun: node4_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id + - id: rel3_type + description: rel3_type + type: Other + units: rel3_type + names: + - noun: rel3_type + - id: node4_type + description: node4_type + type: Other + units: node4_type + names: + - noun: node4_type + - id: node4_name + description: node4_name + type: Other + units: node4_name + names: + - noun: node4_name + - id: node4_id + description: node4_id + type: Other + units: node4_id + names: + - noun: node4_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 9936872 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: [] diff --git a/data/kg/compound_chebi_chebi_chebi_2/meta.yaml b/data/kg/compound_chebi_chebi_chebi_2/meta.yaml index 0a0bd567b..944890527 100644 --- a/data/kg/compound_chebi_chebi_chebi_2/meta.yaml +++ b/data/kg/compound_chebi_chebi_chebi_2/meta.yaml @@ -1,110 +1,105 @@ ---- name: compound_chebi_chebi_chebi_2 description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id - - id: rel3_type - description: rel3_type - type: Other - units: rel3_type - names: - - noun: rel3_type - - id: node4_type - description: node4_type - type: Other - units: node4_type - names: - - noun: node4_type - - id: node4_name - description: node4_name - type: Other - units: node4_name - names: - - noun: node4_name - - id: node4_id - description: node4_id - type: Other - units: node4_id - names: - - noun: node4_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id + - id: rel3_type + description: rel3_type + type: Other + units: rel3_type + names: + - noun: rel3_type + - id: node4_type + description: node4_type + type: Other + units: node4_type + names: + - noun: node4_type + - id: node4_name + description: node4_name + type: Other + units: node4_name + names: + - noun: node4_name + - id: node4_id + description: node4_id + type: Other + units: node4_id + names: + - noun: node4_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 1480272 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: [] diff --git a/data/kg/compound_protein/meta.yaml b/data/kg/compound_protein/meta.yaml index c4e2a093f..31905c97b 100644 --- a/data/kg/compound_protein/meta.yaml +++ b/data/kg/compound_protein/meta.yaml @@ -1,73 +1,68 @@ ---- name: compound_protein description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 619840 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. - - The {node2_type#} {node2_protein_names#} is targeted by the drug with the {SMILES__description} {SMILES#}. - - |- - User: Can you {#give me|come up with!} {#one|an!} example for a {node1_type#} {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}? - Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. + - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. + - The {node2_type#} {node2_protein_names#} is targeted by the drug with the {SMILES__description} {SMILES#}. + - |- + User: Can you {#give me|come up with!} {#one|an!} example for a {node1_type#} {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}? + Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. diff --git a/data/kg/compound_protein_compound_1/meta.yaml b/data/kg/compound_protein_compound_1/meta.yaml index 6fc86bdc9..c5659cca0 100644 --- a/data/kg/compound_protein_compound_1/meta.yaml +++ b/data/kg/compound_protein_compound_1/meta.yaml @@ -1,106 +1,100 @@ ---- name: compound_protein_compound_1 description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_smiles - description: node3_smiles - type: Other - units: node3_smiles - names: - - noun: node3_smiles - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_smiles + description: node3_smiles + type: Other + units: node3_smiles + names: + - noun: node3_smiles + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 9851748 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} the {node3_type#} - {node3_smiles#}. - - The {node2_type#} {node2_protein_names#} is targeted by the compound with the {SMILES__description} {SMILES#} and {node3_smiles#}. - - |- - User: Can you {#give me|come up with!} {#one|an!} example for a {node1_type#} with the {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}? - Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. - User: Can you {#tell me|create|generate!} {#another|a!} {node1_type#} {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}? - Assistant: {#Sure|Yes|Of course|Yes, of course!}, the SMILES{# representation|!} {node3_smiles#} {#also |!}{rel1_type#} the {node2_type#} {node2_protein_names#}. + - The {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} the {node3_type#} {node3_smiles#}. + - The {node2_type#} {node2_protein_names#} is targeted by the compound with the {SMILES__description} {SMILES#} and {node3_smiles#}. + - |- + User: Can you {#give me|come up with!} {#one|an!} example for a {node1_type#} with the {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}? + Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. + User: Can you {#tell me|create|generate!} {#another|a!} {node1_type#} {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}? + Assistant: {#Sure|Yes|Of course|Yes, of course!}, the SMILES{# representation|!} {node3_smiles#} {#also |!}{rel1_type#} the {node2_type#} {node2_protein_names#}. diff --git a/data/kg/compound_protein_compound_2/meta.yaml b/data/kg/compound_protein_compound_2/meta.yaml index 76ede634e..29d92b5af 100644 --- a/data/kg/compound_protein_compound_2/meta.yaml +++ b/data/kg/compound_protein_compound_2/meta.yaml @@ -1,106 +1,100 @@ ---- name: compound_protein_compound_2 description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_smiles - description: node3_smiles - type: Other - units: node3_smiles - names: - - noun: node3_smiles - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_smiles + description: node3_smiles + type: Other + units: node3_smiles + names: + - noun: node3_smiles + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 9906551 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} the {node3_type#} - {node3_smiles#}. - - The {node2_type#} {node2_protein_names#} is targeted by the compound with the {SMILES__description} {SMILES#} and {node3_smiles#}. - - |- - User: Can you {#give me|come up with!} {#one|an!} example for a {node1_type#} with the {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}? - Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. - User: Can you {#tell me|create|generate!} {#another|a!} {node1_type#} {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}? - Assistant: {#Sure|Yes|Of course|Yes, of course!}, the SMILES{# representation|!} {node3_smiles#} {#also |!}{rel1_type#} the {node2_type#} {node2_protein_names#}. + - The {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} the {node3_type#} {node3_smiles#}. + - The {node2_type#} {node2_protein_names#} is targeted by the compound with the {SMILES__description} {SMILES#} and {node3_smiles#}. + - |- + User: Can you {#give me|come up with!} {#one|an!} example for a {node1_type#} with the {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}? + Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. + User: Can you {#tell me|create|generate!} {#another|a!} {node1_type#} {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}? + Assistant: {#Sure|Yes|Of course|Yes, of course!}, the SMILES{# representation|!} {node3_smiles#} {#also |!}{rel1_type#} the {node2_type#} {node2_protein_names#}. diff --git a/data/kg/compound_protein_compound_3/meta.yaml b/data/kg/compound_protein_compound_3/meta.yaml index efaa033e1..94c455871 100644 --- a/data/kg/compound_protein_compound_3/meta.yaml +++ b/data/kg/compound_protein_compound_3/meta.yaml @@ -1,106 +1,100 @@ ---- name: compound_protein_compound_3 description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_smiles - description: node3_smiles - type: Other - units: node3_smiles - names: - - noun: node3_smiles - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_smiles + description: node3_smiles + type: Other + units: node3_smiles + names: + - noun: node3_smiles + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 9764124 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} the {node3_type#} - {node3_smiles#}. - - The {node2_type#} {node2_protein_names#} is targeted by the compound with the {SMILES__description} {SMILES#} and {node3_smiles#}. - - |- - User: Can you {#give me|come up with!} {#one|an!} example for a {node1_type#} with the {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}? - Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. - User: Can you {#tell me|create|generate!} {#another|a!} {node1_type#} {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}? - Assistant: {#Sure|Yes|Of course|Yes, of course!}, the SMILES{# representation|!} {node3_smiles#} {#also |!}{rel1_type#} the {node2_type#} {node2_protein_names#}. + - The {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} the {node3_type#} {node3_smiles#}. + - The {node2_type#} {node2_protein_names#} is targeted by the compound with the {SMILES__description} {SMILES#} and {node3_smiles#}. + - |- + User: Can you {#give me|come up with!} {#one|an!} example for a {node1_type#} with the {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}? + Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. + User: Can you {#tell me|create|generate!} {#another|a!} {node1_type#} {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}? + Assistant: {#Sure|Yes|Of course|Yes, of course!}, the SMILES{# representation|!} {node3_smiles#} {#also |!}{rel1_type#} the {node2_type#} {node2_protein_names#}. diff --git a/data/kg/compound_protein_disease/meta.yaml b/data/kg/compound_protein_disease/meta.yaml index 54de1a39b..e33e843fa 100644 --- a/data/kg/compound_protein_disease/meta.yaml +++ b/data/kg/compound_protein_disease/meta.yaml @@ -1,100 +1,94 @@ ---- name: compound_protein_disease description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 1424348 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_type#} {node3_name#}. - - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_type#} - {node3_name#}. - - |- - User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that is targeted by the {node1_type#} {SMILES#}? - Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_protein_names#} is targeted by the {node1_type#} {SMILES#}. - User: Can you tell me which disease the {node2_type#} {node2_protein_names#} {rel2_type#}? - Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#} {node3_type#}. + - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_type#} {node3_name#}. + - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_type#} {node3_name#}. + - |- + User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that is targeted by the {node1_type#} {SMILES#}? + Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_protein_names#} is targeted by the {node1_type#} {SMILES#}. + User: Can you tell me which disease the {node2_type#} {node2_protein_names#} {rel2_type#}? + Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#} {node3_type#}. diff --git a/data/kg/compound_protein_domain/meta.yaml b/data/kg/compound_protein_domain/meta.yaml index 15184bf4c..f1a86b94a 100644 --- a/data/kg/compound_protein_domain/meta.yaml +++ b/data/kg/compound_protein_domain/meta.yaml @@ -1,99 +1,94 @@ ---- name: compound_protein_domain description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 1589285 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - '{SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} a {node3_name#}.' - - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} a {node3_name#}. - - |- - User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#}? - Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES#} {rel1_type#} for example the {node2_type#} {node2_protein_names#}. - User: Can you tell me a domain of the {node2_type#} {node2_protein_names#}? - Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} a {node3_name#}. + - "{SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} a {node3_name#}." + - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} a {node3_name#}. + - |- + User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#}? + Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES#} {rel1_type#} for example the {node2_type#} {node2_protein_names#}. + User: Can you tell me a domain of the {node2_type#} {node2_protein_names#}? + Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} a {node3_name#}. diff --git a/data/kg/compound_protein_ec_number/meta.yaml b/data/kg/compound_protein_ec_number/meta.yaml index aecaa2ceb..58d8288b8 100644 --- a/data/kg/compound_protein_ec_number/meta.yaml +++ b/data/kg/compound_protein_ec_number/meta.yaml @@ -1,100 +1,94 @@ ---- name: compound_protein_ec_number description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 405980 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} the {node3_name#} (EC {node3_id#}). - - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. Furthermore, the {node1_type#} {SMILES#} {rel2_type#} the {node3_name#} - (EC {node3_id#}). - - |- - User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#}? - Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. - User: Can you tell me which enzyme the {node2_type#} {node2_protein_names#} {rel2_type#}? - Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} a {node3_name#} (EC {node3_id#}). + - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} the {node3_name#} (EC {node3_id#}). + - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. Furthermore, the {node1_type#} {SMILES#} {rel2_type#} the {node3_name#} (EC {node3_id#}). + - |- + User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#}? + Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. + User: Can you tell me which enzyme the {node2_type#} {node2_protein_names#} {rel2_type#}? + Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} a {node3_name#} (EC {node3_id#}). diff --git a/data/kg/compound_protein_go_term_1/meta.yaml b/data/kg/compound_protein_go_term_1/meta.yaml index e49613b7a..f95ecac30 100644 --- a/data/kg/compound_protein_go_term_1/meta.yaml +++ b/data/kg/compound_protein_go_term_1/meta.yaml @@ -1,95 +1,89 @@ ---- name: compound_protein_go_term_1 description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 9820893 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}. - - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} - {rel2_type#} the {node3_name#}. + - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}. + - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}. diff --git a/data/kg/compound_protein_go_term_2/meta.yaml b/data/kg/compound_protein_go_term_2/meta.yaml index 75b61831e..1577ea82d 100644 --- a/data/kg/compound_protein_go_term_2/meta.yaml +++ b/data/kg/compound_protein_go_term_2/meta.yaml @@ -1,95 +1,89 @@ ---- name: compound_protein_go_term_2 description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 9781374 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}. - - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} - {rel2_type#} the {node3_name#}. + - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}. + - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}. diff --git a/data/kg/compound_protein_go_term_3/meta.yaml b/data/kg/compound_protein_go_term_3/meta.yaml index ea6bda6fb..f1df53242 100644 --- a/data/kg/compound_protein_go_term_3/meta.yaml +++ b/data/kg/compound_protein_go_term_3/meta.yaml @@ -1,95 +1,89 @@ ---- name: compound_protein_go_term_3 description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 9798619 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}. - - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} - {rel2_type#} the {node3_name#}. + - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}. + - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}. diff --git a/data/kg/compound_protein_go_term_4/meta.yaml b/data/kg/compound_protein_go_term_4/meta.yaml index bebb00ce5..f23fb3d1f 100644 --- a/data/kg/compound_protein_go_term_4/meta.yaml +++ b/data/kg/compound_protein_go_term_4/meta.yaml @@ -1,95 +1,89 @@ ---- name: compound_protein_go_term_4 description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 1767147 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}. - - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} - {rel2_type#} the {node3_name#}. + - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}. + - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}. diff --git a/data/kg/compound_protein_hpo/meta.yaml b/data/kg/compound_protein_hpo/meta.yaml index 4aea5ce91..8f68f9802 100644 --- a/data/kg/compound_protein_hpo/meta.yaml +++ b/data/kg/compound_protein_hpo/meta.yaml @@ -1,97 +1,91 @@ ---- name: compound_protein_hpo description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 2971239 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} {node3_name#}. - - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. - - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the human - phenotype represented by {node3_name#}. - - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {rel2_type#} the {node3_name#}. + - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} {node3_name#}. + - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. + - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the human phenotype represented by {node3_name#}. + - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {rel2_type#} the {node3_name#}. diff --git a/data/kg/compound_protein_hpo_disease_1/meta.yaml b/data/kg/compound_protein_hpo_disease_1/meta.yaml index a8d5bdd83..035583ee3 100644 --- a/data/kg/compound_protein_hpo_disease_1/meta.yaml +++ b/data/kg/compound_protein_hpo_disease_1/meta.yaml @@ -1,118 +1,112 @@ ---- name: compound_protein_hpo_disease_1 description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id - - id: rel3_type - description: rel3_type - type: Other - units: rel3_type - names: - - noun: rel3_type - - id: node4_type - description: node4_type - type: Other - units: node4_type - names: - - noun: node4_type - - id: node4_name - description: node4_name - type: Other - units: node4_name - names: - - noun: node4_name - - id: node4_id - description: node4_id - type: Other - units: node4_id - names: - - noun: node4_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id + - id: rel3_type + description: rel3_type + type: Other + units: rel3_type + names: + - noun: rel3_type + - id: node4_type + description: node4_type + type: Other + units: node4_type + names: + - noun: node4_type + - id: node4_name + description: node4_name + type: Other + units: node4_name + names: + - noun: node4_name + - id: node4_id + description: node4_id + type: Other + units: node4_id + names: + - noun: node4_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 9815355 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. - The {node3_name#} {rel3_type#} the {node4_type#} {node4_name#}. + - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. The {node3_name#} {rel3_type#} the {node4_type#} {node4_name#}. diff --git a/data/kg/compound_protein_hpo_disease_2/meta.yaml b/data/kg/compound_protein_hpo_disease_2/meta.yaml index c5b744322..5ce2d0ef0 100644 --- a/data/kg/compound_protein_hpo_disease_2/meta.yaml +++ b/data/kg/compound_protein_hpo_disease_2/meta.yaml @@ -1,118 +1,112 @@ ---- name: compound_protein_hpo_disease_2 description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id - - id: rel3_type - description: rel3_type - type: Other - units: rel3_type - names: - - noun: rel3_type - - id: node4_type - description: node4_type - type: Other - units: node4_type - names: - - noun: node4_type - - id: node4_name - description: node4_name - type: Other - units: node4_name - names: - - noun: node4_name - - id: node4_id - description: node4_id - type: Other - units: node4_id - names: - - noun: node4_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id + - id: rel3_type + description: rel3_type + type: Other + units: rel3_type + names: + - noun: rel3_type + - id: node4_type + description: node4_type + type: Other + units: node4_type + names: + - noun: node4_type + - id: node4_name + description: node4_name + type: Other + units: node4_name + names: + - noun: node4_name + - id: node4_id + description: node4_id + type: Other + units: node4_id + names: + - noun: node4_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 2786883 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. - The {node3_name#} {rel3_type#} the {node4_type#} {node4_name#}. + - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. The {node3_name#} {rel3_type#} the {node4_type#} {node4_name#}. diff --git a/data/kg/compound_protein_pathway/meta.yaml b/data/kg/compound_protein_pathway/meta.yaml index 8059ada61..e38ce798f 100644 --- a/data/kg/compound_protein_pathway/meta.yaml +++ b/data/kg/compound_protein_pathway/meta.yaml @@ -1,94 +1,89 @@ ---- name: compound_protein_pathway description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 5872197 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}. - - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}. + - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}. + - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}. diff --git a/data/kg/compound_protein_pathway_disease_1/meta.yaml b/data/kg/compound_protein_pathway_disease_1/meta.yaml index 623bf8483..ddbbdd0b9 100644 --- a/data/kg/compound_protein_pathway_disease_1/meta.yaml +++ b/data/kg/compound_protein_pathway_disease_1/meta.yaml @@ -1,118 +1,112 @@ ---- name: compound_protein_pathway_disease_1 description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id - - id: rel3_type - description: rel3_type - type: Other - units: rel3_type - names: - - noun: rel3_type - - id: node4_type - description: node4_type - type: Other - units: node4_type - names: - - noun: node4_type - - id: node4_name - description: node4_name - type: Other - units: node4_name - names: - - noun: node4_name - - id: node4_id - description: node4_id - type: Other - units: node4_id - names: - - noun: node4_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id + - id: rel3_type + description: rel3_type + type: Other + units: rel3_type + names: + - noun: rel3_type + - id: node4_type + description: node4_type + type: Other + units: node4_type + names: + - noun: node4_type + - id: node4_name + description: node4_name + type: Other + units: node4_name + names: + - noun: node4_name + - id: node4_id + description: node4_id + type: Other + units: node4_id + names: + - noun: node4_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 9797638 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. - The {node3_name#} is {rel3_type#} the {node4_type#} {node4_name#}. + - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. The {node3_name#} is {rel3_type#} the {node4_type#} {node4_name#}. diff --git a/data/kg/compound_protein_pathway_disease_2/meta.yaml b/data/kg/compound_protein_pathway_disease_2/meta.yaml index 51c8f8492..257a1b22b 100644 --- a/data/kg/compound_protein_pathway_disease_2/meta.yaml +++ b/data/kg/compound_protein_pathway_disease_2/meta.yaml @@ -1,118 +1,112 @@ ---- name: compound_protein_pathway_disease_2 description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id - - id: rel3_type - description: rel3_type - type: Other - units: rel3_type - names: - - noun: rel3_type - - id: node4_type - description: node4_type - type: Other - units: node4_type - names: - - noun: node4_type - - id: node4_name - description: node4_name - type: Other - units: node4_name - names: - - noun: node4_name - - id: node4_id - description: node4_id - type: Other - units: node4_id - names: - - noun: node4_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id + - id: rel3_type + description: rel3_type + type: Other + units: rel3_type + names: + - noun: rel3_type + - id: node4_type + description: node4_type + type: Other + units: node4_type + names: + - noun: node4_type + - id: node4_name + description: node4_name + type: Other + units: node4_name + names: + - noun: node4_name + - id: node4_id + description: node4_id + type: Other + units: node4_id + names: + - noun: node4_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 9780116 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} - {rel2_type#} {node3_name#}. The {node3_name#} is {rel3_type#} the {node4_type#} {node4_name#}. + - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. The {node3_name#} is {rel3_type#} the {node4_type#} {node4_name#}. diff --git a/data/kg/compound_protein_pathway_disease_3/meta.yaml b/data/kg/compound_protein_pathway_disease_3/meta.yaml index a39472f08..be4c6c291 100644 --- a/data/kg/compound_protein_pathway_disease_3/meta.yaml +++ b/data/kg/compound_protein_pathway_disease_3/meta.yaml @@ -1,118 +1,112 @@ ---- name: compound_protein_pathway_disease_3 description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id - - id: rel3_type - description: rel3_type - type: Other - units: rel3_type - names: - - noun: rel3_type - - id: node4_type - description: node4_type - type: Other - units: node4_type - names: - - noun: node4_type - - id: node4_name - description: node4_name - type: Other - units: node4_name - names: - - noun: node4_name - - id: node4_id - description: node4_id - type: Other - units: node4_id - names: - - noun: node4_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id + - id: rel3_type + description: rel3_type + type: Other + units: rel3_type + names: + - noun: rel3_type + - id: node4_type + description: node4_type + type: Other + units: node4_type + names: + - noun: node4_type + - id: node4_name + description: node4_name + type: Other + units: node4_name + names: + - noun: node4_name + - id: node4_id + description: node4_id + type: Other + units: node4_id + names: + - noun: node4_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 8349447 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. - The {node3_name#} is {rel3_type#} the {node4_type#} {node4_name#}. + - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. The {node3_name#} is {rel3_type#} the {node4_type#} {node4_name#}. diff --git a/data/kg/compound_protein_protein/meta.yaml b/data/kg/compound_protein_protein/meta.yaml index 53428585a..e16ea7605 100644 --- a/data/kg/compound_protein_protein/meta.yaml +++ b/data/kg/compound_protein_protein/meta.yaml @@ -1,106 +1,100 @@ ---- name: compound_protein_protein description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_protein_names - description: node3_protein_names - type: Other - units: node3_protein_names - names: - - noun: node3_protein_names - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_protein_names + description: node3_protein_names + type: Other + units: node3_protein_names + names: + - noun: node3_protein_names + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 10139561 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_type#} {node3_protein_names#}. - - The {node2_type#} {node2_protein_names#} is targeted by the {SMILES__description} {SMILES#}. The {node2_type#} {node2_protein_names#} {rel2_type#} - {node3_protein_names#}. - - |- - User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} with the {SMILES__description} {SMILES#}? - Assistant: The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} for example the {node2_type#} {node2_protein_names#}. - User: Can you tell me a {node3_type#} that {rel2_type#} {node2_type#} {node2_protein_names#}? - Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_protein_names#} {rel2_type#} {node3_protein_names#}. + - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_type#} {node3_protein_names#}. + - The {node2_type#} {node2_protein_names#} is targeted by the {SMILES__description} {SMILES#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_protein_names#}. + - |- + User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} with the {SMILES__description} {SMILES#}? + Assistant: The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} for example the {node2_type#} {node2_protein_names#}. + User: Can you tell me a {node3_type#} that {rel2_type#} {node2_type#} {node2_protein_names#}? + Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_protein_names#} {rel2_type#} {node3_protein_names#}. diff --git a/data/kg/drug_chebi/meta.yaml b/data/kg/drug_chebi/meta.yaml index 354090f45..b1dfb8922 100644 --- a/data/kg/drug_chebi/meta.yaml +++ b/data/kg/drug_chebi/meta.yaml @@ -1,63 +1,58 @@ ---- name: drug_chebi description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 3033 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#} {rel1_type#} {node2_name#}. + - The {node1_type#} {SMILES#} {rel1_type#} {node2_name#}. diff --git a/data/kg/drug_chebi_chebi/meta.yaml b/data/kg/drug_chebi_chebi/meta.yaml index 9f8a26513..689c985d4 100644 --- a/data/kg/drug_chebi_chebi/meta.yaml +++ b/data/kg/drug_chebi_chebi/meta.yaml @@ -1,87 +1,82 @@ ---- name: drug_chebi_chebi description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 5710 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#} {rel1_type#} {node2_name#} and {rel2_type#} {node3_name#}. + - The {node1_type#} {SMILES#} {rel1_type#} {node2_name#} and {rel2_type#} {node3_name#}. diff --git a/data/kg/drug_chebi_chebi_chebi/meta.yaml b/data/kg/drug_chebi_chebi_chebi/meta.yaml index 5cdc7ec62..a446f2552 100644 --- a/data/kg/drug_chebi_chebi_chebi/meta.yaml +++ b/data/kg/drug_chebi_chebi_chebi/meta.yaml @@ -1,110 +1,105 @@ ---- name: drug_chebi_chebi_chebi description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id - - id: rel3_type - description: rel3_type - type: Other - units: rel3_type - names: - - noun: rel3_type - - id: node4_type - description: node4_type - type: Other - units: node4_type - names: - - noun: node4_type - - id: node4_name - description: node4_name - type: Other - units: node4_name - names: - - noun: node4_name - - id: node4_id - description: node4_id - type: Other - units: node4_id - names: - - noun: node4_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id + - id: rel3_type + description: rel3_type + type: Other + units: rel3_type + names: + - noun: rel3_type + - id: node4_type + description: node4_type + type: Other + units: node4_type + names: + - noun: node4_type + - id: node4_name + description: node4_name + type: Other + units: node4_name + names: + - noun: node4_name + - id: node4_id + description: node4_id + type: Other + units: node4_id + names: + - noun: node4_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 1538960 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: [] diff --git a/data/kg/drug_disease_pathway/meta.yaml b/data/kg/drug_disease_pathway/meta.yaml index f4f863858..c11801a80 100644 --- a/data/kg/drug_disease_pathway/meta.yaml +++ b/data/kg/drug_disease_pathway/meta.yaml @@ -1,87 +1,82 @@ ---- name: drug_disease_pathway description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 276 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#|node1_name#} is indicated for the {node2_name#} {node2_type#} and {rel2_type#} the {node3_name#} {node3_type#}. + - The {node1_type#} {SMILES#|node1_name#} is indicated for the {node2_name#} {node2_type#} and {rel2_type#} the {node3_name#} {node3_type#}. diff --git a/data/kg/drug_disease_pathway_protein/meta.yaml b/data/kg/drug_disease_pathway_protein/meta.yaml index 14a7ace63..9b81a5e58 100644 --- a/data/kg/drug_disease_pathway_protein/meta.yaml +++ b/data/kg/drug_disease_pathway_protein/meta.yaml @@ -1,118 +1,112 @@ ---- name: drug_disease_pathway_protein description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id - - id: rel3_type - description: rel3_type - type: Other - units: rel3_type - names: - - noun: rel3_type - - id: node4_type - description: node4_type - type: Other - units: node4_type - names: - - noun: node4_type - - id: node4_name - description: node4_name - type: Other - units: node4_name - names: - - noun: node4_name - - id: node4_protein_names - description: node4_protein_names - type: Other - units: node4_protein_names - names: - - noun: node4_protein_names - - id: node4_id - description: node4_id - type: Other - units: node4_id - names: - - noun: node4_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id + - id: rel3_type + description: rel3_type + type: Other + units: rel3_type + names: + - noun: rel3_type + - id: node4_type + description: node4_type + type: Other + units: node4_type + names: + - noun: node4_type + - id: node4_name + description: node4_name + type: Other + units: node4_name + names: + - noun: node4_name + - id: node4_protein_names + description: node4_protein_names + type: Other + units: node4_protein_names + names: + - noun: node4_protein_names + - id: node4_id + description: node4_id + type: Other + units: node4_id + names: + - noun: node4_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 33215 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#|node1_name#} is indicated for the {node2_name#} {node2_type#} and {rel2_type#} the {node3_name#} {node3_type#}. The {node3_type#} - {node3_name#} {rel3_type#} the {node4_type#} {node4_protein_names#}. + - The {node1_type#} {SMILES#|node1_name#} is indicated for the {node2_name#} {node2_type#} and {rel2_type#} the {node3_name#} {node3_type#}. The {node3_type#} {node3_name#} {rel3_type#} the {node4_type#} {node4_protein_names#}. diff --git a/data/kg/drug_protein/meta.yaml b/data/kg/drug_protein/meta.yaml index 9f491fe08..244b659b9 100644 --- a/data/kg/drug_protein/meta.yaml +++ b/data/kg/drug_protein/meta.yaml @@ -1,72 +1,67 @@ ---- name: drug_protein description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 15303 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. - - |- - User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that is targeted by the {node1_type#} {SMILES#|node1_name#}? - Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_protein_names#} is targeted by {#this|the above!} {node1_type#}. + - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. + - |- + User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that is targeted by the {node1_type#} {SMILES#|node1_name#}? + Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_protein_names#} is targeted by {#this|the above!} {node1_type#}. diff --git a/data/kg/drug_protein_disease/meta.yaml b/data/kg/drug_protein_disease/meta.yaml index 5797f8635..1c5fda780 100644 --- a/data/kg/drug_protein_disease/meta.yaml +++ b/data/kg/drug_protein_disease/meta.yaml @@ -1,100 +1,94 @@ ---- name: drug_protein_disease description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 28774 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_type#} {node3_name#}. - - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} - the {node3_type#} {node3_name#}. - - |- - User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that is targeted by the {node1_type#} {SMILES#|node1_name#}? - Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_protein_names#} is targeted by {#this|the above!} {node1_type#}. - User: Can you tell me which disease the {node2_type#} {node2_protein_names#} {rel2_type#}? - Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#} {node3_type#}. + - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_type#} {node3_name#}. + - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_type#} {node3_name#}. + - |- + User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that is targeted by the {node1_type#} {SMILES#|node1_name#}? + Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_protein_names#} is targeted by {#this|the above!} {node1_type#}. + User: Can you tell me which disease the {node2_type#} {node2_protein_names#} {rel2_type#}? + Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#} {node3_type#}. diff --git a/data/kg/drug_protein_domain/meta.yaml b/data/kg/drug_protein_domain/meta.yaml index 8a62099e6..3e7394470 100644 --- a/data/kg/drug_protein_domain/meta.yaml +++ b/data/kg/drug_protein_domain/meta.yaml @@ -1,99 +1,94 @@ ---- name: drug_protein_domain description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 33850 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - '{SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} a {node3_name#}.' - - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} a {node3_name#}. - - |- - User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}? - Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {rel1_type#} for example the {node2_type#} {node2_protein_names#}. - User: Can you tell me a domain of the {node2_type#} {node2_protein_names#}? - Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} a {node3_name#}. + - "{SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} a {node3_name#}." + - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} a {node3_name#}. + - |- + User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}? + Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {rel1_type#} for example the {node2_type#} {node2_protein_names#}. + User: Can you tell me a domain of the {node2_type#} {node2_protein_names#}? + Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} a {node3_name#}. diff --git a/data/kg/drug_protein_drug/meta.yaml b/data/kg/drug_protein_drug/meta.yaml index 9d6ffe828..5e23e2455 100644 --- a/data/kg/drug_protein_drug/meta.yaml +++ b/data/kg/drug_protein_drug/meta.yaml @@ -1,105 +1,100 @@ ---- name: drug_protein_drug description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_smiles - description: node3_smiles - type: Other - units: node3_smiles - names: - - noun: node3_smiles - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_smiles + description: node3_smiles + type: Other + units: node3_smiles + names: + - noun: node3_smiles + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 451843 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_type#} {node3_name#|node3_smiles#}. - - The {node2_type#} {node2_protein_names#} is targeted by the drugs {SMILES#|node1_name#} and {node3_name#|node3_smiles#}. - - |- - User: {#Can you give me|Can you come up with!} {#an|one!} example for a {node1_type#} that {rel1_type#} the {node2_type#} {node2_protein_names#}? - Assistant: {#Sure|Yes|Of course|Yes, of course!}, the {node1_type#} {rel1_type#} the {node2_type#} {node2_protein_names#}. - User: Can you tell me another {node1_type#} that {rel1_type#} the {node2_type#} {node2_protein_names#}? - Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node3_type#} {node3_name#|node3_smiles#} also {rel1_type#} the {node2_type#} {node2_protein_names#}. + - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_type#} {node3_name#|node3_smiles#}. + - The {node2_type#} {node2_protein_names#} is targeted by the drugs {SMILES#|node1_name#} and {node3_name#|node3_smiles#}. + - |- + User: {#Can you give me|Can you come up with!} {#an|one!} example for a {node1_type#} that {rel1_type#} the {node2_type#} {node2_protein_names#}? + Assistant: {#Sure|Yes|Of course|Yes, of course!}, the {node1_type#} {rel1_type#} the {node2_type#} {node2_protein_names#}. + User: Can you tell me another {node1_type#} that {rel1_type#} the {node2_type#} {node2_protein_names#}? + Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node3_type#} {node3_name#|node3_smiles#} also {rel1_type#} the {node2_type#} {node2_protein_names#}. diff --git a/data/kg/drug_protein_ec_number/meta.yaml b/data/kg/drug_protein_ec_number/meta.yaml index 2762b5203..02054ee5f 100644 --- a/data/kg/drug_protein_ec_number/meta.yaml +++ b/data/kg/drug_protein_ec_number/meta.yaml @@ -1,100 +1,94 @@ ---- name: drug_protein_ec_number description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 7636 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#} which {rel2_type#} the {node3_name#} (EC {node3_id#}) reaction. - - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}. Furthermore, the {node2_type#} {node2_name#} {rel2_type#} the - {node3_name#} (EC {node3_id#}) reaction. - - |- - User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}? - Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}. - User: Can you tell me which reaction the {node2_type#} {node2_name#} {rel2_type#}? - Assistant: The {node2_type#} {node2_name#} {rel2_type#} a {node3_name#} (EC {node3_id#}) reaction. + - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#} which {rel2_type#} the {node3_name#} (EC {node3_id#}) reaction. + - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}. Furthermore, the {node2_type#} {node2_name#} {rel2_type#} the {node3_name#} (EC {node3_id#}) reaction. + - |- + User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}? + Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}. + User: Can you tell me which reaction the {node2_type#} {node2_name#} {rel2_type#}? + Assistant: The {node2_type#} {node2_name#} {rel2_type#} a {node3_name#} (EC {node3_id#}) reaction. diff --git a/data/kg/drug_protein_go_term/meta.yaml b/data/kg/drug_protein_go_term/meta.yaml index 81f97935e..5670302d4 100644 --- a/data/kg/drug_protein_go_term/meta.yaml +++ b/data/kg/drug_protein_go_term/meta.yaml @@ -1,99 +1,94 @@ ---- name: drug_protein_go_term description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 656202 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#} which {rel2_type#} the {node3_name#}. - - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}. The {node2_type#} {node2_name#} {rel2_type#} the {node3_name#}. - - |- - User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}? - Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}. - User: Can you tell me more {#details |!}about {node2_type#} {node2_name#}? - Assistant: {#Sure|Yes|Of course|Yes, of course!}, the {node2_type#} {node2_name#} {rel2_type#} the {node3_name#}. + - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#} which {rel2_type#} the {node3_name#}. + - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}. The {node2_type#} {node2_name#} {rel2_type#} the {node3_name#}. + - |- + User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}? + Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}. + User: Can you tell me more {#details |!}about {node2_type#} {node2_name#}? + Assistant: {#Sure|Yes|Of course|Yes, of course!}, the {node2_type#} {node2_name#} {rel2_type#} the {node3_name#}. diff --git a/data/kg/drug_protein_hpo/meta.yaml b/data/kg/drug_protein_hpo/meta.yaml index 6bbb7dc01..6e3975e2b 100644 --- a/data/kg/drug_protein_hpo/meta.yaml +++ b/data/kg/drug_protein_hpo/meta.yaml @@ -1,98 +1,91 @@ ---- name: drug_protein_hpo description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 71321 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} {node3_name#}. - - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} - {node3_name#}. - - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} - the human phenotype represented by {node3_name#}. - - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. This {node2_type#} {rel2_type#} the {node3_name#}. + - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} {node3_name#}. + - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. + - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the human phenotype represented by {node3_name#}. + - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. This {node2_type#} {rel2_type#} the {node3_name#}. diff --git a/data/kg/drug_protein_hpo_disease/meta.yaml b/data/kg/drug_protein_hpo_disease/meta.yaml index 865744a14..9e8d68cfd 100644 --- a/data/kg/drug_protein_hpo_disease/meta.yaml +++ b/data/kg/drug_protein_hpo_disease/meta.yaml @@ -1,118 +1,112 @@ ---- name: drug_protein_hpo_disease description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id - - id: rel3_type - description: rel3_type - type: Other - units: rel3_type - names: - - noun: rel3_type - - id: node4_type - description: node4_type - type: Other - units: node4_type - names: - - noun: node4_type - - id: node4_name - description: node4_name - type: Other - units: node4_name - names: - - noun: node4_name - - id: node4_id - description: node4_id - type: Other - units: node4_id - names: - - noun: node4_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id + - id: rel3_type + description: rel3_type + type: Other + units: rel3_type + names: + - noun: rel3_type + - id: node4_type + description: node4_type + type: Other + units: node4_type + names: + - noun: node4_type + - id: node4_name + description: node4_name + type: Other + units: node4_name + names: + - noun: node4_name + - id: node4_id + description: node4_id + type: Other + units: node4_id + names: + - noun: node4_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 293872 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} - {rel2_type#} {node3_name#}. The {node3_name#} {rel3_type#} the {node4_type#} {node4_name#}. + - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. The {node3_name#} {rel3_type#} the {node4_type#} {node4_name#}. diff --git a/data/kg/drug_protein_pathway/meta.yaml b/data/kg/drug_protein_pathway/meta.yaml index a8e7a8180..9d5d42e8e 100644 --- a/data/kg/drug_protein_pathway/meta.yaml +++ b/data/kg/drug_protein_pathway/meta.yaml @@ -1,100 +1,94 @@ ---- name: drug_protein_pathway description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 124609 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}. - - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} - the {node3_name#}. - - |- - User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}? - Assistant: {#Sure|Yes|Of course|Yes, of course!}, the {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}. - User: Can you tell me more {#details |!}about {node2_type#} {node2_name#}? - Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_name#} {rel2_type#} the {node3_name#}. + - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}. + - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}. + - |- + User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}? + Assistant: {#Sure|Yes|Of course|Yes, of course!}, the {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}. + User: Can you tell me more {#details |!}about {node2_type#} {node2_name#}? + Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_name#} {rel2_type#} the {node3_name#}. diff --git a/data/kg/drug_protein_pathway_disease/meta.yaml b/data/kg/drug_protein_pathway_disease/meta.yaml index 1d5a20c11..d0538ca19 100644 --- a/data/kg/drug_protein_pathway_disease/meta.yaml +++ b/data/kg/drug_protein_pathway_disease/meta.yaml @@ -1,118 +1,112 @@ ---- name: drug_protein_pathway_disease description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id - - id: rel3_type - description: rel3_type - type: Other - units: rel3_type - names: - - noun: rel3_type - - id: node4_type - description: node4_type - type: Other - units: node4_type - names: - - noun: node4_type - - id: node4_name - description: node4_name - type: Other - units: node4_name - names: - - noun: node4_name - - id: node4_id - description: node4_id - type: Other - units: node4_id - names: - - noun: node4_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id + - id: rel3_type + description: rel3_type + type: Other + units: rel3_type + names: + - noun: rel3_type + - id: node4_type + description: node4_type + type: Other + units: node4_type + names: + - noun: node4_type + - id: node4_name + description: node4_name + type: Other + units: node4_name + names: + - noun: node4_name + - id: node4_id + description: node4_id + type: Other + units: node4_id + names: + - noun: node4_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 617318 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}. - The {node3_name#} is {rel3_type#} the {node4_type#} {node4_name#}. + - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}. The {node3_name#} is {rel3_type#} the {node4_type#} {node4_name#}. diff --git a/data/kg/drug_protein_protein/meta.yaml b/data/kg/drug_protein_protein/meta.yaml index 849021429..22683f7b7 100644 --- a/data/kg/drug_protein_protein/meta.yaml +++ b/data/kg/drug_protein_protein/meta.yaml @@ -1,105 +1,100 @@ ---- name: drug_protein_protein description: Knowledgegraph data samples. targets: - - id: node1_type - description: node1_type - type: Other - units: node1_type - names: - - noun: node1_type - - id: node1_name - description: node1_name - type: Other - units: node1_name - names: - - noun: node1_name - - id: node1_id - description: node1_id - type: Other - units: node1_id - names: - - noun: node1_id - - id: rel1_type - description: rel1_type - type: Other - units: rel1_type - names: - - noun: rel1_type - - id: node2_type - description: node2_type - type: Other - units: node2_type - names: - - noun: node2_type - - id: node2_protein_names - description: node2_protein_names - type: Other - units: node2_protein_names - names: - - noun: node2_protein_names - - id: node2_name - description: node2_name - type: Other - units: node2_name - names: - - noun: node2_name - - id: node2_id - description: node2_id - type: Other - units: node2_id - names: - - noun: node2_id - - id: rel2_type - description: rel2_type - type: Other - units: rel2_type - names: - - noun: rel2_type - - id: node3_type - description: node3_type - type: Other - units: node3_type - names: - - noun: node3_type - - id: node3_protein_names - description: node3_protein_names - type: Other - units: node3_protein_names - names: - - noun: node3_protein_names - - id: node3_name - description: node3_name - type: Other - units: node3_name - names: - - noun: node3_name - - id: node3_id - description: node3_id - type: Other - units: node3_id - names: - - noun: node3_id + - id: node1_type + description: node1_type + type: Other + units: node1_type + names: + - noun: node1_type + - id: node1_name + description: node1_name + type: Other + units: node1_name + names: + - noun: node1_name + - id: node1_id + description: node1_id + type: Other + units: node1_id + names: + - noun: node1_id + - id: rel1_type + description: rel1_type + type: Other + units: rel1_type + names: + - noun: rel1_type + - id: node2_type + description: node2_type + type: Other + units: node2_type + names: + - noun: node2_type + - id: node2_protein_names + description: node2_protein_names + type: Other + units: node2_protein_names + names: + - noun: node2_protein_names + - id: node2_name + description: node2_name + type: Other + units: node2_name + names: + - noun: node2_name + - id: node2_id + description: node2_id + type: Other + units: node2_id + names: + - noun: node2_id + - id: rel2_type + description: rel2_type + type: Other + units: rel2_type + names: + - noun: rel2_type + - id: node3_type + description: node3_type + type: Other + units: node3_type + names: + - noun: node3_type + - id: node3_protein_names + description: node3_protein_names + type: Other + units: node3_protein_names + names: + - noun: node3_protein_names + - id: node3_name + description: node3_name + type: Other + units: node3_name + names: + - noun: node3_name + - id: node3_id + description: node3_id + type: Other + units: node3_id + names: + - noun: node3_id identifiers: - - id: SMILES - description: SMILES - type: SMILES + - id: SMILES + description: SMILES + type: SMILES license: CC BY 4.0 links: - - url: https://crossbar.kansil.org - description: original knowledge graph web GUI link + - url: https://crossbar.kansil.org + description: original knowledge graph web GUI link num_points: 245582 bibtex: - - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\ - \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\ - \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\ - \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\ - url = {https://doi.org/10.1093/nar/gkab543},\n}" + - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}" templates: - - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_type#} {node3_name#}. - - The {node2_type#} {node2_protein_names#} is targeted by {SMILES#|node1_name#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. - - |- - User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}? - Assistant: The {node1_type#} {SMILES#|node1_name#} {rel1_type#} for example the {node2_type#} {node2_protein_names#}. - User: Can you tell me a {node3_type#} that {rel2_type#} {node2_type#} {node2_protein_names#}? - Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_name#} {rel2_type#} {node3_type#} {node3_name#}. + - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_type#} {node3_name#}. + - The {node2_type#} {node2_protein_names#} is targeted by {SMILES#|node1_name#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. + - |- + User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}? + Assistant: The {node1_type#} {SMILES#|node1_name#} {rel1_type#} for example the {node2_type#} {node2_protein_names#}. + User: Can you tell me a {node3_type#} that {rel2_type#} {node2_type#} {node2_protein_names#}? + Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_name#} {rel2_type#} {node3_type#} {node3_name#}. diff --git a/data/natural/preprocess_msds.py b/data/natural/preprocess_msds.py index c28f2d7d8..9669f0069 100644 --- a/data/natural/preprocess_msds.py +++ b/data/natural/preprocess_msds.py @@ -11,7 +11,7 @@ def get_text(d, text="", level=1, linebreaks=2): for k in d: if k in [ - "SECTION 6: Acidental release measures", # always empty + "SECTION 6: Accidental release measures", # always empty "SECTION 1: Toxicological information", # always empty "SECTION 16: Other information", # always the same information ]: diff --git a/data/tabular/BACE/meta.yaml b/data/tabular/BACE/meta.yaml index b25927a1f..2149a1a61 100644 --- a/data/tabular/BACE/meta.yaml +++ b/data/tabular/BACE/meta.yaml @@ -1,70 +1,69 @@ ---- name: BACE description: |- - The BACE dataset provides quantitative pIC50 and qualitative (binary label) binding results for - a set of inhibitors of human beta-secretase 1 (BACE-1). All data are experimental values reported - in scientific literature over the past decade, some with detailed crystal structures available. + The BACE dataset provides quantitative pIC50 and qualitative (binary label) binding results for + a set of inhibitors of human beta-secretase 1 (BACE-1). All data are experimental values reported + in scientific literature over the past decade, some with detailed crystal structures available. targets: - - id: BACE_inhibition - description: binary labels for inhibition of the human beta-secretase 1 (BACE-1) - type: boolean - names: - - noun: inhibition of the human beta-secretase 1 (BACE-1) - - adjective: inhibitory of the human beta-secretase 1 - - adjective: inhibitory of BACE-1 - - id: pIC50 - description: pIC50 values for inhibition of human beta-secretase 1 (BACE-1) - units: M - type: continuous - names: - - noun: pIC50 of the human beta-secretase 1 (BACE-1) - - noun: negative log10 of the 50% inhibitory concentration of BACE-1 + - id: BACE_inhibition + description: binary labels for inhibition of the human beta-secretase 1 (BACE-1) + type: boolean + names: + - noun: inhibition of the human beta-secretase 1 (BACE-1) + - adjective: inhibitory of the human beta-secretase 1 + - adjective: inhibitory of BACE-1 + - id: pIC50 + description: pIC50 values for inhibition of human beta-secretase 1 (BACE-1) + units: M + type: continuous + names: + - noun: pIC50 of the human beta-secretase 1 (BACE-1) + - noun: negative log10 of the 50% inhibitory concentration of BACE-1 identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://pubs.rsc.org/en/content/articlehtml/2018/sc/c7sc02664a - description: corresponding publication - - url: - - https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/bace.csv - description: data source + - url: https://pubs.rsc.org/en/content/articlehtml/2018/sc/c7sc02664a + description: corresponding publication + - url: + - https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/bace.csv + description: data source num_points: 1513 bibtex: - - |- - @article{Wu2018, - doi = {10.1039/c7sc02664a}, - url = {https://doi.org/10.1039/c7sc02664a}, - year = {2018}, - publisher = {Royal Society of Chemistry (RSC)}, - volume = {9}, - number = {2}, - pages = {513--530}, - author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg and Joseph Gomes - and Caleb Geniesse and Aneesh S. Pappu and Karl Leswing and Vijay Pande}, - title = {MoleculeNet: a benchmark for molecular machine learning}, - journal = {Chemical Science} + - |- + @article{Wu2018, + doi = {10.1039/c7sc02664a}, + url = {https://doi.org/10.1039/c7sc02664a}, + year = {2018}, + publisher = {Royal Society of Chemistry (RSC)}, + volume = {9}, + number = {2}, + pages = {513--530}, + author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg and Joseph Gomes + and Caleb Geniesse and Aneesh S. Pappu and Karl Leswing and Vijay Pande}, + title = {MoleculeNet: a benchmark for molecular machine learning}, + journal = {Chemical Science} templates: - - The {#compound|chemical!} with the {SMILES__description} of {SMILES#} {#shows|exhibits|displays!} {BACE_inhibition#no &NULL}{BACE_inhibition__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {BACE_inhibition#not &NULL}{BACE_inhibition__names__adjective}. - - The {SMILES__description} {SMILES#} represents a molecule that is {BACE_inhibition#not&NULL}identified as {BACE_inhibition__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {BACE_inhibition__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|extra!} words. - Result: {BACE_inhibition#False&True} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {BACE_inhibition#not &NULL}{BACE_inhibition__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {BACE_inhibition__names__adjective}? - Assistant: {BACE_inhibition#No&Yes}, this molecule is {BACE_inhibition#not &NULL}{BACE_inhibition__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {BACE_inhibition__names__adjective}? - Assistant: {BACE_inhibition#No&Yes}, it is {BACE_inhibition#not &NULL}{BACE_inhibition__names__adjective}. - - The compound with the {SMILES__description} {SMILES#} has a {pIC50__names__noun} of {pIC50#} {pIC50__units}. - - Based on the {SMILES__description} {SMILES#}, the molecule has a {pIC50__names__noun} of {pIC50#} {pIC50__units}. - - The {SMILES__description}{SMILES#} represents a molecule that has a {pIC50__names__noun} of {pIC50#} {pIC50__units}. + - The {#compound|chemical!} with the {SMILES__description} of {SMILES#} {#shows|exhibits|displays!} {BACE_inhibition#no &NULL}{BACE_inhibition__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {BACE_inhibition#not &NULL}{BACE_inhibition__names__adjective}. + - The {SMILES__description} {SMILES#} represents a molecule that is {BACE_inhibition#not&NULL}identified as {BACE_inhibition__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {BACE_inhibition__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|extra!} words. + Result: {BACE_inhibition#False&True} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {BACE_inhibition#not &NULL}{BACE_inhibition__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {BACE_inhibition__names__adjective}? + Assistant: {BACE_inhibition#No&Yes}, this molecule is {BACE_inhibition#not &NULL}{BACE_inhibition__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {BACE_inhibition__names__adjective}? + Assistant: {BACE_inhibition#No&Yes}, it is {BACE_inhibition#not &NULL}{BACE_inhibition__names__adjective}. + - The compound with the {SMILES__description} {SMILES#} has a {pIC50__names__noun} of {pIC50#} {pIC50__units}. + - Based on the {SMILES__description} {SMILES#}, the molecule has a {pIC50__names__noun} of {pIC50#} {pIC50__units}. + - The {SMILES__description}{SMILES#} represents a molecule that has a {pIC50__names__noun} of {pIC50#} {pIC50__units}. diff --git a/data/tabular/BBBP/meta.yaml b/data/tabular/BBBP/meta.yaml index b589d65a9..5237a25ee 100644 --- a/data/tabular/BBBP/meta.yaml +++ b/data/tabular/BBBP/meta.yaml @@ -1,65 +1,64 @@ ---- name: BBBP description: |- - The blood-brain barrier penetration (BBBP) dataset is designed for the - modeling and prediction of barrier permeability. As a membrane separating - circulating blood and brain extracellular fluid, the blood-brain barrier - blocks most drugs, hormones, and neurotransmitters. Thus penetration of the - barrier forms a long-standing issue in the development of drugs targeting - the central nervous system. This dataset includes binary labels for over 2000 - compounds on their permeability properties. + The blood-brain barrier penetration (BBBP) dataset is designed for the + modeling and prediction of barrier permeability. As a membrane separating + circulating blood and brain extracellular fluid, the blood-brain barrier + blocks most drugs, hormones, and neurotransmitters. Thus penetration of the + barrier forms a long-standing issue in the development of drugs targeting + the central nervous system. This dataset includes binary labels for over 2000 + compounds on their permeability properties. targets: - - id: p_np - description: Binary labels for penetration/non-penetration of the blood-brain barrier - type: boolean - names: - - noun: blood-brain barrier permeability - - noun: permeability through the blood-brain barrier - - noun: permeability through the membrane separating circulating blood and extracellular brain fluid - - adjective: permeable through the blood-brain barrier - - adjective: permeable through the membrane separating circulating blood and extracellular brain fluid + - id: p_np + description: Binary labels for penetration/non-penetration of the blood-brain barrier + type: boolean + names: + - noun: blood-brain barrier permeability + - noun: permeability through the blood-brain barrier + - noun: permeability through the membrane separating circulating blood and extracellular brain fluid + - adjective: permeable through the blood-brain barrier + - adjective: permeable through the membrane separating circulating blood and extracellular brain fluid identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://pubs.acs.org/doi/10.1021/ci300124c - description: corresponding publication - - url: - - https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/BBBP.csv - description: data source + - url: https://pubs.acs.org/doi/10.1021/ci300124c + description: corresponding publication + - url: + - https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/BBBP.csv + description: data source num_points: 2050 bibtex: - - |- - @article{doi:10.1021/ci300124c, - author = {Martins, Ines Filipa and Teixeira, Ana L. and Pinheiro, Luis and Falcao, Andre O.}, - title = {A Bayesian Approach to in Silico Blood-Brain Barrier Penetration Modeling}, - journal = {Journal of Chemical Information and Modeling}, - volume = {52}, - number = {6}, - pages = {1686-1697}, - year = {2012}, - doi = {10.1021/ci300124c}, - URL = {https://doi.org/10.1021/ci300124c}, - eprint = {https://doi.org/10.1021/ci300124c}} + - |- + @article{doi:10.1021/ci300124c, + author = {Martins, Ines Filipa and Teixeira, Ana L. and Pinheiro, Luis and Falcao, Andre O.}, + title = {A Bayesian Approach to in Silico Blood-Brain Barrier Penetration Modeling}, + journal = {Journal of Chemical Information and Modeling}, + volume = {52}, + number = {6}, + pages = {1686-1697}, + year = {2012}, + doi = {10.1021/ci300124c}, + URL = {https://doi.org/10.1021/ci300124c}, + eprint = {https://doi.org/10.1021/ci300124c}} templates: - - The {#compound|chemical!} with the {SMILES__description} of {SMILES#} {#shows|exhibits|displays!} {p_np#no &NULL}{p_np__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {p_np#not &NULL}{p_np__names__adjective}. - - The {SMILES__description} {SMILES#} represents a molecule that is {p_np#not&NULL}identified as {p_np__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {p_np__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|extra!} words. - Result: {p_np#False&True} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {p_np#not &NULL}{p_np__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {p_np__names__adjective}? - Assistant: {p_np#No&Yes}, this molecule is {p_np#not &NULL}{p_np__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {p_np__names__adjective}? - Assistant: {p_np#No&Yes}, it is {p_np#not &NULL}{p_np__names__adjective}. + - The {#compound|chemical!} with the {SMILES__description} of {SMILES#} {#shows|exhibits|displays!} {p_np#no &NULL}{p_np__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {p_np#not &NULL}{p_np__names__adjective}. + - The {SMILES__description} {SMILES#} represents a molecule that is {p_np#not&NULL}identified as {p_np__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {p_np__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|extra!} words. + Result: {p_np#False&True} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {p_np#not &NULL}{p_np__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {p_np__names__adjective}? + Assistant: {p_np#No&Yes}, this molecule is {p_np#not &NULL}{p_np__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {p_np__names__adjective}? + Assistant: {p_np#No&Yes}, it is {p_np#not &NULL}{p_np__names__adjective}. diff --git a/data/tabular/MUV_466/meta.yaml b/data/tabular/MUV_466/meta.yaml index 60ef927e4..c3d47e042 100644 --- a/data/tabular/MUV_466/meta.yaml +++ b/data/tabular/MUV_466/meta.yaml @@ -1,35 +1,33 @@ ---- name: MUV_466 description: Activity in the MUV_466 assay identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: MUV-466 - type: boolean - description: MUV-466 - names: - - noun: an agonist of the S1P1 receptor + - id: MUV-466 + type: boolean + description: MUV-466 + names: + - noun: an agonist of the S1P1 receptor license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz + description: Data source num_points: 14841 bibtex: - - | - @article{doi:10.1021/ci8002649, - author = {Rohrer, Sebastian G. and Baumann, Knut}, - title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, - journal = {Journal of Chemical Information and Modeling}, - volume = {49}, - number = {2}, - pages = {169-184}, - year = {2009}, - doi = {10.1021/ci8002649}, - URL = {https://doi.org/10.1021/ci8002649}} + - | + @article{doi:10.1021/ci8002649, + author = {Rohrer, Sebastian G. and Baumann, Knut}, + title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, + journal = {Journal of Chemical Information and Modeling}, + volume = {49}, + number = {2}, + pages = {169-184}, + year = {2009}, + doi = {10.1021/ci8002649}, + URL = {https://doi.org/10.1021/ci8002649}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-466#not - &NULL}{MUV-466__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-466#not &NULL}{MUV-466__names__noun}. diff --git a/data/tabular/MUV_548/meta.yaml b/data/tabular/MUV_548/meta.yaml index 8a1ad7371..6092be5a3 100644 --- a/data/tabular/MUV_548/meta.yaml +++ b/data/tabular/MUV_548/meta.yaml @@ -1,37 +1,35 @@ ---- name: MUV_548 description: Activity in the MUV_548 assay identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: MUV-548 - type: boolean - description: MUV-548 - names: - - noun: an inhibitor of the protein kinase A (PKA) - - noun: an inhibitor of the protein kinase A - - noun: an inhibitor of PKA + - id: MUV-548 + type: boolean + description: MUV-548 + names: + - noun: an inhibitor of the protein kinase A (PKA) + - noun: an inhibitor of the protein kinase A + - noun: an inhibitor of PKA license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz + description: Data source num_points: 14734 bibtex: - - | - @article{doi:10.1021/ci8002649, - author = {Rohrer, Sebastian G. and Baumann, Knut}, - title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, - journal = {Journal of Chemical Information and Modeling}, - volume = {49}, - number = {2}, - pages = {169-184}, - year = {2009}, - doi = {10.1021/ci8002649}, - URL = {https://doi.org/10.1021/ci8002649}} + - | + @article{doi:10.1021/ci8002649, + author = {Rohrer, Sebastian G. and Baumann, Knut}, + title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, + journal = {Journal of Chemical Information and Modeling}, + volume = {49}, + number = {2}, + pages = {169-184}, + year = {2009}, + doi = {10.1021/ci8002649}, + URL = {https://doi.org/10.1021/ci8002649}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-548#not - &NULL}{MUV-548__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-548#not &NULL}{MUV-548__names__noun}. diff --git a/data/tabular/MUV_600/meta.yaml b/data/tabular/MUV_600/meta.yaml index 58f2cdbc4..cebb65352 100644 --- a/data/tabular/MUV_600/meta.yaml +++ b/data/tabular/MUV_600/meta.yaml @@ -1,36 +1,34 @@ ---- name: MUV_600 description: Activity in the MUV_600 assay identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: MUV-600 - type: boolean - description: MUV-600 - names: - - noun: an inhibitor of the steroidogenic factor 1 (SF-1) - - noun: an inhibitor of SF-1 + - id: MUV-600 + type: boolean + description: MUV-600 + names: + - noun: an inhibitor of the steroidogenic factor 1 (SF-1) + - noun: an inhibitor of SF-1 license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz + description: Data source num_points: 14728 bibtex: - - | - @article{doi:10.1021/ci8002649, - author = {Rohrer, Sebastian G. and Baumann, Knut}, - title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, - journal = {Journal of Chemical Information and Modeling}, - volume = {49}, - number = {2}, - pages = {169-184}, - year = {2009}, - doi = {10.1021/ci8002649}, - URL = {https://doi.org/10.1021/ci8002649}} + - | + @article{doi:10.1021/ci8002649, + author = {Rohrer, Sebastian G. and Baumann, Knut}, + title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, + journal = {Journal of Chemical Information and Modeling}, + volume = {49}, + number = {2}, + pages = {169-184}, + year = {2009}, + doi = {10.1021/ci8002649}, + URL = {https://doi.org/10.1021/ci8002649}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-600#not - &NULL}{MUV-600__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-600#not &NULL}{MUV-600__names__noun}. diff --git a/data/tabular/MUV_644/meta.yaml b/data/tabular/MUV_644/meta.yaml index 3b97372f9..43a60930d 100644 --- a/data/tabular/MUV_644/meta.yaml +++ b/data/tabular/MUV_644/meta.yaml @@ -1,36 +1,34 @@ ---- name: MUV_644 description: Activity in the MUV_644 assay identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: MUV-644 - type: boolean - description: MUV-644 - names: - - noun: an inhibitor of Rho-kinase 2 (ROCK-2) - - noun: an inhibitor of ROCK-2 + - id: MUV-644 + type: boolean + description: MUV-644 + names: + - noun: an inhibitor of Rho-kinase 2 (ROCK-2) + - noun: an inhibitor of ROCK-2 license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz + description: Data source num_points: 14623 bibtex: - - | - @article{doi:10.1021/ci8002649, - author = {Rohrer, Sebastian G. and Baumann, Knut}, - title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, - journal = {Journal of Chemical Information and Modeling}, - volume = {49}, - number = {2}, - pages = {169-184}, - year = {2009}, - doi = {10.1021/ci8002649}, - URL = {https://doi.org/10.1021/ci8002649}} + - | + @article{doi:10.1021/ci8002649, + author = {Rohrer, Sebastian G. and Baumann, Knut}, + title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, + journal = {Journal of Chemical Information and Modeling}, + volume = {49}, + number = {2}, + pages = {169-184}, + year = {2009}, + doi = {10.1021/ci8002649}, + URL = {https://doi.org/10.1021/ci8002649}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-644#not - &NULL}{MUV-644__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-644#not &NULL}{MUV-644__names__noun}. diff --git a/data/tabular/MUV_652/meta.yaml b/data/tabular/MUV_652/meta.yaml index 8f559d38c..da14f996c 100644 --- a/data/tabular/MUV_652/meta.yaml +++ b/data/tabular/MUV_652/meta.yaml @@ -1,35 +1,33 @@ ---- name: MUV_652 description: Activity in the MUV_652 assay identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: MUV-652 - type: boolean - description: MUV-652 - names: - - noun: an inhibitor of HIV RT-RNase + - id: MUV-652 + type: boolean + description: MUV-652 + names: + - noun: an inhibitor of HIV RT-RNase license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz + description: Data source num_points: 14902 bibtex: - - | - @article{doi:10.1021/ci8002649, - author = {Rohrer, Sebastian G. and Baumann, Knut}, - title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, - journal = {Journal of Chemical Information and Modeling}, - volume = {49}, - number = {2}, - pages = {169-184}, - year = {2009}, - doi = {10.1021/ci8002649}, - URL = {https://doi.org/10.1021/ci8002649}} + - | + @article{doi:10.1021/ci8002649, + author = {Rohrer, Sebastian G. and Baumann, Knut}, + title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, + journal = {Journal of Chemical Information and Modeling}, + volume = {49}, + number = {2}, + pages = {169-184}, + year = {2009}, + doi = {10.1021/ci8002649}, + URL = {https://doi.org/10.1021/ci8002649}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-652#not - &NULL}{MUV-652__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-652#not &NULL}{MUV-652__names__noun}. diff --git a/data/tabular/MUV_689/meta.yaml b/data/tabular/MUV_689/meta.yaml index 6e1ad3423..780715e39 100644 --- a/data/tabular/MUV_689/meta.yaml +++ b/data/tabular/MUV_689/meta.yaml @@ -1,35 +1,33 @@ ---- name: MUV_689 description: Activity in the MUV_689 assay identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: MUV-689 - type: boolean - description: MUV-689 - names: - - noun: an inhibitor of the EPH receptor A4 + - id: MUV-689 + type: boolean + description: MUV-689 + names: + - noun: an inhibitor of the EPH receptor A4 license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz + description: Data source num_points: 14601 bibtex: - - | - @article{doi:10.1021/ci8002649, - author = {Rohrer, Sebastian G. and Baumann, Knut}, - title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, - journal = {Journal of Chemical Information and Modeling}, - volume = {49}, - number = {2}, - pages = {169-184}, - year = {2009}, - doi = {10.1021/ci8002649}, - URL = {https://doi.org/10.1021/ci8002649}} + - | + @article{doi:10.1021/ci8002649, + author = {Rohrer, Sebastian G. and Baumann, Knut}, + title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, + journal = {Journal of Chemical Information and Modeling}, + volume = {49}, + number = {2}, + pages = {169-184}, + year = {2009}, + doi = {10.1021/ci8002649}, + URL = {https://doi.org/10.1021/ci8002649}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-689#not - &NULL}{MUV-689__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-689#not &NULL}{MUV-689__names__noun}. diff --git a/data/tabular/MUV_692/meta.yaml b/data/tabular/MUV_692/meta.yaml index d8988d3d3..62400e402 100644 --- a/data/tabular/MUV_692/meta.yaml +++ b/data/tabular/MUV_692/meta.yaml @@ -1,36 +1,34 @@ ---- name: MUV_692 description: Activity in the MUV_692 assay identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: MUV-692 - type: boolean - description: MUV-692 - names: - - noun: an agonist of the steroidogenic factor 1 (SF-1) - - noun: an agonist of SF-1 + - id: MUV-692 + type: boolean + description: MUV-692 + names: + - noun: an agonist of the steroidogenic factor 1 (SF-1) + - noun: an agonist of SF-1 license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz + description: Data source num_points: 14644 bibtex: - - | - @article{doi:10.1021/ci8002649, - author = {Rohrer, Sebastian G. and Baumann, Knut}, - title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, - journal = {Journal of Chemical Information and Modeling}, - volume = {49}, - number = {2}, - pages = {169-184}, - year = {2009}, - doi = {10.1021/ci8002649}, - URL = {https://doi.org/10.1021/ci8002649}} + - | + @article{doi:10.1021/ci8002649, + author = {Rohrer, Sebastian G. and Baumann, Knut}, + title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, + journal = {Journal of Chemical Information and Modeling}, + volume = {49}, + number = {2}, + pages = {169-184}, + year = {2009}, + doi = {10.1021/ci8002649}, + URL = {https://doi.org/10.1021/ci8002649}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-692#not - &NULL}{MUV-692__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-692#not &NULL}{MUV-692__names__noun}. diff --git a/data/tabular/MUV_712/meta.yaml b/data/tabular/MUV_712/meta.yaml index 15ee8d493..977d3feb5 100644 --- a/data/tabular/MUV_712/meta.yaml +++ b/data/tabular/MUV_712/meta.yaml @@ -1,36 +1,34 @@ ---- name: MUV_712 description: Activity in the MUV_712 assay identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: MUV-712 - type: boolean - description: MUV-712 - names: - - noun: an inhibitor of the heat shock protein 90 - - noun: an inhibitor of HSP90 + - id: MUV-712 + type: boolean + description: MUV-712 + names: + - noun: an inhibitor of the heat shock protein 90 + - noun: an inhibitor of HSP90 license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz + description: Data source num_points: 14411 bibtex: - - | - @article{doi:10.1021/ci8002649, - author = {Rohrer, Sebastian G. and Baumann, Knut}, - title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, - journal = {Journal of Chemical Information and Modeling}, - volume = {49}, - number = {2}, - pages = {169-184}, - year = {2009}, - doi = {10.1021/ci8002649}, - URL = {https://doi.org/10.1021/ci8002649}} + - | + @article{doi:10.1021/ci8002649, + author = {Rohrer, Sebastian G. and Baumann, Knut}, + title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, + journal = {Journal of Chemical Information and Modeling}, + volume = {49}, + number = {2}, + pages = {169-184}, + year = {2009}, + doi = {10.1021/ci8002649}, + URL = {https://doi.org/10.1021/ci8002649}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-712#not - &NULL}{MUV-712__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-712#not &NULL}{MUV-712__names__noun}. diff --git a/data/tabular/MUV_713/meta.yaml b/data/tabular/MUV_713/meta.yaml index b26d3da21..a5623ea17 100644 --- a/data/tabular/MUV_713/meta.yaml +++ b/data/tabular/MUV_713/meta.yaml @@ -1,36 +1,34 @@ ---- name: MUV_713 description: Activity in the MUV_713 assay identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: MUV-713 - type: boolean - description: MUV-713 - names: - - noun: an inhibitor of the estrogen receptor-alpha-coactivator binding - - noun: an inhibitor of the ER-alpha-coact. binding + - id: MUV-713 + type: boolean + description: MUV-713 + names: + - noun: an inhibitor of the estrogen receptor-alpha-coactivator binding + - noun: an inhibitor of the ER-alpha-coact. binding license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz + description: Data source num_points: 14836 bibtex: - - | - @article{doi:10.1021/ci8002649, - author = {Rohrer, Sebastian G. and Baumann, Knut}, - title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, - journal = {Journal of Chemical Information and Modeling}, - volume = {49}, - number = {2}, - pages = {169-184}, - year = {2009}, - doi = {10.1021/ci8002649}, - URL = {https://doi.org/10.1021/ci8002649}} + - | + @article{doi:10.1021/ci8002649, + author = {Rohrer, Sebastian G. and Baumann, Knut}, + title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, + journal = {Journal of Chemical Information and Modeling}, + volume = {49}, + number = {2}, + pages = {169-184}, + year = {2009}, + doi = {10.1021/ci8002649}, + URL = {https://doi.org/10.1021/ci8002649}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-713#not - &NULL}{MUV-713__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-713#not &NULL}{MUV-713__names__noun}. diff --git a/data/tabular/MUV_733/meta.yaml b/data/tabular/MUV_733/meta.yaml index c29903e09..7acc2925d 100644 --- a/data/tabular/MUV_733/meta.yaml +++ b/data/tabular/MUV_733/meta.yaml @@ -1,35 +1,33 @@ ---- name: MUV_733 description: Activity in the MUV_733 assay identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: MUV-733 - type: boolean - description: MUV-733 - names: - - noun: an inhibitor of the estrogen receptor-alpha-coactivator binding + - id: MUV-733 + type: boolean + description: MUV-733 + names: + - noun: an inhibitor of the estrogen receptor-alpha-coactivator binding license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz + description: Data source num_points: 14682 bibtex: - - | - @article{doi:10.1021/ci8002649, - author = {Rohrer, Sebastian G. and Baumann, Knut}, - title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, - journal = {Journal of Chemical Information and Modeling}, - volume = {49}, - number = {2}, - pages = {169-184}, - year = {2009}, - doi = {10.1021/ci8002649}, - URL = {https://doi.org/10.1021/ci8002649}} + - | + @article{doi:10.1021/ci8002649, + author = {Rohrer, Sebastian G. and Baumann, Knut}, + title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, + journal = {Journal of Chemical Information and Modeling}, + volume = {49}, + number = {2}, + pages = {169-184}, + year = {2009}, + doi = {10.1021/ci8002649}, + URL = {https://doi.org/10.1021/ci8002649}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-733#not - &NULL}{MUV-733__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-733#not &NULL}{MUV-733__names__noun}. diff --git a/data/tabular/MUV_737/meta.yaml b/data/tabular/MUV_737/meta.yaml index cf0b3deb3..69c56e3ed 100644 --- a/data/tabular/MUV_737/meta.yaml +++ b/data/tabular/MUV_737/meta.yaml @@ -1,36 +1,34 @@ ---- name: MUV_737 description: Activity in the MUV_737 assay identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: MUV-737 - type: boolean - description: MUV-737 - names: - - noun: a potentiator of the estrogen receptor-alpha-coactivator binding - - noun: a potentiator of the ER-alpha-coact. binding + - id: MUV-737 + type: boolean + description: MUV-737 + names: + - noun: a potentiator of the estrogen receptor-alpha-coactivator binding + - noun: a potentiator of the ER-alpha-coact. binding license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz + description: Data source num_points: 14691 bibtex: - - | - @article{doi:10.1021/ci8002649, - author = {Rohrer, Sebastian G. and Baumann, Knut}, - title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, - journal = {Journal of Chemical Information and Modeling}, - volume = {49}, - number = {2}, - pages = {169-184}, - year = {2009}, - doi = {10.1021/ci8002649}, - URL = {https://doi.org/10.1021/ci8002649}} + - | + @article{doi:10.1021/ci8002649, + author = {Rohrer, Sebastian G. and Baumann, Knut}, + title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, + journal = {Journal of Chemical Information and Modeling}, + volume = {49}, + number = {2}, + pages = {169-184}, + year = {2009}, + doi = {10.1021/ci8002649}, + URL = {https://doi.org/10.1021/ci8002649}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-737#not - &NULL}{MUV-737__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-737#not &NULL}{MUV-737__names__noun}. diff --git a/data/tabular/MUV_810/meta.yaml b/data/tabular/MUV_810/meta.yaml index e285dd8ad..74c9f1907 100644 --- a/data/tabular/MUV_810/meta.yaml +++ b/data/tabular/MUV_810/meta.yaml @@ -1,36 +1,34 @@ ---- name: MUV_810 description: Activity in the MUV_810 assay identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: MUV-810 - type: boolean - description: MUV-810 - names: - - noun: an inhibitor of the focal adhesion kinase - - noun: an inhibitor of FAK + - id: MUV-810 + type: boolean + description: MUV-810 + names: + - noun: an inhibitor of the focal adhesion kinase + - noun: an inhibitor of FAK license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz + description: Data source num_points: 14644 bibtex: - - | - @article{doi:10.1021/ci8002649, - author = {Rohrer, Sebastian G. and Baumann, Knut}, - title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, - journal = {Journal of Chemical Information and Modeling}, - volume = {49}, - number = {2}, - pages = {169-184}, - year = {2009}, - doi = {10.1021/ci8002649}, - URL = {https://doi.org/10.1021/ci8002649}} + - | + @article{doi:10.1021/ci8002649, + author = {Rohrer, Sebastian G. and Baumann, Knut}, + title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, + journal = {Journal of Chemical Information and Modeling}, + volume = {49}, + number = {2}, + pages = {169-184}, + year = {2009}, + doi = {10.1021/ci8002649}, + URL = {https://doi.org/10.1021/ci8002649}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-810#not - &NULL}{MUV-810__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-810#not &NULL}{MUV-810__names__noun}. diff --git a/data/tabular/MUV_832/meta.yaml b/data/tabular/MUV_832/meta.yaml index 1c1e74835..16ad3978a 100644 --- a/data/tabular/MUV_832/meta.yaml +++ b/data/tabular/MUV_832/meta.yaml @@ -1,35 +1,33 @@ ---- name: MUV_832 description: Activity in the MUV_832 assay identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: MUV-832 - type: boolean - description: MUV-832 - names: - - noun: an inhibitor of the Cathepsin G protease + - id: MUV-832 + type: boolean + description: MUV-832 + names: + - noun: an inhibitor of the Cathepsin G protease license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz + description: Data source num_points: 14667 bibtex: - - | - @article{doi:10.1021/ci8002649, - author = {Rohrer, Sebastian G. and Baumann, Knut}, - title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, - journal = {Journal of Chemical Information and Modeling}, - volume = {49}, - number = {2}, - pages = {169-184}, - year = {2009}, - doi = {10.1021/ci8002649}, - URL = {https://doi.org/10.1021/ci8002649}} + - | + @article{doi:10.1021/ci8002649, + author = {Rohrer, Sebastian G. and Baumann, Knut}, + title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, + journal = {Journal of Chemical Information and Modeling}, + volume = {49}, + number = {2}, + pages = {169-184}, + year = {2009}, + doi = {10.1021/ci8002649}, + URL = {https://doi.org/10.1021/ci8002649}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-832#not - &NULL}{MUV-832__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-832#not &NULL}{MUV-832__names__noun}. diff --git a/data/tabular/MUV_846/meta.yaml b/data/tabular/MUV_846/meta.yaml index d9fc0362d..fc3621147 100644 --- a/data/tabular/MUV_846/meta.yaml +++ b/data/tabular/MUV_846/meta.yaml @@ -1,35 +1,33 @@ ---- name: MUV_846 description: Activity in the MUV_846 assay identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: MUV-846 - type: boolean - description: MUV-846 - names: - - noun: an inhibitor of factor XIa (FXIa) + - id: MUV-846 + type: boolean + description: MUV-846 + names: + - noun: an inhibitor of factor XIa (FXIa) license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz + description: Data source num_points: 14711 bibtex: - - | - @article{doi:10.1021/ci8002649, - author = {Rohrer, Sebastian G. and Baumann, Knut}, - title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, - journal = {Journal of Chemical Information and Modeling}, - volume = {49}, - number = {2}, - pages = {169-184}, - year = {2009}, - doi = {10.1021/ci8002649}, - URL = {https://doi.org/10.1021/ci8002649}} + - | + @article{doi:10.1021/ci8002649, + author = {Rohrer, Sebastian G. and Baumann, Knut}, + title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, + journal = {Journal of Chemical Information and Modeling}, + volume = {49}, + number = {2}, + pages = {169-184}, + year = {2009}, + doi = {10.1021/ci8002649}, + URL = {https://doi.org/10.1021/ci8002649}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-846#not - &NULL}{MUV-846__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-846#not &NULL}{MUV-846__names__noun}. diff --git a/data/tabular/MUV_852/meta.yaml b/data/tabular/MUV_852/meta.yaml index 86d8874fc..a77ec1be0 100644 --- a/data/tabular/MUV_852/meta.yaml +++ b/data/tabular/MUV_852/meta.yaml @@ -1,35 +1,33 @@ ---- name: MUV_852 description: Activity in the MUV_852 assay identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: MUV-852 - type: boolean - description: MUV-852 - names: - - noun: an inhibitor of factor XIIa (FXIIa) + - id: MUV-852 + type: boolean + description: MUV-852 + names: + - noun: an inhibitor of factor XIIa (FXIIa) license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz + description: Data source num_points: 14651 bibtex: - - | - @article{doi:10.1021/ci8002649, - author = {Rohrer, Sebastian G. and Baumann, Knut}, - title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, - journal = {Journal of Chemical Information and Modeling}, - volume = {49}, - number = {2}, - pages = {169-184}, - year = {2009}, - doi = {10.1021/ci8002649}, - URL = {https://doi.org/10.1021/ci8002649}} + - | + @article{doi:10.1021/ci8002649, + author = {Rohrer, Sebastian G. and Baumann, Knut}, + title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, + journal = {Journal of Chemical Information and Modeling}, + volume = {49}, + number = {2}, + pages = {169-184}, + year = {2009}, + doi = {10.1021/ci8002649}, + URL = {https://doi.org/10.1021/ci8002649}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-852#not - &NULL}{MUV-852__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-852#not &NULL}{MUV-852__names__noun}. diff --git a/data/tabular/MUV_858/meta.yaml b/data/tabular/MUV_858/meta.yaml index bb110dc4e..0690988e8 100644 --- a/data/tabular/MUV_858/meta.yaml +++ b/data/tabular/MUV_858/meta.yaml @@ -1,36 +1,34 @@ ---- name: MUV_858 description: Activity in the MUV_858 assay identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: MUV-858 - type: boolean - description: MUV-858 - names: - - noun: an allosteric modulator of the dopamine receptor D1 - - noun: an allosteric modulator of the D1 receptor + - id: MUV-858 + type: boolean + description: MUV-858 + names: + - noun: an allosteric modulator of the dopamine receptor D1 + - noun: an allosteric modulator of the D1 receptor license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz + description: Data source num_points: 14774 bibtex: - - | - @article{doi:10.1021/ci8002649, - author = {Rohrer, Sebastian G. and Baumann, Knut}, - title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, - journal = {Journal of Chemical Information and Modeling}, - volume = {49}, - number = {2}, - pages = {169-184}, - year = {2009}, - doi = {10.1021/ci8002649}, - URL = {https://doi.org/10.1021/ci8002649}} + - | + @article{doi:10.1021/ci8002649, + author = {Rohrer, Sebastian G. and Baumann, Knut}, + title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, + journal = {Journal of Chemical Information and Modeling}, + volume = {49}, + number = {2}, + pages = {169-184}, + year = {2009}, + doi = {10.1021/ci8002649}, + URL = {https://doi.org/10.1021/ci8002649}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-858#not - &NULL}{MUV-858__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-858#not &NULL}{MUV-858__names__noun}. diff --git a/data/tabular/MUV_859/meta.yaml b/data/tabular/MUV_859/meta.yaml index e2f43b5a3..2bbecfc47 100644 --- a/data/tabular/MUV_859/meta.yaml +++ b/data/tabular/MUV_859/meta.yaml @@ -1,36 +1,34 @@ ---- name: MUV_859 description: Activity in the MUV_859 assay identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: MUV-859 - type: boolean - description: MUV-859 - names: - - noun: an allosteric inhibitor of the muscarinic acetylcholine receptor M1 - - noun: an allosteric inhibitor of the M1 receptor + - id: MUV-859 + type: boolean + description: MUV-859 + names: + - noun: an allosteric inhibitor of the muscarinic acetylcholine receptor M1 + - noun: an allosteric inhibitor of the M1 receptor license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz + description: Data source num_points: 14746 bibtex: - - | - @article{doi:10.1021/ci8002649, - author = {Rohrer, Sebastian G. and Baumann, Knut}, - title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, - journal = {Journal of Chemical Information and Modeling}, - volume = {49}, - number = {2}, - pages = {169-184}, - year = {2009}, - doi = {10.1021/ci8002649}, - URL = {https://doi.org/10.1021/ci8002649}} + - | + @article{doi:10.1021/ci8002649, + author = {Rohrer, Sebastian G. and Baumann, Knut}, + title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data}, + journal = {Journal of Chemical Information and Modeling}, + volume = {49}, + number = {2}, + pages = {169-184}, + year = {2009}, + doi = {10.1021/ci8002649}, + URL = {https://doi.org/10.1021/ci8002649}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-859#not - &NULL}{MUV-859__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-859#not &NULL}{MUV-859__names__noun}. diff --git a/data/tabular/RedDB/meta.yaml b/data/tabular/RedDB/meta.yaml index 91ba5989e..94df55d18 100644 --- a/data/tabular/RedDB/meta.yaml +++ b/data/tabular/RedDB/meta.yaml @@ -1,168 +1,155 @@ ---- name: RedDB description: |- - RedDB: a computational database that contains 30861 molecules - from two prominent classes of organic electroactive compounds, quinones and aza-aromatics, - has been presented. RedDB incorporates miscellaneous physicochemical property information - of the compounds that can potentially be employed as battery performance descriptors. - RedDBs development steps, including: - (i) chemical library generation, - (ii) molecular property prediction based on quantum chemical calculations, - (iii) aqueous solubility prediction using machine learning, - (iv) data processing and database creation, have been described. + RedDB: a computational database that contains 30861 molecules + from two prominent classes of organic electroactive compounds, quinones and aza-aromatics, + has been presented. RedDB incorporates miscellaneous physicochemical property information + of the compounds that can potentially be employed as battery performance descriptors. + RedDBs development steps, including: + (i) chemical library generation, + (ii) molecular property prediction based on quantum chemical calculations, + (iii) aqueous solubility prediction using machine learning, + (iv) data processing and database creation, have been described. targets: - - id: molecularSurface - description: Total surface area of a molecule - units: \AA^2 - type: continuous - names: - - noun: molecular surface area - - id: reactionFieldEnergy - description: Energy associated with the interaction during a chemical reaction - units: kT - type: continuous - significant_digits: 5 - names: - - noun: chemical reaction field energy - - id: solventAccessSurface - description: Surface area of a molecule accessible to a solvent - units: \AA^2 - type: continuous - names: - - noun: solvent-accessible surface area - - id: cavityEnergy - description: Energy associated with the formation of cavities in a molecular structure - units: kT - type: continuous - names: - - noun: cavity formation energy at the PBE level of theory - - id: gasEnergy - description: Total energy of a molecule in the gas phase - units: Hartree - significant_digits: 5 - type: continuous - names: - - noun: gas-phase molecular energy at the PBE level of theory - - id: gasHomo - description: Highest Occupied Molecular Orbital (HOMO) energy of a gas-phase molecule - units: Hartree - type: continuous - significant_digits: 5 - names: - - noun: gaseous phase HOMO energy at the PBE level of theory - - noun: gaseous phase highest occupied molecular orbital energy at the PBE level of theory - - noun: gaseous phase highest occupied molecular orbital (HOMO) energy at the PBE level of theory - - id: gasLumo - description: Lowest Unoccupied Molecular Orbital (LUMO) energy of a gas-phase molecule - units: Hartree - type: continuous - significant_digits: 5 - names: - - noun: gaseous phase LUMO energy at the PBE level of theory - - noun: gaseous phase lowest unoccupied molecular orbital energy at the PBE level of theory - - noun: gaseous phase lowest unoccupied molecular orbital energy (LUMO) at the PBE level of theory - - id: solutionEnergy - description: Total energy of a molecule in a solution - units: Hartree - type: continuous - significant_digits: 5 - names: - - noun: aqueous phase molecular energy at the PBE level of theory - - id: solutionHomo - description: Highest Occupied Molecular Orbital (HOMO) energy in a solution - units: Hartree - type: continuous - significant_digits: 5 - names: - - noun: aqueous phase HOMO energy at the PBE level of theory - - noun: aqueous phase energy of the highest occupied molecular orbital at the PBE level of theory - - noun: aqueous phase energy of the highest occupied molecular orbital (HOMO) at the PBE level of theory - - id: solutionLumo - description: Lowest Unoccupied Molecular Orbital (LUMO) energy in a solution - units: Hartree - type: continuous - significant_digits: 5 - names: - - noun: aqueous phase LUMO energy at the PBE level of theory - - noun: aqueous phase energy of the lowest unoccupied molecular orbital at the PBE level of theory - - noun: aqueous phase energy of the lowest unoccupied molecular orbital (LUMO) at the PBE level of theory - - id: nuclearRepulsionEnergy - description: Electrostatic repulsion energy between atomic nuclei in a molecule - units: Hartree - type: continuous - significant_digits: 5 - names: - - noun: nuclear repulsion energy at the PBE level of theory - - id: optGasEnergy - description: Total energy of an optimized gas-phase molecule - units: Hartree - type: continuous - significant_digits: 5 - names: - - noun: optimized gas-phase molecular energy at the PBE level of theory - - id: optGasHomo - description: Highest Occupied Molecular Orbital (HOMO) energy of an optimized gas-phase molecule - units: Hartree - type: continuous - significant_digits: 5 - names: - - noun: optimized gas-phase HOMO energy at the PBE level of theory - - id: optGasLumo - description: Lowest Unoccupied Molecular Orbital (LUMO) energy of an optimized gas-phase molecule - units: Hartree - significant_digits: 5 - type: continuous - names: - - noun: optimized gas-phase LUMO energy calculated at the PBE level of theory - - noun: optimized gas-phase LUMO energy calculated with DFT at the PBE level of theory + - id: molecularSurface + description: Total surface area of a molecule + units: \AA^2 + type: continuous + names: + - noun: molecular surface area + - id: reactionFieldEnergy + description: Energy associated with the interaction during a chemical reaction + units: kT + type: continuous + significant_digits: 5 + names: + - noun: chemical reaction field energy + - id: solventAccessSurface + description: Surface area of a molecule accessible to a solvent + units: \AA^2 + type: continuous + names: + - noun: solvent-accessible surface area + - id: cavityEnergy + description: Energy associated with the formation of cavities in a molecular structure + units: kT + type: continuous + names: + - noun: cavity formation energy at the PBE level of theory + - id: gasEnergy + description: Total energy of a molecule in the gas phase + units: Hartree + significant_digits: 5 + type: continuous + names: + - noun: gas-phase molecular energy at the PBE level of theory + - id: gasHomo + description: Highest Occupied Molecular Orbital (HOMO) energy of a gas-phase molecule + units: Hartree + type: continuous + significant_digits: 5 + names: + - noun: gaseous phase HOMO energy at the PBE level of theory + - noun: gaseous phase highest occupied molecular orbital energy at the PBE level of theory + - noun: gaseous phase highest occupied molecular orbital (HOMO) energy at the PBE level of theory + - id: gasLumo + description: Lowest Unoccupied Molecular Orbital (LUMO) energy of a gas-phase molecule + units: Hartree + type: continuous + significant_digits: 5 + names: + - noun: gaseous phase LUMO energy at the PBE level of theory + - noun: gaseous phase lowest unoccupied molecular orbital energy at the PBE level of theory + - noun: gaseous phase lowest unoccupied molecular orbital energy (LUMO) at the PBE level of theory + - id: solutionEnergy + description: Total energy of a molecule in a solution + units: Hartree + type: continuous + significant_digits: 5 + names: + - noun: aqueous phase molecular energy at the PBE level of theory + - id: solutionHomo + description: Highest Occupied Molecular Orbital (HOMO) energy in a solution + units: Hartree + type: continuous + significant_digits: 5 + names: + - noun: aqueous phase HOMO energy at the PBE level of theory + - noun: aqueous phase energy of the highest occupied molecular orbital at the PBE level of theory + - noun: aqueous phase energy of the highest occupied molecular orbital (HOMO) at the PBE level of theory + - id: solutionLumo + description: Lowest Unoccupied Molecular Orbital (LUMO) energy in a solution + units: Hartree + type: continuous + significant_digits: 5 + names: + - noun: aqueous phase LUMO energy at the PBE level of theory + - noun: aqueous phase energy of the lowest unoccupied molecular orbital at the PBE level of theory + - noun: aqueous phase energy of the lowest unoccupied molecular orbital (LUMO) at the PBE level of theory + - id: nuclearRepulsionEnergy + description: Electrostatic repulsion energy between atomic nuclei in a molecule + units: Hartree + type: continuous + significant_digits: 5 + names: + - noun: nuclear repulsion energy at the PBE level of theory + - id: optGasEnergy + description: Total energy of an optimized gas-phase molecule + units: Hartree + type: continuous + significant_digits: 5 + names: + - noun: optimized gas-phase molecular energy at the PBE level of theory + - id: optGasHomo + description: Highest Occupied Molecular Orbital (HOMO) energy of an optimized gas-phase molecule + units: Hartree + type: continuous + significant_digits: 5 + names: + - noun: optimized gas-phase HOMO energy at the PBE level of theory + - id: optGasLumo + description: Lowest Unoccupied Molecular Orbital (LUMO) energy of an optimized gas-phase molecule + units: Hartree + significant_digits: 5 + type: continuous + names: + - noun: optimized gas-phase LUMO energy calculated at the PBE level of theory + - noun: optimized gas-phase LUMO energy calculated with DFT at the PBE level of theory identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://doi.org/10.1038/s41597-022-01832-2 - description: corresponding publication - - url: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/F3QFSQ - description: Data source + - url: https://doi.org/10.1038/s41597-022-01832-2 + description: corresponding publication + - url: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/F3QFSQ + description: Data source num_points: 30861 bibtex: - - |- - @article{Elif2022, - doi = {10.1021/ci300400a}, - url = {https://doi.org/10.1038/s41597-022-01832-2}, - year = {2022}, - volume = {9}, - number = {1}, - author = {Elif Sorkun and Qi Zhang and Abhishek Khetan and Murat Cihan Sorkun and - Suleyman Er}, - journal = {Nature Scientific Data} + - |- + @article{Elif2022, + doi = {10.1021/ci300400a}, + url = {https://doi.org/10.1038/s41597-022-01832-2}, + year = {2022}, + volume = {9}, + number = {1}, + author = {Elif Sorkun and Qi Zhang and Abhishek Khetan and Murat Cihan Sorkun and + Suleyman Er}, + journal = {Nature Scientific Data} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {molecularSurface__names__noun} - of {molecularSurface#} {molecularSurface__units}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {reactionFieldEnergy__names__noun} - of {reactionFieldEnergy#} {reactionFieldEnergy__units}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {solventAccessSurface__names__noun} - of {solventAccessSurface#} {solventAccessSurface__units}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {cavityEnergy__names__noun} - of {cavityEnergy#} {cavityEnergy__units}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {gasEnergy__names__noun} - of {gasEnergy#} {gasEnergy__units}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {gasHomo__names__noun} - of {gasHomo#} {gasHomo__units}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {gasLumo__names__noun} - of {gasLumo#} {gasLumo__units}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has an {solutionEnergy__names__noun} - of {solutionEnergy#} {solutionEnergy__units}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {solutionLumo__names__noun} - of {solutionLumo#} {solutionLumo__units}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {nuclearRepulsionEnergy__names__noun} - of {nuclearRepulsionEnergy#} {nuclearRepulsionEnergy__units}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {optGasEnergy__names__noun} - of {optGasEnergy#} {optGasEnergy__units}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {optGasHomo__names__noun} - of {optGasHomo#} {optGasHomo__units}. - - |- - Task: Please {#give me|create|generate!} a {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} based on the {#text |!}description{# below|!}. - Description: It has an {solutionLumo__names__noun} {solutionLumo#} {solutionLumo__units} and an {solutionHomo__names__noun} of {solutionHomo#} {solutionHomo__units}. - Result: {SMILES#} + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {molecularSurface__names__noun} of {molecularSurface#} {molecularSurface__units}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {reactionFieldEnergy__names__noun} of {reactionFieldEnergy#} {reactionFieldEnergy__units}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {solventAccessSurface__names__noun} of {solventAccessSurface#} {solventAccessSurface__units}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {cavityEnergy__names__noun} of {cavityEnergy#} {cavityEnergy__units}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {gasEnergy__names__noun} of {gasEnergy#} {gasEnergy__units}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {gasHomo__names__noun} of {gasHomo#} {gasHomo__units}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {gasLumo__names__noun} of {gasLumo#} {gasLumo__units}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has an {solutionEnergy__names__noun} of {solutionEnergy#} {solutionEnergy__units}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {solutionLumo__names__noun} of {solutionLumo#} {solutionLumo__units}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {nuclearRepulsionEnergy__names__noun} of {nuclearRepulsionEnergy#} {nuclearRepulsionEnergy__units}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {optGasEnergy__names__noun} of {optGasEnergy#} {optGasEnergy__units}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {optGasHomo__names__noun} of {optGasHomo#} {optGasHomo__units}. + - |- + Task: Please {#give me|create|generate!} a {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} based on the {#text |!}description{# below|!}. + Description: It has an {solutionLumo__names__noun} {solutionLumo#} {solutionLumo__units} and an {solutionHomo__names__noun} of {solutionHomo#} {solutionHomo__units}. + Result: {SMILES#} diff --git a/data/tabular/SIDER/meta.yaml b/data/tabular/SIDER/meta.yaml index 1472f3bfb..0fb68d2c4 100644 --- a/data/tabular/SIDER/meta.yaml +++ b/data/tabular/SIDER/meta.yaml @@ -1,215 +1,191 @@ ---- name: SIDER description: Database of marketed drugs and adverse drug reactions (ADR), grouped into 23 system organ classes. identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES targets: - - id: hepatobiliary_disorders - description: hepatobiliary disorders - type: boolean - names: - - noun: hepatobiliary disorders - - noun: liver and gallbladder disorders - - id: metabolism_and_nutrition_disorders - description: metabolism and nutrition disorders - type: boolean - names: - - noun: metabolism and nutrition disorders - - noun: metabolic and nutritional disorders - - id: eye_disorders - description: eye disorders - type: boolean - names: - - noun: eye disorders - - noun: ophthalmic disorders - - id: musculoskeletal_and_connective_tissue_disorders - description: musculoskeletal and connective tissue disorders - type: boolean - names: - - noun: musculoskeletal and connective tissue disorders - - noun: muscle and joint disorders - - id: gastrointestinal_disorders - description: gastrointestinal disorders - type: boolean - names: - - noun: gastrointestinal disorders - - noun: digestive system disorders - - id: immune_system_disorders - description: immune system disorders - type: boolean - names: - - noun: immune system disorders - - noun: disorders of the immune system - - id: reproductive_system_and_breast_disorders - description: reproductive system and breast disorders - type: boolean - names: - - noun: reproductive system and breast disorders - - noun: disorders of the breasts and the reproductive system - - id: neoplasms_benign_malignant_and_unspecified_(incl_cysts_and_polyps) - description: neoplasms benign, malignant and unspecified (incl cysts and polyps) - type: boolean - names: - - noun: neoplasms benign, malignant and unspecified (incl cysts and polyps) - - noun: benign and malignant tumors (including cysts and polyps) - - id: general_disorders_and_administration_site_conditions - description: general disorders and administration site conditions - type: boolean - names: - - noun: general disorders and administration site conditions - - noun: general health and administration site conditions - - id: endocrine_disorders - description: endocrine disorders - type: boolean - names: - - noun: endocrine disorders - - noun: endocrine system disorders - - id: surgical_and_medical_procedures - description: surgical and medical procedures - type: boolean - names: - - noun: surgical and medical procedures - - noun: medical and surgical procedures - - id: vascular_disorders - description: vascular disorders - type: boolean - names: - - noun: vascular disorders - - noun: vascular system disorders - - id: blood_and_lymphatic_system_disorders - description: blood and lymphatic system disorders - type: boolean - names: - - noun: blood and lymphatic system disorders - - noun: disorders of the blood and lymphatic system - - id: skin_and_subcutaneous_tissue_disorders - description: skin and subcutaneous tissue disorders - type: boolean - names: - - noun: skin and subcutaneous tissue disorders - - noun: disorders of the skin and subcutaneous tissue - - id: congenital_familial_and_genetic_disorders - description: congenital, familial and genetic disorders - type: boolean - names: - - noun: congenital, familial and genetic disorders - - noun: familial, congenital and genetic disorders - - id: infections_and_infestations - description: infections and infestations - type: boolean - names: - - noun: infections and infestations - - noun: infestations and infections - - id: respiratory_thoracic_and_mediastinal_disorders - description: respiratory, thoracic and mediastinal disorders - type: boolean - names: - - noun: respiratory, thoracic and mediastinal disorders - - noun: respiratory and thoracic disorders - - id: psychiatric_disorders - description: psychiatric disorders - type: boolean - names: - - noun: psychiatric disorders - - noun: mental health and psychiatric disorders - - id: renal_and_urinary_disorders - description: renal and urinary disorders - type: boolean - names: - - noun: renal and urinary disorders - - noun: kidney and urinary tract disorders - - id: pregnancy_puerperium_and_perinatal_conditions - description: pregnancy, puerperium and perinatal conditions - type: boolean - names: - - noun: pregnancy, puerperium and perinatal conditions - - noun: pregnancy, childbirth, and newborn conditions - - id: ear_and_labyrinth_disorders - description: ear and labyrinth disorders - type: boolean - names: - - noun: ear and labyrinth disorders - - noun: ear and inner ear disorders - - id: cardiac_disorders - description: cardiac disorders - type: boolean - names: - - noun: cardiac disorders - - noun: cardiovascular disorders - - id: nervous_system_disorders - description: nervous system disorders - type: boolean - names: - - noun: nervous system disorders - - noun: disorders of the nervous system + - id: hepatobiliary_disorders + description: hepatobiliary disorders + type: boolean + names: + - noun: hepatobiliary disorders + - noun: liver and gallbladder disorders + - id: metabolism_and_nutrition_disorders + description: metabolism and nutrition disorders + type: boolean + names: + - noun: metabolism and nutrition disorders + - noun: metabolic and nutritional disorders + - id: eye_disorders + description: eye disorders + type: boolean + names: + - noun: eye disorders + - noun: ophthalmic disorders + - id: musculoskeletal_and_connective_tissue_disorders + description: musculoskeletal and connective tissue disorders + type: boolean + names: + - noun: musculoskeletal and connective tissue disorders + - noun: muscle and joint disorders + - id: gastrointestinal_disorders + description: gastrointestinal disorders + type: boolean + names: + - noun: gastrointestinal disorders + - noun: digestive system disorders + - id: immune_system_disorders + description: immune system disorders + type: boolean + names: + - noun: immune system disorders + - noun: disorders of the immune system + - id: reproductive_system_and_breast_disorders + description: reproductive system and breast disorders + type: boolean + names: + - noun: reproductive system and breast disorders + - noun: disorders of the breasts and the reproductive system + - id: neoplasms_benign_malignant_and_unspecified_(incl_cysts_and_polyps) + description: neoplasms benign, malignant and unspecified (incl cysts and polyps) + type: boolean + names: + - noun: neoplasms benign, malignant and unspecified (incl cysts and polyps) + - noun: benign and malignant tumors (including cysts and polyps) + - id: general_disorders_and_administration_site_conditions + description: general disorders and administration site conditions + type: boolean + names: + - noun: general disorders and administration site conditions + - noun: general health and administration site conditions + - id: endocrine_disorders + description: endocrine disorders + type: boolean + names: + - noun: endocrine disorders + - noun: endocrine system disorders + - id: surgical_and_medical_procedures + description: surgical and medical procedures + type: boolean + names: + - noun: surgical and medical procedures + - noun: medical and surgical procedures + - id: vascular_disorders + description: vascular disorders + type: boolean + names: + - noun: vascular disorders + - noun: vascular system disorders + - id: blood_and_lymphatic_system_disorders + description: blood and lymphatic system disorders + type: boolean + names: + - noun: blood and lymphatic system disorders + - noun: disorders of the blood and lymphatic system + - id: skin_and_subcutaneous_tissue_disorders + description: skin and subcutaneous tissue disorders + type: boolean + names: + - noun: skin and subcutaneous tissue disorders + - noun: disorders of the skin and subcutaneous tissue + - id: congenital_familial_and_genetic_disorders + description: congenital, familial and genetic disorders + type: boolean + names: + - noun: congenital, familial and genetic disorders + - noun: familial, congenital and genetic disorders + - id: infections_and_infestations + description: infections and infestations + type: boolean + names: + - noun: infections and infestations + - noun: infestations and infections + - id: respiratory_thoracic_and_mediastinal_disorders + description: respiratory, thoracic and mediastinal disorders + type: boolean + names: + - noun: respiratory, thoracic and mediastinal disorders + - noun: respiratory and thoracic disorders + - id: psychiatric_disorders + description: psychiatric disorders + type: boolean + names: + - noun: psychiatric disorders + - noun: mental health and psychiatric disorders + - id: renal_and_urinary_disorders + description: renal and urinary disorders + type: boolean + names: + - noun: renal and urinary disorders + - noun: kidney and urinary tract disorders + - id: pregnancy_puerperium_and_perinatal_conditions + description: pregnancy, puerperium and perinatal conditions + type: boolean + names: + - noun: pregnancy, puerperium and perinatal conditions + - noun: pregnancy, childbirth, and newborn conditions + - id: ear_and_labyrinth_disorders + description: ear and labyrinth disorders + type: boolean + names: + - noun: ear and labyrinth disorders + - noun: ear and inner ear disorders + - id: cardiac_disorders + description: cardiac disorders + type: boolean + names: + - noun: cardiac disorders + - noun: cardiovascular disorders + - id: nervous_system_disorders + description: nervous system disorders + type: boolean + names: + - noun: nervous system disorders + - noun: disorders of the nervous system license: CC BY 4.0 links: - - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false - description: corresponding publication - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/sider.csv.gz - description: Data source + - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false + description: corresponding publication + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/sider.csv.gz + description: Data source num_points: 1427 bibtex: - - |- - @article{10.1093/nar/gkv1075, - author = {Kuhn, Michael and Letunic, Ivica and Jensen, Lars Juhl and Bork, Peer}, - title = "{The SIDER database of drugs and side effects}", - journal = {Nucleic Acids Research}, - volume = {44}, - number = {D1}, - pages = {D1075-D1079}, - year = {2015}, - month = {10}, - issn = {0305-1048}, - doi = {10.1093/nar/gkv1075}, - url = {https://doi.org/10.1093/nar/gkv1075}, - } + - |- + @article{10.1093/nar/gkv1075, + author = {Kuhn, Michael and Letunic, Ivica and Jensen, Lars Juhl and Bork, Peer}, + title = "{The SIDER database of drugs and side effects}", + journal = {Nucleic Acids Research}, + volume = {44}, + number = {D1}, + pages = {D1075-D1079}, + year = {2015}, + month = {10}, + issn = {0305-1048}, + doi = {10.1093/nar/gkv1075}, + url = {https://doi.org/10.1093/nar/gkv1075}, + } templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {hepatobiliary_disorders#not - a &a }{#potential cause|potential reason!} for {hepatobiliary_disorders__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {metabolism_and_nutrition_disorders#not - a &a }{#potential cause|potential reason!} for {metabolism_and_nutrition_disorders__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {eye_disorders#not - a &a }{#potential cause|potential reason!} for {eye_disorders__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {musculoskeletal_and_connective_tissue_disorders#not - a &a }{#potential cause|potential reason!} for {musculoskeletal_and_connective_tissue_disorders__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {gastrointestinal_disorders#not - a &a }{#potential cause|potential reason!} for {gastrointestinal_disorders__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {immune_system_disorders#not - a &a }{#potential cause|potential reason!} for {immune_system_disorders__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {reproductive_system_and_breast_disorders#not - a &a }{#potential cause|potential reason!} for {reproductive_system_and_breast_disorders__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {neoplasms_benign_malignant_and_unspecified_(incl_cysts_and_polyps)#not - a &a }{#potential cause|potential reason!} for {neoplasms_benign_malignant_and_unspecified_(incl_cysts_and_polyps)__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {general_disorders_and_administration_site_conditions#not - a &a }{#potential cause|potential reason!} for {general_disorders_and_administration_site_conditions__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {endocrine_disorders#not - a &a }{#potential cause|potential reason!} for {endocrine_disorders__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {surgical_and_medical_procedures#not - a &a }{#potential cause|potential reason!} for {surgical_and_medical_procedures__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {vascular_disorders#not - a &a }{#potential cause|potential reason!} for {vascular_disorders__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {blood_and_lymphatic_system_disorders#not - a &a }{#potential cause|potential reason!} for {blood_and_lymphatic_system_disorders__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {skin_and_subcutaneous_tissue_disorders#not - a &a }{#potential cause|potential reason!} for {skin_and_subcutaneous_tissue_disorders__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {congenital_familial_and_genetic_disorders#not - a &a }{#potential cause|potential reason!} for {congenital_familial_and_genetic_disorders__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {infections_and_infestations#not - a &a }{#potential cause|potential reason!} for {infections_and_infestations__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {respiratory_thoracic_and_mediastinal_disorders#not - a &a }{#potential cause|potential reason!} for {respiratory_thoracic_and_mediastinal_disorders__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {psychiatric_disorders#not - a &a }{#potential cause|potential reason!} for {psychiatric_disorders__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {renal_and_urinary_disorders#not - a &a }{#potential cause|potential reason!} for {renal_and_urinary_disorders__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {pregnancy_puerperium_and_perinatal_conditions#not - a &a }{#potential cause|potential reason!} for {pregnancy_puerperium_and_perinatal_conditions__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {ear_and_labyrinth_disorders#not - a &a }{#potential cause|potential reason!} for {ear_and_labyrinth_disorders__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {cardiac_disorders#not - a &a }{#potential cause|potential reason!} for {cardiac_disorders__names__noun}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {nervous_system_disorders#not - a &a }{#potential cause|potential reason!} for {nervous_system_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {hepatobiliary_disorders#not a &a }{#potential cause|potential reason!} for {hepatobiliary_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {metabolism_and_nutrition_disorders#not a &a }{#potential cause|potential reason!} for {metabolism_and_nutrition_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {eye_disorders#not a &a }{#potential cause|potential reason!} for {eye_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {musculoskeletal_and_connective_tissue_disorders#not a &a }{#potential cause|potential reason!} for {musculoskeletal_and_connective_tissue_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {gastrointestinal_disorders#not a &a }{#potential cause|potential reason!} for {gastrointestinal_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {immune_system_disorders#not a &a }{#potential cause|potential reason!} for {immune_system_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {reproductive_system_and_breast_disorders#not a &a }{#potential cause|potential reason!} for {reproductive_system_and_breast_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {neoplasms_benign_malignant_and_unspecified_(incl_cysts_and_polyps)#not a &a }{#potential cause|potential reason!} for {neoplasms_benign_malignant_and_unspecified_(incl_cysts_and_polyps)__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {general_disorders_and_administration_site_conditions#not a &a }{#potential cause|potential reason!} for {general_disorders_and_administration_site_conditions__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {endocrine_disorders#not a &a }{#potential cause|potential reason!} for {endocrine_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {surgical_and_medical_procedures#not a &a }{#potential cause|potential reason!} for {surgical_and_medical_procedures__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {vascular_disorders#not a &a }{#potential cause|potential reason!} for {vascular_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {blood_and_lymphatic_system_disorders#not a &a }{#potential cause|potential reason!} for {blood_and_lymphatic_system_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {skin_and_subcutaneous_tissue_disorders#not a &a }{#potential cause|potential reason!} for {skin_and_subcutaneous_tissue_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {congenital_familial_and_genetic_disorders#not a &a }{#potential cause|potential reason!} for {congenital_familial_and_genetic_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {infections_and_infestations#not a &a }{#potential cause|potential reason!} for {infections_and_infestations__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {respiratory_thoracic_and_mediastinal_disorders#not a &a }{#potential cause|potential reason!} for {respiratory_thoracic_and_mediastinal_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {psychiatric_disorders#not a &a }{#potential cause|potential reason!} for {psychiatric_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {renal_and_urinary_disorders#not a &a }{#potential cause|potential reason!} for {renal_and_urinary_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {pregnancy_puerperium_and_perinatal_conditions#not a &a }{#potential cause|potential reason!} for {pregnancy_puerperium_and_perinatal_conditions__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {ear_and_labyrinth_disorders#not a &a }{#potential cause|potential reason!} for {ear_and_labyrinth_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {cardiac_disorders#not a &a }{#potential cause|potential reason!} for {cardiac_disorders__names__noun}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {nervous_system_disorders#not a &a }{#potential cause|potential reason!} for {nervous_system_disorders__names__noun}. diff --git a/data/tabular/ames_mutagenicity/meta.yaml b/data/tabular/ames_mutagenicity/meta.yaml index fd841fc07..654103ef2 100644 --- a/data/tabular/ames_mutagenicity/meta.yaml +++ b/data/tabular/ames_mutagenicity/meta.yaml @@ -1,132 +1,130 @@ ---- name: ames_mutagenicity description: |- - Mutagenicity means the ability of a drug to induce genetic alterations. - Drugs that can cause damage to the DNA can result in cell death or other severe - adverse effects. Nowadays, the most widely used assay for testing the mutagenicity - of compounds is the Ames experiment which was invented by a professor named - Ames. The Ames test is a short term bacterial reverse mutation assay detecting - a large number of compounds which can induce genetic damage and frameshift mutations. - The dataset is aggregated from four papers. + Mutagenicity means the ability of a drug to induce genetic alterations. + Drugs that can cause damage to the DNA can result in cell death or other severe + adverse effects. Nowadays, the most widely used assay for testing the mutagenicity + of compounds is the Ames experiment which was invented by a professor named + Ames. The Ames test is a short term bacterial reverse mutation assay detecting + a large number of compounds which can induce genetic damage and frameshift mutations. + The dataset is aggregated from four papers. targets: - - id: mutagenic - description: whether it is mutagenic (1) or not mutagenic (0) - units: - type: boolean - names: - - noun: mutagenicity - - noun: Ames mutagenicity - - adjective: mutagenic - - adjective: Ames mutagenic - - verb: has the ability to induce genetic alterations - - gerund: having the potential to cause mutations - - gerund: having the potential to induce genetic alterations + - id: mutagenic + description: whether it is mutagenic (1) or not mutagenic (0) + units: + type: boolean + names: + - noun: mutagenicity + - noun: Ames mutagenicity + - adjective: mutagenic + - adjective: Ames mutagenic + - verb: has the ability to induce genetic alterations + - gerund: having the potential to cause mutations + - gerund: having the potential to induce genetic alterations benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://doi.org/10.1021/ci300400a - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#ames-mutagenicity - description: Data source + - url: https://doi.org/10.1021/ci300400a + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#ames-mutagenicity + description: Data source num_points: 7278 bibtex: - - |- - @article{Xu2012, - doi = {10.1021/ci300400a}, - url = {https://doi.org/10.1021/ci300400a}, - year = {2012}, - month = oct, - publisher = {American Chemical Society (ACS)}, - volume = {52}, - number = {11}, - pages = {2840--2847}, - author = {Congying Xu and Feixiong Cheng and Lei Chen and - Zheng Du and Weihua Li and Guixia Liu and Philip W. Lee and Yun Tang}, - title = {In silico Prediction of Chemical Ames Mutagenicity}, - journal = {Journal of Chemical Information and Modeling} + - |- + @article{Xu2012, + doi = {10.1021/ci300400a}, + url = {https://doi.org/10.1021/ci300400a}, + year = {2012}, + month = oct, + publisher = {American Chemical Society (ACS)}, + volume = {52}, + number = {11}, + pages = {2840--2847}, + author = {Congying Xu and Feixiong Cheng and Lei Chen and + Zheng Du and Weihua Li and Guixia Liu and Philip W. Lee and Yun Tang}, + title = {In silico Prediction of Chemical Ames Mutagenicity}, + journal = {Journal of Chemical Information and Modeling} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {mutagenic#no &NULL}{mutagenic__names__adjective} - properties. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {mutagenic#no &NULL}{mutagenic__names__adjective} {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {mutagenic#not &NULL}identified as {mutagenic__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {mutagenic#not &NULL}{mutagenic__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {mutagenic__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {mutagenic#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {mutagenic__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {mutagenic#not &NULL}{mutagenic__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {mutagenic#not &NULL}{mutagenic__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {mutagenic__names__adjective}? - Assistant: {mutagenic#No&Yes}, this molecule is {mutagenic#not &NULL}{mutagenic__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {mutagenic__names__adjective}? - Assistant: {mutagenic#No&Yes}, it is {mutagenic#not &NULL}{mutagenic__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {mutagenic#not &NULL}{mutagenic__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {mutagenic#not &NULL}{mutagenic__names__adjective}? - Assistant: This is a molecule that is {mutagenic#not &NULL}{mutagenic__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {mutagenic#not &NULL}be {mutagenic__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {mutagenic#not &NULL}{mutagenic__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {mutagenic#not &NULL}be {mutagenic__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {mutagenic#not &NULL}{mutagenic__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {mutagenic__names__adjective}:{mutagenic#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {mutagenic__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{mutagenic#False&True} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {mutagenic__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {mutagenic%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {mutagenic__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {mutagenic%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {mutagenic#not &NULL}{mutagenic__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%mutagenic%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {mutagenic#not &NULL}{mutagenic__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%mutagenic%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {mutagenic#no &NULL}{mutagenic__names__adjective} properties. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {mutagenic#no &NULL}{mutagenic__names__adjective} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {mutagenic#not &NULL}identified as {mutagenic__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {mutagenic#not &NULL}{mutagenic__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {mutagenic__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {mutagenic#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {mutagenic__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {mutagenic#not &NULL}{mutagenic__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {mutagenic#not &NULL}{mutagenic__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {mutagenic__names__adjective}? + Assistant: {mutagenic#No&Yes}, this molecule is {mutagenic#not &NULL}{mutagenic__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {mutagenic__names__adjective}? + Assistant: {mutagenic#No&Yes}, it is {mutagenic#not &NULL}{mutagenic__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {mutagenic#not &NULL}{mutagenic__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {mutagenic#not &NULL}{mutagenic__names__adjective}? + Assistant: This is a molecule that is {mutagenic#not &NULL}{mutagenic__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {mutagenic#not &NULL}be {mutagenic__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {mutagenic#not &NULL}{mutagenic__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {mutagenic#not &NULL}be {mutagenic__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {mutagenic#not &NULL}{mutagenic__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {mutagenic__names__adjective}:{mutagenic#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {mutagenic__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{mutagenic#False&True} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {mutagenic__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {mutagenic%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {mutagenic__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {mutagenic%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {mutagenic#not &NULL}{mutagenic__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%mutagenic%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {mutagenic#not &NULL}{mutagenic__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%mutagenic%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/aminoacids/meta.yaml b/data/tabular/aminoacids/meta.yaml index a44611c2b..90af3904a 100644 --- a/data/tabular/aminoacids/meta.yaml +++ b/data/tabular/aminoacids/meta.yaml @@ -1,41 +1,38 @@ ---- name: aminoacids description: |- - The list of the 20 essential aminoacids, their SMILES, one letter and three letter codes. + The list of the 20 essential aminoacids, their SMILES, one letter and three letter codes. targets: - - id: three_letter_code - description: three-letter code - type: text - - id: one_letter_code - description: one-letter code - type: text - - id: aminoacid_name - description: name - type: text - - id: type - description: type of aminoacid - type: text + - id: three_letter_code + description: three-letter code + type: text + - id: one_letter_code + description: one-letter code + type: text + - id: aminoacid_name + description: name + type: text + - id: type + description: type of aminoacid + type: text identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://chemistry.stackexchange.com/questions/138614/why-are-tyrosine-and-tryptophan-considered-hydrophobic - description: reference for amino acid type + - url: https://chemistry.stackexchange.com/questions/138614/why-are-tyrosine-and-tryptophan-considered-hydrophobic + description: reference for amino acid type num_points: 20 templates: - - The {#essential amino acid|amino acid|amino acid (AA)|AA!} with the {SMILES__description} {SMILES#} has the one-letter code {one_letter_code#} and - the three-letter code {three_letter_code#}. - - The {#essential amino acid|amino acid|amino acid (AA)|AA!} {aminoacid_name#} has the one-letter code {one_letter_code#} and the three-letter code - {three_letter_code#}. - - |- - Question: What is the one-letter code of the {#essential amino acid|amino acid|amino acid (AA)|AA!} with the {SMILES__description} {SMILES#}? - Answer: {one_letter_code#}. - - |- - Question: What is the three-letter code of the {#essential amino acid|amino acid|amino acid (AA)|AA!} with the {SMILES__description} {SMILES#}? - Answer: {three_letter_code#}. - - |- - Question: What is the type of the amino acid with the one-letter code {one_letter_code#} and {SMILES__description} {SMILES#}? - Constraint: The possible types are: polar, non-polar, positively charged, negatively charged. - Answer: From the provided amino acid types (polar, non-polar, positively charged, negatively charged), the amino acid with the one-letter code {one_letter_code#} is {type#}. + - The {#essential amino acid|amino acid|amino acid (AA)|AA!} with the {SMILES__description} {SMILES#} has the one-letter code {one_letter_code#} and the three-letter code {three_letter_code#}. + - The {#essential amino acid|amino acid|amino acid (AA)|AA!} {aminoacid_name#} has the one-letter code {one_letter_code#} and the three-letter code {three_letter_code#}. + - |- + Question: What is the one-letter code of the {#essential amino acid|amino acid|amino acid (AA)|AA!} with the {SMILES__description} {SMILES#}? + Answer: {one_letter_code#}. + - |- + Question: What is the three-letter code of the {#essential amino acid|amino acid|amino acid (AA)|AA!} with the {SMILES__description} {SMILES#}? + Answer: {three_letter_code#}. + - |- + Question: What is the type of the amino acid with the one-letter code {one_letter_code#} and {SMILES__description} {SMILES#}? + Constraint: The possible types are: polar, non-polar, positively charged, negatively charged. + Answer: From the provided amino acid types (polar, non-polar, positively charged, negatively charged), the amino acid with the one-letter code {one_letter_code#} is {type#}. diff --git a/data/tabular/bc5chem/meta.yaml b/data/tabular/bc5chem/meta.yaml index db7f684dd..cd7b301ba 100644 --- a/data/tabular/bc5chem/meta.yaml +++ b/data/tabular/bc5chem/meta.yaml @@ -1,55 +1,54 @@ ---- name: bc5chem description: |- - BC5CHEM is a named entity recognition dataset for chemical mentions. + BC5CHEM is a named entity recognition dataset for chemical mentions. targets: - - id: matched_words - description: matched words - type: text - names: - - noun: entity - - noun: matched entity + - id: matched_words + description: matched words + type: text + names: + - noun: entity + - noun: matched entity identifiers: - - id: sentence - description: Sentence - type: text - names: - - noun: sentence - - noun: text + - id: sentence + description: Sentence + type: text + names: + - noun: sentence + - noun: text license: https://huggingface.co/datasets/bigbio/blurb/blob/main/LICENSE links: - - url: https://huggingface.co/datasets/bigbio/blurb - description: original dataset + - url: https://huggingface.co/datasets/bigbio/blurb + description: original dataset benchmarks: - - name: bc5chem - link: hhttps://huggingface.co/datasets/bigbio/blurb - split_column: split + - name: bc5chem + link: hhttps://huggingface.co/datasets/bigbio/blurb + split_column: split num_points: 13755 bibtex: - - |- - @article{gu2021domain, - title = { - Domain-specific language model pretraining for biomedical natural - language processing - }, - author = { - Gu, Yu and Tinn, Robert and Cheng, Hao and Lucas, Michael and - Usuyama, Naoto and Liu, Xiaodong and Naumann, Tristan and Gao, - Jianfeng and Poon, Hoifung - }, - year = 2021, - journal = {ACM Transactions on Computing for Healthcare (HEALTH)}, - publisher = {ACM New York, NY}, - volume = 3, - number = 1, - pages = {1--23} - } + - |- + @article{gu2021domain, + title = { + Domain-specific language model pretraining for biomedical natural + language processing + }, + author = { + Gu, Yu and Tinn, Robert and Cheng, Hao and Lucas, Michael and + Usuyama, Naoto and Liu, Xiaodong and Naumann, Tristan and Gao, + Jianfeng and Poon, Hoifung + }, + year = 2021, + journal = {ACM Transactions on Computing for Healthcare (HEALTH)}, + publisher = {ACM New York, NY}, + volume = 3, + number = 1, + pages = {1--23} + } templates: - - |- - Task: Find all the mentions of {#chemicals|chemical compounds|chemical substances!} in the {#following|subsequent!} {#text|sentence!}. Return the matching {#words|entities!}. If there is no {#match|mention of a chemical|matching entity!}, return `no match`. - {#Sentence|Description!}: {sentence#} - Answer: {matched_words#} - - |- - User: Does the following text contain mentions of {#chemicals|chemical compounds|chemical substances!}?{# Can you return matches?| Can you output matches?| Please return matches.!} - {#Text: |!}{sentence#} - Assistant: {#I found|There is!} {matched_words#}. + - |- + Task: Find all the mentions of {#chemicals|chemical compounds|chemical substances!} in the {#following|subsequent!} {#text|sentence!}. Return the matching {#words|entities!}. If there is no {#match|mention of a chemical|matching entity!}, return `no match`. + {#Sentence|Description!}: {sentence#} + Answer: {matched_words#} + - |- + User: Does the following text contain mentions of {#chemicals|chemical compounds|chemical substances!}?{# Can you return matches?| Can you output matches?| Please return matches.!} + {#Text: |!}{sentence#} + Assistant: {#I found|There is!} {matched_words#}. diff --git a/data/tabular/bc5disease/meta.yaml b/data/tabular/bc5disease/meta.yaml index 02491b3ab..cbe390570 100644 --- a/data/tabular/bc5disease/meta.yaml +++ b/data/tabular/bc5disease/meta.yaml @@ -1,55 +1,54 @@ ---- name: bc5disease description: |- - BC5Disease is a named entity recognition dataset for disease mentions. + BC5Disease is a named entity recognition dataset for disease mentions. targets: - - id: matched_words - description: matched words - type: text - names: - - noun: entity - - noun: matched entity + - id: matched_words + description: matched words + type: text + names: + - noun: entity + - noun: matched entity identifiers: - - id: sentence - description: Sentence - type: text - names: - - noun: sentence - - noun: text + - id: sentence + description: Sentence + type: text + names: + - noun: sentence + - noun: text license: https://huggingface.co/datasets/bigbio/blurb/blob/main/LICENSE links: - - url: https://huggingface.co/datasets/bigbio/blurb - description: original dataset + - url: https://huggingface.co/datasets/bigbio/blurb + description: original dataset benchmarks: - - name: bc5chem - link: hhttps://huggingface.co/datasets/bigbio/blurb - split_column: split + - name: bc5chem + link: hhttps://huggingface.co/datasets/bigbio/blurb + split_column: split num_points: 13755 bibtex: - - |- - @article{gu2021domain, - title = { - Domain-specific language model pretraining for biomedical natural - language processing - }, - author = { - Gu, Yu and Tinn, Robert and Cheng, Hao and Lucas, Michael and - Usuyama, Naoto and Liu, Xiaodong and Naumann, Tristan and Gao, - Jianfeng and Poon, Hoifung - }, - year = 2021, - journal = {ACM Transactions on Computing for Healthcare (HEALTH)}, - publisher = {ACM New York, NY}, - volume = 3, - number = 1, - pages = {1--23} - } + - |- + @article{gu2021domain, + title = { + Domain-specific language model pretraining for biomedical natural + language processing + }, + author = { + Gu, Yu and Tinn, Robert and Cheng, Hao and Lucas, Michael and + Usuyama, Naoto and Liu, Xiaodong and Naumann, Tristan and Gao, + Jianfeng and Poon, Hoifung + }, + year = 2021, + journal = {ACM Transactions on Computing for Healthcare (HEALTH)}, + publisher = {ACM New York, NY}, + volume = 3, + number = 1, + pages = {1--23} + } templates: - - |- - Task: Find all the mentions of diseases in the {#following|subsequent!} {#text|sentence!}. Return the matching {#words|entities!}. If there is no {#match|mention of a disease|matching entity!}, return `no match`. - {#Sentence|Description!}: {sentence#} - Answer: {matched_words#} - - |- - User: Does the following text contain mentions of diseases?{# Can you return matches?| Can you output matches?|Please return matches!} - {#Text: |!}{sentence#} - Assistant: {#I found|There is!} {matched_words#}. + - |- + Task: Find all the mentions of diseases in the {#following|subsequent!} {#text|sentence!}. Return the matching {#words|entities!}. If there is no {#match|mention of a disease|matching entity!}, return `no match`. + {#Sentence|Description!}: {sentence#} + Answer: {matched_words#} + - |- + User: Does the following text contain mentions of diseases?{# Can you return matches?| Can you output matches?|Please return matches!} + {#Text: |!}{sentence#} + Assistant: {#I found|There is!} {matched_words#}. diff --git a/data/tabular/bicerano_dataset/meta.yaml b/data/tabular/bicerano_dataset/meta.yaml index 382c63d3a..305bb1b93 100644 --- a/data/tabular/bicerano_dataset/meta.yaml +++ b/data/tabular/bicerano_dataset/meta.yaml @@ -1,66 +1,65 @@ ---- name: bicerano_dataset description: |- - This paper outlines a MD simulation workflow based on GPU MD simulation and the - refined optimized potentials for liquid simulation (OPLS) OPLS3e force field to - calculate glass transition temperatures (Tgs) of 315 polymers for which Bicerano - reported experimental values. + This paper outlines a MD simulation workflow based on GPU MD simulation and the + refined optimized potentials for liquid simulation (OPLS) OPLS3e force field to + calculate glass transition temperatures (Tgs) of 315 polymers for which Bicerano + reported experimental values. targets: - - id: Tg_exp - description: experimental glass transition temperature - units: K - type: float - names: - - noun: experimental glass transition temperature - uris: - - id: Tg_calc - description: calculated glass transition T - units: K - type: float - names: - - noun: computed glass transition temperature - - id: rho_300K_calc - description: computed density at 300K - units: g/cm^3 - type: float - names: - - noun: computed polymer density at 300K + - id: Tg_exp + description: experimental glass transition temperature + units: K + type: float + names: + - noun: experimental glass transition temperature + uris: + - id: Tg_calc + description: calculated glass transition T + units: K + type: float + names: + - noun: computed glass transition temperature + - id: rho_300K_calc + description: computed density at 300K + units: g/cm^3 + type: float + names: + - noun: computed polymer density at 300K identifiers: - - id: PSMILES - type: PSMILES - description: PSMILES - - id: compound_name - type: Other - names: - - noun: compound name - description: polymer name + - id: PSMILES + type: PSMILES + description: PSMILES + - id: compound_name + type: Other + names: + - noun: compound name + description: polymer name license: CC BY 4.0 links: - - url: https://pubs.acs.org/doi/10.1021/acsapm.0c00524# - description: corresponding publication - - url: - - https://raw.githubusercontent.com/AdrianM0/chemnlp/main/data/tabular/bicerano_dataset/HT_MD_polymer_properties.csv - description: data source + - url: https://pubs.acs.org/doi/10.1021/acsapm.0c00524# + description: corresponding publication + - url: + - https://raw.githubusercontent.com/AdrianM0/chemnlp/main/data/tabular/bicerano_dataset/HT_MD_polymer_properties.csv + description: data source num_points: 315 bibtex: - - |- - @article{afzal2021, - author = {Afzal, Mohammad Atif Faiz and Browning, Andrea R. and Goldberg, Alexander and Halls, Mathew D. and Gavartin, Jacob L. and Morisato, - Tsuguo and Hughes, Thomas F. and Giesen, David J. and Goose, Joseph E.}, - title = {High-Throughput Molecular Dynamics Simulations and Validation of Thermophysical Properties of Polymers for Various Applications}, - journal = {ACS Applied Polymer Materials}, - volume = {3}, - number = {2}, - pages = {620-630}, - year = {2021}, - doi = {10.1021/acsapm.0c00524}} + - |- + @article{afzal2021, + author = {Afzal, Mohammad Atif Faiz and Browning, Andrea R. and Goldberg, Alexander and Halls, Mathew D. and Gavartin, Jacob L. and Morisato, + Tsuguo and Hughes, Thomas F. and Giesen, David J. and Goose, Joseph E.}, + title = {High-Throughput Molecular Dynamics Simulations and Validation of Thermophysical Properties of Polymers for Various Applications}, + journal = {ACS Applied Polymer Materials}, + volume = {3}, + number = {2}, + pages = {620-630}, + year = {2021}, + doi = {10.1021/acsapm.0c00524}} templates: - - The polymer with the {PSMILES__description} of {PSMILES#} has an experimental glass transition temperature of {Tg_exp#} K. - - The polymer with the {PSMILES__description} of {PSMILES#} has a computed glass transition temperature of {Tg_calc#} K. - - The polymer with the {PSMILES__description} of {PSMILES#} has a computed density at 300 K of {rho_300K_calc#} g/cc. - - The polymer with the {compound_name__names__noun} of {compound_name#} has an experimental glass transition temperature of {Tg_exp#} K. - - The polymer with the {compound_name__names__noun} of {compound_name#} has a computed glass transition temperature of {Tg_calc#} K. - - The polymer with the {compound_name__names__noun} of {compound_name#} has a computed density at 300 K of {rho_300K_calc#} g/cc. - - |- - Question: What is a polymer with a computed glass transition temperature of {Tg_calc#} K and a computed density at 300 K of {rho_300K_calc#} g/cc. - Answer: A polymer with {PSMILES__description} {PSMILES#} + - The polymer with the {PSMILES__description} of {PSMILES#} has an experimental glass transition temperature of {Tg_exp#} K. + - The polymer with the {PSMILES__description} of {PSMILES#} has a computed glass transition temperature of {Tg_calc#} K. + - The polymer with the {PSMILES__description} of {PSMILES#} has a computed density at 300 K of {rho_300K_calc#} g/cc. + - The polymer with the {compound_name__names__noun} of {compound_name#} has an experimental glass transition temperature of {Tg_exp#} K. + - The polymer with the {compound_name__names__noun} of {compound_name#} has a computed glass transition temperature of {Tg_calc#} K. + - The polymer with the {compound_name__names__noun} of {compound_name#} has a computed density at 300 K of {rho_300K_calc#} g/cc. + - |- + Question: What is a polymer with a computed glass transition temperature of {Tg_calc#} K and a computed density at 300 K of {rho_300K_calc#} g/cc. + Answer: A polymer with {PSMILES__description} {PSMILES#} diff --git a/data/tabular/bio_ner/meta.yaml b/data/tabular/bio_ner/meta.yaml index a0cb30f83..8566a1816 100644 --- a/data/tabular/bio_ner/meta.yaml +++ b/data/tabular/bio_ner/meta.yaml @@ -1,37 +1,36 @@ ---- name: bio_ner description: NER task on bio-related text. identifiers: - - id: Sentence - description: Sentence - type: Other + - id: Sentence + description: Sentence + type: Other targets: - - id: entity_1 - description: entity_1 - type: Other - units: entity_1 - names: - - noun: entity_1 - - id: json - description: json - type: Other - units: - names: - - noun: JSON output + - id: entity_1 + description: entity_1 + type: Other + units: entity_1 + names: + - noun: entity_1 + - id: json + description: json + type: Other + units: + names: + - noun: JSON output benchmarks: - - name: bio_ner - link: https://github.com/ML4LitS/bio-datasets - split_column: split + - name: bio_ner + link: https://github.com/ML4LitS/bio-datasets + split_column: split license: unknown links: - - url: https://github.com/ML4LitS/bio-datasets - description: ??? + - url: https://github.com/ML4LitS/bio-datasets + description: ??? num_points: 123509 bibtex: - - ??? + - ??? templates: - - |- - Task: Please carry out the {#named entity recognition (NER)|named entity recognition|NER!} task for the the text below. - Text: {Sentence#}. - Constrain: Please, {#only |!}list the entities in the form NER entity, span start, span end, and type {#in separate lines |!}with a high probability of being in the text. - Result: {entity_1#} + - |- + Task: Please carry out the {#named entity recognition (NER)|named entity recognition|NER!} task for the the text below. + Text: {Sentence#}. + Constrain: Please, {#only |!}list the entities in the form NER entity, span start, span end, and type {#in separate lines |!}with a high probability of being in the text. + Result: {entity_1#} diff --git a/data/tabular/bioavailability_ma_et_al/meta.yaml b/data/tabular/bioavailability_ma_et_al/meta.yaml index ccfb73dae..e5d7f417a 100644 --- a/data/tabular/bioavailability_ma_et_al/meta.yaml +++ b/data/tabular/bioavailability_ma_et_al/meta.yaml @@ -1,132 +1,131 @@ ---- name: bioavailability_ma_et_al description: |- - Oral bioavailability is defined as the rate and extent to which the - active ingredient or active moiety is absorbed from a drug product and becomes - available at the site of action. + Oral bioavailability is defined as the rate and extent to which the + active ingredient or active moiety is absorbed from a drug product and becomes + available at the site of action. targets: - - id: bioavailable - description: whether it is bioavailable (1) or not (0) - units: - type: boolean - names: - - noun: oral bioavailability - - adjective: orally bioavailable - uris: - - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C70913 + - id: bioavailable + description: whether it is bioavailable (1) or not (0) + units: + type: boolean + names: + - noun: oral bioavailability + - adjective: orally bioavailable + uris: + - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C70913 benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: compound_name - type: Other - names: - - noun: compound name - - noun: drug name - - noun: generic drug name - description: drug name + - id: SMILES + type: SMILES + description: SMILES + - id: compound_name + type: Other + names: + - noun: compound name + - noun: drug name + - noun: generic drug name + description: drug name license: CC BY 4.0 links: - - url: https://doi.org/10.1016/j.jpba.2008.03.023 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#bioavailability-ma-et-al - description: data source + - url: https://doi.org/10.1016/j.jpba.2008.03.023 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#bioavailability-ma-et-al + description: data source num_points: 640 bibtex: - - |- - @article{Ma2008, - doi = {10.1016/j.jpba.2008.03.023}, - url = {https://doi.org/10.1016/j.jpba.2008.03.023}, - year = {2008}, - month = aug, - publisher = {Elsevier BV}, - volume = {47}, - number = {4-5}, - author = {Chang-Ying Ma and Sheng-Yong Yang and Hui Zhang - and Ming-Li Xiang and Qi Huang and Yu-Quan Wei}, - title = {Prediction models of human plasma protein binding rate and - oral bioavailability derived by using GA-CG-SVM method}, - journal = {Journal of Pharmaceutical and Biomedical Analysis} + - |- + @article{Ma2008, + doi = {10.1016/j.jpba.2008.03.023}, + url = {https://doi.org/10.1016/j.jpba.2008.03.023}, + year = {2008}, + month = aug, + publisher = {Elsevier BV}, + volume = {47}, + number = {4-5}, + author = {Chang-Ying Ma and Sheng-Yong Yang and Hui Zhang + and Ming-Li Xiang and Qi Huang and Yu-Quan Wei}, + title = {Prediction models of human plasma protein binding rate and + oral bioavailability derived by using GA-CG-SVM method}, + journal = {Journal of Pharmaceutical and Biomedical Analysis} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} has a {bioavailable#low&high} {bioavailable__names__noun}. - - Based on the {SMILES__description} {#representation of |!}{SMILES#}, the molecule has a {bioavailable#low&high} {bioavailable__names__noun}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that has a {bioavailable#low&high} {bioavailable__names__noun}. - - The {SMILES__description} {SMILES#} has a {bioavailable#low&high} {bioavailable__names__noun}. - - The molecule with the {SMILES__description} {SMILES#} has a {bioavailable#low&high} {bioavailable__names__noun}. - - |- - Task: Please classify a molecule based on the description. - Description: Predict if the molecule has a low or high {bioavailable__names__noun}? - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "low" or "high" without using any {#other|additional!} words. - Result: {bioavailable#low&high} - - |- - Task: Please classify a molecule based on the description. - Description: Predict if the molecule has a low or high {bioavailable__names__noun}? - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule has a {bioavailable#low&high} {bioavailable__names__noun}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that has a {bioavailable#low&high} {bioavailable__names__noun}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} has a low or high {bioavailable__names__noun}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, this molecule has a {bioavailable#low&high} {bioavailable__names__noun}. - - |- - User: Has the molecule with the {SMILES__description} {SMILES#} a low or high {bioavailable__names__noun}? - Assistant: It has a {bioavailable#low&high} {bioavailable__names__noun}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that has a {bioavailable#low&high} {bioavailable__names__noun}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that has a {bioavailable#low&high} {bioavailable__names__noun}? - Assistant: {#Ok, this|This!} is a molecule that has a {bioavailable#low&high} {bioavailable__names__noun}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should have a {bioavailable#low&high} {bioavailable__names__noun}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} has a {bioavailable#low&high} {bioavailable__names__noun}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should have a {bioavailable#low&high} {bioavailable__names__noun}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} has a {bioavailable#low&high} {bioavailable__names__noun}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {bioavailable__names__adjective}?{bioavailable#yes&no} - - |- - Task: Please classify a molecule based on the description. - Description: Predict if the molecule has a low or high {bioavailable__names__noun}? - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "low" or "high" without using any {#other|additional!} words. - Result:{bioavailable#low&high} - - |- - Task: Please answer the multiple choice question. - Question: Has the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a high {bioavailable__names__noun}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {bioavailable%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Has the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a high {bioavailable__names__noun}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {bioavailable%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules have a high {bioavailable__names__noun}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%bioavailable%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules have a high {bioavailable__names__noun}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%bioavailable%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} has a {bioavailable#low&high} {bioavailable__names__noun}. + - Based on the {SMILES__description} {#representation of |!}{SMILES#}, the molecule has a {bioavailable#low&high} {bioavailable__names__noun}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that has a {bioavailable#low&high} {bioavailable__names__noun}. + - The {SMILES__description} {SMILES#} has a {bioavailable#low&high} {bioavailable__names__noun}. + - The molecule with the {SMILES__description} {SMILES#} has a {bioavailable#low&high} {bioavailable__names__noun}. + - |- + Task: Please classify a molecule based on the description. + Description: Predict if the molecule has a low or high {bioavailable__names__noun}? + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "low" or "high" without using any {#other|additional!} words. + Result: {bioavailable#low&high} + - |- + Task: Please classify a molecule based on the description. + Description: Predict if the molecule has a low or high {bioavailable__names__noun}? + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule has a {bioavailable#low&high} {bioavailable__names__noun}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that has a {bioavailable#low&high} {bioavailable__names__noun}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} has a low or high {bioavailable__names__noun}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, this molecule has a {bioavailable#low&high} {bioavailable__names__noun}. + - |- + User: Has the molecule with the {SMILES__description} {SMILES#} a low or high {bioavailable__names__noun}? + Assistant: It has a {bioavailable#low&high} {bioavailable__names__noun}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that has a {bioavailable#low&high} {bioavailable__names__noun}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that has a {bioavailable#low&high} {bioavailable__names__noun}? + Assistant: {#Ok, this|This!} is a molecule that has a {bioavailable#low&high} {bioavailable__names__noun}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should have a {bioavailable#low&high} {bioavailable__names__noun}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} has a {bioavailable#low&high} {bioavailable__names__noun}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should have a {bioavailable#low&high} {bioavailable__names__noun}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} has a {bioavailable#low&high} {bioavailable__names__noun}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {bioavailable__names__adjective}?{bioavailable#yes&no} + - |- + Task: Please classify a molecule based on the description. + Description: Predict if the molecule has a low or high {bioavailable__names__noun}? + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "low" or "high" without using any {#other|additional!} words. + Result:{bioavailable#low&high} + - |- + Task: Please answer the multiple choice question. + Question: Has the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a high {bioavailable__names__noun}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {bioavailable%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Has the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a high {bioavailable__names__noun}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {bioavailable%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules have a high {bioavailable__names__noun}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%bioavailable%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules have a high {bioavailable__names__noun}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%bioavailable%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/block_polymers_morphology/meta.yaml b/data/tabular/block_polymers_morphology/meta.yaml index 874438bde..4c3644bf2 100644 --- a/data/tabular/block_polymers_morphology/meta.yaml +++ b/data/tabular/block_polymers_morphology/meta.yaml @@ -1,68 +1,66 @@ ---- name: block_polymers_morphology description: |- - Results of experimental phase measurements of di-block copolymers. + Results of experimental phase measurements of di-block copolymers. targets: - - id: phase1 - description: experimentally observed phase - type: text - names: - - noun: phase - - noun: experimentally observed phase - - id: T - description: temperature of measurement - type: continuous - significant_digits: 0 - units: K - - id: Mn - description: number-average molar mass - type: continuous - units: g/mol - significant_digits: 0 - names: - - noun: number-average molar mass - - noun: Mn - - noun: number-average molar mass (Mn) - - id: f1 - description: volume fraction of block type 1 - type: continuous - significant_digits: 2 - names: - - noun: volume fraction of block type 1 - - id: Mw - description: mass-average molar mass - type: text - names: - - noun: mass-average molar mass - - noun: mass-average molar mass (Mw) - - id: D - description: dispersity - type: text - names: - - noun: dispersity - - noun: dispersity (D) + - id: phase1 + description: experimentally observed phase + type: text + names: + - noun: phase + - noun: experimentally observed phase + - id: T + description: temperature of measurement + type: continuous + significant_digits: 0 + units: K + - id: Mn + description: number-average molar mass + type: continuous + units: g/mol + significant_digits: 0 + names: + - noun: number-average molar mass + - noun: Mn + - noun: number-average molar mass (Mn) + - id: f1 + description: volume fraction of block type 1 + type: continuous + significant_digits: 2 + names: + - noun: volume fraction of block type 1 + - id: Mw + description: mass-average molar mass + type: text + names: + - noun: mass-average molar mass + - noun: mass-average molar mass (Mw) + - id: D + description: dispersity + type: text + names: + - noun: dispersity + - noun: dispersity (D) identifiers: - - id: BigSMILES - type: string - description: BigSMILES + - id: BigSMILES + type: string + description: BigSMILES license: CC BY 4.0 links: - - url: https://github.com/olsenlabmit/BCDB/tree/main - description: original data source + - url: https://github.com/olsenlabmit/BCDB/tree/main + description: original data source num_points: 4438 templates: - - The {#polymer|di-block copolymer|copolymer!} with BigSMILES {BigSMILES#}, {Mn__names__noun} of {Mn#} {Mn__units}, {f1__names__noun} of {f1#}{Mw#}{D#} - was {#measured|analyzed|studied!} at {T#} {T__units} and found to be in the {phase1#} phase. - - |- - Question: If I have a {#polymer|di-block copolymer|copolymer!} with {Mn__names__noun} of {Mn#} {Mn__units}, {f1__names__noun} of {f1#}{Mw#}{D#}, what phase will it be in at {T#} {T__units}? - Answer: The polymer will be in the {phase1#} phase. - - |- - User: I want to design a {#polymer|di-block copolymer|copolymer!} with a particular {Mn__names__noun}, {f1__names__noun}, and {phase1__names__noun}. - Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, !}I would need to know the {Mn__names__noun}, {f1__names__noun}, and {phase1__names__noun} of the polymer you want to design. - User: The {Mn__names__noun} should be {Mn#} {Mn__units}, the {f1__names__noun} should be {f1#}, and the {phase1__names__noun} should be {phase1#}. - Assistant: I {#recommend|suggest|propose|advise!} the {#polymer|di-block copolymer|copolymer!} with BigSMILES {BigSMILES#}{Mw#}{D#}. - - |- - User: I want to design a {#polymer|di-block copolymer|copolymer!} that is in the {phase1#} phase. - Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, !}{#do you have any other constraints?|do you have other requirements?|what else should I take into account?!} - User: The {Mn__names__noun} should be {Mn#} {Mn__units}, the {f1__names__noun} should be {f1#}. - Assistant: I {#recommend|suggest|propose|advise!} the {#polymer|di-block copolymer|copolymer!} with BigSMILES {BigSMILES#}{Mw#}{D#}. + - The {#polymer|di-block copolymer|copolymer!} with BigSMILES {BigSMILES#}, {Mn__names__noun} of {Mn#} {Mn__units}, {f1__names__noun} of {f1#}{Mw#}{D#} was {#measured|analyzed|studied!} at {T#} {T__units} and found to be in the {phase1#} phase. + - |- + Question: If I have a {#polymer|di-block copolymer|copolymer!} with {Mn__names__noun} of {Mn#} {Mn__units}, {f1__names__noun} of {f1#}{Mw#}{D#}, what phase will it be in at {T#} {T__units}? + Answer: The polymer will be in the {phase1#} phase. + - |- + User: I want to design a {#polymer|di-block copolymer|copolymer!} with a particular {Mn__names__noun}, {f1__names__noun}, and {phase1__names__noun}. + Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, !}I would need to know the {Mn__names__noun}, {f1__names__noun}, and {phase1__names__noun} of the polymer you want to design. + User: The {Mn__names__noun} should be {Mn#} {Mn__units}, the {f1__names__noun} should be {f1#}, and the {phase1__names__noun} should be {phase1#}. + Assistant: I {#recommend|suggest|propose|advise!} the {#polymer|di-block copolymer|copolymer!} with BigSMILES {BigSMILES#}{Mw#}{D#}. + - |- + User: I want to design a {#polymer|di-block copolymer|copolymer!} that is in the {phase1#} phase. + Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, !}{#do you have any other constraints?|do you have other requirements?|what else should I take into account?!} + User: The {Mn__names__noun} should be {Mn#} {Mn__units}, the {f1__names__noun} should be {f1#}. + Assistant: I {#recommend|suggest|propose|advise!} the {#polymer|di-block copolymer|copolymer!} with BigSMILES {BigSMILES#}{Mw#}{D#}. diff --git a/data/tabular/blood_brain_barrier_martins_et_al/meta.yaml b/data/tabular/blood_brain_barrier_martins_et_al/meta.yaml index d8e213bb9..db2f840c9 100644 --- a/data/tabular/blood_brain_barrier_martins_et_al/meta.yaml +++ b/data/tabular/blood_brain_barrier_martins_et_al/meta.yaml @@ -1,155 +1,154 @@ ---- name: blood_brain_barrier_martins_et_al description: |- - As a membrane separating circulating blood and brain extracellular - fluid, the blood-brain barrier (BBB) is the protection layer that blocks most - foreign drugs. Thus the ability of a drug to penetrate the barrier to deliver - to the site of action forms a crucial challenge in development of drugs for the - central nervous system. + As a membrane separating circulating blood and brain extracellular + fluid, the blood-brain barrier (BBB) is the protection layer that blocks most + foreign drugs. Thus the ability of a drug to penetrate the barrier to deliver + to the site of action forms a crucial challenge in development of drugs for the + central nervous system. targets: - - id: penetrate_BBB - description: The ability of a drug to penetrate the blood brain barrier (1) or not (0) - units: - type: boolean - names: - - noun: blood brain barrier penetration - - noun: ADME blood-brain barrier penetration - - verb: penetrates the blood brain barrier to reach the brain - - verb: penetrates the blood brain barrier - - adjective: penetrating the blood brain barrier - - adjective: penetrating the blood brain barrier to reach the brain - uris: + - id: penetrate_BBB + description: The ability of a drug to penetrate the blood brain barrier (1) or not (0) + units: + type: boolean + names: + - noun: blood brain barrier penetration + - noun: ADME blood-brain barrier penetration + - verb: penetrates the blood brain barrier to reach the brain + - verb: penetrates the blood brain barrier + - adjective: penetrating the blood brain barrier + - adjective: penetrating the blood brain barrier to reach the brain + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: compound_name - type: Other - names: - - noun: compound name - - noun: drug name - - noun: generic drug name - description: compound name + - id: SMILES + type: SMILES + description: SMILES + - id: compound_name + type: Other + names: + - noun: compound name + - noun: drug name + - noun: generic drug name + description: compound name license: CC BY 4.0 links: - - url: https://doi.org/10.1021/ci300124c - description: corresponding publication - - url: https://rb.gy/0xx91v - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#bbb-blood-brain-barrier-martins-et-al - description: data source + - url: https://doi.org/10.1021/ci300124c + description: corresponding publication + - url: https://rb.gy/0xx91v + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#bbb-blood-brain-barrier-martins-et-al + description: data source num_points: 2030 bibtex: - - |- - @article{Martins2012, - doi = {10.1021/ci300124c}, - url = {https://doi.org/10.1021/ci300124c}, - year = {2012}, - month = jun, - publisher = {American Chemical Society (ACS)}, - volume = {52}, - number = {6}, - pages = {1686--1697}, - author = {Ines Filipa Martins and Ana L. Teixeira and Luis Pinheiro - and Andre O. Falcao}, - title = {A Bayesian Approach to in Silico Blood-Brain Barrier Penetration Modeling}, - journal = {Journal of Chemical Information and Modeling} - - |- - @article{Wu2018, - doi = {10.1039/c7sc02664a}, - url = {https://doi.org/10.1039/c7sc02664a}, - year = {2018}, - publisher = {Royal Society of Chemistry (RSC)}, - volume = {9}, - number = {2}, - pages = {513--530}, - author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg and Joseph - Gomes and Caleb Geniesse and Aneesh S. Pappu and Karl Leswing and Vijay Pande}, - title = {MoleculeNet: a benchmark for molecular machine learning}, - journal = {Chemical Science} + - |- + @article{Martins2012, + doi = {10.1021/ci300124c}, + url = {https://doi.org/10.1021/ci300124c}, + year = {2012}, + month = jun, + publisher = {American Chemical Society (ACS)}, + volume = {52}, + number = {6}, + pages = {1686--1697}, + author = {Ines Filipa Martins and Ana L. Teixeira and Luis Pinheiro + and Andre O. Falcao}, + title = {A Bayesian Approach to in Silico Blood-Brain Barrier Penetration Modeling}, + journal = {Journal of Chemical Information and Modeling} + - |- + @article{Wu2018, + doi = {10.1039/c7sc02664a}, + url = {https://doi.org/10.1039/c7sc02664a}, + year = {2018}, + publisher = {Royal Society of Chemistry (RSC)}, + volume = {9}, + number = {2}, + pages = {513--530}, + author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg and Joseph + Gomes and Caleb Geniesse and Aneesh S. Pappu and Karl Leswing and Vijay Pande}, + title = {MoleculeNet: a benchmark for molecular machine learning}, + journal = {Chemical Science} templates: - - The molecule with the {SMILES__description} {SMILES#} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {penetrate_BBB#not &NULL}identified as {penetrate_BBB__names__adjective}. - - The molecule represented with the {SMILES__description} {SMILES#} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. - - '{SMILES#} represents a molecule that is {penetrate_BBB#not &NULL}identified as {penetrate_BBB__names__adjective}.' - - '{SMILES#} represents a molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.' - - '{SMILES#} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.' - - The {#molecule |!}{SMILES__description} {SMILES#} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {penetrate_BBB__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {penetrate_BBB#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {penetrate_BBB__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. - - |- - Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|chemical compound!} based on the {#text |!}description{# below|!}. - Description: A molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {penetrate_BBB__names__adjective}? - Assistant: {penetrate_BBB#No&Yes}, this molecule is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {penetrate_BBB__names__adjective}? - Assistant: {penetrate_BBB#No&Yes}, it is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}? - Assistant: This is a molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} the {SMILES__description} of a {#molecule|chemical!}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {penetrate_BBB#not &NULL}be {penetrate_BBB__names__adjective}. - Assistant: Got it, this {SMILES__description} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {penetrate_BBB__names__adjective}:{penetrate_BBB#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {penetrate_BBB__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{penetrate_BBB#False&True} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} of {SMILES#} {penetrate_BBB__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {penetrate_BBB%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%penetrate_BBB%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} of {SMILES#} {penetrate_BBB__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {penetrate_BBB%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%penetrate_BBB%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {SMILES#} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {penetrate_BBB#not &NULL}identified as {penetrate_BBB__names__adjective}. + - The molecule represented with the {SMILES__description} {SMILES#} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. + - "{SMILES#} represents a molecule that is {penetrate_BBB#not &NULL}identified as {penetrate_BBB__names__adjective}." + - "{SMILES#} represents a molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}." + - "{SMILES#} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}." + - The {#molecule |!}{SMILES__description} {SMILES#} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {penetrate_BBB__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {penetrate_BBB#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {penetrate_BBB__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. + - |- + Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|chemical compound!} based on the {#text |!}description{# below|!}. + Description: A molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {penetrate_BBB__names__adjective}? + Assistant: {penetrate_BBB#No&Yes}, this molecule is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {penetrate_BBB__names__adjective}? + Assistant: {penetrate_BBB#No&Yes}, it is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}? + Assistant: This is a molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} the {SMILES__description} of a {#molecule|chemical!}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {penetrate_BBB#not &NULL}be {penetrate_BBB__names__adjective}. + Assistant: Got it, this {SMILES__description} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {penetrate_BBB__names__adjective}:{penetrate_BBB#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {penetrate_BBB__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{penetrate_BBB#False&True} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} of {SMILES#} {penetrate_BBB__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {penetrate_BBB%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%penetrate_BBB%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} of {SMILES#} {penetrate_BBB__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {penetrate_BBB%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%penetrate_BBB%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/buchwald_hartwig/meta.yaml b/data/tabular/buchwald_hartwig/meta.yaml index d777d975a..4ab71993f 100644 --- a/data/tabular/buchwald_hartwig/meta.yaml +++ b/data/tabular/buchwald_hartwig/meta.yaml @@ -1,103 +1,101 @@ ---- name: buchwald_hartwig_doyle description: |- - High-throughput experimentation palladium-catalyzed Buchwald Hardwig - C-N cross-coupling data set with yields. + High-throughput experimentation palladium-catalyzed Buchwald Hardwig + C-N cross-coupling data set with yields. targets: - - id: yield - description: Reaction yields analyzed by LCMS - units: \% - type: continuous - names: - - noun: reaction yield - - noun: yield - - noun: reaction yield (measured by LCMS) - - id: masked_rxn_smiles - type: text - description: reaction SMILES with one element masked - names: - - noun: reaction SMILES with one element masked as `MASK` - - noun: reaction SMILES with one element hidden as `MASK` - - noun: masked reaction SMILES (one component masked as `MASK`) - - noun: masked reaction SMILES string (one component masked as `MASK`) - - noun: masked RXNSMILES (one component masked as `MASK`) - - id: educt_string - type: text - description: reaction educts - names: - - noun: reaction educts - - noun: educts - - noun: starting materials - - id: product_string - type: text - description: reaction products - names: - - noun: reaction products - - noun: products + - id: yield + description: Reaction yields analyzed by LCMS + units: \% + type: continuous + names: + - noun: reaction yield + - noun: yield + - noun: reaction yield (measured by LCMS) + - id: masked_rxn_smiles + type: text + description: reaction SMILES with one element masked + names: + - noun: reaction SMILES with one element masked as `MASK` + - noun: reaction SMILES with one element hidden as `MASK` + - noun: masked reaction SMILES (one component masked as `MASK`) + - noun: masked reaction SMILES string (one component masked as `MASK`) + - noun: masked RXNSMILES (one component masked as `MASK`) + - id: educt_string + type: text + description: reaction educts + names: + - noun: reaction educts + - noun: educts + - noun: starting materials + - id: product_string + type: text + description: reaction products + names: + - noun: reaction products + - noun: products identifiers: - - id: RXNSMILES - type: RXNSMILES - description: RXNSMILES - names: - - noun: reaction SMILES - - noun: reaction SMILES string - - noun: RXNSMILES - - noun: reaction SMILES (RXNSMILES) - - id: missing_component - type: text - description: masked element + - id: RXNSMILES + type: RXNSMILES + description: RXNSMILES + names: + - noun: reaction SMILES + - noun: reaction SMILES string + - noun: RXNSMILES + - noun: reaction SMILES (RXNSMILES) + - id: missing_component + type: text + description: masked element license: MIT links: - - url: https://doi.org/10.1126/science.aar5169 - description: corresponding publication - - url: https://www.sciencedirect.com/science/article/pii/S2451929420300851 - description: publication with data processing - - url: https://github.com/rxn4chemistry/rxn_yields/blob/master/rxn_yields/data.py - description: preprocessing - - url: https://github.com/reymond-group/drfp/tree/main/data - description: dataset + - url: https://doi.org/10.1126/science.aar5169 + description: corresponding publication + - url: https://www.sciencedirect.com/science/article/pii/S2451929420300851 + description: publication with data processing + - url: https://github.com/rxn4chemistry/rxn_yields/blob/master/rxn_yields/data.py + description: preprocessing + - url: https://github.com/reymond-group/drfp/tree/main/data + description: dataset num_points: 3955 url: https://doi.org/10.1126/science.aar5169 bibtex: - - |- - @article{ahneman2018predicting, - title={Predicting reaction performance in C--N cross-coupling using machine learning}, - author={Ahneman, Derek T and Estrada, Jes{'u}s G and Lin, Shishi and Dreher, Spencer D and Doyle, Abigail G}, - journal={Science}, - volume={360}, - number={6385}, - pages={186--190}, - year={2018}, - publisher={American Association for the Advancement of Science}, - } + - |- + @article{ahneman2018predicting, + title={Predicting reaction performance in C--N cross-coupling using machine learning}, + author={Ahneman, Derek T and Estrada, Jes{'u}s G and Lin, Shishi and Dreher, Spencer D and Doyle, Abigail G}, + journal={Science}, + volume={360}, + number={6385}, + pages={186--190}, + year={2018}, + publisher={American Association for the Advancement of Science}, + } templates: - - The {RXNSMILES__names__noun} {RXNSMILES#} has the {educt_string__names__noun} {educt_string#} and the {product_string__names__noun} {product_string#}. - - The {RXNSMILES__names__noun} {RXNSMILES#} has the {product_string__names__noun} {product_string#} and the {educt_string__names__noun} {educt_string#}. - - The masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#} is {missing_component#}. - - The {#chemical|compound!} with SMILES {missing_component#} is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}. - - |- - Question: {#What|Which!} {educt_string__names__noun} are {#needed|required!} to {#produce|synthesize!} the {product_string__names__noun} {product_string#}? - Answer: {educt_string#}. - - |- - Question: {#What|Which!} {product_string__names__noun} are produced from the {educt_string__names__noun} {educt_string#}? - Answer: {product_string#}. - - |- - User: I {#want|would like to|must|need to!} {#synthesize|produce!} the {product_string__names__noun} {product_string#}. - Assistant: {#Cool, is|That's interesting, is|Great, is|Is!} there anything else I can do for you? - User: {#Yes, |!}I would like to know the {educt_string__names__noun} I need to produce the {product_string__names__noun} {product_string#}. - Assistant: {#I recommend|I suggest|I propose|I advise!} the following {educt_string__names__noun}: {educt_string#}. - - |- - Question: What is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}? - Answer: {missing_component#}. - - |- - Task: Predict the masked component in a {masked_rxn_smiles__names__noun}. - Description: {masked_rxn_smiles#} - {#Answer|Solution!}: {missing_component#} - - The {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#} is {yield#}{yield__units}. - - |- - User: {#I need|I want|I would like!} to run a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}. What is the {yield__names__noun} {#I can expect|I should expect|I should get|I can get!}? - Assistant: {#The|The expected|The predicted|The estimated!} {yield__names__noun} is {yield#}{yield__units}. - - - |- - Question: {#What is|What's|What is the|What's the!} {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}? - Answer: {yield#}{yield__units}. + - The {RXNSMILES__names__noun} {RXNSMILES#} has the {educt_string__names__noun} {educt_string#} and the {product_string__names__noun} {product_string#}. + - The {RXNSMILES__names__noun} {RXNSMILES#} has the {product_string__names__noun} {product_string#} and the {educt_string__names__noun} {educt_string#}. + - The masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#} is {missing_component#}. + - The {#chemical|compound!} with SMILES {missing_component#} is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}. + - |- + Question: {#What|Which!} {educt_string__names__noun} are {#needed|required!} to {#produce|synthesize!} the {product_string__names__noun} {product_string#}? + Answer: {educt_string#}. + - |- + Question: {#What|Which!} {product_string__names__noun} are produced from the {educt_string__names__noun} {educt_string#}? + Answer: {product_string#}. + - |- + User: I {#want|would like to|must|need to!} {#synthesize|produce!} the {product_string__names__noun} {product_string#}. + Assistant: {#Cool, is|That's interesting, is|Great, is|Is!} there anything else I can do for you? + User: {#Yes, |!}I would like to know the {educt_string__names__noun} I need to produce the {product_string__names__noun} {product_string#}. + Assistant: {#I recommend|I suggest|I propose|I advise!} the following {educt_string__names__noun}: {educt_string#}. + - |- + Question: What is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}? + Answer: {missing_component#}. + - |- + Task: Predict the masked component in a {masked_rxn_smiles__names__noun}. + Description: {masked_rxn_smiles#} + {#Answer|Solution!}: {missing_component#} + - The {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#} is {yield#}{yield__units}. + - |- + User: {#I need|I want|I would like!} to run a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}. What is the {yield__names__noun} {#I can expect|I should expect|I should get|I can get!}? + Assistant: {#The|The expected|The predicted|The estimated!} {yield__names__noun} is {yield#}{yield__units}. + - |- + Question: {#What is|What's|What is the|What's the!} {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}? + Answer: {yield#}{yield__units}. diff --git a/data/tabular/caco2_wang/meta.yaml b/data/tabular/caco2_wang/meta.yaml index e336c6c47..5d4fb41da 100644 --- a/data/tabular/caco2_wang/meta.yaml +++ b/data/tabular/caco2_wang/meta.yaml @@ -1,57 +1,56 @@ ---- name: caco2_wang description: |- - The human colon epithelial cancer cell line, Caco-2, - is used as an in vitro model to simulate the human intestinal tissue. - The experimental result on the rate of drug passing through - the Caco-2 cells can approximate the rate at which the drug permeates - through the human intestinal tissue. + The human colon epithelial cancer cell line, Caco-2, + is used as an in vitro model to simulate the human intestinal tissue. + The experimental result on the rate of drug passing through + the Caco-2 cells can approximate the rate at which the drug permeates + through the human intestinal tissue. targets: - - id: permeability - description: Caco-2 cell effective permeability. - units: cm/s - type: continuous - names: - - noun: Caco-2 cell effective permeability - - noun: Caco-2 cell permeability - - noun: Caco-2 permeability - pubchem_aids: - - 678378 - uris: - - http://www.bioassayontology.org/bao#BAO_0010008 - - http://purl.obolibrary.org/obo/MI_2162 + - id: permeability + description: Caco-2 cell effective permeability. + units: cm/s + type: continuous + names: + - noun: Caco-2 cell effective permeability + - noun: Caco-2 cell permeability + - noun: Caco-2 permeability + pubchem_aids: + - 678378 + uris: + - http://www.bioassayontology.org/bao#BAO_0010008 + - http://purl.obolibrary.org/obo/MI_2162 benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: compound_name - type: Other - description: compound name - names: - - noun: compound - - noun: compound name + - id: SMILES + type: SMILES + description: SMILES + - id: compound_name + type: Other + description: compound name + names: + - noun: compound + - noun: compound name license: CC BY 4.0 links: - - url: https://tdcommons.ai/single_pred_tasks/adme/#caco-2-cell-effective-permeability-wang-et-al - description: original data set link - - url: https://pubs.acs.org/doi/10.1021/acs.jcim.5b00642 - description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#caco-2-cell-effective-permeability-wang-et-al + description: original data set link + - url: https://pubs.acs.org/doi/10.1021/acs.jcim.5b00642 + description: corresponding publication num_points: 910 bibtex: - - |- - @article{wang2016adme, - title={ADME properties evaluation in drug discovery: prediction of Caco-2 cell permeability - using a combination of NSGA-II and boosting}, - author={Wang, Ning-Ning and Dong, Jie and Deng, Yin-Hua and Zhu, Min-Feng and Wen, Ming and Yao, - Zhi-Jiang and Lu, Ai-Ping and Wang, Jian-Bing and Cao, Dong-Sheng}, - journal={Journal of Chemical Information and Modeling}, - volume={56}, - number={4}, - pages={763--773}, - year={2016}, - publisher={ACS Publications} - } + - |- + @article{wang2016adme, + title={ADME properties evaluation in drug discovery: prediction of Caco-2 cell permeability + using a combination of NSGA-II and boosting}, + author={Wang, Ning-Ning and Dong, Jie and Deng, Yin-Hua and Zhu, Min-Feng and Wen, Ming and Yao, + Zhi-Jiang and Lu, Ai-Ping and Wang, Jian-Bing and Cao, Dong-Sheng}, + journal={Journal of Chemical Information and Modeling}, + volume={56}, + number={4}, + pages={763--773}, + year={2016}, + publisher={ACS Publications} + } diff --git a/data/tabular/carcinogens/meta.yaml b/data/tabular/carcinogens/meta.yaml index 7e4fde301..abcf07603 100644 --- a/data/tabular/carcinogens/meta.yaml +++ b/data/tabular/carcinogens/meta.yaml @@ -1,144 +1,142 @@ ---- name: carcinogens description: |- - A carcinogen is any substance, radionuclide, or radiation that promotes - carcinogenesis, the formation of cancer. This may be due to the ability to damage - the genome or to the disruption of cellular metabolic processes. + A carcinogen is any substance, radionuclide, or radiation that promotes + carcinogenesis, the formation of cancer. This may be due to the ability to damage + the genome or to the disruption of cellular metabolic processes. targets: - - id: carcinogen - description: whether it is carcinogenic (1) or not (0). - units: - type: boolean - names: - - noun: carcinogen - - adjective: carcinogenic - - gerund: having the potential to cause cancer - uris: - - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C347 - - http://purl.bioontology.org/ontology/SNOMEDCT/88376000 + - id: carcinogen + description: whether it is carcinogenic (1) or not (0). + units: + type: boolean + names: + - noun: carcinogen + - adjective: carcinogenic + - gerund: having the potential to cause cancer + uris: + - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C347 + - http://purl.bioontology.org/ontology/SNOMEDCT/88376000 benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://doi.org/10.1002/qsar.200860192 - description: corresponding publication - - url: https://doi.org/10.1021/ci300367a - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#carcinogens - description: Data source + - url: https://doi.org/10.1002/qsar.200860192 + description: corresponding publication + - url: https://doi.org/10.1021/ci300367a + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#carcinogens + description: Data source num_points: 280 bibtex: - - |- - @article{Lagunin2009, - doi = {10.1002/qsar.200860192}, - url = {https://doi.org/10.1002/qsar.200860192}, - year = {2009}, - month = jun, - publisher = {Wiley}, - volume = {28}, - number = {8}, - pages = {806--810}, - author = {Alexey Lagunin and Dmitrii Filimonov and Alexey Zakharov and Wei Xie - and Ying Huang and Fucheng Zhu and Tianxiang Shen and Jianhua Yao and Vladimir Poroikov}, - title = {Computer-Aided Prediction of Rodent Carcinogenicity by PASS and CISOC PSCT}, - journal = {QSAR & Combinatorial Science} - - |- - @article{Cheng2012, - doi = {10.1021/ci300367a}, - url = {https://doi.org/10.1021/ci300367a}, - year = {2012}, - month = nov, - publisher = {American Chemical Society (ACS)}, - volume = {52}, - number = {11}, - pages = {3099--3105}, - author = {Feixiong Cheng and Weihua Li and Yadi Zhou and Jie Shen and Zengrui Wu - and Guixia Liu and Philip W. Lee and Yun Tang}, - title = {admetSAR: A Comprehensive Source and Free Tool for Assessment of Chemical ADMET Properties}, - journal = {Journal of Chemical Information and Modeling} + - |- + @article{Lagunin2009, + doi = {10.1002/qsar.200860192}, + url = {https://doi.org/10.1002/qsar.200860192}, + year = {2009}, + month = jun, + publisher = {Wiley}, + volume = {28}, + number = {8}, + pages = {806--810}, + author = {Alexey Lagunin and Dmitrii Filimonov and Alexey Zakharov and Wei Xie + and Ying Huang and Fucheng Zhu and Tianxiang Shen and Jianhua Yao and Vladimir Poroikov}, + title = {Computer-Aided Prediction of Rodent Carcinogenicity by PASS and CISOC PSCT}, + journal = {QSAR & Combinatorial Science} + - |- + @article{Cheng2012, + doi = {10.1021/ci300367a}, + url = {https://doi.org/10.1021/ci300367a}, + year = {2012}, + month = nov, + publisher = {American Chemical Society (ACS)}, + volume = {52}, + number = {11}, + pages = {3099--3105}, + author = {Feixiong Cheng and Weihua Li and Yadi Zhou and Jie Shen and Zengrui Wu + and Guixia Liu and Philip W. Lee and Yun Tang}, + title = {admetSAR: A Comprehensive Source and Free Tool for Assessment of Chemical ADMET Properties}, + journal = {Journal of Chemical Information and Modeling} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {carcinogen#no &NULL}{carcinogen__names__adjective} - {#properties|effects!}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {carcinogen#no &NULL}{carcinogen__names__adjective} {#effects|properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {carcinogen#not &NULL}identified as {carcinogen__names__adjective}. - - The {SMILES__description} {SMILES#} is {carcinogen#not &NULL}{carcinogen__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {carcinogen#not &NULL}{carcinogen__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {carcinogen__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {carcinogen#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {carcinogen__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {carcinogen#not &NULL}{carcinogen__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {carcinogen#not &NULL}{carcinogen__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {carcinogen__names__adjective}? - Assistant: {carcinogen#No&Yes}, this molecule is {carcinogen#not &NULL}{carcinogen__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {carcinogen__names__adjective}? - Assistant: {carcinogen#No&Yes}, it is {carcinogen#not &NULL}{carcinogen__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {carcinogen#not &NULL}{carcinogen__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {carcinogen#not &NULL}{carcinogen__names__adjective}? - Assistant: This is a molecule that is {carcinogen#not &NULL}{carcinogen__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {carcinogen#not &NULL}be {carcinogen__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {carcinogen#not &NULL}{carcinogen__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {carcinogen#not &NULL}be {carcinogen__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {carcinogen#not &NULL}{carcinogen__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {carcinogen__names__adjective}:{carcinogen#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {carcinogen__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{carcinogen#False&True} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {carcinogen__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {carcinogen%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {carcinogen__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {carcinogen%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {carcinogen#not &NULL}{carcinogen__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%carcinogen%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {carcinogen#not &NULL}{carcinogen__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%carcinogen%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {carcinogen#no &NULL}{carcinogen__names__adjective} {#properties|effects!}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {carcinogen#no &NULL}{carcinogen__names__adjective} {#effects|properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {carcinogen#not &NULL}identified as {carcinogen__names__adjective}. + - The {SMILES__description} {SMILES#} is {carcinogen#not &NULL}{carcinogen__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {carcinogen#not &NULL}{carcinogen__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {carcinogen__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {carcinogen#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {carcinogen__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {carcinogen#not &NULL}{carcinogen__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {carcinogen#not &NULL}{carcinogen__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {carcinogen__names__adjective}? + Assistant: {carcinogen#No&Yes}, this molecule is {carcinogen#not &NULL}{carcinogen__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {carcinogen__names__adjective}? + Assistant: {carcinogen#No&Yes}, it is {carcinogen#not &NULL}{carcinogen__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {carcinogen#not &NULL}{carcinogen__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {carcinogen#not &NULL}{carcinogen__names__adjective}? + Assistant: This is a molecule that is {carcinogen#not &NULL}{carcinogen__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {carcinogen#not &NULL}be {carcinogen__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {carcinogen#not &NULL}{carcinogen__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {carcinogen#not &NULL}be {carcinogen__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {carcinogen#not &NULL}{carcinogen__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {carcinogen__names__adjective}:{carcinogen#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {carcinogen__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{carcinogen#False&True} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {carcinogen__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {carcinogen%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {carcinogen__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {carcinogen%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {carcinogen#not &NULL}{carcinogen__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%carcinogen%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {carcinogen#not &NULL}{carcinogen__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%carcinogen%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/cav3_t-type_calcium_channels_butkiewicz/meta.yaml b/data/tabular/cav3_t-type_calcium_channels_butkiewicz/meta.yaml index 7157d491e..03c4c5ea1 100644 --- a/data/tabular/cav3_t-type_calcium_channels_butkiewicz/meta.yaml +++ b/data/tabular/cav3_t-type_calcium_channels_butkiewicz/meta.yaml @@ -1,169 +1,166 @@ ---- name: cav3_t-type_calcium_channels_butkiewicz description: |- - This dataset was initially curated from HTS data at the PubChem database. - The curation process is documented in Butkiewicz et al. - Primary screening with AID 449739 identified inhibitors of Cav3 T-type calcium channels. - Four follow-up screens were performed to confirm inhibitory effects on smaller sets of compounds - involving AID 493021, AID 493022, AID 493023, and AID 493041. - AID 489005 was performed as counter screen validating active compounds of the primary screen. + This dataset was initially curated from HTS data at the PubChem database. + The curation process is documented in Butkiewicz et al. + Primary screening with AID 449739 identified inhibitors of Cav3 T-type calcium channels. + Four follow-up screens were performed to confirm inhibitory effects on smaller sets of compounds + involving AID 493021, AID 493022, AID 493023, and AID 493041. + AID 489005 was performed as counter screen validating active compounds of the primary screen. targets: - - id: activity_cav3_t_type_calcium_channels - description: whether it active against cav3 t-type calcium channels receptor (1) or not (0) - units: - type: boolean - names: - - noun: inhibition of the cav3 t-type calcium channel activity - - adjective: cav3 t-type calcium channel inhibition - - gerund: inhibiting the activity of cav3 t-type calcium channels - - verb: blocks t-type calcium channels - - verb: inhibits cav3 t-type calcium channels - pubchem_aids: - - 1053190 - - 489005 - - 493021 - - 493022 - - 493023 - - 493041 - uris: - - http://purl.obolibrary.org/obo/CHEBI_194338 + - id: activity_cav3_t_type_calcium_channels + description: whether it active against cav3 t-type calcium channels receptor (1) or not (0) + units: + type: boolean + names: + - noun: inhibition of the cav3 t-type calcium channel activity + - adjective: cav3 t-type calcium channel inhibition + - gerund: inhibiting the activity of cav3 t-type calcium channels + - verb: blocks t-type calcium channels + - verb: inhibits cav3 t-type calcium channels + pubchem_aids: + - 1053190 + - 489005 + - 493021 + - 493022 + - 493023 + - 493041 + uris: + - http://purl.obolibrary.org/obo/CHEBI_194338 identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al - description: original dataset - - url: https://doi.org/10.3390/molecules18010735 - description: corresponding publication - - url: https://doi.org/10.1093/nar/gky1033 - description: corresponding publication - - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ - description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al + description: original dataset + - url: https://doi.org/10.3390/molecules18010735 + description: corresponding publication + - url: https://doi.org/10.1093/nar/gky1033 + description: corresponding publication + - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ + description: corresponding publication benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split num_points: 100875 bibtex: - - |- - @article{Butkiewicz2013, - doi = {10.3390/molecules18010735}, - url = {https://doi.org/10.3390/molecules18010735}, - year = {2013}, - month = jan, - publisher = {{MDPI} {AG}}, - volume = {18}, - number = {1}, - pages = {735--756}, - author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and Jeffrey Mendenhall - and Pedro Teixeira and C. Weaver and Jens Meiler}, - title = {Benchmarking Ligand-Based Virtual High-Throughput Screening with the {PubChem} Database}, - journal = {Molecules}} - - |- - @article{Kim2018, - doi = {10.1093/nar/gky1033}, - url = {https://doi.org/10.1093/nar/gky1033}, - year = {2018}, - month = oct, - publisher = {Oxford University Press ({OUP})}, - volume = {47}, - number = {D1}, - pages = {D1102--D1109}, - author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta Gindulyte and Jia He and Siqian He - and Qingliang Li and Benjamin A Shoemaker and Paul A Thiessen and Bo Yu and Leonid Zaslavsky - and Jian Zhang and Evan E Bolton}, - title = {{PubChem} 2019 update: improved access to chemical data}, - journal = {Nucleic Acids Research}} - - |- - @article{Butkiewicz2017, - doi = {}, - url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, - year = {2017}, - publisher = {Chem Inform}, - volume = {3}, - number = {1}, - author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe, E. W. and Weaver, D. C. - and Meiler, J.}, - title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from the {P}ub{C}hem {D}atabase}}, - journal = {Chemical Science}} + - |- + @article{Butkiewicz2013, + doi = {10.3390/molecules18010735}, + url = {https://doi.org/10.3390/molecules18010735}, + year = {2013}, + month = jan, + publisher = {{MDPI} {AG}}, + volume = {18}, + number = {1}, + pages = {735--756}, + author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and Jeffrey Mendenhall + and Pedro Teixeira and C. Weaver and Jens Meiler}, + title = {Benchmarking Ligand-Based Virtual High-Throughput Screening with the {PubChem} Database}, + journal = {Molecules}} + - |- + @article{Kim2018, + doi = {10.1093/nar/gky1033}, + url = {https://doi.org/10.1093/nar/gky1033}, + year = {2018}, + month = oct, + publisher = {Oxford University Press ({OUP})}, + volume = {47}, + number = {D1}, + pages = {D1102--D1109}, + author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta Gindulyte and Jia He and Siqian He + and Qingliang Li and Benjamin A Shoemaker and Paul A Thiessen and Bo Yu and Leonid Zaslavsky + and Jian Zhang and Evan E Bolton}, + title = {{PubChem} 2019 update: improved access to chemical data}, + journal = {Nucleic Acids Research}} + - |- + @article{Butkiewicz2017, + doi = {}, + url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, + year = {2017}, + publisher = {Chem Inform}, + volume = {3}, + number = {1}, + author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe, E. W. and Weaver, D. C. + and Meiler, J.}, + title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from the {P}ub{C}hem {D}atabase}}, + journal = {Chemical Science}} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_cav3_t_type_calcium_channels#no - &NULL}{activity_cav3_t_type_calcium_channels__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {activity_cav3_t_type_calcium_channels#no - &NULL}{activity_cav3_t_type_calcium_channels__names__noun}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_cav3_t_type_calcium_channels#no &NULL}{activity_cav3_t_type_calcium_channels__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_cav3_t_type_calcium_channels__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {activity_cav3_t_type_calcium_channels#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_cav3_t_type_calcium_channels__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_cav3_t_type_calcium_channels__names__gerund}? - Assistant: {activity_cav3_t_type_calcium_channels#No&Yes}, this molecule is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {activity_cav3_t_type_calcium_channels__names__gerund}? - Assistant: {activity_cav3_t_type_calcium_channels#No&Yes}, it is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}? - Assistant: This is a molecule that is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {activity_cav3_t_type_calcium_channels#not &NULL}be {activity_cav3_t_type_calcium_channels__names__gerund}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {activity_cav3_t_type_calcium_channels#not &NULL}be {activity_cav3_t_type_calcium_channels__names__gerund}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {activity_cav3_t_type_calcium_channels__names__gerund}:{activity_cav3_t_type_calcium_channels#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_cav3_t_type_calcium_channels__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{activity_cav3_t_type_calcium_channels#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_cav3_t_type_calcium_channels__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result:This molecule is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}. - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_cav3_t_type_calcium_channels__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {activity_cav3_t_type_calcium_channels%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_cav3_t_type_calcium_channels%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_cav3_t_type_calcium_channels%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_cav3_t_type_calcium_channels#no &NULL}{activity_cav3_t_type_calcium_channels__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {activity_cav3_t_type_calcium_channels#no &NULL}{activity_cav3_t_type_calcium_channels__names__noun}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_cav3_t_type_calcium_channels#no &NULL}{activity_cav3_t_type_calcium_channels__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_cav3_t_type_calcium_channels__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {activity_cav3_t_type_calcium_channels#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_cav3_t_type_calcium_channels__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_cav3_t_type_calcium_channels__names__gerund}? + Assistant: {activity_cav3_t_type_calcium_channels#No&Yes}, this molecule is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {activity_cav3_t_type_calcium_channels__names__gerund}? + Assistant: {activity_cav3_t_type_calcium_channels#No&Yes}, it is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}? + Assistant: This is a molecule that is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {activity_cav3_t_type_calcium_channels#not &NULL}be {activity_cav3_t_type_calcium_channels__names__gerund}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {activity_cav3_t_type_calcium_channels#not &NULL}be {activity_cav3_t_type_calcium_channels__names__gerund}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {activity_cav3_t_type_calcium_channels__names__gerund}:{activity_cav3_t_type_calcium_channels#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_cav3_t_type_calcium_channels__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{activity_cav3_t_type_calcium_channels#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_cav3_t_type_calcium_channels__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result:This molecule is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}. + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_cav3_t_type_calcium_channels__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {activity_cav3_t_type_calcium_channels%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_cav3_t_type_calcium_channels%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_cav3_t_type_calcium_channels%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/chebi_20/meta.yaml b/data/tabular/chebi_20/meta.yaml index 75a6bd7e4..53a853540 100644 --- a/data/tabular/chebi_20/meta.yaml +++ b/data/tabular/chebi_20/meta.yaml @@ -1,108 +1,107 @@ ---- name: chebi_20 description: A dataset of pairs of natural language descriptions and SMILEs. targets: - - id: description - description: a natural language description of the molecule SMILE - units: - type: string - names: - - noun: natural language description - pubchem_aids: [] - uris: [] + - id: description + description: a natural language description of the molecule SMILE + units: + type: string + names: + - noun: natural language description + pubchem_aids: [] + uris: [] identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: CID - type: Other - names: - - noun: compound id - sample: false - description: This is the PubChem CID to identify a given molecule. + - id: SMILES + type: SMILES + description: SMILES + - id: CID + type: Other + names: + - noun: compound id + sample: false + description: This is the PubChem CID to identify a given molecule. license: CC BY 4.0 links: - - name: Research Paper - url: https://aclanthology.org/2021.emnlp-main.47/ - description: Original Text2Mol paper which introduced the chebi_20 dataset. - - name: Dataset - url: https://github.com/cnedwards/text2mol - description: Text2Mol original data repository on GitHub. - - name: Hugging Face dataset upload - url: https://huggingface.co/datasets/OpenBioML/chebi_20 - description: Hugging Face dataset uploaded to the OpenBioML organisation. + - name: Research Paper + url: https://aclanthology.org/2021.emnlp-main.47/ + description: Original Text2Mol paper which introduced the chebi_20 dataset. + - name: Dataset + url: https://github.com/cnedwards/text2mol + description: Text2Mol original data repository on GitHub. + - name: Hugging Face dataset upload + url: https://huggingface.co/datasets/OpenBioML/chebi_20 + description: Hugging Face dataset uploaded to the OpenBioML organisation. benchmarks: [] num_points: 33008 bibtex: - - |- - @inproceedings{edwards2021text2mol, - title={Text2Mol: Cross-Modal Molecule Retrieval with Natural Language Queries}, - author={Edwards, Carl and Zhai, ChengXiang and Ji, Heng}, - booktitle={Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing}, - pages={595--607}, - year={2021}, - url = {https://aclanthology.org/2021.emnlp-main.47/} - } - - |- - @inproceedings{edwards-etal-2022-translation, - title = "Translation between Molecules and Natural Language", - author = "Edwards, Carl and - Lai, Tuan and - Ros, Kevin and - Honke, Garrett and - Cho, Kyunghyun and - Ji, Heng", - booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing", - month = dec, - year = "2022", - address = "Abu Dhabi, United Arab Emirates", - publisher = "Association for Computational Linguistics", - url = "https://aclanthology.org/2022.emnlp-main.26", - pages = "375--413", - abstract = "We present MolT5 - a self-supervised learning framework for pretraining models on a vast amount of unlabeled natural language text and molecule strings. MolT5 allows for new, useful, and challenging analogs of traditional vision-language tasks, such as molecule captioning and text-based de novo molecule generation (altogether: translation between molecules and language), which we explore for the first time. Since MolT5 pretrains models on single-modal data, it helps overcome the chemistry domain shortcoming of data scarcity. Furthermore, we consider several metrics, including a new cross-modal embedding-based metric, to evaluate the tasks of molecule captioning and text-based molecule generation. Our results show that MolT5-based models are able to generate outputs, both molecules and captions, which in many cases are high quality.", - } + - |- + @inproceedings{edwards2021text2mol, + title={Text2Mol: Cross-Modal Molecule Retrieval with Natural Language Queries}, + author={Edwards, Carl and Zhai, ChengXiang and Ji, Heng}, + booktitle={Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing}, + pages={595--607}, + year={2021}, + url = {https://aclanthology.org/2021.emnlp-main.47/} + } + - |- + @inproceedings{edwards-etal-2022-translation, + title = "Translation between Molecules and Natural Language", + author = "Edwards, Carl and + Lai, Tuan and + Ros, Kevin and + Honke, Garrett and + Cho, Kyunghyun and + Ji, Heng", + booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing", + month = dec, + year = "2022", + address = "Abu Dhabi, United Arab Emirates", + publisher = "Association for Computational Linguistics", + url = "https://aclanthology.org/2022.emnlp-main.26", + pages = "375--413", + abstract = "We present MolT5 - a self-supervised learning framework for pretraining models on a vast amount of unlabeled natural language text and molecule strings. MolT5 allows for new, useful, and challenging analogs of traditional vision-language tasks, such as molecule captioning and text-based de novo molecule generation (altogether: translation between molecules and language), which we explore for the first time. Since MolT5 pretrains models on single-modal data, it helps overcome the chemistry domain shortcoming of data scarcity. Furthermore, we consider several metrics, including a new cross-modal embedding-based metric, to evaluate the tasks of molecule captioning and text-based molecule generation. Our results show that MolT5-based models are able to generate outputs, both molecules and captions, which in many cases are high quality.", + } templates: - - |- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} can be described {#by|as!}: - {description#} - - |- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule can be described {#by|as!}: - {description#} - - |- - Task: Please create a {#text |!}description for a molecule{# based on its representation|!}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question with {#full|complete!} sentences. - Result: {description#} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: {description#} - Result: {SMILES#} - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule based in this description: - {description#} - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that can be described {#by|as!}: - {description#} - Assistant: This is a molecule that fits {#your|this!} description: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule can be described {#by|as!}: - {description#} - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} fits {#your|this!} description: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule can be described {#by|as!}: - {description#} - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} fits {#your|this!} description: {SMILES#} - - |- - Task: Please create a {#text |!}description for a molecule{# based on its representation|!}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question with {#full|complete!} sentences. - Result:{description#} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: {description#} - Result:{SMILES#} + - |- + The molecule with the {SMILES__description} {#representation of |!}{SMILES#} can be described {#by|as!}: + {description#} + - |- + Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule can be described {#by|as!}: + {description#} + - |- + Task: Please create a {#text |!}description for a molecule{# based on its representation|!}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question with {#full|complete!} sentences. + Result: {description#} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: {description#} + Result: {SMILES#} + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule based in this description: + {description#} + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that can be described {#by|as!}: + {description#} + Assistant: This is a molecule that fits {#your|this!} description: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule can be described {#by|as!}: + {description#} + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} fits {#your|this!} description: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule can be described {#by|as!}: + {description#} + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} fits {#your|this!} description: {SMILES#} + - |- + Task: Please create a {#text |!}description for a molecule{# based on its representation|!}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question with {#full|complete!} sentences. + Result:{description#} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: {description#} + Result:{SMILES#} diff --git a/data/tabular/chem_caption_smarts/meta.yaml b/data/tabular/chem_caption_smarts/meta.yaml index 57f8ecfbe..7e8982ccf 100644 --- a/data/tabular/chem_caption_smarts/meta.yaml +++ b/data/tabular/chem_caption_smarts/meta.yaml @@ -1,41 +1,40 @@ ---- name: chem_caption_smarts description: |- - This dataset contains the count of substructures in molecules + This dataset contains the count of substructures in molecules targets: - - id: smarts - type: text - description: substructure smarts - names: - - noun: SMARTS - - noun: SMiles ARbitrary Target Specification (SMARTS) - - id: completion - type: categorical - description: number of matches - - id: completion_labels - type: text - description: name of the substructure + - id: smarts + type: text + description: substructure smarts + names: + - noun: SMARTS + - noun: SMiles ARbitrary Target Specification (SMARTS) + - id: completion + type: categorical + description: number of matches + - id: completion_labels + type: text + description: name of the substructure identifiers: - - id: representation - type: text - description: representation - - id: representation_type - type: text - description: representation type + - id: representation + type: text + description: representation + - id: representation_type + type: text + description: representation type license: CC BY 4.0 links: - - url: https://github.com/lamalab-org/chem-caption - description: Original codebase used to generate this dataset + - url: https://github.com/lamalab-org/chem-caption + description: Original codebase used to generate this dataset templates: - - |- - Question: {#How many times|How often!} does the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contain the substructure with the {smarts__names__noun} {#smarts#}? - Answer: {completion#} - - |- - Question: {#How many times|How often!} does the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contain a {completion#} substructure? - Answer: {smarts__names__noun} {#smarts#} - - |- - User: {#I want to|I have to|I must|I would like to!} know {#how many times|how often!} the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contains the substructure with the {smarts__names__noun} {#smarts#}. - Assistant: The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contains the substructure with the {smarts__names__noun} {#smarts#} {completion#} times. - - |- - User: {#I want to|I have to|I must|I would like to!} know how many times the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contains a {completion#} substructure. - Assistant: The {#molecule|chemical|compound|chemical structure!} contains the substructure with the {smarts__names__noun} {#smarts#} {completion#} times. + - |- + Question: {#How many times|How often!} does the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contain the substructure with the {smarts__names__noun} {#smarts#}? + Answer: {completion#} + - |- + Question: {#How many times|How often!} does the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contain a {completion#} substructure? + Answer: {smarts__names__noun} {#smarts#} + - |- + User: {#I want to|I have to|I must|I would like to!} know {#how many times|how often!} the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contains the substructure with the {smarts__names__noun} {#smarts#}. + Assistant: The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contains the substructure with the {smarts__names__noun} {#smarts#} {completion#} times. + - |- + User: {#I want to|I have to|I must|I would like to!} know how many times the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contains a {completion#} substructure. + Assistant: The {#molecule|chemical|compound|chemical structure!} contains the substructure with the {smarts__names__noun} {#smarts#} {completion#} times. diff --git a/data/tabular/chembl_v29/meta.yaml b/data/tabular/chembl_v29/meta.yaml index d729bd357..c6f09128e 100644 --- a/data/tabular/chembl_v29/meta.yaml +++ b/data/tabular/chembl_v29/meta.yaml @@ -1,48 +1,47 @@ ---- name: chembl_v29 description: |- - ChEMBL is a manually curated database of bioactive molecules with drug-like properties. - It brings together chemical, bioactivity and genomic data - to aid the translation of genomic information into effective new drugs. + ChEMBL is a manually curated database of bioactive molecules with drug-like properties. + It brings together chemical, bioactivity and genomic data + to aid the translation of genomic information into effective new drugs. benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY-SA 3.0 links: - - url: https://academic.oup.com/nar/article/47/D1/D930/5162468 - description: Article about original dataset - - url: https://academic.oup.com/nar/article/43/W1/W612/2467881 - description: Exemplary related article shown in tdc's website + - url: https://academic.oup.com/nar/article/47/D1/D930/5162468 + description: Article about original dataset + - url: https://academic.oup.com/nar/article/43/W1/W612/2467881 + description: Exemplary related article shown in tdc's website num_points: 2084637 bibtex: - - |- - @article{10.1093/nar/gky1075, - author = {Mendez, David and Gaulton, Anna and Bento, A Patricia and Chambers, Jon and De Veij, - Marleen and Felix, Eloy and Magarinos, Maria Paula and Mosquera, - Juan F and Mutowo, Prudence and Nowotka, Michal and Gordillo-Maranon, - Maria and Hunter, Fiona and Junco, Laura and Mugumbate, Grace and Rodriguez-Lopez, Milagros and Atkinson, - Francis and Bosc, Nicolas and Radoux, Chris J and Segura-Cabrera, Aldo and Hersey, Anne and Leach, Andrew R}, - title = {ChEMBL: towards direct deposition of bioassay data}, - journal = {Nucleic Acids Research}, - volume = {47}, - number = {D1}, - pages = {D930-D940}, - year = {2018}, - month = {11}, - abstract = "{ChEMBL is a large, open-access bioactivity database - (https://www.ebi.ac.uk/chembl), previously described in the 2012, - 2014 and 2017 Nucleic Acids Research Database Issues. - In the last two years, several important improvements have been made to the database and are described here. - These include more robust capture and representation of assay details; - a new data deposition system, allowing updating of data sets and deposition of supplementary data; - and a completely redesigned web interface, with enhanced search and filtering capabilities.}", - issn = {0305-1048}, - doi = {10.1093/nar/gky1075}, - url = {https://doi.org/10.1093/nar/gky1075}, - eprint = {https://academic.oup.com/nar/article-pdf/47/D1/D930/27437436/gky1075.pdf}, - } + - |- + @article{10.1093/nar/gky1075, + author = {Mendez, David and Gaulton, Anna and Bento, A Patricia and Chambers, Jon and De Veij, + Marleen and Felix, Eloy and Magarinos, Maria Paula and Mosquera, + Juan F and Mutowo, Prudence and Nowotka, Michal and Gordillo-Maranon, + Maria and Hunter, Fiona and Junco, Laura and Mugumbate, Grace and Rodriguez-Lopez, Milagros and Atkinson, + Francis and Bosc, Nicolas and Radoux, Chris J and Segura-Cabrera, Aldo and Hersey, Anne and Leach, Andrew R}, + title = {ChEMBL: towards direct deposition of bioassay data}, + journal = {Nucleic Acids Research}, + volume = {47}, + number = {D1}, + pages = {D930-D940}, + year = {2018}, + month = {11}, + abstract = "{ChEMBL is a large, open-access bioactivity database + (https://www.ebi.ac.uk/chembl), previously described in the 2012, + 2014 and 2017 Nucleic Acids Research Database Issues. + In the last two years, several important improvements have been made to the database and are described here. + These include more robust capture and representation of assay details; + a new data deposition system, allowing updating of data sets and deposition of supplementary data; + and a completely redesigned web interface, with enhanced search and filtering capabilities.}", + issn = {0305-1048}, + doi = {10.1093/nar/gky1075}, + url = {https://doi.org/10.1093/nar/gky1075}, + eprint = {https://academic.oup.com/nar/article-pdf/47/D1/D930/27437436/gky1075.pdf}, + } diff --git a/data/tabular/chemcaption_fragments/meta.yaml b/data/tabular/chemcaption_fragments/meta.yaml index d10c228c1..a6dc95c16 100644 --- a/data/tabular/chemcaption_fragments/meta.yaml +++ b/data/tabular/chemcaption_fragments/meta.yaml @@ -1,51 +1,50 @@ ---- name: chemcaption_fragments description: |- - Checks if a given fragment is present in a molecule. + Checks if a given fragment is present in a molecule. targets: - - id: presence - description: flag indicating whether the fragment is present in the molecule - type: boolean + - id: presence + description: flag indicating whether the fragment is present in the molecule + type: boolean identifiers: - - id: molecule - type: text - description: identifier of the molecule - - id: fragment - type: text - description: identifier of the fragment - - id: smarts - type: text - description: SMARTS of the fragment - - id: representation_type - type: text - description: representation type of the molecule + - id: molecule + type: text + description: identifier of the molecule + - id: fragment + type: text + description: identifier of the fragment + - id: smarts + type: text + description: SMARTS of the fragment + - id: representation_type + type: text + description: representation type of the molecule license: MIT links: - - url: https://github.com/lamalab-org/chem-caption - description: software used to generate the data + - url: https://github.com/lamalab-org/chem-caption + description: software used to generate the data num_points: 812177 templates: - - |- - {#Question: |Q: !}Is the fragment with SMARTs {smarts#} present in the molecule with {representation_type#} {molecule#}? - {#Answer: |A: |!}{presence#No&Yes} - - |- - {#Question: |Q: !}Is a {fragment#} fragment present in the molecule with {representation_type#} {molecule#}? - {#Answer: |A: |!}{presence#No&Yes} - - A {fragment#} fragment is {presence#present&absent} in the molecule with {representation_type#} {molecule#}. - - |- - Task: {#Answer a question about substructures|Answer a question about fragments!} - {#Question: |Q: !}Is the fragment with SMARTS {smarts#} {#present in|part of!} the molecule with {representation_type#} {molecule#}? - {#Answer: |A: |!}{presence#No&Yes} - - |- - User: Is the fragment {fragment#} {#present in|part of!} the molecule with {representation_type#} {molecule#}? - Assistant: {presence#No&Yes} - - |- - User: I have a question about the molecule with {representation_type#} {molecule#}. - Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!} - User: Is a {fragment#} fragment {#present in|part of!} the molecule? - Assistant: {presence#No&Yes} - - |- - User: I want to know more about the molecule with {representation_type#} {molecule#}. - Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!} - User: Is a {fragment#} fragment {#present in|part of of|substructure of!} the molecule? - Assistant: {presence#No&Yes} + - |- + {#Question: |Q: !}Is the fragment with SMARTs {smarts#} present in the molecule with {representation_type#} {molecule#}? + {#Answer: |A: |!}{presence#No&Yes} + - |- + {#Question: |Q: !}Is a {fragment#} fragment present in the molecule with {representation_type#} {molecule#}? + {#Answer: |A: |!}{presence#No&Yes} + - A {fragment#} fragment is {presence#present&absent} in the molecule with {representation_type#} {molecule#}. + - |- + Task: {#Answer a question about substructures|Answer a question about fragments!} + {#Question: |Q: !}Is the fragment with SMARTS {smarts#} {#present in|part of!} the molecule with {representation_type#} {molecule#}? + {#Answer: |A: |!}{presence#No&Yes} + - |- + User: Is the fragment {fragment#} {#present in|part of!} the molecule with {representation_type#} {molecule#}? + Assistant: {presence#No&Yes} + - |- + User: I have a question about the molecule with {representation_type#} {molecule#}. + Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!} + User: Is a {fragment#} fragment {#present in|part of!} the molecule? + Assistant: {presence#No&Yes} + - |- + User: I want to know more about the molecule with {representation_type#} {molecule#}. + Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!} + User: Is a {fragment#} fragment {#present in|part of of|substructure of!} the molecule? + Assistant: {presence#No&Yes} diff --git a/data/tabular/chemcaption_rdkit/meta.yaml b/data/tabular/chemcaption_rdkit/meta.yaml index 99424eed9..69057f470 100644 --- a/data/tabular/chemcaption_rdkit/meta.yaml +++ b/data/tabular/chemcaption_rdkit/meta.yaml @@ -1,342 +1,332 @@ ---- name: chemcaption_rdkit description: |- - This dataset contains molecular descriptors, mostly derived using RDKit. + This dataset contains molecular descriptors, mostly derived using RDKit. targets: - - id: num_valence_electrons - type: categorical - description: number of valence electrons - names: - - noun: number of valence electrons - - noun: valence electron count - - only_name: valence electrons - - id: rotable_proportion - type: continuous - significant_digits: 3 - description: proportion of rotatable bonds - names: - - noun: proportion of rotatable bonds - - noun: rotatable bond proportion - - id: non_rotable_proportion - type: continuous - significant_digits: 3 - description: proportion of non-rotatable bonds - names: - - noun: proportion of non-rotatable bonds - - noun: non-rotatable bond proportion - - id: num_single_bonds - type: categorical - description: number of single bonds - names: - - noun: number of single bonds - - only_name: single bonds - - id: num_double_bonds - type: categorical - description: number of double bonds - names: - - noun: number of double bonds - - only_name: double bonds - - id: num_triple_bonds - type: categorical - description: number of triple bonds - names: - - noun: number of triple bonds - - only_name: triple bonds - - id: num_aromatic_bonds - type: categorical - description: number of aromatic bonds - names: - - noun: number of aromatic bonds - - only_name: aromatic bonds - - id: num_bonds - type: categorical - description: number of bonds - names: - - noun: number of bonds - - noun: bond count - - only_name: bonds - - id: num_carbon_atoms - type: categorical - description: number of carbon atoms - names: - - noun: number of carbon atoms - - noun: carbon atom count - - only_name: carbon atoms - - id: num_hydrogen_atoms - type: categorical - description: number of hydrogen atoms - names: - - noun: number of hydrogen atoms - - noun: hydrogen atom count - - only_name: hydrogen atoms - - id: num_nitrogen_atoms - type: categorical - description: number of nitrogen atoms - names: - - noun: number of nitrogen atoms - - noun: nitrogen atom count - - only_name: nitrogen atoms - - id: num_oxygen_atoms - type: categorical - description: number of oxygen atoms - names: - - noun: number of oxygen atoms - - noun: oxygen atom count - - only_name: oxygen atoms - - id: num_hydrogen_bond_acceptors - type: categorical - description: number of hydrogen bond acceptors - names: - - noun: number of hydrogen bond acceptors - - noun: hydrogen bond acceptor count - - only_name: hydrogen bond acceptors - - id: num_hydrogen_bond_donors - type: categorical - description: number of hydrogen bond donors - names: - - noun: number of hydrogen bond donors - - noun: hydrogen bond donor count - - only_name: hydrogen bond donors - - id: num_lipinski_violations - type: categorical - description: number of Lipinski violations - names: - - noun: number of violations of Lipinski's rule of five - - noun: number of violations of Lipinski's rule of 5 - - only_name: violations of Lipinski's rule of five - - only_name: violations of Lipinski's rule of 5 - - id: monoisotopic_molecular_mass - type: continuous - significant_digits: 3 - description: monoisotopic molecular mass - names: - - noun: monoisotopic molecular mass - - noun: monoisotopic mass - units: Da - - id: carbon_mass - type: continuous - significant_digits: 3 - description: carbon mass - names: - - noun: carbon mass fraction - - noun: carbon mass proportion - - id: hydrogen_mass - type: continuous - significant_digits: 3 - description: hydrogen mass - names: - - noun: hydrogen mass fraction - - noun: hydrogen mass proportion - - id: nitrogen_mass - type: continuous - significant_digits: 3 - description: nitrogen mass - names: - - noun: nitrogen mass fraction - - noun: nitrogen mass proportion - - id: oxygen_mass - type: continuous - significant_digits: 3 - description: oxygen mass - names: - - noun: oxygen mass fraction - - noun: oxygen mass proportion - - id: num_chiral_centers - type: categorical - description: number of chiral centers - names: - - noun: number of chiral centers - - noun: chiral center count - - only_name: chiral centers - - id: inertial_shape_factor - type: continuous - significant_digits: 3 - description: inertial shape factor - names: - - noun: inertial shape factor - - id: eccentricity - type: continuous - significant_digits: 3 - description: eccentricity - names: - - noun: eccentricity - - id: asphericity - type: continuous - significant_digits: 3 - description: asphericity - names: - - noun: asphericity - - id: npr1_value - type: continuous - significant_digits: 3 - description: NPR1 value - names: - - noun: NPR1 value - - noun: normalized principal moment of inertia ratio 1 value - - noun: normalized principal moment of inertia ratio 1 (NPR1) value - - id: npr2_value - type: continuous - significant_digits: 3 - description: NPR2 value - names: - - noun: NPR2 value - - noun: normalized principal moment of inertia ratio 2 value - - noun: normalized principal moment of inertia ratio 2 (NPR2) value - - id: pmi1_value - type: continuous - significant_digits: 3 - description: PMI1 value - names: - - noun: PMI1 value - - noun: principal moment of inertia 1 value - - noun: principal moment of inertia 1 (PMI1) value - - id: pmi2_value - type: continuous - significant_digits: 3 - description: PMI2 value - names: - - noun: PMI2 value - - noun: principal moment of inertia 2 value - - noun: principal moment of inertia 2 (PMI2) value - - id: molecular_formula - type: text - description: molecular formula - names: - - noun: molecular formula - - noun: chemical formula + - id: num_valence_electrons + type: categorical + description: number of valence electrons + names: + - noun: number of valence electrons + - noun: valence electron count + - only_name: valence electrons + - id: rotable_proportion + type: continuous + significant_digits: 3 + description: proportion of rotatable bonds + names: + - noun: proportion of rotatable bonds + - noun: rotatable bond proportion + - id: non_rotable_proportion + type: continuous + significant_digits: 3 + description: proportion of non-rotatable bonds + names: + - noun: proportion of non-rotatable bonds + - noun: non-rotatable bond proportion + - id: num_single_bonds + type: categorical + description: number of single bonds + names: + - noun: number of single bonds + - only_name: single bonds + - id: num_double_bonds + type: categorical + description: number of double bonds + names: + - noun: number of double bonds + - only_name: double bonds + - id: num_triple_bonds + type: categorical + description: number of triple bonds + names: + - noun: number of triple bonds + - only_name: triple bonds + - id: num_aromatic_bonds + type: categorical + description: number of aromatic bonds + names: + - noun: number of aromatic bonds + - only_name: aromatic bonds + - id: num_bonds + type: categorical + description: number of bonds + names: + - noun: number of bonds + - noun: bond count + - only_name: bonds + - id: num_carbon_atoms + type: categorical + description: number of carbon atoms + names: + - noun: number of carbon atoms + - noun: carbon atom count + - only_name: carbon atoms + - id: num_hydrogen_atoms + type: categorical + description: number of hydrogen atoms + names: + - noun: number of hydrogen atoms + - noun: hydrogen atom count + - only_name: hydrogen atoms + - id: num_nitrogen_atoms + type: categorical + description: number of nitrogen atoms + names: + - noun: number of nitrogen atoms + - noun: nitrogen atom count + - only_name: nitrogen atoms + - id: num_oxygen_atoms + type: categorical + description: number of oxygen atoms + names: + - noun: number of oxygen atoms + - noun: oxygen atom count + - only_name: oxygen atoms + - id: num_hydrogen_bond_acceptors + type: categorical + description: number of hydrogen bond acceptors + names: + - noun: number of hydrogen bond acceptors + - noun: hydrogen bond acceptor count + - only_name: hydrogen bond acceptors + - id: num_hydrogen_bond_donors + type: categorical + description: number of hydrogen bond donors + names: + - noun: number of hydrogen bond donors + - noun: hydrogen bond donor count + - only_name: hydrogen bond donors + - id: num_lipinski_violations + type: categorical + description: number of Lipinski violations + names: + - noun: number of violations of Lipinski's rule of five + - noun: number of violations of Lipinski's rule of 5 + - only_name: violations of Lipinski's rule of five + - only_name: violations of Lipinski's rule of 5 + - id: monoisotopic_molecular_mass + type: continuous + significant_digits: 3 + description: monoisotopic molecular mass + names: + - noun: monoisotopic molecular mass + - noun: monoisotopic mass + units: Da + - id: carbon_mass + type: continuous + significant_digits: 3 + description: carbon mass + names: + - noun: carbon mass fraction + - noun: carbon mass proportion + - id: hydrogen_mass + type: continuous + significant_digits: 3 + description: hydrogen mass + names: + - noun: hydrogen mass fraction + - noun: hydrogen mass proportion + - id: nitrogen_mass + type: continuous + significant_digits: 3 + description: nitrogen mass + names: + - noun: nitrogen mass fraction + - noun: nitrogen mass proportion + - id: oxygen_mass + type: continuous + significant_digits: 3 + description: oxygen mass + names: + - noun: oxygen mass fraction + - noun: oxygen mass proportion + - id: num_chiral_centers + type: categorical + description: number of chiral centers + names: + - noun: number of chiral centers + - noun: chiral center count + - only_name: chiral centers + - id: inertial_shape_factor + type: continuous + significant_digits: 3 + description: inertial shape factor + names: + - noun: inertial shape factor + - id: eccentricity + type: continuous + significant_digits: 3 + description: eccentricity + names: + - noun: eccentricity + - id: asphericity + type: continuous + significant_digits: 3 + description: asphericity + names: + - noun: asphericity + - id: npr1_value + type: continuous + significant_digits: 3 + description: NPR1 value + names: + - noun: NPR1 value + - noun: normalized principal moment of inertia ratio 1 value + - noun: normalized principal moment of inertia ratio 1 (NPR1) value + - id: npr2_value + type: continuous + significant_digits: 3 + description: NPR2 value + names: + - noun: NPR2 value + - noun: normalized principal moment of inertia ratio 2 value + - noun: normalized principal moment of inertia ratio 2 (NPR2) value + - id: pmi1_value + type: continuous + significant_digits: 3 + description: PMI1 value + names: + - noun: PMI1 value + - noun: principal moment of inertia 1 value + - noun: principal moment of inertia 1 (PMI1) value + - id: pmi2_value + type: continuous + significant_digits: 3 + description: PMI2 value + names: + - noun: PMI2 value + - noun: principal moment of inertia 2 value + - noun: principal moment of inertia 2 (PMI2) value + - id: molecular_formula + type: text + description: molecular formula + names: + - noun: molecular formula + - noun: chemical formula identifiers: - - id: representation - type: text - description: representation - - id: representation_type - type: text - description: representation type + - id: representation + type: text + description: representation + - id: representation_type + type: text + description: representation type license: CC BY 4.0 num_points: 79811 links: - - url: https://github.com/lamalab-org/chem-caption - description: Original codebase used to generate this dataset + - url: https://github.com/lamalab-org/chem-caption + description: Original codebase used to generate this dataset templates: - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_valence_electrons#} {num_valence_electrons__names__only_name}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {rotable_proportion__names__noun} of {rotable_proportion#}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {non_rotable_proportion__names__noun} of - {non_rotable_proportion#}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_single_bonds#} {num_single_bonds__names__only_name}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_double_bonds#} {num_double_bonds__names__only_name}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_triple_bonds#} {num_triple_bonds__names__only_name}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_aromatic_bonds#} {num_aromatic_bonds__names__only_name}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_bonds#} {num_bonds__names__only_name}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_carbon_atoms#} {num_carbon_atoms__names__only_name}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_hydrogen_atoms#} {num_hydrogen_atoms__names__only_name}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_nitrogen_atoms#} {num_nitrogen_atoms__names__only_name}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_oxygen_atoms#} {num_oxygen_atoms__names__only_name}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__only_name}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__only_name}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_lipinski_violations#} {num_lipinski_violations__names__only_name}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {monoisotopic_molecular_mass__names__noun} - of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {carbon_mass__names__noun} of {carbon_mass#}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {hydrogen_mass__names__noun} of {hydrogen_mass#}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {nitrogen_mass__names__noun} of {nitrogen_mass#}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {oxygen_mass__names__noun} of {oxygen_mass#}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_chiral_centers#} {num_chiral_centers__names__only_name}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {inertial_shape_factor__names__noun} of - {inertial_shape_factor#}. - - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {eccentricity__names__noun} - of {eccentricity#}. - - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {asphericity__names__noun} - of {asphericity#}. - - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {npr1_value__names__noun} - of {npr1_value#}. - - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {npr2_value__names__noun} - of {npr2_value#}. - - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {pmi1_value__names__noun} - of {pmi1_value#}. - - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {pmi2_value__names__noun} - of {pmi2_value#}. - - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has the {molecular_formula__names__noun} {molecular_formula#}. - - |- - Question: What is the {molecular_formula__names__noun} and {monoisotopic_molecular_mass__names__noun} of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}? - Constraint: Answer by only returning the values separated by a comma. - Answer: {molecular_formula#}, {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units} - - |- - Question: What is the {molecular_formula__names__noun} and {num_valence_electrons__names__noun} of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}? - Constraint: Answer by only returning the values separated by a comma. - Answer: {molecular_formula#}, {num_valence_electrons#} - - |- - Question: What is the {molecular_formula__names__noun}, {rotable_proportion__names__noun}, and {num_chiral_centers__names__noun} of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}? - Constraint: Answer by only returning the values separated by a comma. - Answer: {molecular_formula#}, {rotable_proportion#}, {num_chiral_centers#} - - |- - Question: What is the {carbon_mass__names__noun}, {hydrogen_mass__names__noun}, {nitrogen_mass__names__noun}, and {oxygen_mass__names__noun} of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}? - Constraint: Answer by only returning the values separated by a comma. - Answer: {carbon_mass#}, {hydrogen_mass#}, {nitrogen_mass#}, {oxygen_mass#} - - |- - User: I {#want|need|have!} to design a {#molecule|chemical|compound|chemical structure!} with {molecular_formula__names__noun} {molecular_formula#}. - Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? - User: I {#want|would like!} the {#molecule|chemical|compound|chemical structure!} to have {num_valence_electrons#} {num_valence_electrons__names__noun}, {num_chiral_centers#} {num_chiral_centers__names__only_name}, and {num_lipinski_violations#} {num_lipinski_violations__names__only_name}. - Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}. - - |- - User: I {#want|need|have!} to design a {#molecule|chemical|compound|chemical structure!} with {num_lipinski_violations#} {num_lipinski_violations__names__only_name}. - Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? - User: I {#want|would like!} the {#molecule|chemical|compound|chemical structure!} to have {num_bonds#} {num_bonds__names__only_name}, {num_chiral_centers#} {num_chiral_centers__names__only_name}, and a {carbon_mass__names__noun} of {carbon_mass#}. - Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}. - - |- - User: I {#want|need|have!} to design a {#molecule|chemical|compound|drug|chemical structure!} with {num_lipinski_violations#} {num_lipinski_violations__names__only_name}. - Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? - User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_bonds#} {num_bonds__names__only_name}. - Assistant: {#OK, that already helps constraining my search. |Thanks, that already helps constraining my search. |Thanks, that already helps. |OK, that already helps. |!}It would {#help|be great|be useful!} if you could tell me more about the {#molecule|chemical|compound|drug|chemical structure!} you are looking for. - User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_chiral_centers#} {num_chiral_centers__names__only_name}. {#In addition,|Additionally,|Moreover,!} I want the {#molecule|chemical|compound|drug|chemical structure!} to have a {carbon_mass__names__noun} of {carbon_mass#}. - Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. - - |- - User: I {#want|need|have!} to design a {#molecule|chemical|compound|drug|chemical structure!} with {num_lipinski_violations#} {num_lipinski_violations__names__only_name}. - Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? - User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_bonds#} {num_bonds__names__only_name}. - Assistant: {#OK, that already helps constraining my search. |Thanks, that already helps constraining my search. |Thanks, that already helps. |OK, that already helps. |!}It would {#help|be great|be useful!} if you could tell me more about the {#molecule|chemical|compound|drug|chemical structure!} you {#want to design|are looking for|are interested in!}. - User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_chiral_centers#} {num_chiral_centers__names__only_name}. {#In addition,|Additionally,|Moreover,!} I want the {#molecule|chemical|compound|drug|chemical structure!} to have a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. - Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. - - |- - User: I {#want|need|have!} to design a {#molecule|chemical|compound|drug|chemical structure!} with a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. - Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? - User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_bonds#} {num_bonds__names__only_name}. - Assistant: {#OK, that already helps constraining my search. |Thanks, that already helps constraining my search. |Thanks, that already helps. |OK, that already helps. |!}It would {#help|be great|be useful!} if you could tell me more about the {#molecule|chemical|compound|drug|chemical structure!} you {#want to design|are looking for|are interested in!}. - User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_chiral_centers#} {num_chiral_centers__names__only_name}. - Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. - - |- - User: I {#have some questions|want to ask you!} about the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. - Assistant: {#How can I help?|What can I do for you?|How can I be of assistance?!} - User: {#What is|I want to know|I need to know!} the {molecular_formula__names__noun} and {monoisotopic_molecular_mass__names__noun} of this {#molecule|chemical|compound|drug|chemical structure!}. - Assistant: The {#molecule|chemical|compound|drug|chemical structure!} has the {molecular_formula__names__noun} {molecular_formula#} and a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. - - |- - User: I {#have some questions|want to ask you!} about the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. - Assistant: {#How can I help?|What can I do for you?|How can I be of assistance?!} - User: {#What is|I want to know|I need to know!} the {asphericity__names__noun} of this {#molecule|chemical|compound|drug|chemical structure!}. - Assistant: The {#molecule|chemical|compound|drug|chemical structure!} has an {asphericity__names__noun} of {asphericity#}. - - |- - User: I {#want to|must|would like to|need to!} {#design|create|synthesize|make!} a {#molecule|chemical|compound|drug|chemical structure!} with {asphericity__names__noun} of {asphericity#}. - Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? - User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {eccentricity__names__noun} of {eccentricity#}. - Assistant: {#Is there anything else I should know?|Is there anything else I should be aware of?|Is there anything else I should take into account?|Is there anything else I should consider?|Is there anything else I should take into consideration?|Is there anything else I should take into account?|Is there anything else I should take into consideration?|Is there anything else I should consider?!} - User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {npr1_value__names__noun} of {npr1_value#} and a {molecular_formula__names__noun} of {molecular_formula#}. - Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. - - |- - User: I {#want to|must|would like to|need to!} {#design|create|synthesize|make!} a {#molecule|chemical|compound|drug|chemical structure!} with {eccentricity__names__noun} of {eccentricity#}. - Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? - User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {asphericity__names__noun} of {asphericity#}. - Assistant: {#Is there anything else I should know?|Is there anything else I should be aware of?|Is there anything else I should take into account?|Is there anything else I should consider?|Is there anything else I should take into consideration?|Is there anything else I should take into account?|Is there anything else I should take into consideration?|Is there anything else I should consider?!} - User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {num_lipinski_violations#} {num_lipinski_violations__names__only_name} and a {molecular_formula__names__noun} of {molecular_formula#}. - Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. - - |- - User: I {#want to|must|would like to|need to!} {#design|create|synthesize|make!} a {#molecule|chemical|compound|drug|chemical structure!} with {npr1_value__names__noun} of {npr1_value#}. - Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? - User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {npr2_value__names__noun} of {npr2_value#}. - Assistant: {#Is there anything else I should know?|Is there anything else I should be aware of?|Is there anything else I should take into account?|Is there anything else I should consider?|Is there anything else I should take into consideration?|Is there anything else I should take into account?|Is there anything else I should take into consideration?|Is there anything else I should consider?!} - User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {molecular_formula__names__noun} of {molecular_formula#}. - Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. - - |- - User: I {#want to|must|would like to|need to!} {#design|create|synthesize|make!} a {#molecule|chemical|compound|drug|chemical structure!} with {pmi1_value__names__noun} of {pmi1_value#}. - Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? - User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {pmi2_value__names__noun} of {pmi2_value#}. - Assistant: {#Is there anything else I should know?|Is there anything else I should be aware of?|Is there anything else I should take into account?|Is there anything else I should consider?|Is there anything else I should take into consideration?|Is there anything else I should take into account?|Is there anything else I should take into consideration?|Is there anything else I should consider?!} - User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {molecular_formula__names__noun} of {molecular_formula#}. - Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_valence_electrons#} {num_valence_electrons__names__only_name}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {rotable_proportion__names__noun} of {rotable_proportion#}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {non_rotable_proportion__names__noun} of {non_rotable_proportion#}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_single_bonds#} {num_single_bonds__names__only_name}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_double_bonds#} {num_double_bonds__names__only_name}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_triple_bonds#} {num_triple_bonds__names__only_name}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_aromatic_bonds#} {num_aromatic_bonds__names__only_name}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_bonds#} {num_bonds__names__only_name}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_carbon_atoms#} {num_carbon_atoms__names__only_name}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_hydrogen_atoms#} {num_hydrogen_atoms__names__only_name}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_nitrogen_atoms#} {num_nitrogen_atoms__names__only_name}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_oxygen_atoms#} {num_oxygen_atoms__names__only_name}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__only_name}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__only_name}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_lipinski_violations#} {num_lipinski_violations__names__only_name}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {carbon_mass__names__noun} of {carbon_mass#}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {hydrogen_mass__names__noun} of {hydrogen_mass#}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {nitrogen_mass__names__noun} of {nitrogen_mass#}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {oxygen_mass__names__noun} of {oxygen_mass#}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_chiral_centers#} {num_chiral_centers__names__only_name}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {inertial_shape_factor__names__noun} of {inertial_shape_factor#}. + - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {eccentricity__names__noun} of {eccentricity#}. + - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {asphericity__names__noun} of {asphericity#}. + - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {npr1_value__names__noun} of {npr1_value#}. + - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {npr2_value__names__noun} of {npr2_value#}. + - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {pmi1_value__names__noun} of {pmi1_value#}. + - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {pmi2_value__names__noun} of {pmi2_value#}. + - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has the {molecular_formula__names__noun} {molecular_formula#}. + - |- + Question: What is the {molecular_formula__names__noun} and {monoisotopic_molecular_mass__names__noun} of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}? + Constraint: Answer by only returning the values separated by a comma. + Answer: {molecular_formula#}, {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units} + - |- + Question: What is the {molecular_formula__names__noun} and {num_valence_electrons__names__noun} of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}? + Constraint: Answer by only returning the values separated by a comma. + Answer: {molecular_formula#}, {num_valence_electrons#} + - |- + Question: What is the {molecular_formula__names__noun}, {rotable_proportion__names__noun}, and {num_chiral_centers__names__noun} of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}? + Constraint: Answer by only returning the values separated by a comma. + Answer: {molecular_formula#}, {rotable_proportion#}, {num_chiral_centers#} + - |- + Question: What is the {carbon_mass__names__noun}, {hydrogen_mass__names__noun}, {nitrogen_mass__names__noun}, and {oxygen_mass__names__noun} of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}? + Constraint: Answer by only returning the values separated by a comma. + Answer: {carbon_mass#}, {hydrogen_mass#}, {nitrogen_mass#}, {oxygen_mass#} + - |- + User: I {#want|need|have!} to design a {#molecule|chemical|compound|chemical structure!} with {molecular_formula__names__noun} {molecular_formula#}. + Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? + User: I {#want|would like!} the {#molecule|chemical|compound|chemical structure!} to have {num_valence_electrons#} {num_valence_electrons__names__noun}, {num_chiral_centers#} {num_chiral_centers__names__only_name}, and {num_lipinski_violations#} {num_lipinski_violations__names__only_name}. + Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}. + - |- + User: I {#want|need|have!} to design a {#molecule|chemical|compound|chemical structure!} with {num_lipinski_violations#} {num_lipinski_violations__names__only_name}. + Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? + User: I {#want|would like!} the {#molecule|chemical|compound|chemical structure!} to have {num_bonds#} {num_bonds__names__only_name}, {num_chiral_centers#} {num_chiral_centers__names__only_name}, and a {carbon_mass__names__noun} of {carbon_mass#}. + Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}. + - |- + User: I {#want|need|have!} to design a {#molecule|chemical|compound|drug|chemical structure!} with {num_lipinski_violations#} {num_lipinski_violations__names__only_name}. + Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? + User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_bonds#} {num_bonds__names__only_name}. + Assistant: {#OK, that already helps constraining my search. |Thanks, that already helps constraining my search. |Thanks, that already helps. |OK, that already helps. |!}It would {#help|be great|be useful!} if you could tell me more about the {#molecule|chemical|compound|drug|chemical structure!} you are looking for. + User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_chiral_centers#} {num_chiral_centers__names__only_name}. {#In addition,|Additionally,|Moreover,!} I want the {#molecule|chemical|compound|drug|chemical structure!} to have a {carbon_mass__names__noun} of {carbon_mass#}. + Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. + - |- + User: I {#want|need|have!} to design a {#molecule|chemical|compound|drug|chemical structure!} with {num_lipinski_violations#} {num_lipinski_violations__names__only_name}. + Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? + User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_bonds#} {num_bonds__names__only_name}. + Assistant: {#OK, that already helps constraining my search. |Thanks, that already helps constraining my search. |Thanks, that already helps. |OK, that already helps. |!}It would {#help|be great|be useful!} if you could tell me more about the {#molecule|chemical|compound|drug|chemical structure!} you {#want to design|are looking for|are interested in!}. + User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_chiral_centers#} {num_chiral_centers__names__only_name}. {#In addition,|Additionally,|Moreover,!} I want the {#molecule|chemical|compound|drug|chemical structure!} to have a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. + Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. + - |- + User: I {#want|need|have!} to design a {#molecule|chemical|compound|drug|chemical structure!} with a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. + Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? + User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_bonds#} {num_bonds__names__only_name}. + Assistant: {#OK, that already helps constraining my search. |Thanks, that already helps constraining my search. |Thanks, that already helps. |OK, that already helps. |!}It would {#help|be great|be useful!} if you could tell me more about the {#molecule|chemical|compound|drug|chemical structure!} you {#want to design|are looking for|are interested in!}. + User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_chiral_centers#} {num_chiral_centers__names__only_name}. + Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. + - |- + User: I {#have some questions|want to ask you!} about the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. + Assistant: {#How can I help?|What can I do for you?|How can I be of assistance?!} + User: {#What is|I want to know|I need to know!} the {molecular_formula__names__noun} and {monoisotopic_molecular_mass__names__noun} of this {#molecule|chemical|compound|drug|chemical structure!}. + Assistant: The {#molecule|chemical|compound|drug|chemical structure!} has the {molecular_formula__names__noun} {molecular_formula#} and a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. + - |- + User: I {#have some questions|want to ask you!} about the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. + Assistant: {#How can I help?|What can I do for you?|How can I be of assistance?!} + User: {#What is|I want to know|I need to know!} the {asphericity__names__noun} of this {#molecule|chemical|compound|drug|chemical structure!}. + Assistant: The {#molecule|chemical|compound|drug|chemical structure!} has an {asphericity__names__noun} of {asphericity#}. + - |- + User: I {#want to|must|would like to|need to!} {#design|create|synthesize|make!} a {#molecule|chemical|compound|drug|chemical structure!} with {asphericity__names__noun} of {asphericity#}. + Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? + User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {eccentricity__names__noun} of {eccentricity#}. + Assistant: {#Is there anything else I should know?|Is there anything else I should be aware of?|Is there anything else I should take into account?|Is there anything else I should consider?|Is there anything else I should take into consideration?|Is there anything else I should take into account?|Is there anything else I should take into consideration?|Is there anything else I should consider?!} + User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {npr1_value__names__noun} of {npr1_value#} and a {molecular_formula__names__noun} of {molecular_formula#}. + Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. + - |- + User: I {#want to|must|would like to|need to!} {#design|create|synthesize|make!} a {#molecule|chemical|compound|drug|chemical structure!} with {eccentricity__names__noun} of {eccentricity#}. + Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? + User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {asphericity__names__noun} of {asphericity#}. + Assistant: {#Is there anything else I should know?|Is there anything else I should be aware of?|Is there anything else I should take into account?|Is there anything else I should consider?|Is there anything else I should take into consideration?|Is there anything else I should take into account?|Is there anything else I should take into consideration?|Is there anything else I should consider?!} + User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {num_lipinski_violations#} {num_lipinski_violations__names__only_name} and a {molecular_formula__names__noun} of {molecular_formula#}. + Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. + - |- + User: I {#want to|must|would like to|need to!} {#design|create|synthesize|make!} a {#molecule|chemical|compound|drug|chemical structure!} with {npr1_value__names__noun} of {npr1_value#}. + Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? + User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {npr2_value__names__noun} of {npr2_value#}. + Assistant: {#Is there anything else I should know?|Is there anything else I should be aware of?|Is there anything else I should take into account?|Is there anything else I should consider?|Is there anything else I should take into consideration?|Is there anything else I should take into account?|Is there anything else I should take into consideration?|Is there anything else I should consider?!} + User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {molecular_formula__names__noun} of {molecular_formula#}. + Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. + - |- + User: I {#want to|must|would like to|need to!} {#design|create|synthesize|make!} a {#molecule|chemical|compound|drug|chemical structure!} with {pmi1_value__names__noun} of {pmi1_value#}. + Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}? + User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {pmi2_value__names__noun} of {pmi2_value#}. + Assistant: {#Is there anything else I should know?|Is there anything else I should be aware of?|Is there anything else I should take into account?|Is there anything else I should consider?|Is there anything else I should take into consideration?|Is there anything else I should take into account?|Is there anything else I should take into consideration?|Is there anything else I should consider?!} + User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {molecular_formula__names__noun} of {molecular_formula#}. + Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}. diff --git a/data/tabular/chemdner/meta.yaml b/data/tabular/chemdner/meta.yaml index 4a207ba67..dad41aa94 100644 --- a/data/tabular/chemdner/meta.yaml +++ b/data/tabular/chemdner/meta.yaml @@ -1,97 +1,96 @@ ---- name: chemdner description: |- - The CHEMDNER corpus comprises 10,000 PubMed abstracts, which have been meticulously annotated by expert chemistry literature curators according to task-specific guidelines, identifying a total of 84,355 mentions of chemical entities. The CHEMDNER corpus is a collection of 10,000 PubMed abstracts that contain a total of 84,355 chemical entity mentions labeled manually by expert chemistry literature curators, following annotation guidelines specifically defined for this task. + The CHEMDNER corpus comprises 10,000 PubMed abstracts, which have been meticulously annotated by expert chemistry literature curators according to task-specific guidelines, identifying a total of 84,355 mentions of chemical entities. The CHEMDNER corpus is a collection of 10,000 PubMed abstracts that contain a total of 84,355 chemical entity mentions labeled manually by expert chemistry literature curators, following annotation guidelines specifically defined for this task. targets: - - id: matched_words - description: matched words - type: text - names: - - noun: entity - - noun: matched entity + - id: matched_words + description: matched words + type: text + names: + - noun: entity + - noun: matched entity identifiers: - - id: sentence - description: Sentence - type: text - names: - - noun: sentence - - noun: text + - id: sentence + description: Sentence + type: text + names: + - noun: sentence + - noun: text license: unknown links: - - url: https://huggingface.co/datasets/bigbio/chemdner - description: original dataset + - url: https://huggingface.co/datasets/bigbio/chemdner + description: original dataset benchmarks: - - name: chemdner - link: hhttps://huggingface.co/datasets/bigbio/blurb - split_column: split + - name: chemdner + link: hhttps://huggingface.co/datasets/bigbio/blurb + split_column: split num_points: 19440 bibtex: - - |- - @article{Krallinger2015, - title = {The CHEMDNER corpus of chemicals and drugs and its annotation principles}, - author = { - Krallinger, Martin and Rabal, Obdulia and Leitner, Florian and Vazquez, - Miguel and Salgado, David and Lu, Zhiyong and Leaman, Robert and Lu, Yanan - and Ji, Donghong and Lowe, Daniel M. and Sayle, Roger A. and - Batista-Navarro, Riza Theresa and Rak, Rafal and Huber, Torsten and - Rockt{"a}schel, Tim and Matos, S{'e}rgio and Campos, David and Tang, - Buzhou and Xu, Hua and Munkhdalai, Tsendsuren and Ryu, Keun Ho and Ramanan, - S. V. and Nathan, Senthil and {{Z}}itnik, Slavko and Bajec, Marko and - Weber, Lutz and Irmer, Matthias and Akhondi, Saber A. and Kors, Jan A. and - Xu, Shuo and An, Xin and Sikdar, Utpal Kumar and Ekbal, Asif and Yoshioka, - Masaharu and Dieb, Thaer M. and Choi, Miji and Verspoor, Karin and Khabsa, - Madian and Giles, C. Lee and Liu, Hongfang and Ravikumar, Komandur - Elayavilli and Lamurias, Andre and Couto, Francisco M. and Dai, Hong-Jie - and Tsai, Richard Tzong-Han and Ata, Caglar and Can, Tolga and Usi{'e}, - Anabel and Alves, Rui and Segura-Bedmar, Isabel and Mart{'i}nez, Paloma - and Oyarzabal, Julen and Valencia, Alfonso - }, - year = 2015, - month = {Jan}, - day = 19, - journal = {Journal of Cheminformatics}, - volume = 7, - number = 1, - pages = {S2}, - doi = {10.1186/1758-2946-7-S1-S2}, - issn = {1758-2946}, - url = {https://doi.org/10.1186/1758-2946-7-S1-S2}, - abstract = { - The automatic extraction of chemical information from text requires the - recognition of chemical entity mentions as one of its key steps. When - developing supervised named entity recognition (NER) systems, the - availability of a large, manually annotated text corpus is desirable. - Furthermore, large corpora permit the robust evaluation and comparison of - different approaches that detect chemicals in documents. We present the - CHEMDNER corpus, a collection of 10,000 PubMed abstracts that contain a - total of 84,355 chemical entity mentions labeled manually by expert - chemistry literature curators, following annotation guidelines specifically - defined for this task. The abstracts of the CHEMDNER corpus were selected - to be representative for all major chemical disciplines. Each of the - chemical entity mentions was manually labeled according to its - structure-associated chemical entity mention (SACEM) class: abbreviation, - family, formula, identifier, multiple, systematic and trivial. The - difficulty and consistency of tagging chemicals in text was measured using - an agreement study between annotators, obtaining a percentage agreement of - 91. For a subset of the CHEMDNER corpus (the test set of 3,000 abstracts) - we provide not only the Gold Standard manual annotations, but also mentions - automatically detected by the 26 teams that participated in the BioCreative - IV CHEMDNER chemical mention recognition task. In addition, we release the - CHEMDNER silver standard corpus of automatically extracted mentions from - 17,000 randomly selected PubMed abstracts. A version of the CHEMDNER corpus - in the BioC format has been generated as well. We propose a standard for - required minimum information about entity annotations for the construction - of domain specific corpora on chemical and drug entities. The CHEMDNER - corpus and annotation guidelines are available at: - ttp://www.biocreative.org/resources/biocreative-iv/chemdner-corpus/ - } - } + - |- + @article{Krallinger2015, + title = {The CHEMDNER corpus of chemicals and drugs and its annotation principles}, + author = { + Krallinger, Martin and Rabal, Obdulia and Leitner, Florian and Vazquez, + Miguel and Salgado, David and Lu, Zhiyong and Leaman, Robert and Lu, Yanan + and Ji, Donghong and Lowe, Daniel M. and Sayle, Roger A. and + Batista-Navarro, Riza Theresa and Rak, Rafal and Huber, Torsten and + Rockt{"a}schel, Tim and Matos, S{'e}rgio and Campos, David and Tang, + Buzhou and Xu, Hua and Munkhdalai, Tsendsuren and Ryu, Keun Ho and Ramanan, + S. V. and Nathan, Senthil and {{Z}}itnik, Slavko and Bajec, Marko and + Weber, Lutz and Irmer, Matthias and Akhondi, Saber A. and Kors, Jan A. and + Xu, Shuo and An, Xin and Sikdar, Utpal Kumar and Ekbal, Asif and Yoshioka, + Masaharu and Dieb, Thaer M. and Choi, Miji and Verspoor, Karin and Khabsa, + Madian and Giles, C. Lee and Liu, Hongfang and Ravikumar, Komandur + Elayavilli and Lamurias, Andre and Couto, Francisco M. and Dai, Hong-Jie + and Tsai, Richard Tzong-Han and Ata, Caglar and Can, Tolga and Usi{'e}, + Anabel and Alves, Rui and Segura-Bedmar, Isabel and Mart{'i}nez, Paloma + and Oyarzabal, Julen and Valencia, Alfonso + }, + year = 2015, + month = {Jan}, + day = 19, + journal = {Journal of Cheminformatics}, + volume = 7, + number = 1, + pages = {S2}, + doi = {10.1186/1758-2946-7-S1-S2}, + issn = {1758-2946}, + url = {https://doi.org/10.1186/1758-2946-7-S1-S2}, + abstract = { + The automatic extraction of chemical information from text requires the + recognition of chemical entity mentions as one of its key steps. When + developing supervised named entity recognition (NER) systems, the + availability of a large, manually annotated text corpus is desirable. + Furthermore, large corpora permit the robust evaluation and comparison of + different approaches that detect chemicals in documents. We present the + CHEMDNER corpus, a collection of 10,000 PubMed abstracts that contain a + total of 84,355 chemical entity mentions labeled manually by expert + chemistry literature curators, following annotation guidelines specifically + defined for this task. The abstracts of the CHEMDNER corpus were selected + to be representative for all major chemical disciplines. Each of the + chemical entity mentions was manually labeled according to its + structure-associated chemical entity mention (SACEM) class: abbreviation, + family, formula, identifier, multiple, systematic and trivial. The + difficulty and consistency of tagging chemicals in text was measured using + an agreement study between annotators, obtaining a percentage agreement of + 91. For a subset of the CHEMDNER corpus (the test set of 3,000 abstracts) + we provide not only the Gold Standard manual annotations, but also mentions + automatically detected by the 26 teams that participated in the BioCreative + IV CHEMDNER chemical mention recognition task. In addition, we release the + CHEMDNER silver standard corpus of automatically extracted mentions from + 17,000 randomly selected PubMed abstracts. A version of the CHEMDNER corpus + in the BioC format has been generated as well. We propose a standard for + required minimum information about entity annotations for the construction + of domain specific corpora on chemical and drug entities. The CHEMDNER + corpus and annotation guidelines are available at: + ttp://www.biocreative.org/resources/biocreative-iv/chemdner-corpus/ + } + } templates: - - |- - Task: Find all the mentions of {#chemicals|chemical compounds|chemical substances!} in the {#following|subsequent!} {#text|sentence!}. Return the matching {#words|entities!}. If there is no {#match|mention of a chemical|matching entity!}, return `no match`. - {#Sentence|Description!}: {sentence#} - Answer: {matched_words#} - - |- - User: Does the following text contain mentions of {#chemicals|chemical compounds|chemical substances!}? {#Can you return matches?|Can you output matches?|Please return matches.!} - {#Text: |!}{sentence#} - Assistant: {#I found|There is!} {matched_words#}. + - |- + Task: Find all the mentions of {#chemicals|chemical compounds|chemical substances!} in the {#following|subsequent!} {#text|sentence!}. Return the matching {#words|entities!}. If there is no {#match|mention of a chemical|matching entity!}, return `no match`. + {#Sentence|Description!}: {sentence#} + Answer: {matched_words#} + - |- + User: Does the following text contain mentions of {#chemicals|chemical compounds|chemical substances!}? {#Can you return matches?|Can you output matches?|Please return matches.!} + {#Text: |!}{sentence#} + Assistant: {#I found|There is!} {matched_words#}. diff --git a/data/tabular/chemistry_stackexchange/meta.yaml b/data/tabular/chemistry_stackexchange/meta.yaml index 62c581c20..33e973401 100644 --- a/data/tabular/chemistry_stackexchange/meta.yaml +++ b/data/tabular/chemistry_stackexchange/meta.yaml @@ -1,31 +1,30 @@ ---- name: chemistry_stackexchange description: |- - Questions and answers mined from chemistry.stackexchange.com. + Questions and answers mined from chemistry.stackexchange.com. targets: - - id: a - description: answer to the question - type: string - - id: title - description: title of the question - type: string + - id: a + description: answer to the question + type: string + - id: title + description: title of the question + type: string identifiers: - - id: q - type: string - description: question asked on chemistry.stackexchange.com + - id: q + type: string + description: question asked on chemistry.stackexchange.com license: CC BY-SA links: - - url: chemistry.stackexchange.com - description: original data source - - url: https://stackoverflow.com/help/licensing - description: information about the license + - url: chemistry.stackexchange.com + description: original data source + - url: https://stackoverflow.com/help/licensing + description: information about the license num_points: 4582 templates: - - |- - {#Task: Please answer the question of the user.|Task: Provide a detailed response to the user's question.|Task: Address the user's query with a well-structured answer.|Task: Your role is to respond to the user's question with clarity.|Task: Offer a concise and informative answer to the user's question.|Task: Provide a clear and concise reply to the user's inquiry.!} - {#User: |Question: |Inquiry: |\n!}{#q} - {#Assistant: |Answer: !}{#a} - - |- - {#Task: Generate a title for this question.|Task: Create a meaningful title for this question.|Task: Summarize the question in a title.!} - {#Question: |Inquiry: |\n!}{#q} - {#Assistant: |Title: |Answer: |!}{#title} + - |- + {#Task: Please answer the question of the user.|Task: Provide a detailed response to the user's question.|Task: Address the user's query with a well-structured answer.|Task: Your role is to respond to the user's question with clarity.|Task: Offer a concise and informative answer to the user's question.|Task: Provide a clear and concise reply to the user's inquiry.!} + {#User: |Question: |Inquiry: |\n!}{#q} + {#Assistant: |Answer: !}{#a} + - |- + {#Task: Generate a title for this question.|Task: Create a meaningful title for this question.|Task: Summarize the question in a title.!} + {#Question: |Inquiry: |\n!}{#q} + {#Assistant: |Title: |Answer: |!}{#title} diff --git a/data/tabular/choline_transporter_butkiewicz/meta.yaml b/data/tabular/choline_transporter_butkiewicz/meta.yaml index ba6dcd688..cc7e170e6 100644 --- a/data/tabular/choline_transporter_butkiewicz/meta.yaml +++ b/data/tabular/choline_transporter_butkiewicz/meta.yaml @@ -1,174 +1,173 @@ ---- name: choline_transporter_butkiewicz description: |- - This dataset was originally curated from HTS data at - the PubChem database. The primary screen AID 488975 identified - inhibitors of CHT. The counter screen AID 493221 was used as a - validation screen to confirm the active compounds that inhibit CHT. - AID504840 and AID588401 experiments were used as additional validation - experiments. The screen AID 493222 evaluated remaining active compounds - for non-specific activity in parental HEK293 cells. AID602208 tested a - selected set of compounds for 3H choline uptake. The final set of 254 - active compounds was determined by the overlap of active compounds in - screens AID 493221, AID504840, and AID588401 subtracting any - non-specific hits from AID 49322 and all inactive compounds in the - re-confirmation screen AID602208. + This dataset was originally curated from HTS data at + the PubChem database. The primary screen AID 488975 identified + inhibitors of CHT. The counter screen AID 493221 was used as a + validation screen to confirm the active compounds that inhibit CHT. + AID504840 and AID588401 experiments were used as additional validation + experiments. The screen AID 493222 evaluated remaining active compounds + for non-specific activity in parental HEK293 cells. AID602208 tested a + selected set of compounds for 3H choline uptake. The final set of 254 + active compounds was determined by the overlap of active compounds in + screens AID 493221, AID504840, and AID588401 subtracting any + non-specific hits from AID 49322 and all inactive compounds in the + re-confirmation screen AID602208. targets: - - id: activity_choline_transporter - description: inhibition of choline transporter receptor (1) or not (0). - units: - type: boolean - names: - - noun: inhibition of choline transporter activity - - adjective: choline transporter activity inhibition - - gerund: inhibiting the choline transporter activity - - verb: inhibits choline transporter activity - pubchem_aids: - - 488975 - - 493221 - - 504840 - - 588401 - - 493222 - - 602208 + - id: activity_choline_transporter + description: inhibition of choline transporter receptor (1) or not (0). + units: + type: boolean + names: + - noun: inhibition of choline transporter activity + - adjective: choline transporter activity inhibition + - gerund: inhibiting the choline transporter activity + - verb: inhibits choline transporter activity + pubchem_aids: + - 488975 + - 493221 + - 504840 + - 588401 + - 493222 + - 602208 benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al - description: original dataset - - url: https://doi.org/10.3390/molecules18010735 - description: corresponding publication - - url: https://doi.org/10.1093/nar/gky1033 - description: corresponding publication - - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ - description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al + description: original dataset + - url: https://doi.org/10.3390/molecules18010735 + description: corresponding publication + - url: https://doi.org/10.1093/nar/gky1033 + description: corresponding publication + - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ + description: corresponding publication num_points: 302306 bibtex: - - |- - @article{Butkiewicz2013, - doi = {10.3390/molecules18010735}, - url = {https://doi.org/10.3390/molecules18010735}, - year = {2013}, - month = jan, - publisher = {{MDPI} {AG}}, - volume = {18}, - number = {1}, - pages = {735--756}, - author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and - Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens - Meiler}, - title = {Benchmarking Ligand-Based Virtual High-Throughput - Screening with the {PubChem} Database}, - journal = {Molecules}} - - |- - @article{Kim2018, - doi = {10.1093/nar/gky1033}, - url = {https://doi.org/10.1093/nar/gky1033}, - year = {2018}, - month = oct, - publisher = {Oxford University Press ({OUP})}, - volume = {47}, - number = {D1}, - pages = {D1102--D1109}, - author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and - Asta Gindulyte and Jia He and Siqian He and Qingliang Li and - Benjamin A Shoemaker and Paul A Thiessen and Bo Yu and Leonid - Zaslavsky and Jian Zhang and Evan E Bolton}, - title = {{PubChem} 2019 update: improved access to chemical data}, - journal = {Nucleic Acids Research}} - - |- - @article{Butkiewicz2017, - doi = {}, - url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, - year = {2017}, - publisher = {Chem Inform}, - volume = {3}, - number = {1}, - author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe, - E. W. and Weaver, D. C. and Meiler, J.}, - title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from - the {P}ub{C}hem {D}atabase}}, - journal = {Chemical Science}} + - |- + @article{Butkiewicz2013, + doi = {10.3390/molecules18010735}, + url = {https://doi.org/10.3390/molecules18010735}, + year = {2013}, + month = jan, + publisher = {{MDPI} {AG}}, + volume = {18}, + number = {1}, + pages = {735--756}, + author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and + Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens + Meiler}, + title = {Benchmarking Ligand-Based Virtual High-Throughput + Screening with the {PubChem} Database}, + journal = {Molecules}} + - |- + @article{Kim2018, + doi = {10.1093/nar/gky1033}, + url = {https://doi.org/10.1093/nar/gky1033}, + year = {2018}, + month = oct, + publisher = {Oxford University Press ({OUP})}, + volume = {47}, + number = {D1}, + pages = {D1102--D1109}, + author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and + Asta Gindulyte and Jia He and Siqian He and Qingliang Li and + Benjamin A Shoemaker and Paul A Thiessen and Bo Yu and Leonid + Zaslavsky and Jian Zhang and Evan E Bolton}, + title = {{PubChem} 2019 update: improved access to chemical data}, + journal = {Nucleic Acids Research}} + - |- + @article{Butkiewicz2017, + doi = {}, + url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, + year = {2017}, + publisher = {Chem Inform}, + volume = {3}, + number = {1}, + author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe, + E. W. and Weaver, D. C. and Meiler, J.}, + title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from + the {P}ub{C}hem {D}atabase}}, + journal = {Chemical Science}} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_choline_transporter#no &NULL}{activity_choline_transporter__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {activity_choline_transporter#no &NULL}{activity_choline_transporter__names__noun}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_choline_transporter#no &NULL}{activity_choline_transporter__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_choline_transporter__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {activity_choline_transporter#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_choline_transporter__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {activity_choline_transporter__names__gerund}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_choline_transporter__names__gerund}? - Assistant: {activity_choline_transporter#No&Yes}, this molecule is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {activity_choline_transporter__names__gerund}? - Assistant: {activity_choline_transporter#No&Yes}, it is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}? - Assistant: This is a molecule that is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {activity_choline_transporter#not &NULL}be {activity_choline_transporter__names__gerund}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {activity_choline_transporter#not &NULL}be {activity_choline_transporter__names__gerund}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {activity_choline_transporter__names__gerund}:{activity_choline_transporter#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_choline_transporter__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{activity_choline_transporter#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_choline_transporter__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result:This molecule is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}. - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_choline_transporter__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {activity_choline_transporter%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_choline_transporter%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_choline_transporter%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_choline_transporter#no &NULL}{activity_choline_transporter__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {activity_choline_transporter#no &NULL}{activity_choline_transporter__names__noun}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_choline_transporter#no &NULL}{activity_choline_transporter__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_choline_transporter__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {activity_choline_transporter#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_choline_transporter__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {activity_choline_transporter__names__gerund}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_choline_transporter__names__gerund}? + Assistant: {activity_choline_transporter#No&Yes}, this molecule is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {activity_choline_transporter__names__gerund}? + Assistant: {activity_choline_transporter#No&Yes}, it is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}? + Assistant: This is a molecule that is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {activity_choline_transporter#not &NULL}be {activity_choline_transporter__names__gerund}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {activity_choline_transporter#not &NULL}be {activity_choline_transporter__names__gerund}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {activity_choline_transporter__names__gerund}:{activity_choline_transporter#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_choline_transporter__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{activity_choline_transporter#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_choline_transporter__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result:This molecule is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}. + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_choline_transporter__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {activity_choline_transporter%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_choline_transporter%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_choline_transporter%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/clearance_astrazeneca/meta.yaml b/data/tabular/clearance_astrazeneca/meta.yaml index 5a9ec0ae6..81eb58889 100644 --- a/data/tabular/clearance_astrazeneca/meta.yaml +++ b/data/tabular/clearance_astrazeneca/meta.yaml @@ -1,67 +1,66 @@ ---- name: clearance_astrazeneca description: |- - Drug clearance is defined as the volume of plasma cleared of a drug - over a specified time period and it measures the rate at which the active drug - is removed from the body. This is a dataset curated from ChEMBL database containing - experimental results on intrinsic clearance, deposited from AstraZeneca. It - contains clearance measures from two experiments types, hepatocyte and microsomes. + Drug clearance is defined as the volume of plasma cleared of a drug + over a specified time period and it measures the rate at which the active drug + is removed from the body. This is a dataset curated from ChEMBL database containing + experimental results on intrinsic clearance, deposited from AstraZeneca. It + contains clearance measures from two experiments types, hepatocyte and microsomes. targets: - - id: drug_clearance - description: the volume of plasma cleared of a drug over a specified time period - units: mL / (min g) - type: continuous - names: - - noun: drug clearance - - noun: volume of plasma cleared of a drug over a specified time period - uris: - - http://purl.bioontology.org/ontology/MEDDRA/10077254 + - id: drug_clearance + description: the volume of plasma cleared of a drug over a specified time period + units: mL / (min g) + type: continuous + names: + - noun: drug clearance + - noun: volume of plasma cleared of a drug over a specified time period + uris: + - http://purl.bioontology.org/ontology/MEDDRA/10077254 benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: chembl_id - type: Other - names: - - noun: ChEMBL id - - noun: ChEMBL identifier number - description: ChEMBL ids - sample: false + - id: SMILES + type: SMILES + description: SMILES + - id: chembl_id + type: Other + names: + - noun: ChEMBL id + - noun: ChEMBL identifier number + description: ChEMBL ids + sample: false license: CC BY 4.0 links: - - url: http://dx.doi.org/10.6019/CHEMBL3301361 - description: corresponding publication - - url: https://doi.org/10.1016/j.ejmech.2012.06.043 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#clearance-astrazeneca - description: data source + - url: http://dx.doi.org/10.6019/CHEMBL3301361 + description: corresponding publication + - url: https://doi.org/10.1016/j.ejmech.2012.06.043 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#clearance-astrazeneca + description: data source num_points: 1213 bibtex: - - |- - @techreport{Hersey2015, - doi = {10.6019/chembl3301361}, - url = {https://doi.org/10.6019/chembl3301361}, - year = {2015}, - month = feb, - publisher = {{EMBL}-{EBI}}, - author = {Anne Hersey}, - title = {{ChEMBL} Deposited Data Set - {AZ dataset}} - - |- - @article{Di2012, - doi = {10.1016/j.ejmech.2012.06.043}, - url = {https://doi.org/10.1016/j.ejmech.2012.06.043}, - year = {2012}, - month = nov, - publisher = {Elsevier BV}, - volume = {57}, - pages = {441--448}, - author = {Li Di and Christopher Keefer and Dennis O. Scott and Timothy J. Strelevitz - and George Chang and Yi-An Bi and Yurong Lai and Jonathon Duckworth and - Katherine Fenner and Matthew D. Troutman and R. Scott Obach}, - title = {Mechanistic insights from comparing intrinsic clearance values between - human liver microsomes and hepatocytes to guide drug design}, - journal = {European Journal of Medicinal Chemistry} + - |- + @techreport{Hersey2015, + doi = {10.6019/chembl3301361}, + url = {https://doi.org/10.6019/chembl3301361}, + year = {2015}, + month = feb, + publisher = {{EMBL}-{EBI}}, + author = {Anne Hersey}, + title = {{ChEMBL} Deposited Data Set - {AZ dataset}} + - |- + @article{Di2012, + doi = {10.1016/j.ejmech.2012.06.043}, + url = {https://doi.org/10.1016/j.ejmech.2012.06.043}, + year = {2012}, + month = nov, + publisher = {Elsevier BV}, + volume = {57}, + pages = {441--448}, + author = {Li Di and Christopher Keefer and Dennis O. Scott and Timothy J. Strelevitz + and George Chang and Yi-An Bi and Yurong Lai and Jonathon Duckworth and + Katherine Fenner and Matthew D. Troutman and R. Scott Obach}, + title = {Mechanistic insights from comparing intrinsic clearance values between + human liver microsomes and hepatocytes to guide drug design}, + journal = {European Journal of Medicinal Chemistry} diff --git a/data/tabular/clintox/meta.yaml b/data/tabular/clintox/meta.yaml index fcda1a556..282d3342c 100644 --- a/data/tabular/clintox/meta.yaml +++ b/data/tabular/clintox/meta.yaml @@ -1,129 +1,127 @@ ---- name: clintox description: |- - The ClinTox dataset includes drugs that have failed - clinical trials for toxicity reasons and also drugs that are associated - with successful trials. + The ClinTox dataset includes drugs that have failed + clinical trials for toxicity reasons and also drugs that are associated + with successful trials. targets: - - id: clinical_toxicity - description: whether it can cause clinical toxicity (1) or not (0). - units: - type: boolean - names: - - noun: toxicity - - noun: clinical toxicity - - adjective: toxic - - adjective: clinically toxic - - gerund: displaying clinical toxicity - uris: - - http://purl.bioontology.org/ontology/MESH/Q000633 - - https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&ns=ncit&code=C27990 - - https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&ns=ncit&code=C27955 + - id: clinical_toxicity + description: whether it can cause clinical toxicity (1) or not (0). + units: + type: boolean + names: + - noun: toxicity + - noun: clinical toxicity + - adjective: toxic + - adjective: clinically toxic + - gerund: displaying clinical toxicity + uris: + - http://purl.bioontology.org/ontology/MESH/Q000633 + - https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&ns=ncit&code=C27990 + - https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&ns=ncit&code=C27955 benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://tdcommons.ai/single_pred_tasks/tox/#clintox - description: original dataset - - url: https://doi.org/10.1016/j.chembiol.2016.07.023 - description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#clintox + description: original dataset + - url: https://doi.org/10.1016/j.chembiol.2016.07.023 + description: corresponding publication num_points: 1478 bibtex: - - |- - @article{Gayvert2016, - doi = {10.1016/j.chembiol.2016.07.023}, - url = {https://doi.org/10.1016/j.chembiol.2016.07.023}, - year = {2016}, - month = oct, - publisher = {Elsevier {BV}}, - volume = {23}, - number = {10}, - pages = {1294--1301}, - author = {Kaitlyn~M. Gayvert and Neel~S. Madhukar and Olivier Elemento}, - title = {A Data-Driven Approach to Predicting Successes and Failures of Clinical Trials}, - journal = {Cell Chemical Biology}} + - |- + @article{Gayvert2016, + doi = {10.1016/j.chembiol.2016.07.023}, + url = {https://doi.org/10.1016/j.chembiol.2016.07.023}, + year = {2016}, + month = oct, + publisher = {Elsevier {BV}}, + volume = {23}, + number = {10}, + pages = {1294--1301}, + author = {Kaitlyn~M. Gayvert and Neel~S. Madhukar and Olivier Elemento}, + title = {A Data-Driven Approach to Predicting Successes and Failures of Clinical Trials}, + journal = {Cell Chemical Biology}} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {clinical_toxicity#no &NULL}{clinical_toxicity__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {clinical_toxicity#no &NULL}{clinical_toxicity__names__adjective} - {#properties|characteristics|features|traits!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {clinical_toxicity#not &NULL}identified as {clinical_toxicity__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {clinical_toxicity__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {clinical_toxicity#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {clinical_toxicity__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {clinical_toxicity__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {clinical_toxicity__names__adjective}? - Assistant: {clinical_toxicity#No&Yes}, this molecule is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {clinical_toxicity__names__adjective}? - Assistant: {clinical_toxicity#No&Yes}, it is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}? - Assistant: This is a molecule that is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {clinical_toxicity#not &NULL}be {clinical_toxicity__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {clinical_toxicity#not &NULL}be {clinical_toxicity__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {clinical_toxicity__names__adjective}:{clinical_toxicity#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {clinical_toxicity__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{clinical_toxicity#False&True} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {clinical_toxicity__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {clinical_toxicity%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {clinical_toxicity__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {clinical_toxicity%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%clinical_toxicity%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%clinical_toxicity%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {clinical_toxicity#no &NULL}{clinical_toxicity__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {clinical_toxicity#no &NULL}{clinical_toxicity__names__adjective} {#properties|characteristics|features|traits!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {clinical_toxicity#not &NULL}identified as {clinical_toxicity__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {clinical_toxicity__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {clinical_toxicity#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {clinical_toxicity__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {clinical_toxicity__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {clinical_toxicity__names__adjective}? + Assistant: {clinical_toxicity#No&Yes}, this molecule is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {clinical_toxicity__names__adjective}? + Assistant: {clinical_toxicity#No&Yes}, it is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}? + Assistant: This is a molecule that is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {clinical_toxicity#not &NULL}be {clinical_toxicity__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {clinical_toxicity#not &NULL}be {clinical_toxicity__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {clinical_toxicity__names__adjective}:{clinical_toxicity#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {clinical_toxicity__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{clinical_toxicity#False&True} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {clinical_toxicity__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {clinical_toxicity%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {clinical_toxicity__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {clinical_toxicity%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%clinical_toxicity%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%clinical_toxicity%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/core_mof_no_topo/meta.yaml b/data/tabular/core_mof_no_topo/meta.yaml index ee15f93c6..460f840aa 100644 --- a/data/tabular/core_mof_no_topo/meta.yaml +++ b/data/tabular/core_mof_no_topo/meta.yaml @@ -1,250 +1,239 @@ ---- name: core_mof_no_topo description: |- - CoRE MOF is a database of "computationally ready" crystal structures of metal-organic frameworks + CoRE MOF is a database of "computationally ready" crystal structures of metal-organic frameworks targets: - - id: cif - description: Crystal structure, in CIF format - type: text - names: - - noun: crystal structure in CIF format - - noun: content of a CIF file with the crystal structure - - noun: data from a CIF file with the crystal structure - - noun: content within a CIF file of the crystal structure - - noun: crystal structure represented in CIF format - - id: outputs.pure_CO2_widomHOA - description: heat of adsorption of CO2, simulated using Widom insertion - units: kJ/mol - type: continuous - significant_digits: 2 - names: - - noun: heat of adsorption of CO2 (computed using the Widom insertion technique) - - noun: simulated heat of adsorption of CO2 (obtained using Widom insertions) - - noun: heat of adsorption of carbon dioxide (computed using the Widom insertion technique) - - noun: simulated heat of adsorption of carbon dioxide (obtained using Widom insertions) - - id: outputs.pure_methane_widomHOA - description: heat of adsorption for methane, simulated using Widom insertion - units: kJ/mol - type: continuous - significant_digits: 2 - names: - - noun: heat of adsorption of methane (computed using the Widom insertion technique) - - noun: simulated heat of adsorption of methane (obtained using Widom insertions) - - noun: heat of adsorption of CH4 (computed using the Widom insertion technique) - - noun: simulated heat of adsorption of CH4 (obtained using Widom insertions) - - id: outputs.pure_uptake_CO2_298.00_15000 - description: CO2 uptake at 298 K and 15000 Pa - units: mol/kg - type: continuous - significant_digits: 1 - names: - - noun: CO2 uptake at 298 K and 15000 Pa as obtained from GCMC simulations - - noun: simulated CO2 uptake at 298 K and 15000 Pa - - noun: CO2 uptake at 298 K and 15000 Pa as obtained from grand canonical Monte Carlo simulations - - noun: carbon dioxide uptake at 298 K and 15000 Pa as obtained from GCMC simulations - - noun: simulated carbon dioxide uptake at 298 K and 15000 Pa - - noun: carbon dioxide uptake at 298 K and 15000 Pa as obtained from grand canonical Monte Carlo simulations - - noun: CO2 uptake at 298 K and 0.15 bar as obtained from GCMC simulations - - noun: simulated CO2 uptake at 298 K and 0.15 bar - - id: outputs.pure_uptake_CO2_298.00_1600000 - description: CO2 uptake at 298 K and 1600000 Pa - units: mol/kg - type: continuous - significant_digits: 1 - names: - - noun: CO2 uptake at 298 K and 1600000 Pa as obtained from GCMC simulations - - noun: simulated CO2 uptake at 298 K and 1600000 Pa - - noun: CO2 uptake at 298 K and 1600000 Pa as obtained from grand canonical Monte Carlo simulations - - noun: carbon dioxide uptake at 298 K and 1600000 Pa as obtained from GCMC simulations - - noun: simulated carbon dioxide uptake at 298 K and 1600000 Pa - - noun: carbon dioxide uptake at 298 K and 1600000 Pa as obtained from grand canonical Monte Carlo simulations - - noun: CO2 uptake at 298 K and 16 bar as obtained from GCMC simulations - - noun: simulated CO2 uptake at 298 K and 16 bar - - id: outputs.pure_uptake_methane_298.00_580000 - description: methane uptake at 298 K and 580000 Pa - units: mol/kg - type: continuous - significant_digits: 1 - names: - - noun: methane uptake at 298 K and 580000 Pa as obtained from GCMC simulations - - noun: simulated methane uptake at 298 K and 580000 Pa - - noun: methane uptake at 298 K and 580000 Pa as obtained from grand canonical Monte Carlo simulations - - noun: CH4 uptake at 298 K and 580000 Pa as obtained from GCMC simulations - - noun: simulated CH4 uptake at 298 K and 580000 Pa - - noun: CH4 uptake at 298 K and 580000 Pa as obtained from grand canonical Monte Carlo simulations - - noun: methane uptake at 298 K and 5.8 bar as obtained from GCMC simulations - - noun: simulated methane uptake at 298 K and 5.8 bar - - id: outputs.pure_uptake_methane_298.00_6500000 - description: methane uptake at 298 K and 6500000 Pa - units: mol/kg - type: continuous - significant_digits: 1 - names: - - noun: methane uptake at 298 K and 6500000 Pa as obtained from GCMC simulations - - noun: simulated methane uptake at 298 K and 6500000 Pa - - noun: methane uptake at 298 K and 6500000 Pa as obtained from grand canonical Monte Carlo simulations - - noun: CH4 uptake at 298 K and 6500000 Pa as obtained from GCMC simulations - - noun: simulated CH4 uptake at 298 K and 6500000 Pa - - noun: CH4 uptake at 298 K and 6500000 Pa as obtained from grand canonical Monte Carlo simulations - - noun: methane uptake at 298 K and 65 bar as obtained from GCMC simulations - - noun: simulated methane uptake at 298 K and 65 bar - - id: outputs.logKH_CO2 - description: logarithm of Henry's constant for CO2 - units: log(mol/kg/Pa) - type: continuous - significant_digits: 2 - names: - - noun: logarithm of Henry's constant for CO2 obtained from Widom insertion simulations - - noun: logarithm of Henry's constant for carbon dioxide obtained from Widom insertion simulations - - noun: logarithm of Henry's constant for CO2 obtained from Widom insertion simulations - - noun: logarithm of Henry's constant for carbon dioxide obtained from Widom insertion simulations - - id: outputs.logKH_CH4 - description: logarithm of Henry's constant for methane - units: log(mol/kg/Pa) - type: continuous - significant_digits: 2 - names: - - noun: logarithm of Henry's constant for methane obtained from Widom insertion simulations - - noun: logarithm of Henry's constant for CH4 obtained from Widom insertion simulations - - noun: logarithm of Henry's constant for methane obtained from Widom insertion simulations - - noun: logarithm of Henry's constant for CH4 obtained from Widom insertion simulations - - id: outputs.CH4DC - description: deliverable capacity of methane - type: continuous - units: vSTP/v - names: - - noun: deliverable capacity (between 5.8 bar and 65 bar at 298 K) of methane obtained from GCMC simulations - - noun: deliverable capacity of CH4 obtained from GCMC simulations between 5.8 bar and 65 bar at 298 K - - noun: deliverable capacity of methane (between 5.8 bar and 65 bar at 298 K) obtained from GCMC simulations - - noun: deliverable capacity of CH4 (between 5.8 bar and 65 bar at 298 K) obtained from GCMC simulations + - id: cif + description: Crystal structure, in CIF format + type: text + names: + - noun: crystal structure in CIF format + - noun: content of a CIF file with the crystal structure + - noun: data from a CIF file with the crystal structure + - noun: content within a CIF file of the crystal structure + - noun: crystal structure represented in CIF format + - id: outputs.pure_CO2_widomHOA + description: heat of adsorption of CO2, simulated using Widom insertion + units: kJ/mol + type: continuous + significant_digits: 2 + names: + - noun: heat of adsorption of CO2 (computed using the Widom insertion technique) + - noun: simulated heat of adsorption of CO2 (obtained using Widom insertions) + - noun: heat of adsorption of carbon dioxide (computed using the Widom insertion technique) + - noun: simulated heat of adsorption of carbon dioxide (obtained using Widom insertions) + - id: outputs.pure_methane_widomHOA + description: heat of adsorption for methane, simulated using Widom insertion + units: kJ/mol + type: continuous + significant_digits: 2 + names: + - noun: heat of adsorption of methane (computed using the Widom insertion technique) + - noun: simulated heat of adsorption of methane (obtained using Widom insertions) + - noun: heat of adsorption of CH4 (computed using the Widom insertion technique) + - noun: simulated heat of adsorption of CH4 (obtained using Widom insertions) + - id: outputs.pure_uptake_CO2_298.00_15000 + description: CO2 uptake at 298 K and 15000 Pa + units: mol/kg + type: continuous + significant_digits: 1 + names: + - noun: CO2 uptake at 298 K and 15000 Pa as obtained from GCMC simulations + - noun: simulated CO2 uptake at 298 K and 15000 Pa + - noun: CO2 uptake at 298 K and 15000 Pa as obtained from grand canonical Monte Carlo simulations + - noun: carbon dioxide uptake at 298 K and 15000 Pa as obtained from GCMC simulations + - noun: simulated carbon dioxide uptake at 298 K and 15000 Pa + - noun: carbon dioxide uptake at 298 K and 15000 Pa as obtained from grand canonical Monte Carlo simulations + - noun: CO2 uptake at 298 K and 0.15 bar as obtained from GCMC simulations + - noun: simulated CO2 uptake at 298 K and 0.15 bar + - id: outputs.pure_uptake_CO2_298.00_1600000 + description: CO2 uptake at 298 K and 1600000 Pa + units: mol/kg + type: continuous + significant_digits: 1 + names: + - noun: CO2 uptake at 298 K and 1600000 Pa as obtained from GCMC simulations + - noun: simulated CO2 uptake at 298 K and 1600000 Pa + - noun: CO2 uptake at 298 K and 1600000 Pa as obtained from grand canonical Monte Carlo simulations + - noun: carbon dioxide uptake at 298 K and 1600000 Pa as obtained from GCMC simulations + - noun: simulated carbon dioxide uptake at 298 K and 1600000 Pa + - noun: carbon dioxide uptake at 298 K and 1600000 Pa as obtained from grand canonical Monte Carlo simulations + - noun: CO2 uptake at 298 K and 16 bar as obtained from GCMC simulations + - noun: simulated CO2 uptake at 298 K and 16 bar + - id: outputs.pure_uptake_methane_298.00_580000 + description: methane uptake at 298 K and 580000 Pa + units: mol/kg + type: continuous + significant_digits: 1 + names: + - noun: methane uptake at 298 K and 580000 Pa as obtained from GCMC simulations + - noun: simulated methane uptake at 298 K and 580000 Pa + - noun: methane uptake at 298 K and 580000 Pa as obtained from grand canonical Monte Carlo simulations + - noun: CH4 uptake at 298 K and 580000 Pa as obtained from GCMC simulations + - noun: simulated CH4 uptake at 298 K and 580000 Pa + - noun: CH4 uptake at 298 K and 580000 Pa as obtained from grand canonical Monte Carlo simulations + - noun: methane uptake at 298 K and 5.8 bar as obtained from GCMC simulations + - noun: simulated methane uptake at 298 K and 5.8 bar + - id: outputs.pure_uptake_methane_298.00_6500000 + description: methane uptake at 298 K and 6500000 Pa + units: mol/kg + type: continuous + significant_digits: 1 + names: + - noun: methane uptake at 298 K and 6500000 Pa as obtained from GCMC simulations + - noun: simulated methane uptake at 298 K and 6500000 Pa + - noun: methane uptake at 298 K and 6500000 Pa as obtained from grand canonical Monte Carlo simulations + - noun: CH4 uptake at 298 K and 6500000 Pa as obtained from GCMC simulations + - noun: simulated CH4 uptake at 298 K and 6500000 Pa + - noun: CH4 uptake at 298 K and 6500000 Pa as obtained from grand canonical Monte Carlo simulations + - noun: methane uptake at 298 K and 65 bar as obtained from GCMC simulations + - noun: simulated methane uptake at 298 K and 65 bar + - id: outputs.logKH_CO2 + description: logarithm of Henry's constant for CO2 + units: log(mol/kg/Pa) + type: continuous + significant_digits: 2 + names: + - noun: logarithm of Henry's constant for CO2 obtained from Widom insertion simulations + - noun: logarithm of Henry's constant for carbon dioxide obtained from Widom insertion simulations + - noun: logarithm of Henry's constant for CO2 obtained from Widom insertion simulations + - noun: logarithm of Henry's constant for carbon dioxide obtained from Widom insertion simulations + - id: outputs.logKH_CH4 + description: logarithm of Henry's constant for methane + units: log(mol/kg/Pa) + type: continuous + significant_digits: 2 + names: + - noun: logarithm of Henry's constant for methane obtained from Widom insertion simulations + - noun: logarithm of Henry's constant for CH4 obtained from Widom insertion simulations + - noun: logarithm of Henry's constant for methane obtained from Widom insertion simulations + - noun: logarithm of Henry's constant for CH4 obtained from Widom insertion simulations + - id: outputs.CH4DC + description: deliverable capacity of methane + type: continuous + units: vSTP/v + names: + - noun: deliverable capacity (between 5.8 bar and 65 bar at 298 K) of methane obtained from GCMC simulations + - noun: deliverable capacity of CH4 obtained from GCMC simulations between 5.8 bar and 65 bar at 298 K + - noun: deliverable capacity of methane (between 5.8 bar and 65 bar at 298 K) obtained from GCMC simulations + - noun: deliverable capacity of CH4 (between 5.8 bar and 65 bar at 298 K) obtained from GCMC simulations identifiers: - - id: smiles_linkers - description: SMILES representation of the linker - type: text - - id: smiles_nodes - description: SMILES representation of the nodes - type: text + - id: smiles_linkers + description: SMILES representation of the linker + type: text + - id: smiles_nodes + description: SMILES representation of the nodes + type: text license: CC BY 4.0 num_points: 142 links: - - url: https://huggingface.co/datasets/kjappelbaum/chemnlp-core-mof/tree/main - description: original data source + - url: https://huggingface.co/datasets/kjappelbaum/chemnlp-core-mof/tree/main + description: original data source bibtex: - - |- - @article{Jablonka_2023, - doi = {10.1021/acscentsci.2c01177}, - url = {https://doi.org/10.1021%2Facscentsci.2c01177}, - year = 2023, - month = {mar}, + - |- + @article{Jablonka_2023, + doi = {10.1021/acscentsci.2c01177}, + url = {https://doi.org/10.1021%2Facscentsci.2c01177}, + year = 2023, + month = {mar}, + publisher = {American Chemical Society ({ACS})}, + volume = {9}, + number = {4}, + pages = {563--581}, + author = {Kevin Maik Jablonka and Andrew S. Rosen and Aditi S. Krishnapriyan and Berend Smit}, + title = {An Ecosystem for Digital Reticular Chemistry}, + journal = {ACS Cent. Sci.} + } + - |- + @article{Chung_2014, + doi = {10.1021/cm502594j}, + url = {https://doi.org/10.1021%2Fcm502594j}, + year = 2014, + month = {oct}, publisher = {American Chemical Society ({ACS})}, - volume = {9}, - number = {4}, - pages = {563--581}, - author = {Kevin Maik Jablonka and Andrew S. Rosen and Aditi S. Krishnapriyan and Berend Smit}, - title = {An Ecosystem for Digital Reticular Chemistry}, - journal = {ACS Cent. Sci.} - } - - |- - @article{Chung_2014, - doi = {10.1021/cm502594j}, - url = {https://doi.org/10.1021%2Fcm502594j}, - year = 2014, - month = {oct}, - publisher = {American Chemical Society ({ACS})}, - volume = {26}, - number = {21}, - pages = {6185--6192}, - author = {Yongchul G. Chung and Jeffrey Camp and Maciej Haranczyk and Benjamin J. Sikora and Wojciech Bury and Vaiva Krungleviciute and Taner Yildirim and Omar K. Farha and David S. Sholl and Randall Q. Snurr}, - title = {Computation-Ready, Experimental Metal{\textendash}Organic Frameworks: A Tool To Enable High-Throughput Screening of Nanoporous Crystals}, - journal = {Chem. Mater.} - } - - |- - @article{Chung_2019, - doi = {10.1021/acs.jced.9b00835}, - url = {https://doi.org/10.1021%2Facs.jced.9b00835}, - year = 2019, - month = {nov}, - publisher = {American Chemical Society ({ACS})}, - volume = {64}, - number = {12}, - pages = {5985--5998}, - author = {Yongchul G. Chung and Emmanuel Haldoupis and Benjamin J. Bucior and Maciej Haranczyk and Seulchan Lee and Hongda Zhang and Konstantinos D. Vogiatzis and Marija Milisavljevic and Sanliang Ling and Jeffrey S. Camp and Ben Slater and J. Ilja Siepmann and David S. Sholl and Randall Q. Snurr}, - title = {Advances, Updates, and Analytics for the Computation-Ready, Experimental Metal{\textendash}Organic Framework Database: {CoRE} {MOF} 2019}, - journal = {J. Chem. Eng. Data}amp$\mathsemicolon$ Engineering Data} - } + volume = {26}, + number = {21}, + pages = {6185--6192}, + author = {Yongchul G. Chung and Jeffrey Camp and Maciej Haranczyk and Benjamin J. Sikora and Wojciech Bury and Vaiva Krungleviciute and Taner Yildirim and Omar K. Farha and David S. Sholl and Randall Q. Snurr}, + title = {Computation-Ready, Experimental Metal{\textendash}Organic Frameworks: A Tool To Enable High-Throughput Screening of Nanoporous Crystals}, + journal = {Chem. Mater.} + } + - |- + @article{Chung_2019, + doi = {10.1021/acs.jced.9b00835}, + url = {https://doi.org/10.1021%2Facs.jced.9b00835}, + year = 2019, + month = {nov}, + publisher = {American Chemical Society ({ACS})}, + volume = {64}, + number = {12}, + pages = {5985--5998}, + author = {Yongchul G. Chung and Emmanuel Haldoupis and Benjamin J. Bucior and Maciej Haranczyk and Seulchan Lee and Hongda Zhang and Konstantinos D. Vogiatzis and Marija Milisavljevic and Sanliang Ling and Jeffrey S. Camp and Ben Slater and J. Ilja Siepmann and David S. Sholl and Randall Q. Snurr}, + title = {Advances, Updates, and Analytics for the Computation-Ready, Experimental Metal{\textendash}Organic Framework Database: {CoRE} {MOF} 2019}, + journal = {J. Chem. Eng. Data}amp$\mathsemicolon$ Engineering Data} + } templates: - - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} - {cif#} is build from linker molecules with the SMILES {smiles_linkers#} and nodes with the SMILES {smiles_nodes#}. - - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} - {cif#} has a {outputs.pure_CO2_widomHOA__names__noun} of {outputs.pure_CO2_widomHOA#} {outputs.pure_CO2_widomHOA__units}. - - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} - {cif#} has a {outputs.pure_methane_widomHOA__names__noun} of {outputs.pure_methane_widomHOA#} {outputs.pure_methane_widomHOA__units}. - - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} - {cif#} has a {outputs.pure_uptake_CO2_298.00_15000__names__noun} of {outputs.pure_uptake_CO2_298.00_15000#} {outputs.pure_uptake_CO2_298.00_15000__units}. - - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} - {cif#} has a {outputs.pure_uptake_CO2_298.00_1600000__names__noun} of {outputs.pure_uptake_CO2_298.00_1600000#} {outputs.pure_uptake_CO2_298.00_1600000__units}. - - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} - {cif#} has a {outputs.pure_uptake_methane_298.00_580000__names__noun} of {outputs.pure_uptake_methane_298.00_580000#} {outputs.pure_uptake_methane_298.00_580000__units}. - - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} - {cif#} has a {outputs.pure_uptake_methane_298.00_6500000__names__noun} of {outputs.pure_uptake_methane_298.00_6500000#} {outputs.pure_uptake_methane_298.00_6500000__units}. - - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} - {cif#} has a {outputs.logKH_CO2__names__noun} of {outputs.logKH_CO2#} {outputs.logKH_CO2__units}. - - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} - {cif#} has a {outputs.logKH_CH4__names__noun} of {outputs.logKH_CH4#} {outputs.logKH_CH4__units}. - - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} - {cif#} has a {outputs.CH4DC__names__noun} of {outputs.CH4DC#} {outputs.CH4DC__units}. - - |- - Question: What linker molecules are {#used|present!} in the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? - Answer: The SMILES of the linker molecules are {smiles_linkers#}. - - |- - Question: What nodes are {#used|present!} in the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? - Answer: The SMILES of the nodes are {smiles_nodes#}. - - |- - Question: What is the {outputs.pure_CO2_widomHOA__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? - Answer: The {outputs.pure_CO2_widomHOA__names__noun} is {outputs.pure_CO2_widomHOA#} {outputs.pure_CO2_widomHOA__units}. - - |- - Question: What is the {outputs.pure_methane_widomHOA__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? - Answer: The {outputs.pure_methane_widomHOA__names__noun} is {outputs.pure_methane_widomHOA#} {outputs.pure_methane_widomHOA__units}. - - |- - Question: What is the {outputs.pure_uptake_CO2_298.00_15000__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? - Answer: The {outputs.pure_uptake_CO2_298.00_15000__names__noun} is {outputs.pure_uptake_CO2_298.00_15000#} {outputs.pure_uptake_CO2_298.00_15000__units}. - - |- - Question: What is the {outputs.pure_uptake_CO2_298.00_1600000__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? - Answer: The {outputs.pure_uptake_CO2_298.00_1600000__names__noun} is {outputs.pure_uptake_CO2_298.00_1600000#} {outputs.pure_uptake_CO2_298.00_1600000__units}. - - |- - Question: What is the {outputs.pure_uptake_methane_298.00_580000__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? - Answer: The {outputs.pure_uptake_methane_298.00_580000__names__noun} is {outputs.pure_uptake_methane_298.00_580000#} {outputs.pure_uptake_methane_298.00_580000__units}. - - |- - Question: What is the {outputs.pure_uptake_methane_298.00_6500000__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? - Answer: The {outputs.pure_uptake_methane_298.00_6500000__names__noun} is {outputs.pure_uptake_methane_298.00_6500000#} {outputs.pure_uptake_methane_298.00_6500000__units}. - - |- - Question: What is the {outputs.logKH_CO2__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? - Answer: The {outputs.logKH_CO2__names__noun} is {outputs.logKH_CO2#} {outputs.logKH_CO2__units}. - - |- - Question: What is the {outputs.logKH_CH4__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? - Answer: The {outputs.logKH_CH4__names__noun} is {outputs.logKH_CH4#} {outputs.logKH_CH4__units}. - - |- - Question: What is the {outputs.CH4DC__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? - Answer: The {outputs.CH4DC__names__noun} is {outputs.CH4DC#} {outputs.CH4DC__units}. - - |- - User: {#I have|I am working with|I'm interested in!} a {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}. What is the {outputs.pure_CO2_widomHOA__names__noun}? - Assistant: The {outputs.pure_CO2_widomHOA__names__noun} is {outputs.pure_CO2_widomHOA#} {outputs.pure_CO2_widomHOA__units}. - - |- - User: I just {#synthesized|made|created!} a {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}. What is the {outputs.pure_methane_widomHOA__names__noun}? - Assistant: The {outputs.pure_methane_widomHOA__names__noun} is {outputs.pure_methane_widomHOA#} {outputs.pure_methane_widomHOA__units}. {#Is there anything else you want to know?|Is there anything else I can do for you?!} - User: {#Yes, |Yeah, |Indeed, |!}I would like to know the {outputs.pure_uptake_CO2_298.00_15000__names__noun}. - Assistant: The {outputs.pure_uptake_CO2_298.00_15000__names__noun} is {outputs.pure_uptake_CO2_298.00_15000#} {outputs.pure_uptake_CO2_298.00_15000__units}. - - |- - User: {#I am interested in|I want to make|I want to synthesize!} a {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}. - Assistant: {#That's interesting.|Cool.|!} {#How can I help?|How can I be of assistance?|Is there anything I can help you with?!} - User: What building blocks do I need to combine to make this {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!}? - Assistant: The SMILES of the linker molecules are {smiles_linkers#} and the SMILES of the nodes are {smiles_nodes#}. - - |- - User: {#I want to|I need to|I must|I have to!} {#synthesize|create|make!} a {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with a {outputs.CH4DC__names__noun} of {outputs.CH4DC#} {outputs.CH4DC__units}. What {#building blocks|linkers and nodes!} do I need to combine? - Assistant: {#I'd go for|I'd recommend!} linkers with SMILES {smiles_linkers#} and nodes with SMILES {smiles_nodes#}. {#Is there anything else you want to know?|Is there anything else I can do for you?!} - User: {#Yes, |Yeah, |Indeed, |!}I would like to know the {outputs.pure_uptake_CO2_298.00_15000__names__noun}. - Assistant: The {outputs.pure_uptake_CO2_298.00_15000__names__noun} is {outputs.pure_uptake_CO2_298.00_15000#} {outputs.pure_uptake_CO2_298.00_15000__units}. {#Is there anything else you want to know?|Is there anything else I can do for you?!} - User: {#Yes, |Yeah, |Indeed, |!}I would like to know the {outputs.pure_uptake_methane_298.00_580000__names__noun}. - Assistant: The {outputs.pure_uptake_methane_298.00_580000__names__noun} is {outputs.pure_uptake_methane_298.00_580000#} {outputs.pure_uptake_methane_298.00_580000__units}. {#Is there anything else you want to know?|Is there anything else I can do for you?!} - User: {#No, |Nope, |No thank you, |!}that's all I need to know. - Assistant: {#You're welcome.||Anytime.|Happy to help.!} + - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} is build from linker molecules with the SMILES {smiles_linkers#} and nodes with the SMILES {smiles_nodes#}. + - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.pure_CO2_widomHOA__names__noun} of {outputs.pure_CO2_widomHOA#} {outputs.pure_CO2_widomHOA__units}. + - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.pure_methane_widomHOA__names__noun} of {outputs.pure_methane_widomHOA#} {outputs.pure_methane_widomHOA__units}. + - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.pure_uptake_CO2_298.00_15000__names__noun} of {outputs.pure_uptake_CO2_298.00_15000#} {outputs.pure_uptake_CO2_298.00_15000__units}. + - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.pure_uptake_CO2_298.00_1600000__names__noun} of {outputs.pure_uptake_CO2_298.00_1600000#} {outputs.pure_uptake_CO2_298.00_1600000__units}. + - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.pure_uptake_methane_298.00_580000__names__noun} of {outputs.pure_uptake_methane_298.00_580000#} {outputs.pure_uptake_methane_298.00_580000__units}. + - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.pure_uptake_methane_298.00_6500000__names__noun} of {outputs.pure_uptake_methane_298.00_6500000#} {outputs.pure_uptake_methane_298.00_6500000__units}. + - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.logKH_CO2__names__noun} of {outputs.logKH_CO2#} {outputs.logKH_CO2__units}. + - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.logKH_CH4__names__noun} of {outputs.logKH_CH4#} {outputs.logKH_CH4__units}. + - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.CH4DC__names__noun} of {outputs.CH4DC#} {outputs.CH4DC__units}. + - |- + Question: What linker molecules are {#used|present!} in the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? + Answer: The SMILES of the linker molecules are {smiles_linkers#}. + - |- + Question: What nodes are {#used|present!} in the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? + Answer: The SMILES of the nodes are {smiles_nodes#}. + - |- + Question: What is the {outputs.pure_CO2_widomHOA__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? + Answer: The {outputs.pure_CO2_widomHOA__names__noun} is {outputs.pure_CO2_widomHOA#} {outputs.pure_CO2_widomHOA__units}. + - |- + Question: What is the {outputs.pure_methane_widomHOA__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? + Answer: The {outputs.pure_methane_widomHOA__names__noun} is {outputs.pure_methane_widomHOA#} {outputs.pure_methane_widomHOA__units}. + - |- + Question: What is the {outputs.pure_uptake_CO2_298.00_15000__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? + Answer: The {outputs.pure_uptake_CO2_298.00_15000__names__noun} is {outputs.pure_uptake_CO2_298.00_15000#} {outputs.pure_uptake_CO2_298.00_15000__units}. + - |- + Question: What is the {outputs.pure_uptake_CO2_298.00_1600000__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? + Answer: The {outputs.pure_uptake_CO2_298.00_1600000__names__noun} is {outputs.pure_uptake_CO2_298.00_1600000#} {outputs.pure_uptake_CO2_298.00_1600000__units}. + - |- + Question: What is the {outputs.pure_uptake_methane_298.00_580000__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? + Answer: The {outputs.pure_uptake_methane_298.00_580000__names__noun} is {outputs.pure_uptake_methane_298.00_580000#} {outputs.pure_uptake_methane_298.00_580000__units}. + - |- + Question: What is the {outputs.pure_uptake_methane_298.00_6500000__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? + Answer: The {outputs.pure_uptake_methane_298.00_6500000__names__noun} is {outputs.pure_uptake_methane_298.00_6500000#} {outputs.pure_uptake_methane_298.00_6500000__units}. + - |- + Question: What is the {outputs.logKH_CO2__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? + Answer: The {outputs.logKH_CO2__names__noun} is {outputs.logKH_CO2#} {outputs.logKH_CO2__units}. + - |- + Question: What is the {outputs.logKH_CH4__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? + Answer: The {outputs.logKH_CH4__names__noun} is {outputs.logKH_CH4#} {outputs.logKH_CH4__units}. + - |- + Question: What is the {outputs.CH4DC__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}? + Answer: The {outputs.CH4DC__names__noun} is {outputs.CH4DC#} {outputs.CH4DC__units}. + - |- + User: {#I have|I am working with|I'm interested in!} a {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}. What is the {outputs.pure_CO2_widomHOA__names__noun}? + Assistant: The {outputs.pure_CO2_widomHOA__names__noun} is {outputs.pure_CO2_widomHOA#} {outputs.pure_CO2_widomHOA__units}. + - |- + User: I just {#synthesized|made|created!} a {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}. What is the {outputs.pure_methane_widomHOA__names__noun}? + Assistant: The {outputs.pure_methane_widomHOA__names__noun} is {outputs.pure_methane_widomHOA#} {outputs.pure_methane_widomHOA__units}. {#Is there anything else you want to know?|Is there anything else I can do for you?!} + User: {#Yes, |Yeah, |Indeed, |!}I would like to know the {outputs.pure_uptake_CO2_298.00_15000__names__noun}. + Assistant: The {outputs.pure_uptake_CO2_298.00_15000__names__noun} is {outputs.pure_uptake_CO2_298.00_15000#} {outputs.pure_uptake_CO2_298.00_15000__units}. + - |- + User: {#I am interested in|I want to make|I want to synthesize!} a {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}. + Assistant: {#That's interesting.|Cool.|!} {#How can I help?|How can I be of assistance?|Is there anything I can help you with?!} + User: What building blocks do I need to combine to make this {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!}? + Assistant: The SMILES of the linker molecules are {smiles_linkers#} and the SMILES of the nodes are {smiles_nodes#}. + - |- + User: {#I want to|I need to|I must|I have to!} {#synthesize|create|make!} a {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with a {outputs.CH4DC__names__noun} of {outputs.CH4DC#} {outputs.CH4DC__units}. What {#building blocks|linkers and nodes!} do I need to combine? + Assistant: {#I'd go for|I'd recommend!} linkers with SMILES {smiles_linkers#} and nodes with SMILES {smiles_nodes#}. {#Is there anything else you want to know?|Is there anything else I can do for you?!} + User: {#Yes, |Yeah, |Indeed, |!}I would like to know the {outputs.pure_uptake_CO2_298.00_15000__names__noun}. + Assistant: The {outputs.pure_uptake_CO2_298.00_15000__names__noun} is {outputs.pure_uptake_CO2_298.00_15000#} {outputs.pure_uptake_CO2_298.00_15000__units}. {#Is there anything else you want to know?|Is there anything else I can do for you?!} + User: {#Yes, |Yeah, |Indeed, |!}I would like to know the {outputs.pure_uptake_methane_298.00_580000__names__noun}. + Assistant: The {outputs.pure_uptake_methane_298.00_580000__names__noun} is {outputs.pure_uptake_methane_298.00_580000#} {outputs.pure_uptake_methane_298.00_580000__units}. {#Is there anything else you want to know?|Is there anything else I can do for you?!} + User: {#No, |Nope, |No thank you, |!}that's all I need to know. + Assistant: {#You're welcome.||Anytime.|Happy to help.!} diff --git a/data/tabular/cyp2c9_substrate_carbonmangels/meta.yaml b/data/tabular/cyp2c9_substrate_carbonmangels/meta.yaml index 875fd4ce3..4e950d80f 100644 --- a/data/tabular/cyp2c9_substrate_carbonmangels/meta.yaml +++ b/data/tabular/cyp2c9_substrate_carbonmangels/meta.yaml @@ -1,151 +1,150 @@ ---- name: cyp2c9_substrate_carbonmangels description: |- - CYP P450 2C9 plays a major role in the oxidation of both xenobiotic - and endogenous compounds. Substrates are drugs that are metabolized by the enzyme. - TDC used a dataset from Carbon Mangels et al, which merged information on substrates - and nonsubstrates from six publications. + CYP P450 2C9 plays a major role in the oxidation of both xenobiotic + and endogenous compounds. Substrates are drugs that are metabolized by the enzyme. + TDC used a dataset from Carbon Mangels et al, which merged information on substrates + and nonsubstrates from six publications. targets: - - id: CYP2C9_Substrate - description: drugs that are metabolized by CYP P450 2C9 (1) or not (0) - units: - type: boolean - names: - - noun: CYP P450 2C9 substrate - - noun: CYP2C9 substrate - - noun: substrate for CYP2C9 - - noun: substrate for CYP P450 2C9 - - verb: metabolized by CYP2C9 - - verb: metabolized by CYP P450 2C9 - uris: + - id: CYP2C9_Substrate + description: drugs that are metabolized by CYP P450 2C9 (1) or not (0) + units: + type: boolean + names: + - noun: CYP P450 2C9 substrate + - noun: CYP2C9 substrate + - noun: substrate for CYP2C9 + - noun: substrate for CYP P450 2C9 + - verb: metabolized by CYP2C9 + - verb: metabolized by CYP P450 2C9 + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: compound_name - type: Other - names: - - noun: compound name - - noun: drug name - - noun: generic drug name - description: drug name + - id: SMILES + type: SMILES + description: SMILES + - id: compound_name + type: Other + names: + - noun: compound name + - noun: drug name + - noun: generic drug name + description: drug name license: CC BY 4.0 links: - - url: https://doi.org/10.1002/minf.201100069 - description: corresponding publication - - url: https://doi.org/10.1021/ci300367a - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp2c9-substrate-carbon-mangels-et-al - description: data source + - url: https://doi.org/10.1002/minf.201100069 + description: corresponding publication + - url: https://doi.org/10.1021/ci300367a + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp2c9-substrate-carbon-mangels-et-al + description: data source num_points: 669 bibtex: - - |- - @article{CarbonMangels2011, - doi = {10.1002/minf.201100069}, - url = {https://doi.org/10.1002/minf.201100069}, - year = {2011}, - month = sep, - publisher = {Wiley}, - volume = {30}, - number = {10}, - pages = {885--895}, - author = {Miriam Carbon-Mangels and Michael C. Hutter}, - title = {Selecting Relevant Descriptors for Classification by Bayesian Estimates: - A Comparison with Decision Trees and Support Vector Machines Approaches for Disparate Data Sets}, - journal = {Molecular Informatics} - - |- - @article{Cheng2012, - doi = {10.1021/ci300367a}, - url = {https://doi.org/10.1021/ci300367a}, - year = {2012}, - month = nov, - publisher = {American Chemical Society (ACS)}, - volume = {52}, - number = {11}, - number = {3099--3105}, - author = {Feixiong Cheng and Weihua Li and Yadi Zhou and Jie Shen and Zengrui Wu - and Guixia Liu and Philip W. Lee and Yun Tang}, - title = {admetSAR: A Comprehensive Source and Free Tool for Assessment of Chemical ADMET Properties}, - journal = {Journal of Chemical Information and Modeling} + - |- + @article{CarbonMangels2011, + doi = {10.1002/minf.201100069}, + url = {https://doi.org/10.1002/minf.201100069}, + year = {2011}, + month = sep, + publisher = {Wiley}, + volume = {30}, + number = {10}, + pages = {885--895}, + author = {Miriam Carbon-Mangels and Michael C. Hutter}, + title = {Selecting Relevant Descriptors for Classification by Bayesian Estimates: + A Comparison with Decision Trees and Support Vector Machines Approaches for Disparate Data Sets}, + journal = {Molecular Informatics} + - |- + @article{Cheng2012, + doi = {10.1021/ci300367a}, + url = {https://doi.org/10.1021/ci300367a}, + year = {2012}, + month = nov, + publisher = {American Chemical Society (ACS)}, + volume = {52}, + number = {11}, + number = {3099--3105}, + author = {Feixiong Cheng and Weihua Li and Yadi Zhou and Jie Shen and Zengrui Wu + and Guixia Liu and Philip W. Lee and Yun Tang}, + title = {admetSAR: A Comprehensive Source and Free Tool for Assessment of Chemical ADMET Properties}, + journal = {Journal of Chemical Information and Modeling} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {CYP2C9_Substrate#not &NULL}identified as a {CYP2C9_Substrate__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is a {CYP2C9_Substrate__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - esult: {CYP2C9_Substrate#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP2C9_Substrate__names__verb}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is a {CYP2C9_Substrate__names__noun}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {CYP2C9_Substrate__names__noun}? - Assistant: {CYP2C9_Substrate#No&Yes}, this molecule is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2C9_Substrate__names__verb}? - Assistant: {CYP2C9_Substrate#No&Yes}, it is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is a {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__noun}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}? - Assistant: This is a molecule that is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {CYP2C9_Substrate#not &NULL}be {CYP2C9_Substrate__names__verb}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {CYP2C9_Substrate#not &NULL}be a {CYP2C9_Substrate__names__noun}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}: {SMILES#} - - Is the {SMILES__description} {SMILES#} a {CYP2C9_Substrate__names__noun}:{CYP2C9_Substrate#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is a {CYP2C9_Substrate__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{CYP2C9_Substrate#False&True} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C9_Substrate__names__verb}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {CYP2C9_Substrate%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {CYP2C9_Substrate__names__noun}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {CYP2C9_Substrate%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%CYP2C9_Substrate%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%CYP2C9_Substrate%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {CYP2C9_Substrate#not &NULL}identified as a {CYP2C9_Substrate__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is a {CYP2C9_Substrate__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + esult: {CYP2C9_Substrate#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP2C9_Substrate__names__verb}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is a {CYP2C9_Substrate__names__noun}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {CYP2C9_Substrate__names__noun}? + Assistant: {CYP2C9_Substrate#No&Yes}, this molecule is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2C9_Substrate__names__verb}? + Assistant: {CYP2C9_Substrate#No&Yes}, it is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is a {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__noun}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}? + Assistant: This is a molecule that is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {CYP2C9_Substrate#not &NULL}be {CYP2C9_Substrate__names__verb}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {CYP2C9_Substrate#not &NULL}be a {CYP2C9_Substrate__names__noun}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}: {SMILES#} + - Is the {SMILES__description} {SMILES#} a {CYP2C9_Substrate__names__noun}:{CYP2C9_Substrate#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is a {CYP2C9_Substrate__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{CYP2C9_Substrate#False&True} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C9_Substrate__names__verb}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {CYP2C9_Substrate%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {CYP2C9_Substrate__names__noun}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {CYP2C9_Substrate%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%CYP2C9_Substrate%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%CYP2C9_Substrate%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/cyp2d6_substrate_carbonmangels/meta.yaml b/data/tabular/cyp2d6_substrate_carbonmangels/meta.yaml index 383b9de22..f7446332e 100644 --- a/data/tabular/cyp2d6_substrate_carbonmangels/meta.yaml +++ b/data/tabular/cyp2d6_substrate_carbonmangels/meta.yaml @@ -1,152 +1,151 @@ ---- name: cyp2d6_substrate_carbonmangels description: |- - CYP2D6 is primarily expressed in the liver. It is also highly expressed - in areas of the central nervous system, including the substantia nigra. TDC - used a dataset from Carbon Mangels et al, which merged information on substrates - and nonsubstrates from six publications. + CYP2D6 is primarily expressed in the liver. It is also highly expressed + in areas of the central nervous system, including the substantia nigra. TDC + used a dataset from Carbon Mangels et al, which merged information on substrates + and nonsubstrates from six publications. targets: - - id: CYP2D6_Substrate - description: drugs that are metabolized by the CYP P450 2D6 (1) or not (0) - units: - type: boolean - names: - - noun: CYP P450 2D6 substrate - - noun: CYP2D6 substrate - - noun: substrate for CYP2D6 - - noun: substrate for CYP P450 2D6 - - verb: metabolized by CYP2D6 - - verb: metabolized by CYP P450 2D6 - uris: + - id: CYP2D6_Substrate + description: drugs that are metabolized by the CYP P450 2D6 (1) or not (0) + units: + type: boolean + names: + - noun: CYP P450 2D6 substrate + - noun: CYP2D6 substrate + - noun: substrate for CYP2D6 + - noun: substrate for CYP P450 2D6 + - verb: metabolized by CYP2D6 + - verb: metabolized by CYP P450 2D6 + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: compound_name - type: Other - names: - - noun: compound name - - noun: drug name - - noun: generic drug name - description: drug name + - id: SMILES + type: SMILES + description: SMILES + - id: compound_name + type: Other + names: + - noun: compound name + - noun: drug name + - noun: generic drug name + description: drug name license: CC BY 4.0 links: - - url: https://doi.org/10.1002/minf.201100069 - description: corresponding publication - - url: https://doi.org/10.1021/ci300367a - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp2d6-substrate-carbon-mangels-et-al - description: data source + - url: https://doi.org/10.1002/minf.201100069 + description: corresponding publication + - url: https://doi.org/10.1021/ci300367a + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp2d6-substrate-carbon-mangels-et-al + description: data source num_points: 667 bibtex: - - |- - @article{CarbonMangels2011, - doi = {10.1002/minf.201100069}, - url = {https://doi.org/10.1002/minf.201100069}, - year = {2011}, - month = sep, - publisher = {Wiley}, - volume = {30}, - number = {10}, - pages = {885--895}, - author = {Miriam Carbon-Mangels and Michael C. Hutter}, - title = {Selecting Relevant Descriptors for Classification by Bayesian Estimates: - A Comparison with Decision Trees and Support Vector Machines Approaches for Disparate Data Sets}, - journal = {Molecular Informatics} - - |- - @article{Cheng2012, - doi = {10.1021/ci300367a}, - url = {https://doi.org/10.1021/ci300367a}, - year = {2012}, - month = nov, - publisher = {American Chemical Society (ACS)}, - volume = {52}, - number = {11}, - pages = {3099--3105}, - author = {Feixiong Cheng and Weihua Li and Yadi Zhou and Jie Shen - and Zengrui Wu and Guixia Liu and Philip W. Lee and Yun Tang}, - title = {admetSAR: A Comprehensive Source and Free Tool for - Assessment of Chemical ADMET Properties}, - journal = {Journal of Chemical Information and Modeling} + - |- + @article{CarbonMangels2011, + doi = {10.1002/minf.201100069}, + url = {https://doi.org/10.1002/minf.201100069}, + year = {2011}, + month = sep, + publisher = {Wiley}, + volume = {30}, + number = {10}, + pages = {885--895}, + author = {Miriam Carbon-Mangels and Michael C. Hutter}, + title = {Selecting Relevant Descriptors for Classification by Bayesian Estimates: + A Comparison with Decision Trees and Support Vector Machines Approaches for Disparate Data Sets}, + journal = {Molecular Informatics} + - |- + @article{Cheng2012, + doi = {10.1021/ci300367a}, + url = {https://doi.org/10.1021/ci300367a}, + year = {2012}, + month = nov, + publisher = {American Chemical Society (ACS)}, + volume = {52}, + number = {11}, + pages = {3099--3105}, + author = {Feixiong Cheng and Weihua Li and Yadi Zhou and Jie Shen + and Zengrui Wu and Guixia Liu and Philip W. Lee and Yun Tang}, + title = {admetSAR: A Comprehensive Source and Free Tool for + Assessment of Chemical ADMET Properties}, + journal = {Journal of Chemical Information and Modeling} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {CYP2D6_Substrate#not &NULL}identified as a {CYP2D6_Substrate__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is a {CYP2D6_Substrate__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - esult: {CYP2D6_Substrate#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP2D6_Substrate__names__verb}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is a {CYP2D6_Substrate__names__noun}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {CYP2D6_Substrate__names__noun}? - Assistant: {CYP2D6_Substrate#No&Yes}, this molecule is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2D6_Substrate__names__verb}? - Assistant: {CYP2D6_Substrate#No&Yes}, it is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is a {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__noun}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}? - Assistant: This is a molecule that is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {CYP2D6_Substrate#not &NULL}be {CYP2D6_Substrate__names__verb}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {CYP2D6_Substrate#not &NULL}be a {CYP2D6_Substrate__names__noun}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}: {SMILES#} - - Is the {SMILES__description} {SMILES#} a {CYP2D6_Substrate__names__noun}:{CYP2D6_Substrate#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is a {CYP2D6_Substrate__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{CYP2D6_Substrate#False&True} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2D6_Substrate__names__verb}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {CYP2D6_Substrate%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {CYP2D6_Substrate__names__noun}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {CYP2D6_Substrate%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%CYP2D6_Substrate%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%CYP2D6_Substrate%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {CYP2D6_Substrate#not &NULL}identified as a {CYP2D6_Substrate__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is a {CYP2D6_Substrate__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + esult: {CYP2D6_Substrate#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP2D6_Substrate__names__verb}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is a {CYP2D6_Substrate__names__noun}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {CYP2D6_Substrate__names__noun}? + Assistant: {CYP2D6_Substrate#No&Yes}, this molecule is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2D6_Substrate__names__verb}? + Assistant: {CYP2D6_Substrate#No&Yes}, it is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is a {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__noun}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}? + Assistant: This is a molecule that is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {CYP2D6_Substrate#not &NULL}be {CYP2D6_Substrate__names__verb}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {CYP2D6_Substrate#not &NULL}be a {CYP2D6_Substrate__names__noun}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}: {SMILES#} + - Is the {SMILES__description} {SMILES#} a {CYP2D6_Substrate__names__noun}:{CYP2D6_Substrate#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is a {CYP2D6_Substrate__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{CYP2D6_Substrate#False&True} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2D6_Substrate__names__verb}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {CYP2D6_Substrate%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {CYP2D6_Substrate__names__noun}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {CYP2D6_Substrate%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%CYP2D6_Substrate%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%CYP2D6_Substrate%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/cyp3a4_substrate_carbonmangels/meta.yaml b/data/tabular/cyp3a4_substrate_carbonmangels/meta.yaml index 42cc5b3b3..020c1e096 100644 --- a/data/tabular/cyp3a4_substrate_carbonmangels/meta.yaml +++ b/data/tabular/cyp3a4_substrate_carbonmangels/meta.yaml @@ -1,153 +1,152 @@ ---- name: cyp3a4_substrate_carbonmangels description: |- - CYP3A4 is an important enzyme in the body, mainly found in the liver - and in the intestine. It oxidizes small foreign organic molecules (xenobiotics), - such as toxins or drugs, so that they can be removed from the body. TDC used - a dataset from Carbon Mangels et al, which merged information on substrates - and nonsubstrates from six publications. + CYP3A4 is an important enzyme in the body, mainly found in the liver + and in the intestine. It oxidizes small foreign organic molecules (xenobiotics), + such as toxins or drugs, so that they can be removed from the body. TDC used + a dataset from Carbon Mangels et al, which merged information on substrates + and nonsubstrates from six publications. targets: - - id: CYP3A4_Substrate - description: drugs that are metabolized by the CYP P450 3A4 (1) or not (0) - units: - type: boolean - names: - - noun: CYP P450 3A4 substrate - - noun: CYP3A4 substrate - - noun: substrate for CYP3A4 - - noun: substrate for CYP P450 3A4 - - verb: metabolized by CYP3A4 - - verb: metabolized by CYP P450 3A4 - uris: + - id: CYP3A4_Substrate + description: drugs that are metabolized by the CYP P450 3A4 (1) or not (0) + units: + type: boolean + names: + - noun: CYP P450 3A4 substrate + - noun: CYP3A4 substrate + - noun: substrate for CYP3A4 + - noun: substrate for CYP P450 3A4 + - verb: metabolized by CYP3A4 + - verb: metabolized by CYP P450 3A4 + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: compound_name - type: Other - names: - - noun: compound name - - noun: drug name - - noun: generic drug name - description: drug name + - id: SMILES + type: SMILES + description: SMILES + - id: compound_name + type: Other + names: + - noun: compound name + - noun: drug name + - noun: generic drug name + description: drug name license: CC BY 4.0 links: - - url: https://doi.org/10.1002/minf.201100069 - description: corresponding publication - - url: https://doi.org/10.1021/ci300367a - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp3a4-substrate-carbon-mangels-et-al - description: data source + - url: https://doi.org/10.1002/minf.201100069 + description: corresponding publication + - url: https://doi.org/10.1021/ci300367a + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp3a4-substrate-carbon-mangels-et-al + description: data source num_points: 670 bibtex: - - |- - @article{CarbonMangels2011, - doi = {10.1002/minf.201100069}, - url = {https://doi.org/10.1002/minf.201100069}, - year = {2011}, - month = sep, - publisher = {Wiley}, - volume = {30}, - number = {10}, - pages = {885--895}, - author = {Miriam Carbon-Mangels and Michael C. Hutter}, - title = {Selecting Relevant Descriptors for Classification by Bayesian Estimates: - A Comparison with Decision Trees and Support Vector Machines Approaches for Disparate Data Sets}, - journal = {Molecular Informatics} - - |- - @article{Cheng2012, - doi = {10.1021/ci300367a}, - url = {https://doi.org/10.1021/ci300367a}, - year = {2012}, - month = nov, - publisher = {American Chemical Society (ACS)}, - volume = {52}, - number = {11}, - pages = {3099--3105}, - author = {Feixiong Cheng and Weihua Li and Yadi Zhou and Jie Shen - and Zengrui Wu and Guixia Liu and Philip W. Lee and Yun Tang}, - title = {admetSAR: A Comprehensive Source and Free Tool for - Assessment of Chemical ADMET Properties}, - journal = {Journal of Chemical Information and Modeling} + - |- + @article{CarbonMangels2011, + doi = {10.1002/minf.201100069}, + url = {https://doi.org/10.1002/minf.201100069}, + year = {2011}, + month = sep, + publisher = {Wiley}, + volume = {30}, + number = {10}, + pages = {885--895}, + author = {Miriam Carbon-Mangels and Michael C. Hutter}, + title = {Selecting Relevant Descriptors for Classification by Bayesian Estimates: + A Comparison with Decision Trees and Support Vector Machines Approaches for Disparate Data Sets}, + journal = {Molecular Informatics} + - |- + @article{Cheng2012, + doi = {10.1021/ci300367a}, + url = {https://doi.org/10.1021/ci300367a}, + year = {2012}, + month = nov, + publisher = {American Chemical Society (ACS)}, + volume = {52}, + number = {11}, + pages = {3099--3105}, + author = {Feixiong Cheng and Weihua Li and Yadi Zhou and Jie Shen + and Zengrui Wu and Guixia Liu and Philip W. Lee and Yun Tang}, + title = {admetSAR: A Comprehensive Source and Free Tool for + Assessment of Chemical ADMET Properties}, + journal = {Journal of Chemical Information and Modeling} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {CYP3A4_Substrate#not &NULL}identified as a {CYP3A4_Substrate__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is a {CYP3A4_Substrate__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - esult: {CYP3A4_Substrate#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP3A4_Substrate__names__verb}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is a {CYP3A4_Substrate__names__noun}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {CYP3A4_Substrate__names__noun}? - Assistant: {CYP3A4_Substrate#No&Yes}, this molecule is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {CYP3A4_Substrate__names__verb}? - Assistant: {CYP3A4_Substrate#No&Yes}, it is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is a {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__noun}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}? - Assistant: This is a molecule that is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {CYP3A4_Substrate#not &NULL}be {CYP3A4_Substrate__names__verb}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {CYP3A4_Substrate#not &NULL}be a {CYP3A4_Substrate__names__noun}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}: {SMILES#} - - Is the {SMILES__description} {SMILES#} a {CYP3A4_Substrate__names__noun}:{CYP3A4_Substrate#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is a {CYP3A4_Substrate__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{CYP3A4_Substrate#False&True} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP3A4_Substrate__names__verb}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {CYP3A4_Substrate%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {CYP3A4_Substrate__names__noun}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {CYP3A4_Substrate%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%CYP3A4_Substrate%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%CYP3A4_Substrate%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {CYP3A4_Substrate#not &NULL}identified as a {CYP3A4_Substrate__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is a {CYP3A4_Substrate__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + esult: {CYP3A4_Substrate#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP3A4_Substrate__names__verb}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is a {CYP3A4_Substrate__names__noun}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {CYP3A4_Substrate__names__noun}? + Assistant: {CYP3A4_Substrate#No&Yes}, this molecule is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {CYP3A4_Substrate__names__verb}? + Assistant: {CYP3A4_Substrate#No&Yes}, it is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is a {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__noun}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}? + Assistant: This is a molecule that is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {CYP3A4_Substrate#not &NULL}be {CYP3A4_Substrate__names__verb}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {CYP3A4_Substrate#not &NULL}be a {CYP3A4_Substrate__names__noun}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}: {SMILES#} + - Is the {SMILES__description} {SMILES#} a {CYP3A4_Substrate__names__noun}:{CYP3A4_Substrate#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is a {CYP3A4_Substrate__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{CYP3A4_Substrate#False&True} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP3A4_Substrate__names__verb}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {CYP3A4_Substrate%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {CYP3A4_Substrate__names__noun}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {CYP3A4_Substrate%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%CYP3A4_Substrate%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%CYP3A4_Substrate%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/cyp_p450_1a2_inhibition_veith_et_al/meta.yaml b/data/tabular/cyp_p450_1a2_inhibition_veith_et_al/meta.yaml index 1fb61165c..ae5056c6c 100644 --- a/data/tabular/cyp_p450_1a2_inhibition_veith_et_al/meta.yaml +++ b/data/tabular/cyp_p450_1a2_inhibition_veith_et_al/meta.yaml @@ -1,140 +1,139 @@ ---- name: cyp_p450_1a2_inhibition_veith_et_al description: |- - The CYP P450 genes are involved in the formation and breakdown (metabolism) - of various molecules and chemicals within cells. Specifically, CYP1A2 localizes - to the endoplasmic reticulum and its expression is induced by some polycyclic - aromatic hydrocarbons (PAHs), some of which are found in cigarette smoke. It - is able to metabolize some PAHs to carcinogenic intermediates. Other xenobiotic - substrates for this enzyme include caffeine, aflatoxin B1, and acetaminophen. + The CYP P450 genes are involved in the formation and breakdown (metabolism) + of various molecules and chemicals within cells. Specifically, CYP1A2 localizes + to the endoplasmic reticulum and its expression is induced by some polycyclic + aromatic hydrocarbons (PAHs), some of which are found in cigarette smoke. It + is able to metabolize some PAHs to carcinogenic intermediates. Other xenobiotic + substrates for this enzyme include caffeine, aflatoxin B1, and acetaminophen. targets: - - id: CYP1A2_inhibition - description: ability of the drug to inhibit CYP P450 1A2 (1) or not (0) - units: - type: boolean - names: - - noun: inhibition of CYP1A2 - - noun: inhibition of CYP P450 1A2 - - adjective: CYP1A2 inhibition - - adjective: CYP P450 1A2 inhibition - - verb: inhibits CYP P450 1A2 - - verb: inhibits CYP1A2 - - gerund: inhibiting CYP P450 1A2 - - gerund: inhibiting CYP1A2 - uris: + - id: CYP1A2_inhibition + description: ability of the drug to inhibit CYP P450 1A2 (1) or not (0) + units: + type: boolean + names: + - noun: inhibition of CYP1A2 + - noun: inhibition of CYP P450 1A2 + - adjective: CYP1A2 inhibition + - adjective: CYP P450 1A2 inhibition + - verb: inhibits CYP P450 1A2 + - verb: inhibits CYP1A2 + - gerund: inhibiting CYP P450 1A2 + - gerund: inhibiting CYP1A2 + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://doi.org/10.1038/nbt.1581 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-1a2-inhibition-veith-et-al - description: data source + - url: https://doi.org/10.1038/nbt.1581 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-1a2-inhibition-veith-et-al + description: data source num_points: 12579 bibtex: - - |- - @article{Veith2009, - doi = {10.1038/nbt.1581}, - url = {https://doi.org/10.1038/nbt.1581}, - year = {2009}, - month = oct, - publisher = {Springer Science and Business Media LLC}, - volume = {27}, - number = {11}, - pages = {1050--1055}, - author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James - and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese - and Christopher P Austin and David G Lloyd and Douglas S Auld}, - title = {Comprehensive characterization of cytochrome P450 isozyme selectivity - across chemical libraries}, - journal = {Nature Biotechnology} + - |- + @article{Veith2009, + doi = {10.1038/nbt.1581}, + url = {https://doi.org/10.1038/nbt.1581}, + year = {2009}, + month = oct, + publisher = {Springer Science and Business Media LLC}, + volume = {27}, + number = {11}, + pages = {1050--1055}, + author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James + and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese + and Christopher P Austin and David G Lloyd and Douglas S Auld}, + title = {Comprehensive characterization of cytochrome P450 isozyme selectivity + across chemical libraries}, + journal = {Nature Biotechnology} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP1A2_inhibition#no &NULL}{CYP1A2_inhibition__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP1A2_inhibition#no &NULL}{CYP1A2_inhibition__names__noun}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP1A2_inhibition#no &NULL}{CYP1A2_inhibition__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP1A2_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {CYP1A2_inhibition#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP1A2_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {CYP1A2_inhibition__names__gerund}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP1A2_inhibition__names__gerund}? - Assistant: {CYP1A2_inhibition#No&Yes}, this molecule is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {CYP1A2_inhibition__names__gerund}? - Assistant: {CYP1A2_inhibition#No&Yes}, it is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}? - Assistant: This is a molecule that is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {CYP1A2_inhibition#not &NULL}be {CYP1A2_inhibition__names__gerund}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {CYP1A2_inhibition#not &NULL}be {CYP1A2_inhibition__names__gerund}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {CYP1A2_inhibition__names__gerund}:{CYP1A2_inhibition#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP1A2_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{CYP1A2_inhibition#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP1A2_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result:This molecule is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}. - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP1A2_inhibition__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {CYP1A2_inhibition%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP1A2_inhibition__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {CYP1A2_inhibition%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%CYP1A2_inhibition%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%CYP1A2_inhibition%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP1A2_inhibition#no &NULL}{CYP1A2_inhibition__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP1A2_inhibition#no &NULL}{CYP1A2_inhibition__names__noun}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP1A2_inhibition#no &NULL}{CYP1A2_inhibition__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP1A2_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {CYP1A2_inhibition#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP1A2_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {CYP1A2_inhibition__names__gerund}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP1A2_inhibition__names__gerund}? + Assistant: {CYP1A2_inhibition#No&Yes}, this molecule is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {CYP1A2_inhibition__names__gerund}? + Assistant: {CYP1A2_inhibition#No&Yes}, it is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}? + Assistant: This is a molecule that is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {CYP1A2_inhibition#not &NULL}be {CYP1A2_inhibition__names__gerund}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {CYP1A2_inhibition#not &NULL}be {CYP1A2_inhibition__names__gerund}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {CYP1A2_inhibition__names__gerund}:{CYP1A2_inhibition#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP1A2_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{CYP1A2_inhibition#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP1A2_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result:This molecule is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}. + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP1A2_inhibition__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {CYP1A2_inhibition%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP1A2_inhibition__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {CYP1A2_inhibition%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%CYP1A2_inhibition%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%CYP1A2_inhibition%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/cyp_p450_2c19_inhibition_veith_et_al/meta.yaml b/data/tabular/cyp_p450_2c19_inhibition_veith_et_al/meta.yaml index 787c4de0b..239aa54ed 100644 --- a/data/tabular/cyp_p450_2c19_inhibition_veith_et_al/meta.yaml +++ b/data/tabular/cyp_p450_2c19_inhibition_veith_et_al/meta.yaml @@ -1,140 +1,139 @@ ---- name: cyp_p450_2c19_inhibition_veith_et_al description: |- - The CYP P450 genes are essential in the breakdown (metabolism) of - various molecules and chemicals within cells. A drug that can inhibit these - enzymes would mean poor metabolism to this drug and other drugs, which could lead - to drug-drug interactions and adverse effects. Specifically, the CYP2C19 gene - provides instructions for making an enzyme of the endoplasmic reticulum, which - is involved in protein processing and transport. + The CYP P450 genes are essential in the breakdown (metabolism) of + various molecules and chemicals within cells. A drug that can inhibit these + enzymes would mean poor metabolism to this drug and other drugs, which could lead + to drug-drug interactions and adverse effects. Specifically, the CYP2C19 gene + provides instructions for making an enzyme of the endoplasmic reticulum, which + is involved in protein processing and transport. targets: - - id: CYP2C19_inhibition - description: ability of the drug to inhibit CYP 2C19 (1) or not (0) - units: - type: boolean - names: - - noun: inhibition of CYP2C19 - - noun: inhibition of CYP P450 2C19 - - adjective: CYP2C19 inhibition - - adjective: CYP P450 2C19 inhibition - - verb: inhibits CYP P450 2C19 - - verb: inhibits CYP2C19 - - gerund: inhibiting CYP P450 2C19 - - gerund: inhibiting CYP2C19 - uris: + - id: CYP2C19_inhibition + description: ability of the drug to inhibit CYP 2C19 (1) or not (0) + units: + type: boolean + names: + - noun: inhibition of CYP2C19 + - noun: inhibition of CYP P450 2C19 + - adjective: CYP2C19 inhibition + - adjective: CYP P450 2C19 inhibition + - verb: inhibits CYP P450 2C19 + - verb: inhibits CYP2C19 + - gerund: inhibiting CYP P450 2C19 + - gerund: inhibiting CYP2C19 + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://doi.org/10.1038/nbt.1581 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-2c19-inhibition-veith-et-al - description: data source + - url: https://doi.org/10.1038/nbt.1581 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-2c19-inhibition-veith-et-al + description: data source num_points: 12665 bibtex: - - |- - @article{Veith2009, - doi = {10.1038/nbt.1581}, - url = {https://doi.org/10.1038/nbt.1581}, - year = {2009}, - month = oct, - publisher = {Springer Science and Business Media LLC}, - volume = {27}, - number = {11}, - pages = {1050--1055}, - author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James - and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese - and Christopher P Austin and David G Lloyd and Douglas S Auld}, - title = {Comprehensive characterization of cytochrome P450 isozyme selectivity - across chemical libraries}, - journal = {Nature Biotechnology} + - |- + @article{Veith2009, + doi = {10.1038/nbt.1581}, + url = {https://doi.org/10.1038/nbt.1581}, + year = {2009}, + month = oct, + publisher = {Springer Science and Business Media LLC}, + volume = {27}, + number = {11}, + pages = {1050--1055}, + author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James + and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese + and Christopher P Austin and David G Lloyd and Douglas S Auld}, + title = {Comprehensive characterization of cytochrome P450 isozyme selectivity + across chemical libraries}, + journal = {Nature Biotechnology} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP2C19_inhibition#no &NULL}{CYP2C19_inhibition__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP2C19_inhibition#no &NULL}{CYP2C19_inhibition__names__noun}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP2C19_inhibition#no &NULL}{CYP2C19_inhibition__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP2C19_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {CYP2C19_inhibition#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP2C19_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {CYP2C19_inhibition__names__gerund}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP2C19_inhibition__names__gerund}? - Assistant: {CYP2C19_inhibition#No&Yes}, this molecule is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2C19_inhibition__names__gerund}? - Assistant: {CYP2C19_inhibition#No&Yes}, it is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}? - Assistant: This is a molecule that is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {CYP2C19_inhibition#not &NULL}be {CYP2C19_inhibition__names__gerund}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {CYP2C19_inhibition#not &NULL}be {CYP2C19_inhibition__names__gerund}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {CYP2C19_inhibition__names__gerund}:{CYP2C19_inhibition#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP2C19_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{CYP2C19_inhibition#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP2C19_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result:This molecule is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}. - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C19_inhibition__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {CYP2C19_inhibition%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C19_inhibition__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {CYP2C19_inhibition%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%CYP2C19_inhibition%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%CYP2C19_inhibition%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP2C19_inhibition#no &NULL}{CYP2C19_inhibition__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP2C19_inhibition#no &NULL}{CYP2C19_inhibition__names__noun}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP2C19_inhibition#no &NULL}{CYP2C19_inhibition__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP2C19_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {CYP2C19_inhibition#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP2C19_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {CYP2C19_inhibition__names__gerund}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP2C19_inhibition__names__gerund}? + Assistant: {CYP2C19_inhibition#No&Yes}, this molecule is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2C19_inhibition__names__gerund}? + Assistant: {CYP2C19_inhibition#No&Yes}, it is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}? + Assistant: This is a molecule that is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {CYP2C19_inhibition#not &NULL}be {CYP2C19_inhibition__names__gerund}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {CYP2C19_inhibition#not &NULL}be {CYP2C19_inhibition__names__gerund}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {CYP2C19_inhibition__names__gerund}:{CYP2C19_inhibition#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP2C19_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{CYP2C19_inhibition#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP2C19_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result:This molecule is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}. + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C19_inhibition__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {CYP2C19_inhibition%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C19_inhibition__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {CYP2C19_inhibition%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%CYP2C19_inhibition%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%CYP2C19_inhibition%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/cyp_p450_2c9_inhibition_veith_et_al/meta.yaml b/data/tabular/cyp_p450_2c9_inhibition_veith_et_al/meta.yaml index ff8245e88..56889d1e7 100644 --- a/data/tabular/cyp_p450_2c9_inhibition_veith_et_al/meta.yaml +++ b/data/tabular/cyp_p450_2c9_inhibition_veith_et_al/meta.yaml @@ -1,137 +1,136 @@ ---- name: cyp_p450_2c9_inhibition_veith_et_al description: |- - The CYP P450 genes are involved in the formation and breakdown (metabolism) - of various molecules and chemicals within cells. Specifically, the CYP P450 - 2C9 plays a major role in the oxidation of both xenobiotic and endogenous compounds. + The CYP P450 genes are involved in the formation and breakdown (metabolism) + of various molecules and chemicals within cells. Specifically, the CYP P450 + 2C9 plays a major role in the oxidation of both xenobiotic and endogenous compounds. targets: - - id: CYP2C9_inhibition - description: ability of the drug to inhibit CYP P450 2C9 (1) or not (0) - units: - type: boolean - names: - - noun: inhibition of CYP2C9 - - noun: inhibition of CYP P450 2C9 - - adjective: CYP2C9 inhibition - - adjective: CYP P450 2C9 inhibition - - verb: inhibits CYP P450 2C9 - - verb: inhibits CYP2C9 - - gerund: inhibiting CYP P450 2C9 - - gerund: inhibiting CYP2C9 - uris: + - id: CYP2C9_inhibition + description: ability of the drug to inhibit CYP P450 2C9 (1) or not (0) + units: + type: boolean + names: + - noun: inhibition of CYP2C9 + - noun: inhibition of CYP P450 2C9 + - adjective: CYP2C9 inhibition + - adjective: CYP P450 2C9 inhibition + - verb: inhibits CYP P450 2C9 + - verb: inhibits CYP2C9 + - gerund: inhibiting CYP P450 2C9 + - gerund: inhibiting CYP2C9 + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://doi.org/10.1038/nbt.1581 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-2c9-inhibition-veith-et-al - description: data source + - url: https://doi.org/10.1038/nbt.1581 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-2c9-inhibition-veith-et-al + description: data source num_points: 12092 bibtex: - - |- - @article{Veith2009, - doi = {10.1038/nbt.1581}, - url = {https://doi.org/10.1038/nbt.1581}, - year = {2009}, - month = oct, - publisher = {Springer Science and Business Media LLC}, - volume = {27}, - number = {11}, - pages = {1050--1055}, - author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James - and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese - and Christopher P Austin and David G Lloyd and Douglas S Auld}, - title = {Comprehensive characterization of cytochrome P450 isozyme selectivity - across chemical libraries}, - journal = {Nature Biotechnology} + - |- + @article{Veith2009, + doi = {10.1038/nbt.1581}, + url = {https://doi.org/10.1038/nbt.1581}, + year = {2009}, + month = oct, + publisher = {Springer Science and Business Media LLC}, + volume = {27}, + number = {11}, + pages = {1050--1055}, + author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James + and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese + and Christopher P Austin and David G Lloyd and Douglas S Auld}, + title = {Comprehensive characterization of cytochrome P450 isozyme selectivity + across chemical libraries}, + journal = {Nature Biotechnology} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP2C9_inhibition#no &NULL}{CYP2C9_inhibition__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP2C9_inhibition#no &NULL}{CYP2C9_inhibition__names__noun}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP2C9_inhibition#no &NULL}{CYP2C9_inhibition__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP2C9_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {CYP2C9_inhibition#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP2C9_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {CYP2C9_inhibition__names__gerund}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP2C9_inhibition__names__gerund}? - Assistant: {CYP2C9_inhibition#No&Yes}, this molecule is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2C9_inhibition__names__gerund}? - Assistant: {CYP2C9_inhibition#No&Yes}, it is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}? - Assistant: This is a molecule that is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {CYP2C9_inhibition#not &NULL}be {CYP2C9_inhibition__names__gerund}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {CYP2C9_inhibition#not &NULL}be {CYP2C9_inhibition__names__gerund}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {CYP2C9_inhibition__names__gerund}:{CYP2C9_inhibition#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP2C9_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{CYP2C9_inhibition#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP2C9_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result:This molecule is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}. - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C9_inhibition__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {CYP2C9_inhibition%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C9_inhibition__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {CYP2C9_inhibition%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%CYP2C9_inhibition%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%CYP2C9_inhibition%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP2C9_inhibition#no &NULL}{CYP2C9_inhibition__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP2C9_inhibition#no &NULL}{CYP2C9_inhibition__names__noun}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP2C9_inhibition#no &NULL}{CYP2C9_inhibition__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP2C9_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {CYP2C9_inhibition#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP2C9_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {CYP2C9_inhibition__names__gerund}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP2C9_inhibition__names__gerund}? + Assistant: {CYP2C9_inhibition#No&Yes}, this molecule is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2C9_inhibition__names__gerund}? + Assistant: {CYP2C9_inhibition#No&Yes}, it is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}? + Assistant: This is a molecule that is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {CYP2C9_inhibition#not &NULL}be {CYP2C9_inhibition__names__gerund}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {CYP2C9_inhibition#not &NULL}be {CYP2C9_inhibition__names__gerund}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {CYP2C9_inhibition__names__gerund}:{CYP2C9_inhibition#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP2C9_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{CYP2C9_inhibition#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP2C9_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result:This molecule is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}. + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C9_inhibition__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {CYP2C9_inhibition%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C9_inhibition__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {CYP2C9_inhibition%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%CYP2C9_inhibition%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%CYP2C9_inhibition%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/cyp_p450_2d6_inhibition_veith_et_al/meta.yaml b/data/tabular/cyp_p450_2d6_inhibition_veith_et_al/meta.yaml index 734bb7587..c1c5cb9a7 100644 --- a/data/tabular/cyp_p450_2d6_inhibition_veith_et_al/meta.yaml +++ b/data/tabular/cyp_p450_2d6_inhibition_veith_et_al/meta.yaml @@ -1,138 +1,137 @@ ---- name: cyp_p450_2d6_inhibition_veith_et_al description: |- - The CYP P450 genes are involved in the formation and breakdown (metabolism) - of various molecules and chemicals within cells. Specifically, CYP2D6 is primarily - expressed in the liver. It is also highly expressed in areas of the central - nervous system, including the substantia nigra. + The CYP P450 genes are involved in the formation and breakdown (metabolism) + of various molecules and chemicals within cells. Specifically, CYP2D6 is primarily + expressed in the liver. It is also highly expressed in areas of the central + nervous system, including the substantia nigra. targets: - - id: CYP2D6_inhibition - description: ability of the drug to inhibit CYP P450 2D6 (1) or not (0) - units: - type: boolean - names: - - noun: inhibition of CYP2D6 - - noun: inhibition of CYP P450 2D6 - - adjective: CYP2D6 inhibition - - adjective: CYP P450 2D6 inhibition - - verb: inhibits CYP P450 2D6 - - verb: inhibits CYP2D6 - - gerund: inhibiting CYP P450 2D6 - - gerund: inhibiting CYP2D6 - uris: + - id: CYP2D6_inhibition + description: ability of the drug to inhibit CYP P450 2D6 (1) or not (0) + units: + type: boolean + names: + - noun: inhibition of CYP2D6 + - noun: inhibition of CYP P450 2D6 + - adjective: CYP2D6 inhibition + - adjective: CYP P450 2D6 inhibition + - verb: inhibits CYP P450 2D6 + - verb: inhibits CYP2D6 + - gerund: inhibiting CYP P450 2D6 + - gerund: inhibiting CYP2D6 + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://doi.org/10.1038/nbt.1581 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-2d6-inhibition-veith-et-al - description: data source + - url: https://doi.org/10.1038/nbt.1581 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-2d6-inhibition-veith-et-al + description: data source num_points: 13130 bibtex: - - |- - @article{Veith2009, - doi = {10.1038/nbt.1581}, - url = {https://doi.org/10.1038/nbt.1581}, - year = {2009}, - month = oct, - publisher = {Springer Science and Business Media LLC}, - volume = {27}, - number = {11}, - pages = {1050--1055}, - author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James - and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese - and Christopher P Austin and David G Lloyd and Douglas S Auld}, - title = {Comprehensive characterization of cytochrome P450 isozyme selectivity - across chemical libraries}, - journal = {Nature Biotechnology} + - |- + @article{Veith2009, + doi = {10.1038/nbt.1581}, + url = {https://doi.org/10.1038/nbt.1581}, + year = {2009}, + month = oct, + publisher = {Springer Science and Business Media LLC}, + volume = {27}, + number = {11}, + pages = {1050--1055}, + author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James + and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese + and Christopher P Austin and David G Lloyd and Douglas S Auld}, + title = {Comprehensive characterization of cytochrome P450 isozyme selectivity + across chemical libraries}, + journal = {Nature Biotechnology} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP2D6_inhibition#no &NULL}{CYP2D6_inhibition__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP2D6_inhibition#no &NULL}{CYP2D6_inhibition__names__noun}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP2D6_inhibition#no &NULL}{CYP2D6_inhibition__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP2D6_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {CYP2D6_inhibition#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP2D6_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {CYP2D6_inhibition__names__gerund}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP2D6_inhibition__names__gerund}? - Assistant: {CYP2D6_inhibition#No&Yes}, this molecule is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2D6_inhibition__names__gerund}? - Assistant: {CYP2D6_inhibition#No&Yes}, it is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}? - Assistant: This is a molecule that is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {CYP2D6_inhibition#not &NULL}be {CYP2D6_inhibition__names__gerund}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {CYP2D6_inhibition#not &NULL}be {CYP2D6_inhibition__names__gerund}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {CYP2D6_inhibition__names__gerund}:{CYP2D6_inhibition#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP2D6_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{CYP2D6_inhibition#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP2D6_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result:This molecule is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}. - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2D6_inhibition__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {CYP2D6_inhibition%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2D6_inhibition__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {CYP2D6_inhibition%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%CYP2D6_inhibition%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%CYP2D6_inhibition%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP2D6_inhibition#no &NULL}{CYP2D6_inhibition__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP2D6_inhibition#no &NULL}{CYP2D6_inhibition__names__noun}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP2D6_inhibition#no &NULL}{CYP2D6_inhibition__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP2D6_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {CYP2D6_inhibition#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP2D6_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {CYP2D6_inhibition__names__gerund}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP2D6_inhibition__names__gerund}? + Assistant: {CYP2D6_inhibition#No&Yes}, this molecule is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2D6_inhibition__names__gerund}? + Assistant: {CYP2D6_inhibition#No&Yes}, it is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}? + Assistant: This is a molecule that is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {CYP2D6_inhibition#not &NULL}be {CYP2D6_inhibition__names__gerund}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {CYP2D6_inhibition#not &NULL}be {CYP2D6_inhibition__names__gerund}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {CYP2D6_inhibition__names__gerund}:{CYP2D6_inhibition#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP2D6_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{CYP2D6_inhibition#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP2D6_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result:This molecule is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}. + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2D6_inhibition__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {CYP2D6_inhibition%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2D6_inhibition__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {CYP2D6_inhibition%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%CYP2D6_inhibition%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%CYP2D6_inhibition%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/cyp_p450_3a4_inhibition_veith_et_al/meta.yaml b/data/tabular/cyp_p450_3a4_inhibition_veith_et_al/meta.yaml index 3fdb25903..b17d5272d 100644 --- a/data/tabular/cyp_p450_3a4_inhibition_veith_et_al/meta.yaml +++ b/data/tabular/cyp_p450_3a4_inhibition_veith_et_al/meta.yaml @@ -1,139 +1,138 @@ ---- name: cyp_p450_3a4_inhibition_veith_et_al description: |- - The CYP P450 genes are involved in the formation and breakdown (metabolism) - of various molecules and chemicals within cells. Specifically, CYP3A4 is an - important enzyme in the body, mainly found in the liver and in the intestine. - It oxidizes small foreign organic molecules (xenobiotics), such as toxins or - drugs, so that they can be removed from the body. + The CYP P450 genes are involved in the formation and breakdown (metabolism) + of various molecules and chemicals within cells. Specifically, CYP3A4 is an + important enzyme in the body, mainly found in the liver and in the intestine. + It oxidizes small foreign organic molecules (xenobiotics), such as toxins or + drugs, so that they can be removed from the body. targets: - - id: CYP3A4_inhibition - description: ability of the drug to inhibit CYP P450 3A4 (1) or not (0) - units: - type: boolean - names: - - noun: inhibition of CYP3A4 - - noun: inhibition of CYP P450 3A4 - - adjective: CYP3A4 inhibition - - adjective: CYP P450 3A4 inhibition - - verb: inhibits CYP P450 3A4 - - verb: inhibits CYP3A4 - - gerund: inhibiting CYP P450 3A4 - - gerund: inhibiting CYP3A4 - uris: + - id: CYP3A4_inhibition + description: ability of the drug to inhibit CYP P450 3A4 (1) or not (0) + units: + type: boolean + names: + - noun: inhibition of CYP3A4 + - noun: inhibition of CYP P450 3A4 + - adjective: CYP3A4 inhibition + - adjective: CYP P450 3A4 inhibition + - verb: inhibits CYP P450 3A4 + - verb: inhibits CYP3A4 + - gerund: inhibiting CYP P450 3A4 + - gerund: inhibiting CYP3A4 + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://doi.org/10.1038/nbt.1581 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-3a4-inhibition-veith-et-al - description: data source + - url: https://doi.org/10.1038/nbt.1581 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-3a4-inhibition-veith-et-al + description: data source num_points: 12328 bibtex: - - |- - @article{Veith2009, - doi = {10.1038/nbt.1581}, - url = {https://doi.org/10.1038/nbt.1581}, - year = {2009}, - month = oct, - publisher = {Springer Science and Business Media LLC}, - volume = {27}, - number = {11}, - pages = {1050--1055}, - author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James - and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese - and Christopher P Austin and David G Lloyd and Douglas S Auld}, - title = {Comprehensive characterization of cytochrome P450 isozyme selectivity - across chemical libraries}, - journal = {Nature Biotechnology} + - |- + @article{Veith2009, + doi = {10.1038/nbt.1581}, + url = {https://doi.org/10.1038/nbt.1581}, + year = {2009}, + month = oct, + publisher = {Springer Science and Business Media LLC}, + volume = {27}, + number = {11}, + pages = {1050--1055}, + author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James + and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese + and Christopher P Austin and David G Lloyd and Douglas S Auld}, + title = {Comprehensive characterization of cytochrome P450 isozyme selectivity + across chemical libraries}, + journal = {Nature Biotechnology} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP3A4_inhibition#no &NULL}{CYP3A4_inhibition__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP3A4_inhibition#no &NULL}{CYP3A4_inhibition__names__noun}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP3A4_inhibition#no &NULL}{CYP3A4_inhibition__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP3A4_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {CYP3A4_inhibition#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP3A4_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {CYP3A4_inhibition__names__gerund}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP3A4_inhibition__names__gerund}? - Assistant: {CYP3A4_inhibition#No&Yes}, this molecule is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {CYP3A4_inhibition__names__gerund}? - Assistant: {CYP3A4_inhibition#No&Yes}, it is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}? - Assistant: This is a molecule that is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {CYP3A4_inhibition#not &NULL}be {CYP3A4_inhibition__names__gerund}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {CYP3A4_inhibition#not &NULL}be {CYP3A4_inhibition__names__gerund}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {CYP3A4_inhibition__names__gerund}:{CYP3A4_inhibition#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP3A4_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{CYP3A4_inhibition#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {CYP3A4_inhibition__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result:This molecule is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}. - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP3A4_inhibition__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {CYP3A4_inhibition%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP3A4_inhibition__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {CYP3A4_inhibition%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%CYP3A4_inhibition%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%CYP3A4_inhibition%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP3A4_inhibition#no &NULL}{CYP3A4_inhibition__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP3A4_inhibition#no &NULL}{CYP3A4_inhibition__names__noun}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP3A4_inhibition#no &NULL}{CYP3A4_inhibition__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP3A4_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {CYP3A4_inhibition#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP3A4_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {CYP3A4_inhibition__names__gerund}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP3A4_inhibition__names__gerund}? + Assistant: {CYP3A4_inhibition#No&Yes}, this molecule is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {CYP3A4_inhibition__names__gerund}? + Assistant: {CYP3A4_inhibition#No&Yes}, it is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}? + Assistant: This is a molecule that is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {CYP3A4_inhibition#not &NULL}be {CYP3A4_inhibition__names__gerund}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {CYP3A4_inhibition#not &NULL}be {CYP3A4_inhibition__names__gerund}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {CYP3A4_inhibition__names__gerund}:{CYP3A4_inhibition#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP3A4_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{CYP3A4_inhibition#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {CYP3A4_inhibition__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result:This molecule is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}. + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP3A4_inhibition__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {CYP3A4_inhibition%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP3A4_inhibition__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {CYP3A4_inhibition%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%CYP3A4_inhibition%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%CYP3A4_inhibition%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/drug_induced_liver_injury/meta.yaml b/data/tabular/drug_induced_liver_injury/meta.yaml index 7e536091b..9338f7266 100644 --- a/data/tabular/drug_induced_liver_injury/meta.yaml +++ b/data/tabular/drug_induced_liver_injury/meta.yaml @@ -1,129 +1,128 @@ ---- name: drug_induced_liver_injury description: |- - Drug-induced liver injury (DILI) is fatal liver disease caused by drugs - and it has been the single most frequent cause of safety-related drug marketing - withdrawals for the past 50 years (e.g. iproniazid, ticrynafen, benoxaprofen). - This dataset is aggregated from U.S. FDA 2019s National Center for Toxicological - Research. + Drug-induced liver injury (DILI) is fatal liver disease caused by drugs + and it has been the single most frequent cause of safety-related drug marketing + withdrawals for the past 50 years (e.g. iproniazid, ticrynafen, benoxaprofen). + This dataset is aggregated from U.S. FDA 2019s National Center for Toxicological + Research. targets: - - id: liver_injury - description: whether it can cause liver injury (1) or not (0). - units: - type: boolean - names: - - noun: drug-induced liver injury - - noun: drug-induced liver injury (DILI) - - noun: fatal liver disease caused by drugs - - verb: causes drug-induced liver injury - uris: - - http://purl.bioontology.org/ontology/MEDDRA/10072268 - - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C84427 + - id: liver_injury + description: whether it can cause liver injury (1) or not (0). + units: + type: boolean + names: + - noun: drug-induced liver injury + - noun: drug-induced liver injury (DILI) + - noun: fatal liver disease caused by drugs + - verb: causes drug-induced liver injury + uris: + - http://purl.bioontology.org/ontology/MEDDRA/10072268 + - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C84427 benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://doi.org/10.1021/acs.jcim.5b00238 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#dili-drug-induced-liver-injury - description: Data source + - url: https://doi.org/10.1021/acs.jcim.5b00238 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#dili-drug-induced-liver-injury + description: Data source num_points: 475 bibtex: - - |- - @article{Xu2015, - doi = {10.1021/acs.jcim.5b00238}, - url = {https://doi.org/10.1021/acs.jcim.5b00238}, - year = {2015}, - month = oct, - publisher = {American Chemical Society (ACS)}, - volume = {55}, - number = {10}, - pages = {2085-2093}, - author = {Youjun Xu and Ziwei Dai and Fangjin Chen - and Shuaishi Gao and Jianfeng Pei and Luhua Lai}, - title = {Deep Learning for Drug-Induced Liver Injury}, - journal = {Journal of Chemical Information and Modeling} + - |- + @article{Xu2015, + doi = {10.1021/acs.jcim.5b00238}, + url = {https://doi.org/10.1021/acs.jcim.5b00238}, + year = {2015}, + month = oct, + publisher = {American Chemical Society (ACS)}, + volume = {55}, + number = {10}, + pages = {2085-2093}, + author = {Youjun Xu and Ziwei Dai and Fangjin Chen + and Shuaishi Gao and Jianfeng Pei and Luhua Lai}, + title = {Deep Learning for Drug-Induced Liver Injury}, + journal = {Journal of Chemical Information and Modeling} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|causes|displays!} {liver_injury#no &NULL}{liver_injury__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule causes {liver_injury#no &NULL}{liver_injury__names__noun}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {liver_injury#not &NULL}identified as causing a {liver_injury__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is causing {liver_injury#no &NULL}{liver_injury__names__noun}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that {#shows|causes!} {liver_injury__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {liver_injury#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that {#shows|causes!} {liver_injury__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {liver_injury#not &NULL}causing {liver_injury__names__noun}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that {#shows|causes!} {liver_injury__names__noun}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} {#shows|causes!} a {liver_injury__names__noun}? - Assistant: {liver_injury#No&Yes}, this molecule is {liver_injury#not &NULL}causing a {liver_injury__names__noun}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} causing a {liver_injury__names__noun}? - Assistant: {liver_injury#No&Yes}, it is {liver_injury#not &NULL}causing a {liver_injury__names__noun}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {liver_injury#not &NULL}{#showing|causing!} a {liver_injury__names__noun}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {liver_injury#not &NULL}{#showing|causing!} a {liver_injury__names__noun}. - Assistant: This is a molecule that is {liver_injury#not &NULL}causing a {liver_injury__names__noun}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {liver_injury#not &NULL}be causing a {liver_injury__names__noun}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {liver_injury#not &NULL}causing a {liver_injury__names__noun}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {liver_injury#not &NULL}be causing a {liver_injury__names__noun}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {liver_injury#not &NULL}causing a {liver_injury__names__noun}: {SMILES#} - - Is the {SMILES__description} {SMILES#} causing a {liver_injury__names__noun}:{liver_injury#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that {#shows|causes!} a {liver_injury__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{liver_injury#False&True} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} causing a {liver_injury__names__noun}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {liver_injury%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} causing a {liver_injury__names__noun}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {liver_injury%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {liver_injury#not &NULL} causing a {liver_injury__names__noun}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%liver_injury%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {liver_injury#not &NULL} causing a {liver_injury__names__noun}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%liver_injury%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|causes|displays!} {liver_injury#no &NULL}{liver_injury__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule causes {liver_injury#no &NULL}{liver_injury__names__noun}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {liver_injury#not &NULL}identified as causing a {liver_injury__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is causing {liver_injury#no &NULL}{liver_injury__names__noun}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that {#shows|causes!} {liver_injury__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {liver_injury#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that {#shows|causes!} {liver_injury__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {liver_injury#not &NULL}causing {liver_injury__names__noun}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that {#shows|causes!} {liver_injury__names__noun}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} {#shows|causes!} a {liver_injury__names__noun}? + Assistant: {liver_injury#No&Yes}, this molecule is {liver_injury#not &NULL}causing a {liver_injury__names__noun}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} causing a {liver_injury__names__noun}? + Assistant: {liver_injury#No&Yes}, it is {liver_injury#not &NULL}causing a {liver_injury__names__noun}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {liver_injury#not &NULL}{#showing|causing!} a {liver_injury__names__noun}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {liver_injury#not &NULL}{#showing|causing!} a {liver_injury__names__noun}. + Assistant: This is a molecule that is {liver_injury#not &NULL}causing a {liver_injury__names__noun}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {liver_injury#not &NULL}be causing a {liver_injury__names__noun}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {liver_injury#not &NULL}causing a {liver_injury__names__noun}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {liver_injury#not &NULL}be causing a {liver_injury__names__noun}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {liver_injury#not &NULL}causing a {liver_injury__names__noun}: {SMILES#} + - Is the {SMILES__description} {SMILES#} causing a {liver_injury__names__noun}:{liver_injury#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that {#shows|causes!} a {liver_injury__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{liver_injury#False&True} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} causing a {liver_injury__names__noun}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {liver_injury%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} causing a {liver_injury__names__noun}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {liver_injury%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {liver_injury#not &NULL} causing a {liver_injury__names__noun}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%liver_injury%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {liver_injury#not &NULL} causing a {liver_injury__names__noun}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%liver_injury%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/drugchat_liang_zhang_et_al/meta.yaml b/data/tabular/drugchat_liang_zhang_et_al/meta.yaml index da9520a12..8584e8186 100644 --- a/data/tabular/drugchat_liang_zhang_et_al/meta.yaml +++ b/data/tabular/drugchat_liang_zhang_et_al/meta.yaml @@ -1,57 +1,56 @@ ---- name: drugchat_liang_zhang_et_al description: |- - Instruction tuning dataset used for the LLM component of DrugChat. - 10,834 compounds (3,8962 from ChEMBL and 6,942 from PubChem) containing - descriptive drug information were collected. 143,517 questions were generated - using the molecules' classification, properties and descriptions from ChEBI, LOTUS & YMDB. + Instruction tuning dataset used for the LLM component of DrugChat. + 10,834 compounds (3,8962 from ChEMBL and 6,942 from PubChem) containing + descriptive drug information were collected. 143,517 questions were generated + using the molecules' classification, properties and descriptions from ChEBI, LOTUS & YMDB. targets: - - id: answ - description: answer to the question about the SMILES - type: string + - id: answ + description: answer to the question about the SMILES + type: string identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: quest - type: string - description: Question about SMILES + - id: SMILES + type: SMILES + description: SMILES + - id: quest + type: string + description: Question about SMILES license: BSD-3-Clause links: - - url: https://www.techrxiv.org/articles/preprint/DrugChat_Towards_Enabling_ChatGPT-Like_Capabilities_on_Drug_Molecule_Graphs/22945922 - description: corresponding publication - - url: https://github.com/UCSD-AI4H/drugchat - description: rep & data source + - url: https://www.techrxiv.org/articles/preprint/DrugChat_Towards_Enabling_ChatGPT-Like_Capabilities_on_Drug_Molecule_Graphs/22945922 + description: corresponding publication + - url: https://github.com/UCSD-AI4H/drugchat + description: rep & data source num_points: 143,517 bibtex: - - |- - @article{Liang2023, - author = "Youwei Liang and Ruiyi Zhang and Li Zhang and Pengtao Xie", - title = "{DrugChat: Towards Enabling ChatGPT-Like Capabilities on Drug Molecule Graphs}", - year = "2023", - month = "5", - url = "https://www.techrxiv.org/articles/preprint/DrugChat_Towards_Enabling_ChatGPT-Like_Capabilities_on_Drug_Molecule_Graphs/22945922", - doi = "10.36227/techrxiv.22945922.v1"} + - |- + @article{Liang2023, + author = "Youwei Liang and Ruiyi Zhang and Li Zhang and Pengtao Xie", + title = "{DrugChat: Towards Enabling ChatGPT-Like Capabilities on Drug Molecule Graphs}", + year = "2023", + month = "5", + url = "https://www.techrxiv.org/articles/preprint/DrugChat_Towards_Enabling_ChatGPT-Like_Capabilities_on_Drug_Molecule_Graphs/22945922", + doi = "10.36227/techrxiv.22945922.v1"} templates: - - |- - Task: Please answer the following question about the molecule with {SMILES__description} {SMILES#}. - {#Description|Question|Request!}: {quest#} - {#Result|Answer|Completion!}: {answ#} - - |- - {#Question|Q!}: {quest#} - {#Constraint:|Description:|!} The {#Molecule|Compound|Chemical!} {#has the|can be represented with the!} {SMILES__description} {SMILES#}. - {#Answer|Result|Answer!}: {answ#} - - |- - User: I have a question about the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#}. - Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!} - User: {quest#} - Assistant: {#The answer is |!}{answ#} - - |- - User: I want to know more about the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#}. - Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!} - User: {quest#} - Assistant: {#The answer is |!}{answ#} - - |- - Task: Answer the following question about the molecule with {SMILES__description} {SMILES#}. - {#Description|Question|Request!}: {quest#} - {#Result|Answer|Completion!}: {answ#} + - |- + Task: Please answer the following question about the molecule with {SMILES__description} {SMILES#}. + {#Description|Question|Request!}: {quest#} + {#Result|Answer|Completion!}: {answ#} + - |- + {#Question|Q!}: {quest#} + {#Constraint:|Description:|!} The {#Molecule|Compound|Chemical!} {#has the|can be represented with the!} {SMILES__description} {SMILES#}. + {#Answer|Result|Answer!}: {answ#} + - |- + User: I have a question about the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#}. + Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!} + User: {quest#} + Assistant: {#The answer is |!}{answ#} + - |- + User: I want to know more about the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#}. + Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!} + User: {quest#} + Assistant: {#The answer is |!}{answ#} + - |- + Task: Answer the following question about the molecule with {SMILES__description} {SMILES#}. + {#Description|Question|Request!}: {quest#} + {#Result|Answer|Completion!}: {answ#} diff --git a/data/tabular/fda_adverse_reactions/meta.yaml b/data/tabular/fda_adverse_reactions/meta.yaml index 0c5e97d06..d45659aec 100644 --- a/data/tabular/fda_adverse_reactions/meta.yaml +++ b/data/tabular/fda_adverse_reactions/meta.yaml @@ -1,32 +1,31 @@ ---- name: fda_adverse_reactions description: A dataset of adverse reaction statistics for drugs and reaction events. targets: - - id: count - description: A count of how many reaction events occurred for this chembl id. - units: - type: ordinal - names: - - noun: adverse reaction frequency - pubchem_aids: [] - uris: [] - - id: event - description: The type of event that occurred for this molecule interaction. - units: - type: string - names: - - noun: adverse event reaction - pubchem_aids: [] - uris: [] + - id: count + description: A count of how many reaction events occurred for this chembl id. + units: + type: ordinal + names: + - noun: adverse reaction frequency + pubchem_aids: [] + uris: [] + - id: event + description: The type of event that occurred for this molecule interaction. + units: + type: string + names: + - noun: adverse event reaction + pubchem_aids: [] + uris: [] identifiers: - - id: SMILES - type: SMILES - description: This is the SMILES identifier for a given molecule. + - id: SMILES + type: SMILES + description: This is the SMILES identifier for a given molecule. license: CC BY-SA 3.0 links: - - name: Dataset - url: https://platform.opentargets.org/downloads - description: The website which we download the dataset from during the transformation script. + - name: Dataset + url: https://platform.opentargets.org/downloads + description: The website which we download the dataset from during the transformation script. benchmarks: [] num_points: 94910 bibtex: [] diff --git a/data/tabular/flashpoint/meta.yaml b/data/tabular/flashpoint/meta.yaml index 379a9e8e9..184fe3ba7 100644 --- a/data/tabular/flashpoint/meta.yaml +++ b/data/tabular/flashpoint/meta.yaml @@ -1,38 +1,37 @@ ---- name: flashpoint description: | - Curation of experimentally determined flash point values measured with open cup and closed cup methods. - The values are from academic papers, the Hazardous Chemicals Handbook, and the PubChem chemical database. - Differences from the stated sources in the paper are: - * Values from the DIPPR database are not included in their dataset as they are proprietary. - * There are appear to be no values from Lange's handbook of chemistry in their dataset. - * We did our own processing to resolve duplicate SMILES. + Curation of experimentally determined flash point values measured with open cup and closed cup methods. + The values are from academic papers, the Hazardous Chemicals Handbook, and the PubChem chemical database. + Differences from the stated sources in the paper are: + * Values from the DIPPR database are not included in their dataset as they are proprietary. + * There are appear to be no values from Lange's handbook of chemistry in their dataset. + * We did our own processing to resolve duplicate SMILES. targets: - - id: flashpoint - description: Experimental flash point value (K) - units: K - type: continuous - names: - - noun: flash point - uris: - - http://semanticscience.org/resource/CHEMINF_000417 + - id: flashpoint + description: Experimental flash point value (K) + units: K + type: continuous + names: + - noun: flash point + uris: + - http://semanticscience.org/resource/CHEMINF_000417 identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 num_points: 9878 # downloaded dataset has 14696 datapoints, but there are duplicate smiles links: - - url: https://figshare.com/ndownloader/files/18509711 - description: Original figshare dataset + - url: https://figshare.com/ndownloader/files/18509711 + description: Original figshare dataset bibtex: - - | - "@article{sun2020assessing, - title={Assessing Graph-based Deep Learning Models for Predicting Flash Point}, - author={Sun, Xiaoyu and Krakauer, Nathaniel J and Politowicz, Alexander and Chen, Wei-Ting and Li, Qiying and Li, Zuoyi and Shao, Xianjia and Sunaryo, Alfred and Shen, Mingren and Wang, James and others}, - journal={Molecular informatics}, - volume={39}, - number={6}, - pages={e1900101}, - year={2020} - }" + - | + "@article{sun2020assessing, + title={Assessing Graph-based Deep Learning Models for Predicting Flash Point}, + author={Sun, Xiaoyu and Krakauer, Nathaniel J and Politowicz, Alexander and Chen, Wei-Ting and Li, Qiying and Li, Zuoyi and Shao, Xianjia and Sunaryo, Alfred and Shen, Mingren and Wang, James and others}, + journal={Molecular informatics}, + volume={39}, + number={6}, + pages={e1900101}, + year={2020} + }" diff --git a/data/tabular/formation_energies/meta.yaml b/data/tabular/formation_energies/meta.yaml index ce2b19ef9..9490c1df9 100644 --- a/data/tabular/formation_energies/meta.yaml +++ b/data/tabular/formation_energies/meta.yaml @@ -1,86 +1,84 @@ ---- name: formation_energies description: |- - Formation and decomposition energies of inorganic solids mined from the Materials Project database. + Formation and decomposition energies of inorganic solids mined from the Materials Project database. targets: - - id: rxn - description: decomposition reaction - type: text - - id: Ed - description: decomposition enthalpy - units: eV/atom - type: continuous - significant_digits: 3 - - id: Ef - description: formation enthalpy - units: eV/atom - type: continuous - significant_digits: 3 - - id: stability - description: thermodynamic stability of material - type: boolean + - id: rxn + description: decomposition reaction + type: text + - id: Ed + description: decomposition enthalpy + units: eV/atom + type: continuous + significant_digits: 3 + - id: Ef + description: formation enthalpy + units: eV/atom + type: continuous + significant_digits: 3 + - id: stability + description: thermodynamic stability of material + type: boolean benchmarks: [] identifiers: - - id: composition - type: COMPOSITION - description: chemical formula + - id: composition + type: COMPOSITION + description: chemical formula license: CC BY 4.0 links: - - url: https://github.com/CJBartel/TestStabilityML/tree/master - description: original data source + - url: https://github.com/CJBartel/TestStabilityML/tree/master + description: original data source num_points: 85014 bibtex: - - |- - @article{Bartel_2020, - doi = {10.1038/s41524-020-00362-y}, - url = {https://doi.org/10.1038%2Fs41524-020-00362-y}, - year = 2020, - month = {jul}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {6}, - number = {1}, - author = {Christopher J. Bartel and Amalie Trewartha and Qi Wang and Alexander Dunn and Anubhav Jain and Gerbrand Ceder}, - title = {A critical examination of compound stability predictions from machine-learned formation energies}, - journal = {npj Comput Mater} - } + - |- + @article{Bartel_2020, + doi = {10.1038/s41524-020-00362-y}, + url = {https://doi.org/10.1038%2Fs41524-020-00362-y}, + year = 2020, + month = {jul}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {6}, + number = {1}, + author = {Christopher J. Bartel and Amalie Trewartha and Qi Wang and Alexander Dunn and Anubhav Jain and Gerbrand Ceder}, + title = {A critical examination of compound stability predictions from machine-learned formation energies}, + journal = {npj Comput Mater} + } templates: - - The decomposition enthalpy of {composition#} is {Ed#} {Ed__units}. - - The formation enthalpy of {composition#} is {Ef#} {Ef__units}. - - The decomposition enthalpy of {composition#} is {Ed#} {Ed__units}. - - The decomposition reaction of {composition#} is {rxn#}. - - The {#material|compound|crystal|solid|inorganic material!} with composition {composition#} is {stability#not &NULL}thermodynamically stable because - its decomposition enthalpy is {Ed#} {Ed__units}. - - |- - User: What is the decomposition enthalpy of {composition#}? - Assistant: {#The decomposition enthalpy of is |!}{Ed#} {Ed__units}. - - |- - User: What is the formation enthalpy of {composition#}? - Assistant: {#The formation enthalpy of is |!}{Ef#} {Ef__units}. - - |- - User: What is the decomposition enthalpy of {composition#}? - Assistant: {#The decomposition enthalpy is|!}is {Ed#} {Ed__units}. - - |- - User: {#I have a question about |I want to know something about| I need help with!}{composition#}. - Assistant: {#Sure.|How can I help?|How can I be of help?|How can I assist?|Happy to help.!} {#What is your question?|What do you want to know?|!} - User: {composition#} is {stability#not &NULL}thermodynamically stable because its decomposition enthalpy is {Ed#} {Ed__units}. - - |- - User: I want to design a {stability#not &NULL}thermodynamically stable {#material|structure|compound!} What {#chemical formula|composition!} should I use? - Assistant: {#I recommend using |I suggest using |I would use |I would recommend using !}{composition#}. - - |- - User: I want to design a {#material|structure|compound!} that is {stability#not &NULL}thermodynamically stable. What {#chemical formula|composition!} should I use? - Assistant: {#Do you have|Are there!} any other {#requirements|constraints|preferences!}? - User: The {#material|structure|compound!} should have a decomposition enthalpy of {Ed#} {Ed__units}. - Assistant: {#I recommend using |I suggest using |I would use |I would recommend using !}{composition#}. - - |- - User: I want to design a {#material|structure|compound!} that is {stability#not &NULL}thermodynamically stable. What {#chemical formula|composition!} should I use? - Assistant: {#Do you have|Are there!} any other {#requirements|constraints|preferences!}? - User: The {#material|structure|compound!} should have a formation enthalpy of {Ef#} {Ef__units}. - Assistant: {#I recommend using |I suggest using |I would use |I would recommend using !}{composition#}. - - |- - Task: Classify the stability of {composition#}. - Constraint: Give a reason for your answer. - Answer: {#The material is |The compound is |The crystal is |The solid is !}{stability#not &NULL}thermodynamically stable because its decomposition enthalpy is {Ed#} {Ed__units}. - - |- - Question: What is a compound with the following decomposition reaction? - Description: {rxn#} - Answer: {composition#} + - The decomposition enthalpy of {composition#} is {Ed#} {Ed__units}. + - The formation enthalpy of {composition#} is {Ef#} {Ef__units}. + - The decomposition enthalpy of {composition#} is {Ed#} {Ed__units}. + - The decomposition reaction of {composition#} is {rxn#}. + - The {#material|compound|crystal|solid|inorganic material!} with composition {composition#} is {stability#not &NULL}thermodynamically stable because its decomposition enthalpy is {Ed#} {Ed__units}. + - |- + User: What is the decomposition enthalpy of {composition#}? + Assistant: {#The decomposition enthalpy of is |!}{Ed#} {Ed__units}. + - |- + User: What is the formation enthalpy of {composition#}? + Assistant: {#The formation enthalpy of is |!}{Ef#} {Ef__units}. + - |- + User: What is the decomposition enthalpy of {composition#}? + Assistant: {#The decomposition enthalpy is|!}is {Ed#} {Ed__units}. + - |- + User: {#I have a question about |I want to know something about| I need help with!}{composition#}. + Assistant: {#Sure.|How can I help?|How can I be of help?|How can I assist?|Happy to help.!} {#What is your question?|What do you want to know?|!} + User: {composition#} is {stability#not &NULL}thermodynamically stable because its decomposition enthalpy is {Ed#} {Ed__units}. + - |- + User: I want to design a {stability#not &NULL}thermodynamically stable {#material|structure|compound!} What {#chemical formula|composition!} should I use? + Assistant: {#I recommend using |I suggest using |I would use |I would recommend using !}{composition#}. + - |- + User: I want to design a {#material|structure|compound!} that is {stability#not &NULL}thermodynamically stable. What {#chemical formula|composition!} should I use? + Assistant: {#Do you have|Are there!} any other {#requirements|constraints|preferences!}? + User: The {#material|structure|compound!} should have a decomposition enthalpy of {Ed#} {Ed__units}. + Assistant: {#I recommend using |I suggest using |I would use |I would recommend using !}{composition#}. + - |- + User: I want to design a {#material|structure|compound!} that is {stability#not &NULL}thermodynamically stable. What {#chemical formula|composition!} should I use? + Assistant: {#Do you have|Are there!} any other {#requirements|constraints|preferences!}? + User: The {#material|structure|compound!} should have a formation enthalpy of {Ef#} {Ef__units}. + Assistant: {#I recommend using |I suggest using |I would use |I would recommend using !}{composition#}. + - |- + Task: Classify the stability of {composition#}. + Constraint: Give a reason for your answer. + Answer: {#The material is |The compound is |The crystal is |The solid is !}{stability#not &NULL}thermodynamically stable because its decomposition enthalpy is {Ed#} {Ed__units}. + - |- + Question: What is a compound with the following decomposition reaction? + Description: {rxn#} + Answer: {composition#} diff --git a/data/tabular/freesolv/meta.yaml b/data/tabular/freesolv/meta.yaml index ffd29a3f1..04758b6d9 100644 --- a/data/tabular/freesolv/meta.yaml +++ b/data/tabular/freesolv/meta.yaml @@ -1,106 +1,76 @@ ---- name: freesolv description: Experimental and calculated small molecule hydration free energies targets: - - id: exp_value - description: experimental hydration free energy value - units: kcal/mol - type: continuous - names: - - noun: hydration free energy - - id: exp_uncertainty - description: experimental hydration free energy uncertainty - units: kcal/mol - type: continuous - names: - - noun: hydration free energy uncertainty - - id: GAFF - description: mobley group calculated value - units: kcal/mol - type: continuous - names: - - noun: hydration free energy computed using the GAFF force field - - id: calc_uncertainty - description: mobley group calculated value calculated uncertainty - units: kcal/mol - type: continuous - names: - - noun: uncertainty in hydration free energy computed using the GAFF force field + - id: exp_value + description: experimental hydration free energy value + units: kcal/mol + type: continuous + names: + - noun: hydration free energy + - id: exp_uncertainty + description: experimental hydration free energy uncertainty + units: kcal/mol + type: continuous + names: + - noun: hydration free energy uncertainty + - id: GAFF + description: mobley group calculated value + units: kcal/mol + type: continuous + names: + - noun: hydration free energy computed using the GAFF force field + - id: calc_uncertainty + description: mobley group calculated value calculated uncertainty + units: kcal/mol + type: continuous + names: + - noun: uncertainty in hydration free energy computed using the GAFF force field identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: iupac_name - type: IUPAC - description: IUPAC + - id: SMILES + type: SMILES + description: SMILES + - id: iupac_name + type: IUPAC + description: IUPAC license: CC BY-NC-SA 4.0 links: - - url: https://github.com/MobleyLab/FreeSolv - description: issue tracker and source data - - url: https://escholarship.org/uc/item/6sd403pz - description: repository with data + - url: https://github.com/MobleyLab/FreeSolv + description: issue tracker and source data + - url: https://escholarship.org/uc/item/6sd403pz + description: repository with data num_points: 642 bibtex: - - "@article{mobley2013experimental,\ntitle={Experimental and calculated small molecule hydration free energies},\nauthor={Mobley, David L},\nyear={2013}" + - "@article{mobley2013experimental,\ntitle={Experimental and calculated small molecule hydration free energies},\nauthor={Mobley, David L},\nyear={2013}" templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} has a {exp_value__names__noun} of {exp_value#} {exp_value__units}. - - Based on the {SMILES__description} {#representation of |!}{SMILES#}, the molecule has a {exp_value__names__noun} of {exp_value#} {exp_value__units}. - - The {SMILES__description} {SMILES#} {#represents|is representing!} a molecule {#that has a|with a!} {exp_value__names__noun} of {exp_value#} {exp_value__units}. - - The molecule with the {SMILES__description} {SMILES#} has a {exp_value__names__noun} of {exp_value#} {exp_value__units}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} has a {GAFF__names__noun} of {GAFF#} {GAFF__units}. - - Based on the {SMILES__description} {#representation of |!}{SMILES#}, the molecule has a {GAFF__names__noun} of {GAFF#} {GAFF__units}. - - The {SMILES__description} {SMILES#} {#represents|is representing!} a molecule {#that has a|with a!} {GAFF__names__noun} of {GAFF#} {GAFF__units}. - - The molecule with the {SMILES__description} {SMILES#} has a {GAFF__names__noun} of {GAFF#} {GAFF__units}. - - "Task: Please predict a molecule feature based on the description.\nDescription: Predict the {exp_value__names__noun} in {exp_value__units}.\n{#Molecule\ - \ |!}{SMILES__description}: {SMILES#}\nConstraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {exp_value__units}\ - \ without using any {#other|additional!} words.\nResult: {exp_value#} {exp_value__units}" - - "Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.\nDescription: A molecule\ - \ that has {exp_value__names__noun} of {exp_value#} {exp_value__units}.\nResult: {SMILES#}" - - "Task: Please predict a molecule feature based on the description.\nDescription: Predict the {GAFF__names__noun} in {GAFF__units}.\n{#Molecule |!}{SMILES__description}:\ - \ {SMILES#}\nConstraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {GAFF__units} without using any {#other|additional!}\ - \ words.\nResult: {GAFF#} {GAFF__units}" - - "Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.\nDescription: A molecule\ - \ that has {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nResult: {SMILES#}" - - "User: Can you {#tell me|derive|estimate!} the {exp_value__names__noun} in {exp_value__units} of the molecule with the {SMILES__description} {SMILES#}?\n\ - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, this molecule has a {exp_value__names__noun} of {exp_value#} {exp_value__units}." - - "User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}?\n\ - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}" - - "User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}.\n\ - Assistant: This is a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}: {SMILES#}" - - "User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.\nAssistant: {#This sounds very exciting. |This sounds very\ - \ interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?\nUser: Yes, please. The molecule should have\ - \ a {exp_value__names__noun} of {exp_value#} {exp_value__units}.\nAssistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} represents\ - \ a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}: {SMILES#}" - - "User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.\nAssistant: {#This sounds very exciting. |This sounds very\ - \ interesting. !}Should it be a special {#molecule|one!}?\nUser: Yes, the molecule should have a {exp_value__names__noun} of {exp_value#} {exp_value__units}.\n\ - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} represents a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}:\ - \ {SMILES#}" - - "User: Can you {#tell me|derive|estimate!} the {GAFF__names__noun} in {GAFF__units} of the molecule with the {SMILES__description} {SMILES#}?\nAssistant:\ - \ {#Yes|Of course|Sure|Yes, I'm happy to help!}, this molecule has a {GAFF__names__noun} of {GAFF#} {GAFF__units}." - - "User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}?\nAssistant:\ - \ {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}" - - "User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nAssistant:\ - \ This is a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}: {SMILES#}" - - "User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.\nAssistant: {#This sounds very exciting. |This sounds very\ - \ interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?\nUser: Yes, please. The molecule should have\ - \ a {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nAssistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} represents a molecule that\ - \ has a {GAFF__names__noun} of {GAFF#} {GAFF__units}: {SMILES#}" - - "User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.\nAssistant: {#This sounds very exciting. |This sounds very\ - \ interesting. !}Should it be a special {#molecule|one!}?\nUser: Yes, the molecule should have a {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nAssistant:\ - \ {#Understood|Got it|Ok!}, this {SMILES__description} represents a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}: {SMILES#}" - - The {exp_value__names__noun} of the molecule with the {SMILES__description} {SMILES#} is:{exp_value#} {exp_value__units} - - The {exp_value__names__noun} of the {SMILES__description} {SMILES#} is:{exp_value#} {exp_value__units} - - The {exp_value__names__noun} of the molecule {SMILES__description} {SMILES#} is:{exp_value#} {exp_value__units} - - "Task: Please predict a molecule feature based on the description.\nDescription: Predict the {exp_value__names__noun} in {exp_value__units} of a molecule.\n\ - {#Molecule |!}{SMILES__description}: {SMILES#}\nConstraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {exp_value__units}\ - \ without using any {#other|additional!} words.\nResult:{exp_value#} {exp_value__units}" - - "Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.\nDescription: A molecule\ - \ that has {exp_value__names__noun} of {exp_value#} {exp_value__units}.\nResult:{SMILES#}" - - The {GAFF__names__noun} of the molecule with the {SMILES__description} {SMILES#} is:{GAFF#} {GAFF__units} - - The {GAFF__names__noun} of the {SMILES__description} {SMILES#} is:{GAFF#} {GAFF__units} - - The {GAFF__names__noun} of the molecule {SMILES__description} {SMILES#} is:{GAFF#} {GAFF__units} - - "Task: Please predict a molecule feature based on the description.\nDescription: Predict the {GAFF__names__noun} in {GAFF__units} of a molecule.\n\ - {#Molecule |!}{SMILES__description}: {SMILES#}\nConstraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {GAFF__units}\ - \ without using any {#other|additional!} words.\nResult:{GAFF#} {GAFF__units}" - - "Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.\nDescription: A molecule\ - \ that has {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nResult:{SMILES#}" + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} has a {exp_value__names__noun} of {exp_value#} {exp_value__units}. + - Based on the {SMILES__description} {#representation of |!}{SMILES#}, the molecule has a {exp_value__names__noun} of {exp_value#} {exp_value__units}. + - The {SMILES__description} {SMILES#} {#represents|is representing!} a molecule {#that has a|with a!} {exp_value__names__noun} of {exp_value#} {exp_value__units}. + - The molecule with the {SMILES__description} {SMILES#} has a {exp_value__names__noun} of {exp_value#} {exp_value__units}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} has a {GAFF__names__noun} of {GAFF#} {GAFF__units}. + - Based on the {SMILES__description} {#representation of |!}{SMILES#}, the molecule has a {GAFF__names__noun} of {GAFF#} {GAFF__units}. + - The {SMILES__description} {SMILES#} {#represents|is representing!} a molecule {#that has a|with a!} {GAFF__names__noun} of {GAFF#} {GAFF__units}. + - The molecule with the {SMILES__description} {SMILES#} has a {GAFF__names__noun} of {GAFF#} {GAFF__units}. + - "Task: Please predict a molecule feature based on the description.\nDescription: Predict the {exp_value__names__noun} in {exp_value__units}.\n{#Molecule |!}{SMILES__description}: {SMILES#}\nConstraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {exp_value__units} without using any {#other|additional!} words.\nResult: {exp_value#} {exp_value__units}" + - "Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.\nDescription: A molecule that has {exp_value__names__noun} of {exp_value#} {exp_value__units}.\nResult: {SMILES#}" + - "Task: Please predict a molecule feature based on the description.\nDescription: Predict the {GAFF__names__noun} in {GAFF__units}.\n{#Molecule |!}{SMILES__description}: {SMILES#}\nConstraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {GAFF__units} without using any {#other|additional!} words.\nResult: {GAFF#} {GAFF__units}" + - "Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.\nDescription: A molecule that has {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nResult: {SMILES#}" + - "User: Can you {#tell me|derive|estimate!} the {exp_value__names__noun} in {exp_value__units} of the molecule with the {SMILES__description} {SMILES#}?\nAssistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, this molecule has a {exp_value__names__noun} of {exp_value#} {exp_value__units}." + - "User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}?\nAssistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}" + - "User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}.\nAssistant: This is a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}: {SMILES#}" + - "User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.\nAssistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?\nUser: Yes, please. The molecule should have a {exp_value__names__noun} of {exp_value#} {exp_value__units}.\nAssistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} represents a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}: {SMILES#}" + - "User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.\nAssistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?\nUser: Yes, the molecule should have a {exp_value__names__noun} of {exp_value#} {exp_value__units}.\nAssistant: {#Understood|Got it|Ok!}, this {SMILES__description} represents a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}: {SMILES#}" + - "User: Can you {#tell me|derive|estimate!} the {GAFF__names__noun} in {GAFF__units} of the molecule with the {SMILES__description} {SMILES#}?\nAssistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, this molecule has a {GAFF__names__noun} of {GAFF#} {GAFF__units}." + - "User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}?\nAssistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}" + - "User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nAssistant: This is a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}: {SMILES#}" + - "User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.\nAssistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?\nUser: Yes, please. The molecule should have a {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nAssistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} represents a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}: {SMILES#}" + - "User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.\nAssistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?\nUser: Yes, the molecule should have a {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nAssistant: {#Understood|Got it|Ok!}, this {SMILES__description} represents a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}: {SMILES#}" + - The {exp_value__names__noun} of the molecule with the {SMILES__description} {SMILES#} is:{exp_value#} {exp_value__units} + - The {exp_value__names__noun} of the {SMILES__description} {SMILES#} is:{exp_value#} {exp_value__units} + - The {exp_value__names__noun} of the molecule {SMILES__description} {SMILES#} is:{exp_value#} {exp_value__units} + - "Task: Please predict a molecule feature based on the description.\nDescription: Predict the {exp_value__names__noun} in {exp_value__units} of a molecule.\n{#Molecule |!}{SMILES__description}: {SMILES#}\nConstraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {exp_value__units} without using any {#other|additional!} words.\nResult:{exp_value#} {exp_value__units}" + - "Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.\nDescription: A molecule that has {exp_value__names__noun} of {exp_value#} {exp_value__units}.\nResult:{SMILES#}" + - The {GAFF__names__noun} of the molecule with the {SMILES__description} {SMILES#} is:{GAFF#} {GAFF__units} + - The {GAFF__names__noun} of the {SMILES__description} {SMILES#} is:{GAFF#} {GAFF__units} + - The {GAFF__names__noun} of the molecule {SMILES__description} {SMILES#} is:{GAFF#} {GAFF__units} + - "Task: Please predict a molecule feature based on the description.\nDescription: Predict the {GAFF__names__noun} in {GAFF__units} of a molecule.\n{#Molecule |!}{SMILES__description}: {SMILES#}\nConstraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {GAFF__units} without using any {#other|additional!} words.\nResult:{GAFF#} {GAFF__units}" + - "Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.\nDescription: A molecule that has {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nResult:{SMILES#}" diff --git a/data/tabular/h2_storage_materials/meta.yaml b/data/tabular/h2_storage_materials/meta.yaml index 6a464062b..b06e215a4 100644 --- a/data/tabular/h2_storage_materials/meta.yaml +++ b/data/tabular/h2_storage_materials/meta.yaml @@ -1,39 +1,38 @@ ---- name: h2_storage_reversible_hydrides description: synthetic procedures, experimental and theoretical h2 capacities of hydrides targets: - - id: h_weight_density_theory - description: theoretical hydrogen storage capacity - units: wt% - type: continuous - names: - - noun: theoretical hydrogen storage weight density - - id: h_weight_density_experiment - description: experimental hydrogen storage capacity - units: wt% - type: continuous - names: - - noun: experimental hydrogen storage capacity + - id: h_weight_density_theory + description: theoretical hydrogen storage capacity + units: wt% + type: continuous + names: + - noun: theoretical hydrogen storage weight density + - id: h_weight_density_experiment + description: experimental hydrogen storage capacity + units: wt% + type: continuous + names: + - noun: experimental hydrogen storage capacity identifiers: - - id: material_name - type: IUPAC - description: chemical name - - id: chemical_formula - type: COMPOSITION - names: - - noun: chemical formula - description: chemical formula - - id: synthetic_information - names: - - noun: synthesis procedure summary - description: brief description of synthetic procedure - type: Other + - id: material_name + type: IUPAC + description: chemical name + - id: chemical_formula + type: COMPOSITION + names: + - noun: chemical formula + description: chemical formula + - id: synthetic_information + names: + - noun: synthesis procedure summary + description: brief description of synthetic procedure + type: Other license: File links: - - url: https://datahub.hymarc.org/dataset/hydrogen-storage-materials-db/resource/4ef1c494-366e-43a3-bed4-a3985de5c374 - description: website with source data - - url: https://datahub.hymarc.org/dataset/ad580d95-e7e2-4ef4-a7f6-3b2f91a96eba/resource/4ef1c494-366e-43a3-bed4-a3985de5c374/download/hydstormatdb-reversible_hydrides.csv - description: original_dataset + - url: https://datahub.hymarc.org/dataset/hydrogen-storage-materials-db/resource/4ef1c494-366e-43a3-bed4-a3985de5c374 + description: website with source data + - url: https://datahub.hymarc.org/dataset/ad580d95-e7e2-4ef4-a7f6-3b2f91a96eba/resource/4ef1c494-366e-43a3-bed4-a3985de5c374/download/hydstormatdb-reversible_hydrides.csv + description: original_dataset num_points: 30 bibtex: - - "@online{hymarcReversibleHydrides,\ntitle={Hydrogen Storage Materials Database Reversible Hydrides},\nauthor={HyMARC},\nyear={2019}" + - "@online{hymarcReversibleHydrides,\ntitle={Hydrogen Storage Materials Database Reversible Hydrides},\nauthor={HyMARC},\nyear={2019}" diff --git a/data/tabular/h2_storage_materials/processing.ipynb b/data/tabular/h2_storage_materials/processing.ipynb index b9d6d8515..3095b0d51 100644 --- a/data/tabular/h2_storage_materials/processing.ipynb +++ b/data/tabular/h2_storage_materials/processing.ipynb @@ -35,7 +35,7 @@ "metadata": {}, "outputs": [], "source": [ - "data_path = 'https://datahub.hymarc.org/dataset/ad580d95-e7e2-4ef4-a7f6-3b2f91a96eba/resource/4ef1c494-366e-43a3-bed4-a3985de5c374/download/hydstormatdb-reversible_hydrides.csv' " + "data_path = \"https://datahub.hymarc.org/dataset/ad580d95-e7e2-4ef4-a7f6-3b2f91a96eba/resource/4ef1c494-366e-43a3-bed4-a3985de5c374/download/hydstormatdb-reversible_hydrides.csv\"" ] }, { @@ -44,10 +44,11 @@ "metadata": {}, "outputs": [], "source": [ - "data_path = ('https://datahub.hymarc.org/dataset/'\n", - " 'ad580d95-e7e2-4ef4-a7f6-3b2f91a96eba/resource/'\n", - " '4ef1c494-366e-43a3-bed4-a3985de5c374/download/hydstormatdb-reversible_hydrides.csv'\n", - " ) " + "data_path = (\n", + " \"https://datahub.hymarc.org/dataset/\"\n", + " \"ad580d95-e7e2-4ef4-a7f6-3b2f91a96eba/resource/\"\n", + " \"4ef1c494-366e-43a3-bed4-a3985de5c374/download/hydstormatdb-reversible_hydrides.csv\"\n", + ")" ] }, { @@ -76,7 +77,7 @@ "metadata": {}, "outputs": [], "source": [ - "df = pd.read_csv(fn_data_original, sep=',')" + "df = pd.read_csv(fn_data_original, sep=\",\")" ] }, { @@ -112,18 +113,18 @@ "outputs": [], "source": [ "remove_columns = [\n", - " 'keywords',\n", - " 'activation',\n", - " 'principal_investigator',\n", - " 'institution',\n", - " 'reversible_capacity',\n", - " 'h_volume_density_theory',\n", - " 'h_volume_density_experiment',\n", - " 'h_volume_density_reference',\n", - " 'temperature_release_reference',\n", - " 'h_volume_density_reference',\n", - " 'entry_date',\n", - " 'precursors'\n", + " \"keywords\",\n", + " \"activation\",\n", + " \"principal_investigator\",\n", + " \"institution\",\n", + " \"reversible_capacity\",\n", + " \"h_volume_density_theory\",\n", + " \"h_volume_density_experiment\",\n", + " \"h_volume_density_reference\",\n", + " \"temperature_release_reference\",\n", + " \"h_volume_density_reference\",\n", + " \"entry_date\",\n", + " \"precursors\",\n", "]" ] }, @@ -152,7 +153,7 @@ "metadata": {}, "outputs": [], "source": [ - "df['synthesis_information'] = df['synthesis_method'] + ': ' + df['synthesis_conditions']" + "df[\"synthesis_information\"] = df[\"synthesis_method\"] + \": \" + df[\"synthesis_conditions\"]" ] }, { @@ -161,7 +162,7 @@ "metadata": {}, "outputs": [], "source": [ - "df = df.drop(['synthesis_method', 'synthesis_conditions'], axis=1)" + "df = df.drop([\"synthesis_method\", \"synthesis_conditions\"], axis=1)" ] }, { @@ -170,7 +171,7 @@ "metadata": {}, "outputs": [], "source": [ - "string_columns = list(df.select_dtypes(include=['object']).columns)" + "string_columns = list(df.select_dtypes(include=[\"object\"]).columns)" ] }, { @@ -310,7 +311,7 @@ " \"license\": \"No License Provided\", # license under which the original dataset was published\n", " \"links\": [ # list of relevant links (original dataset, other uses, etc.)\n", " {\n", - " \"url\": 'https://datahub.hymarc.org/dataset/hydrogen-storage-materials-db/resource/4ef1c494-366e-43a3-bed4-a3985de5c374',\n", + " \"url\": \"https://datahub.hymarc.org/dataset/hydrogen-storage-materials-db/resource/4ef1c494-366e-43a3-bed4-a3985de5c374\",\n", " \"description\": \"website with source data\",\n", " },\n", " {\n", @@ -356,7 +357,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -365,7 +366,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -540,7 +541,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -575,13 +576,6 @@ "source": [ "!python3 transform.py" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -599,8 +593,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.15" + "pygments_lexer": "ipython3" } }, "nbformat": 4, diff --git a/data/tabular/half_life_obach/meta.yaml b/data/tabular/half_life_obach/meta.yaml index a9b9bcac8..161da87c4 100644 --- a/data/tabular/half_life_obach/meta.yaml +++ b/data/tabular/half_life_obach/meta.yaml @@ -1,55 +1,54 @@ ---- name: half_life_obach description: |- - Half life of a drug is the duration for the concentration of the drug - in the body to be reduced by half. It measures the duration of actions of a drug. - This dataset deposited version under CHEMBL assay 1614674. + Half life of a drug is the duration for the concentration of the drug + in the body to be reduced by half. It measures the duration of actions of a drug. + This dataset deposited version under CHEMBL assay 1614674. targets: - - id: half_life_duration - description: the time it takes for the plasma concentration of a drug in the body to be reduced by half - units: hours - type: continuous - significant_digits: 2 - names: - - noun: half life in humans after IV administration - - noun: half life time in humans after IV administration - - noun: drug half life time in humans after IV administration - uris: - - http://purl.bioontology.org/ontology/MESH/D006207 + - id: half_life_duration + description: the time it takes for the plasma concentration of a drug in the body to be reduced by half + units: hours + type: continuous + significant_digits: 2 + names: + - noun: half life in humans after IV administration + - noun: half life time in humans after IV administration + - noun: drug half life time in humans after IV administration + uris: + - http://purl.bioontology.org/ontology/MESH/D006207 benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: chembl_id - type: Other - names: - - noun: ChEMBL database id - - noun: ChEMBL identifier number - description: ChEMBL ids - sample: false + - id: SMILES + type: SMILES + description: SMILES + - id: chembl_id + type: Other + names: + - noun: ChEMBL database id + - noun: ChEMBL identifier number + description: ChEMBL ids + sample: false license: CC BY 4.0 links: - - url: https://doi.org/10.1124/dmd.108.020479 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#half-life-obach-et-al - description: data source + - url: https://doi.org/10.1124/dmd.108.020479 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#half-life-obach-et-al + description: data source num_points: 667 bibtex: - - |- - @article{Obach2008, - doi = {10.1124/dmd.108.020479}, - url = {https://doi.org/10.1124/dmd.108.020479}, - year = {2008}, - month = apr, - publisher = {American Society for Pharmacology and Experimental Therapeutics (ASPET)}, - volume = {36}, - number = {7}, - pages = {1385--1405}, - author = {R. Scott Obach and Franco Lombardo and Nigel J. Waters}, - title = {Trend Analysis of a Database of Intravenous Pharmacokinetic - Parameters in Humans for 670 Drug Compounds}, - journal = {Drug Metabolism and Disposition} + - |- + @article{Obach2008, + doi = {10.1124/dmd.108.020479}, + url = {https://doi.org/10.1124/dmd.108.020479}, + year = {2008}, + month = apr, + publisher = {American Society for Pharmacology and Experimental Therapeutics (ASPET)}, + volume = {36}, + number = {7}, + pages = {1385--1405}, + author = {R. Scott Obach and Franco Lombardo and Nigel J. Waters}, + title = {Trend Analysis of a Database of Intravenous Pharmacokinetic + Parameters in Humans for 670 Drug Compounds}, + journal = {Drug Metabolism and Disposition} diff --git a/data/tabular/herg_blockers/meta.yaml b/data/tabular/herg_blockers/meta.yaml index bf89b3e65..3d998c759 100644 --- a/data/tabular/herg_blockers/meta.yaml +++ b/data/tabular/herg_blockers/meta.yaml @@ -1,137 +1,134 @@ ---- name: herg_blockers -description: "Human ether-à-go-go related gene (hERG) is crucial for the coordination\nof the heart's beating. Thus, if a drug blocks the hERG, it could\ - \ lead to severe\nadverse effects. Therefore, reliable prediction of hERG liability in the early\nstages of drug design is quite important to reduce\ - \ the risk of cardiotoxicity\nrelated attritions in the later development stages." +description: "Human ether-à-go-go related gene (hERG) is crucial for the coordination\nof the heart's beating. Thus, if a drug blocks the hERG, it could lead to severe\nadverse effects. Therefore, reliable prediction of hERG liability in the early\nstages of drug design is quite important to reduce the risk of cardiotoxicity\nrelated attritions in the later development stages." targets: - - id: herg_blocker - description: whether it blocks hERG (1) or not (0) - units: - type: boolean - names: - - noun: hERG blocker - - noun: hERG blocking compound - - noun: hERG blocking compound (<10uM) - - noun: human ether-à-go-go related gene (hERG) blocker - - noun: human ether-à-go-go related gene (hERG) blocking compound - - verb: blocks hERG - - verb: blocks the human ether-à-go-go related gene (hERG) - - verb: is active against hERG (<10uM) - - verb: is active against the human ether-à-go-go related gene (hERG) - uris: - - http://purl.obolibrary.org/obo/MI_2136 + - id: herg_blocker + description: whether it blocks hERG (1) or not (0) + units: + type: boolean + names: + - noun: hERG blocker + - noun: hERG blocking compound + - noun: hERG blocking compound (<10uM) + - noun: human ether-à-go-go related gene (hERG) blocker + - noun: human ether-à-go-go related gene (hERG) blocking compound + - verb: blocks hERG + - verb: blocks the human ether-à-go-go related gene (hERG) + - verb: is active against hERG (<10uM) + - verb: is active against the human ether-à-go-go related gene (hERG) + uris: + - http://purl.obolibrary.org/obo/MI_2136 benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: compound_name - type: Other - description: compound name - names: - - noun: compound - - noun: compound name - - noun: drug + - id: SMILES + type: SMILES + description: SMILES + - id: compound_name + type: Other + description: compound name + names: + - noun: compound + - noun: compound name + - noun: drug license: CC BY 4.0 links: - - url: https://doi.org/10.1021/acs.molpharmaceut.6b00471 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-blockers - description: Data source + - url: https://doi.org/10.1021/acs.molpharmaceut.6b00471 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-blockers + description: Data source num_points: 655 bibtex: - - |- - @article{Wang2016, - doi = {10.1021/acs.molpharmaceut.6b00471}, - url = {https://doi.org/10.1021/acs.molpharmaceut.6b00471}, - year = {2016}, - month = jul, - publisher = {American Chemical Society (ACS)}, - volume = {13}, - number = {8}, - pages = {2855--2866}, - author = {Shuangquan Wang and Huiyong Sun and Hui Liu and Dan Li and - Youyong Li and Tingjun Hou}, - title = {ADMET Evaluation in Drug Discovery. 16. Predicting hERG Blockers - by Combining Multiple Pharmacophores and Machine Learning Approaches}, - journal = {Molecular Pharmaceutics} + - |- + @article{Wang2016, + doi = {10.1021/acs.molpharmaceut.6b00471}, + url = {https://doi.org/10.1021/acs.molpharmaceut.6b00471}, + year = {2016}, + month = jul, + publisher = {American Chemical Society (ACS)}, + volume = {13}, + number = {8}, + pages = {2855--2866}, + author = {Shuangquan Wang and Huiyong Sun and Hui Liu and Dan Li and + Youyong Li and Tingjun Hou}, + title = {ADMET Evaluation in Drug Discovery. 16. Predicting hERG Blockers + by Combining Multiple Pharmacophores and Machine Learning Approaches}, + journal = {Molecular Pharmaceutics} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that {herg_blocker__names__verb}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {herg_blocker#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is a {herg_blocker__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {herg_blocker#no &NULL}{herg_blocker__names__noun}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is a {herg_blocker__names__noun}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {herg_blocker__names__noun}? - Assistant: {herg_blocker#No&Yes}, this molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} a {herg_blocker__names__noun}? - Assistant: {herg_blocker#No&Yes}, it is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that {herg_blocker#not &NULL}{herg_blocker__names__verb}? - Assistant: This is a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {herg_blocker#not &NULL}be a {herg_blocker__names__noun}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {herg_blocker#not &NULL}be a {herg_blocker__names__noun}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#} - - Is the {SMILES__description} {SMILES#} a {herg_blocker__names__noun}:{herg_blocker#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is a {herg_blocker__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{herg_blocker#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is a {herg_blocker__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result:This molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {herg_blocker__names__noun}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {herg_blocker%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {herg_blocker#not &NULL}a {herg_blocker__names__noun}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%herg_blocker%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {herg_blocker#not &NULL}a {herg_blocker__names__noun}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%herg_blocker%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that {herg_blocker__names__verb}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {herg_blocker#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is a {herg_blocker__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {herg_blocker#no &NULL}{herg_blocker__names__noun}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is a {herg_blocker__names__noun}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {herg_blocker__names__noun}? + Assistant: {herg_blocker#No&Yes}, this molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} a {herg_blocker__names__noun}? + Assistant: {herg_blocker#No&Yes}, it is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that {herg_blocker#not &NULL}{herg_blocker__names__verb}? + Assistant: This is a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {herg_blocker#not &NULL}be a {herg_blocker__names__noun}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {herg_blocker#not &NULL}be a {herg_blocker__names__noun}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#} + - Is the {SMILES__description} {SMILES#} a {herg_blocker__names__noun}:{herg_blocker#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is a {herg_blocker__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{herg_blocker#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is a {herg_blocker__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result:This molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {herg_blocker__names__noun}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {herg_blocker%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {herg_blocker#not &NULL}a {herg_blocker__names__noun}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%herg_blocker%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {herg_blocker#not &NULL}a {herg_blocker__names__noun}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%herg_blocker%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/herg_central_at_10uM/meta.yaml b/data/tabular/herg_central_at_10uM/meta.yaml index 763653fc5..3a72f4ad6 100644 --- a/data/tabular/herg_central_at_10uM/meta.yaml +++ b/data/tabular/herg_central_at_10uM/meta.yaml @@ -1,37 +1,31 @@ ---- name: herg_central_at_10uM -description: "Human ether-à-go-go related gene (hERG) is crucial for the coordination\nof the heart's beating. Thus, if a drug blocks the hERG, it could\ - \ lead to severe\nadverse effects. Therefore, reliable prediction of hERG liability in the early\nstages of drug design is quite important to reduce\ - \ the risk of cardiotoxicity-related\nattritions in the later development stages. There are three targets: hERG_at_1microM,\nhERG_at_10microM, and herg_inhib." +description: "Human ether-à-go-go related gene (hERG) is crucial for the coordination\nof the heart's beating. Thus, if a drug blocks the hERG, it could lead to severe\nadverse effects. Therefore, reliable prediction of hERG liability in the early\nstages of drug design is quite important to reduce the risk of cardiotoxicity-related\nattritions in the later development stages. There are three targets: hERG_at_1microM,\nhERG_at_10microM, and herg_inhib." targets: - - id: herg_central_at_10uM - description: the percent inhibition of hERG at a 10uM concentration - units: '%' - type: continuous - names: - - noun: hERG inhibition at a concentration of 10uM - - noun: hERG inhibition at a concentration of 10uM - - noun: hERG inhibition at 10uM - - noun: human ether-à-go-go related gene (hERG) inhibition at a concentration of 10uM - - noun: human ether-à-go-go related gene (hERG) inhibition at 10uM - - noun: human ether-à-go-go related gene (hERG) inhibition at 10uM - uris: - - http://purl.obolibrary.org/obo/MI_2136 + - id: herg_central_at_10uM + description: the percent inhibition of hERG at a 10uM concentration + units: "%" + type: continuous + names: + - noun: hERG inhibition at a concentration of 10uM + - noun: hERG inhibition at a concentration of 10uM + - noun: hERG inhibition at 10uM + - noun: human ether-à-go-go related gene (hERG) inhibition at a concentration of 10uM + - noun: human ether-à-go-go related gene (hERG) inhibition at 10uM + - noun: human ether-à-go-go related gene (hERG) inhibition at 10uM + uris: + - http://purl.obolibrary.org/obo/MI_2136 identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://doi.org/10.1089/adt.2011.0425 - description: corresponding publication - - url: https://bbirnbaum.com/ - description: TDC Contributer - - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-central - description: Data source + - url: https://doi.org/10.1089/adt.2011.0425 + description: corresponding publication + - url: https://bbirnbaum.com/ + description: TDC Contributer + - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-central + description: Data source num_points: 306893 bibtex: - - "@article{Du2011,\ndoi = {10.1089/adt.2011.0425},\nurl = {https://doi.org/10.1089/adt.2011.0425},\nyear = {2011},\nmonth = dec,\npublisher = {Mary\ - \ Ann Liebert Inc},\nvolume = {9},\nnumber = {6},\npages = {580--588},\nauthor = {Fang Du and Haibo Yu and Beiyan Zou and Joseph Babcock\nand Shunyou\ - \ Long and Min Li},\ntitle = {hERGCentral: A Large Database to Store, Retrieve, and Analyze Compound Human\nEther-à-go-go Related Gene Channel Interactions\ - \ to Facilitate Cardiotoxicity Assessment in Drug Development},\njournal = {ASSAY and Drug Development Technologies}" + - "@article{Du2011,\ndoi = {10.1089/adt.2011.0425},\nurl = {https://doi.org/10.1089/adt.2011.0425},\nyear = {2011},\nmonth = dec,\npublisher = {Mary Ann Liebert Inc},\nvolume = {9},\nnumber = {6},\npages = {580--588},\nauthor = {Fang Du and Haibo Yu and Beiyan Zou and Joseph Babcock\nand Shunyou Long and Min Li},\ntitle = {hERGCentral: A Large Database to Store, Retrieve, and Analyze Compound Human\nEther-à-go-go Related Gene Channel Interactions to Facilitate Cardiotoxicity Assessment in Drug Development},\njournal = {ASSAY and Drug Development Technologies}" diff --git a/data/tabular/herg_central_at_10uM/transform.py b/data/tabular/herg_central_at_10uM/transform.py index 39c9080a1..d2870a332 100644 --- a/data/tabular/herg_central_at_10uM/transform.py +++ b/data/tabular/herg_central_at_10uM/transform.py @@ -96,7 +96,7 @@ def get_and_transform_data(): }, { "url": "https://bbirnbaum.com/", - "description": "TDC Contributer", + "description": "TDC Contributor", }, { "url": "https://tdcommons.ai/single_pred_tasks/tox/#herg-central", diff --git a/data/tabular/herg_central_at_1uM/meta.yaml b/data/tabular/herg_central_at_1uM/meta.yaml index 0c47d92d2..2817fd315 100644 --- a/data/tabular/herg_central_at_1uM/meta.yaml +++ b/data/tabular/herg_central_at_1uM/meta.yaml @@ -1,37 +1,31 @@ ---- name: herg_central_at_1uM -description: "Human ether-à-go-go related gene (hERG) is crucial for the coordination\nof the heart's beating. Thus, if a drug blocks the hERG, it could\ - \ lead to severe\nadverse effects. Therefore, reliable prediction of hERG liability in the early\nstages of drug design is quite important to reduce\ - \ the risk of cardiotoxicity-related\nattritions in the later development stages. There are three targets: hERG_at_1microM,\nhERG_at_10microM, and herg_inhib." +description: "Human ether-à-go-go related gene (hERG) is crucial for the coordination\nof the heart's beating. Thus, if a drug blocks the hERG, it could lead to severe\nadverse effects. Therefore, reliable prediction of hERG liability in the early\nstages of drug design is quite important to reduce the risk of cardiotoxicity-related\nattritions in the later development stages. There are three targets: hERG_at_1microM,\nhERG_at_10microM, and herg_inhib." targets: - - id: herg_central_at_1uM - description: the percent inhibition of hERG at a 1uM concentration - units: '%' - type: continuous - names: - - noun: hERG inhibition at a concentration of 1uM - - noun: hERG inhibition at a concentration of 1uM - - noun: hERG inhibition at 1uM - - noun: human ether-à-go-go related gene (hERG) inhibition at a concentration of 1uM - - noun: human ether-à-go-go related gene (hERG) inhibition at 1uM - - noun: human ether-à-go-go related gene (hERG) inhibition at 1uM - uris: - - http://purl.obolibrary.org/obo/MI_2136 + - id: herg_central_at_1uM + description: the percent inhibition of hERG at a 1uM concentration + units: "%" + type: continuous + names: + - noun: hERG inhibition at a concentration of 1uM + - noun: hERG inhibition at a concentration of 1uM + - noun: hERG inhibition at 1uM + - noun: human ether-à-go-go related gene (hERG) inhibition at a concentration of 1uM + - noun: human ether-à-go-go related gene (hERG) inhibition at 1uM + - noun: human ether-à-go-go related gene (hERG) inhibition at 1uM + uris: + - http://purl.obolibrary.org/obo/MI_2136 identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://doi.org/10.1089/adt.2011.0425 - description: corresponding publication - - url: https://bbirnbaum.com/ - description: TDC Contributer - - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-central - description: Data source + - url: https://doi.org/10.1089/adt.2011.0425 + description: corresponding publication + - url: https://bbirnbaum.com/ + description: TDC Contributer + - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-central + description: Data source num_points: 306893 bibtex: - - "@article{Du2011,\ndoi = {10.1089/adt.2011.0425},\nurl = {https://doi.org/10.1089/adt.2011.0425},\nyear = {2011},\nmonth = dec,\npublisher = {Mary\ - \ Ann Liebert Inc},\nvolume = {9},\nnumber = {6},\npages = {580--588},\nauthor = {Fang Du and Haibo Yu and Beiyan Zou and Joseph Babcock\nand Shunyou\ - \ Long and Min Li},\ntitle = {hERGCentral: A Large Database to Store, Retrieve, and Analyze Compound Human\nEther-à-go-go Related Gene Channel Interactions\ - \ to Facilitate Cardiotoxicity Assessment in Drug Development},\njournal = {ASSAY and Drug Development Technologies}" + - "@article{Du2011,\ndoi = {10.1089/adt.2011.0425},\nurl = {https://doi.org/10.1089/adt.2011.0425},\nyear = {2011},\nmonth = dec,\npublisher = {Mary Ann Liebert Inc},\nvolume = {9},\nnumber = {6},\npages = {580--588},\nauthor = {Fang Du and Haibo Yu and Beiyan Zou and Joseph Babcock\nand Shunyou Long and Min Li},\ntitle = {hERGCentral: A Large Database to Store, Retrieve, and Analyze Compound Human\nEther-à-go-go Related Gene Channel Interactions to Facilitate Cardiotoxicity Assessment in Drug Development},\njournal = {ASSAY and Drug Development Technologies}" diff --git a/data/tabular/herg_central_inhib/meta.yaml b/data/tabular/herg_central_inhib/meta.yaml index 5c62d550b..8c4514045 100644 --- a/data/tabular/herg_central_inhib/meta.yaml +++ b/data/tabular/herg_central_inhib/meta.yaml @@ -1,121 +1,115 @@ ---- name: herg_central_inhib -description: "Human ether-à-go-go related gene (hERG) is crucial for the coordination\nof the heart's beating. Thus, if a drug blocks the hERG, it could\ - \ lead to severe\nadverse effects. Therefore, reliable prediction of hERG liability in the early\nstages of drug design is quite important to reduce\ - \ the risk of cardiotoxicity-related\nattritions in the later development stages. There are three targets: hERG_at_1microM,\nhERG_at_10microM, and herg_inhib." +description: "Human ether-à-go-go related gene (hERG) is crucial for the coordination\nof the heart's beating. Thus, if a drug blocks the hERG, it could lead to severe\nadverse effects. Therefore, reliable prediction of hERG liability in the early\nstages of drug design is quite important to reduce the risk of cardiotoxicity-related\nattritions in the later development stages. There are three targets: hERG_at_1microM,\nhERG_at_10microM, and herg_inhib." targets: - - id: herg_inhib - description: |- - whether it blocks (1) or not blocks (0) hERG - (This is equivalent to whether hERG_at_10microM < -50, i.e., - whether the compound has an IC50 of less than 10microM.) - units: - type: boolean - names: - - noun: hERG blocker - - noun: hERG blocking compound - - noun: hERG blocking compound (IC50 < 10uM) - - noun: hERG blocking compound (IC50 less than 10uM) - - noun: human ether-à-go-go related gene (hERG) blocker - - noun: human ether-à-go-go related gene (hERG) blocking compound - - verb: block hERG - - verb: block hERG (IC50 < 10uM) - - verb: block hERG (IC50 less than 10uM) - - verb: block the human ether-à-go-go related gene (hERG) - uris: - - http://purl.obolibrary.org/obo/MI_2136 + - id: herg_inhib + description: |- + whether it blocks (1) or not blocks (0) hERG + (This is equivalent to whether hERG_at_10microM < -50, i.e., + whether the compound has an IC50 of less than 10microM.) + units: + type: boolean + names: + - noun: hERG blocker + - noun: hERG blocking compound + - noun: hERG blocking compound (IC50 < 10uM) + - noun: hERG blocking compound (IC50 less than 10uM) + - noun: human ether-à-go-go related gene (hERG) blocker + - noun: human ether-à-go-go related gene (hERG) blocking compound + - verb: block hERG + - verb: block hERG (IC50 < 10uM) + - verb: block hERG (IC50 less than 10uM) + - verb: block the human ether-à-go-go related gene (hERG) + uris: + - http://purl.obolibrary.org/obo/MI_2136 identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://doi.org/10.1089/adt.2011.0425 - description: corresponding publication - - url: https://bbirnbaum.com/ - description: TDC Contributer - - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-central - description: Data source + - url: https://doi.org/10.1089/adt.2011.0425 + description: corresponding publication + - url: https://bbirnbaum.com/ + description: TDC Contributer + - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-central + description: Data source num_points: 306893 bibtex: - - "@article{Du2011,\ndoi = {10.1089/adt.2011.0425},\nurl = {https://doi.org/10.1089/adt.2011.0425},\nyear = {2011},\nmonth = dec,\npublisher = {Mary\ - \ Ann Liebert Inc},\nvolume = {9},\nnumber = {6},\npages = {580--588},\nauthor = {Fang Du and Haibo Yu and Beiyan Zou and Joseph Babcock\nand Shunyou\ - \ Long and Min Li},\ntitle = {hERGCentral: A Large Database to Store, Retrieve, and Analyze Compound Human\nEther-à-go-go Related Gene Channel Interactions\ - \ to Facilitate Cardiotoxicity Assessment in Drug Development},\njournal = {ASSAY and Drug Development Technologies}" + - "@article{Du2011,\ndoi = {10.1089/adt.2011.0425},\nurl = {https://doi.org/10.1089/adt.2011.0425},\nyear = {2011},\nmonth = dec,\npublisher = {Mary Ann Liebert Inc},\nvolume = {9},\nnumber = {6},\npages = {580--588},\nauthor = {Fang Du and Haibo Yu and Beiyan Zou and Joseph Babcock\nand Shunyou Long and Min Li},\ntitle = {hERGCentral: A Large Database to Store, Retrieve, and Analyze Compound Human\nEther-à-go-go Related Gene Channel Interactions to Facilitate Cardiotoxicity Assessment in Drug Development},\njournal = {ASSAY and Drug Development Technologies}" templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {herg_inhib#not &NULL}a {herg_inhib__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {herg_inhib#not &NULL}a {herg_inhib__names__noun}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {herg_inhib#not &NULL}a {herg_inhib__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {herg_inhib#not &NULL}a {herg_inhib__names__noun}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that {herg_inhib__names__verb}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {herg_inhib#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is a {herg_inhib__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {herg_inhib#no &NULL}{herg_inhib__names__noun}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is a {herg_inhib__names__noun}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {herg_inhib__names__noun}? - Assistant: {herg_inhib#No&Yes}, this molecule is {herg_inhib#not &NULL}a {herg_inhib__names__noun}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} a {herg_inhib__names__noun}? - Assistant: {herg_inhib#No&Yes}, it is {herg_inhib#not &NULL}a {herg_inhib__names__noun}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {herg_inhib#not &NULL}a {herg_inhib__names__noun}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that does {herg_inhib#not &NULL}{herg_inhib__names__verb}? - Assistant: This is a molecule that is {herg_inhib#not &NULL}a {herg_inhib__names__noun}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {herg_inhib#not &NULL}be a {herg_inhib__names__noun}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {herg_inhib#not &NULL}a {herg_inhib__names__noun}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {herg_inhib#not &NULL}be a {herg_inhib__names__noun}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {herg_inhib#not &NULL}a {herg_inhib__names__noun}: {SMILES#} - - Is the {SMILES__description} {SMILES#} a {herg_inhib__names__noun}:{herg_inhib#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is a {herg_inhib__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{herg_inhib#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is a {herg_inhib__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result:This molecule is {herg_inhib#not &NULL}a {herg_inhib__names__noun}. - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {herg_inhib__names__noun}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {herg_inhib%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {herg_inhib#not &NULL}a {herg_inhib__names__noun}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%herg_inhib%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {herg_inhib#not &NULL}a {herg_inhib__names__noun}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%herg_inhib%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {herg_inhib#not &NULL}a {herg_inhib__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {herg_inhib#not &NULL}a {herg_inhib__names__noun}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {herg_inhib#not &NULL}a {herg_inhib__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {herg_inhib#not &NULL}a {herg_inhib__names__noun}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that {herg_inhib__names__verb}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {herg_inhib#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is a {herg_inhib__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {herg_inhib#no &NULL}{herg_inhib__names__noun}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is a {herg_inhib__names__noun}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {herg_inhib__names__noun}? + Assistant: {herg_inhib#No&Yes}, this molecule is {herg_inhib#not &NULL}a {herg_inhib__names__noun}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} a {herg_inhib__names__noun}? + Assistant: {herg_inhib#No&Yes}, it is {herg_inhib#not &NULL}a {herg_inhib__names__noun}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {herg_inhib#not &NULL}a {herg_inhib__names__noun}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that does {herg_inhib#not &NULL}{herg_inhib__names__verb}? + Assistant: This is a molecule that is {herg_inhib#not &NULL}a {herg_inhib__names__noun}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {herg_inhib#not &NULL}be a {herg_inhib__names__noun}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {herg_inhib#not &NULL}a {herg_inhib__names__noun}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {herg_inhib#not &NULL}be a {herg_inhib__names__noun}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {herg_inhib#not &NULL}a {herg_inhib__names__noun}: {SMILES#} + - Is the {SMILES__description} {SMILES#} a {herg_inhib__names__noun}:{herg_inhib#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is a {herg_inhib__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{herg_inhib#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is a {herg_inhib__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result:This molecule is {herg_inhib#not &NULL}a {herg_inhib__names__noun}. + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {herg_inhib__names__noun}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {herg_inhib%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {herg_inhib#not &NULL}a {herg_inhib__names__noun}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%herg_inhib%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {herg_inhib#not &NULL}a {herg_inhib__names__noun}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%herg_inhib%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/herg_karim_et_al/meta.yaml b/data/tabular/herg_karim_et_al/meta.yaml index a419981bb..0220393be 100644 --- a/data/tabular/herg_karim_et_al/meta.yaml +++ b/data/tabular/herg_karim_et_al/meta.yaml @@ -1,126 +1,122 @@ ---- name: herg_karim_et_al -description: "A integrated Ether-à-go-go-related gene (hERG) dataset consisting\nof molecular structures labelled as hERG (<10uM) and non-hERG (>=10uM)\ - \ blockers in\nthe form of SMILES strings was obtained from the DeepHIT, the BindingDB database,\nChEMBL bioactivity database, and other literature." +description: "A integrated Ether-à-go-go-related gene (hERG) dataset consisting\nof molecular structures labelled as hERG (<10uM) and non-hERG (>=10uM) blockers in\nthe form of SMILES strings was obtained from the DeepHIT, the BindingDB database,\nChEMBL bioactivity database, and other literature." targets: - - id: herg_blocker - description: whether it blocks hERG (1, <10uM) or not (0, >=10uM) - units: - type: boolean - names: - - noun: hERG blocker (<10uM) - - noun: hERG blocking compound (<10uM) - - noun: human ether-à-go-go related gene (hERG) blocker (<10uM) - - noun: human ether-à-go-go related gene (hERG) blocking compound (<10uM) - - verb: block hERG (<10uM) - - verb: block the human ether-à-go-go related gene (hERG) (<10uM) - uris: - - http://purl.obolibrary.org/obo/MI_2136 + - id: herg_blocker + description: whether it blocks hERG (1, <10uM) or not (0, >=10uM) + units: + type: boolean + names: + - noun: hERG blocker (<10uM) + - noun: hERG blocking compound (<10uM) + - noun: human ether-à-go-go related gene (hERG) blocker (<10uM) + - noun: human ether-à-go-go related gene (hERG) blocking compound (<10uM) + - verb: block hERG (<10uM) + - verb: block the human ether-à-go-go related gene (hERG) (<10uM) + uris: + - http://purl.obolibrary.org/obo/MI_2136 benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://doi.org/10.1186/s13321-021-00541-z - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-karim-et-al - description: Data source + - url: https://doi.org/10.1186/s13321-021-00541-z + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-karim-et-al + description: Data source num_points: 13445 bibtex: - - |- - @article{Karim2021, - doi = {10.1186/s13321-021-00541-z}, - url = {https://doi.org/10.1186/s13321-021-00541-z}, - year = {2021}, - month = aug, - publisher = {Springer Science and Business Media LLC}, - volume = {13}, - number = {1}, - author = {Abdul Karim and Matthew Lee and Thomas Balle and Abdul Sattar}, - title = {CardioTox net: a robust predictor for hERG channel blockade - based on deep learning meta-feature ensembles}, - journal = {Journal of Cheminformatics} + - |- + @article{Karim2021, + doi = {10.1186/s13321-021-00541-z}, + url = {https://doi.org/10.1186/s13321-021-00541-z}, + year = {2021}, + month = aug, + publisher = {Springer Science and Business Media LLC}, + volume = {13}, + number = {1}, + author = {Abdul Karim and Matthew Lee and Thomas Balle and Abdul Sattar}, + title = {CardioTox net: a robust predictor for hERG channel blockade + based on deep learning meta-feature ensembles}, + journal = {Journal of Cheminformatics} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {herg_blocker#not a hERG blocker (>= 10uM)&a hERG blocker (<10uM)}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {herg_blocker#not a human ether-à-go-go related gene (hERG) blocker - (>= 10uM)&a human ether-à-go-go related gene (hERG) blocker (<10uM)}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {herg_blocker#not a hERG blocker (>= 10uM)&a hERG blocker (<10uM)}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {herg_blocker#not a human ether-à-go-go related gene (hERG) blocker (>= 10uM)&a human ether-à-go-go - related gene (hERG) blocker (<10uM)}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that does {herg_blocker__names__verb}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {herg_blocker#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is a {herg_blocker__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {herg_blocker#no &NULL}{herg_blocker__names__noun}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is a {herg_blocker__names__noun}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {herg_blocker__names__noun}? - Assistant: {herg_blocker#No&Yes}, this molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} a {herg_blocker__names__noun}? - Assistant: {herg_blocker#No&Yes}, it is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that does {herg_blocker#not &NULL}{herg_blocker__names__verb}? - Assistant: This is a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {herg_blocker#not &NULL}be a {herg_blocker__names__noun}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {herg_blocker#not &NULL}be a {herg_blocker__names__noun}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#} - - Is the {SMILES__description} {SMILES#} a {herg_blocker__names__noun}:{herg_blocker#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is a {herg_blocker__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{herg_blocker#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is a {herg_blocker__names__noun}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result:This molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {herg_blocker__names__noun}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {herg_blocker%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {herg_blocker#not &NULL}a {herg_blocker__names__noun}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%herg_blocker%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {herg_blocker#not &NULL}a {herg_blocker__names__noun}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%herg_blocker%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {herg_blocker#not a hERG blocker (>= 10uM)&a hERG blocker (<10uM)}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {herg_blocker#not a human ether-à-go-go related gene (hERG) blocker (>= 10uM)&a human ether-à-go-go related gene (hERG) blocker (<10uM)}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {herg_blocker#not a hERG blocker (>= 10uM)&a hERG blocker (<10uM)}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {herg_blocker#not a human ether-à-go-go related gene (hERG) blocker (>= 10uM)&a human ether-à-go-go related gene (hERG) blocker (<10uM)}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that does {herg_blocker__names__verb}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {herg_blocker#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is a {herg_blocker__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {herg_blocker#no &NULL}{herg_blocker__names__noun}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is a {herg_blocker__names__noun}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {herg_blocker__names__noun}? + Assistant: {herg_blocker#No&Yes}, this molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} a {herg_blocker__names__noun}? + Assistant: {herg_blocker#No&Yes}, it is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that does {herg_blocker#not &NULL}{herg_blocker__names__verb}? + Assistant: This is a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {herg_blocker#not &NULL}be a {herg_blocker__names__noun}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {herg_blocker#not &NULL}be a {herg_blocker__names__noun}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#} + - Is the {SMILES__description} {SMILES#} a {herg_blocker__names__noun}:{herg_blocker#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is a {herg_blocker__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{herg_blocker#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is a {herg_blocker__names__noun}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result:This molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}. + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {herg_blocker__names__noun}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {herg_blocker%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {herg_blocker#not &NULL}a {herg_blocker__names__noun}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%herg_blocker%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {herg_blocker#not &NULL}a {herg_blocker__names__noun}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%herg_blocker%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/hiv/meta.yaml b/data/tabular/hiv/meta.yaml index 595cd53f5..a94119ea7 100644 --- a/data/tabular/hiv/meta.yaml +++ b/data/tabular/hiv/meta.yaml @@ -1,124 +1,123 @@ ---- name: hiv description: |- - The HIV dataset was introduced by the Drug Therapeutics Program (DTP) - AIDS Antiviral Screen, which tested the ability to inhibit HIV replication for - over 40,000 compounds. + The HIV dataset was introduced by the Drug Therapeutics Program (DTP) + AIDS Antiviral Screen, which tested the ability to inhibit HIV replication for + over 40,000 compounds. targets: - - id: activity_HIV - description: whether it is active against HIV virus (1) or not (0). - units: - type: boolean - names: - - noun: activity against the human immunodeficiency virus - - noun: activity against HIV - - adjective: active against the human immunodeficiency virus - - adjective: active against HIV + - id: activity_HIV + description: whether it is active against HIV virus (1) or not (0). + units: + type: boolean + names: + - noun: activity against the human immunodeficiency virus + - noun: activity against HIV + - adjective: active against the human immunodeficiency virus + - adjective: active against HIV benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://rb.gy/wphpqg - description: data source - - url: https://rb.gy/0xx91v - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/hts/#hiv - description: data source + - url: https://rb.gy/wphpqg + description: data source + - url: https://rb.gy/0xx91v + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/hts/#hiv + description: data source num_points: 41127 bibtex: - - |- - @article{Wu2018, - doi = {10.1039/c7sc02664a}, - url = {https://doi.org/10.1039/c7sc02664a}, - year = {2018}, - publisher = {Royal Society of Chemistry (RSC)}, - volume = {9}, - number = {2}, - pages = {513--530}, - author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg and Joseph Gomes - and Caleb Geniesse and Aneesh S. Pappu and Karl Leswing and Vijay Pande}, - title = {MoleculeNet: a benchmark for molecular machine learning}, - journal = {Chemical Science} + - |- + @article{Wu2018, + doi = {10.1039/c7sc02664a}, + url = {https://doi.org/10.1039/c7sc02664a}, + year = {2018}, + publisher = {Royal Society of Chemistry (RSC)}, + volume = {9}, + number = {2}, + pages = {513--530}, + author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg and Joseph Gomes + and Caleb Geniesse and Aneesh S. Pappu and Karl Leswing and Vijay Pande}, + title = {MoleculeNet: a benchmark for molecular machine learning}, + journal = {Chemical Science} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_HIV#no &NULL}{activity_HIV__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {activity_HIV#no &NULL}{activity_HIV__names__noun}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_HIV#no &NULL}{activity_HIV__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_HIV#not &NULL}{activity_HIV__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_HIV__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {activity_HIV#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_HIV__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {activity_HIV#not &NULL}{activity_HIV__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {activity_HIV__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_HIV__names__adjective}? - Assistant: Yes, this molecule is {activity_HIV#not &NULL}{activity_HIV__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {activity_HIV__names__adjective}? - Assistant: Yes, it is {activity_HIV#not &NULL}{activity_HIV__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_HIV#not &NULL}{activity_HIV__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_HIV#not &NULL}{activity_HIV__names__adjective}? - Assistant: This is a molecule that is {activity_HIV#not &NULL}{activity_HIV__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {activity_HIV#not &NULL}be {activity_HIV__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_HIV#not &NULL}{activity_HIV__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {activity_HIV#not &NULL}be {activity_HIV__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_HIV#not &NULL}{activity_HIV__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {activity_HIV__names__adjective}:{activity_HIV#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_HIV__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{activity_HIV#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_HIV__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result:This molecule is {activity_HIV#not &NULL}{activity_HIV__names__adjective}. - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_HIV__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {activity_HIV%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_HIV#not &NULL}{activity_HIV__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_HIV%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_HIV#not &NULL}{activity_HIV__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_HIV%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_HIV#no &NULL}{activity_HIV__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {activity_HIV#no &NULL}{activity_HIV__names__noun}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_HIV#no &NULL}{activity_HIV__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_HIV#not &NULL}{activity_HIV__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_HIV__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {activity_HIV#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_HIV__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {activity_HIV#not &NULL}{activity_HIV__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {activity_HIV__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_HIV__names__adjective}? + Assistant: Yes, this molecule is {activity_HIV#not &NULL}{activity_HIV__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {activity_HIV__names__adjective}? + Assistant: Yes, it is {activity_HIV#not &NULL}{activity_HIV__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_HIV#not &NULL}{activity_HIV__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_HIV#not &NULL}{activity_HIV__names__adjective}? + Assistant: This is a molecule that is {activity_HIV#not &NULL}{activity_HIV__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {activity_HIV#not &NULL}be {activity_HIV__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_HIV#not &NULL}{activity_HIV__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {activity_HIV#not &NULL}be {activity_HIV__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_HIV#not &NULL}{activity_HIV__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {activity_HIV__names__adjective}:{activity_HIV#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_HIV__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{activity_HIV#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_HIV__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result:This molecule is {activity_HIV#not &NULL}{activity_HIV__names__adjective}. + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_HIV__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {activity_HIV%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_HIV#not &NULL}{activity_HIV__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_HIV%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_HIV#not &NULL}{activity_HIV__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_HIV%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/human_intestinal_absorption/meta.yaml b/data/tabular/human_intestinal_absorption/meta.yaml index b26e11d4a..99e2a9234 100644 --- a/data/tabular/human_intestinal_absorption/meta.yaml +++ b/data/tabular/human_intestinal_absorption/meta.yaml @@ -1,136 +1,133 @@ ---- name: human_intestinal_absorption description: |- - When a drug is orally administered, it needs to be absorbed from the - human gastrointestinal system into the bloodstream of the human body. This ability - of absorption is called human intestinal absorption (HIA) and it is crucial - for a drug to be delivered to the target. + When a drug is orally administered, it needs to be absorbed from the + human gastrointestinal system into the bloodstream of the human body. This ability + of absorption is called human intestinal absorption (HIA) and it is crucial + for a drug to be delivered to the target. targets: - - id: absorption_HIA_Hou - description: whether it is absorbed from the human gastrointestinal system (1) or not (0) - units: - type: boolean - names: - - noun: human intestinal absorption - - noun: human intestinal absorption (HIA) - - noun: HIA - - adjective: absorbed from the human gastrointestinal system - uris: - - http://purl.bioontology.org/ontology/MESH/D007408 + - id: absorption_HIA_Hou + description: whether it is absorbed from the human gastrointestinal system (1) or not (0) + units: + type: boolean + names: + - noun: human intestinal absorption + - noun: human intestinal absorption (HIA) + - noun: HIA + - adjective: absorbed from the human gastrointestinal system + uris: + - http://purl.bioontology.org/ontology/MESH/D007408 benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: compound_name - type: Other - names: - - noun: compound name - - noun: drug name - - noun: generic drug name - description: drug name + - id: SMILES + type: SMILES + description: SMILES + - id: compound_name + type: Other + names: + - noun: compound name + - noun: drug name + - noun: generic drug name + description: drug name license: CC BY 4.0 links: - - url: https://doi.org/10.1021/ci600343x - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#hia-human-intestinal-absorption-hou-et-al - description: data source + - url: https://doi.org/10.1021/ci600343x + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#hia-human-intestinal-absorption-hou-et-al + description: data source num_points: 578 bibtex: - - |- - @article{Hou2006, - doi = {10.1021/ci600343x}, - url = {https://doi.org/10.1021/ci600343x}, - year = {2006}, - month = nov, - publisher = {American Chemical Society (ACS)}, - volume = {47}, - number = {1}, - pages = {208--218}, - author = {Tingjun Hou and Junmei Wang and Wei Zhang and Xiaojie Xu}, - title = {ADME Evaluation in Drug Discovery. 7. Prediction of Oral Absorption - by Correlation and Classification}, - journal = {Journal of Chemical Information and Modeling} + - |- + @article{Hou2006, + doi = {10.1021/ci600343x}, + url = {https://doi.org/10.1021/ci600343x}, + year = {2006}, + month = nov, + publisher = {American Chemical Society (ACS)}, + volume = {47}, + number = {1}, + pages = {208--218}, + author = {Tingjun Hou and Junmei Wang and Wei Zhang and Xiaojie Xu}, + title = {ADME Evaluation in Drug Discovery. 7. Prediction of Oral Absorption + by Correlation and Classification}, + journal = {Journal of Chemical Information and Modeling} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {absorption_HIA_Hou#no &NULL}{absorption_HIA_Hou__names__noun} - properties. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {absorption_HIA_Hou#no &NULL}{absorption_HIA_Hou__names__noun} - {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {absorption_HIA_Hou__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {absorption_HIA_Hou#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {absorption_HIA_Hou__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {absorption_HIA_Hou__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {absorption_HIA_Hou__names__adjective}? - Assistant: {absorption_HIA_Hou#No&Yes}, this molecule is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {absorption_HIA_Hou__names__adjective}? - Assistant: {absorption_HIA_Hou#No&Yes}, it is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}? - Assistant: This is a molecule that is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {absorption_HIA_Hou#not &NULL}be {absorption_HIA_Hou__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {absorption_HIA_Hou#not &NULL}be {absorption_HIA_Hou__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {absorption_HIA_Hou__names__adjective}:{absorption_HIA_Hou#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {absorption_HIA_Hou__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{absorption_HIA_Hou#False&True} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {absorption_HIA_Hou__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {absorption_HIA_Hou%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {absorption_HIA_Hou__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {absorption_HIA_Hou%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%absorption_HIA_Hou%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%absorption_HIA_Hou%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {absorption_HIA_Hou#no &NULL}{absorption_HIA_Hou__names__noun} properties. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {absorption_HIA_Hou#no &NULL}{absorption_HIA_Hou__names__noun} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {absorption_HIA_Hou__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {absorption_HIA_Hou#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {absorption_HIA_Hou__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {absorption_HIA_Hou__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {absorption_HIA_Hou__names__adjective}? + Assistant: {absorption_HIA_Hou#No&Yes}, this molecule is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {absorption_HIA_Hou__names__adjective}? + Assistant: {absorption_HIA_Hou#No&Yes}, it is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}? + Assistant: This is a molecule that is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {absorption_HIA_Hou#not &NULL}be {absorption_HIA_Hou__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {absorption_HIA_Hou#not &NULL}be {absorption_HIA_Hou__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {absorption_HIA_Hou__names__adjective}:{absorption_HIA_Hou#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {absorption_HIA_Hou__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{absorption_HIA_Hou#False&True} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {absorption_HIA_Hou__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {absorption_HIA_Hou%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {absorption_HIA_Hou__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {absorption_HIA_Hou%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%absorption_HIA_Hou%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%absorption_HIA_Hou%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/inverse_1/meta.yaml b/data/tabular/inverse_1/meta.yaml index 44014874d..6dc0057b7 100644 --- a/data/tabular/inverse_1/meta.yaml +++ b/data/tabular/inverse_1/meta.yaml @@ -1,223 +1,222 @@ ---- name: inverse_1 description: |- - Inverse design task constructed by merging solubility_aqsoldb and - nr_ar_tox21 and augmenting it with molecular descriptors. + Inverse design task constructed by merging solubility_aqsoldb and + nr_ar_tox21 and augmenting it with molecular descriptors. targets: - - id: aqeuous_solubility - description: aqueous solubility - units: log(mol/L) - type: continuous - names: - - noun: aqueous solubility (logarithmic) - - noun: water solubility (measured in log(mol/L)) - - noun: water solubility (logarithmic) - - adjective: dissolves in a water - uris: - - http://purl.jp/bio/4/id/200906006880450101 - - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C60821 - - id: toxicity_NR-AR - description: whether it toxic in a androgen receptor toxicity assay (1) or not (0) - units: - type: boolean - names: - - noun: NR-AR toxicity - - noun: NR-androgen receptor toxicity - - verb: is toxic in a androgen receptor toxicity assay - - adjective: toxic in the NR-AR assay - - adjective: toxic in the NR-androgen receptor assay - - gerund: displaying toxicity in the NR-AR assay - - gerund: exhibiting toxicity in the NR-androgen assay - - gerund: demonstrating toxicity in the NR-androgen assay - uris: - - id: carboxyl_count - description: number of carboxyl groups - type: ordinal - names: - - noun: carboxyl groups - - id: carbonyl_count - description: number of carbonyl groups - type: ordinal - names: - - noun: carbonyl groups - - id: ether_count - description: number of ether groups - type: ordinal - names: - - noun: ether groups - - id: alkanol_count - description: number of alkanol groups - type: ordinal - names: - - noun: alkanol groups - - id: thiol_count - description: number of thiol groups - type: ordinal - names: - - noun: thiol groups - - id: halogen_count - description: number of halogen groups - type: ordinal - names: - - noun: halogen groups - - id: amine_count - description: number of amine groups - type: ordinal - names: - - noun: amine groups - - id: amide_count - description: number of amide groups - type: ordinal - names: - - noun: amide groups - - id: ketone_count - description: number of ketone groups - type: ordinal - names: - - noun: ketone group count - - id: num_valence_electrons - description: number of valence electrons - type: ordinal - names: - - noun: valence electrons - - id: molecular_formula - description: molecular formula - type: text - names: - - noun: molecular formula - - id: monoisotopic_molecular_mass - description: monoisotopic molecular mass - type: continuous - units: g/mol - names: - - noun: monoisotopic molecular mass - - id: carbon_mass - description: carbon mass - type: continuous - units: g/mol - names: - - noun: carbon mass - - id: hydrogen_mass - description: hydrogen mass - type: continuous - units: g/mol - names: - - noun: hydrogen mass - - id: nitrogen_mass - description: nitrogen mass - type: continuous - units: g/mol - names: - - noun: nitrogen mass - - id: oxygen_mass - description: oxygen mass - units: g/mol - type: continuous - names: - - noun: oxygen mass - - id: num_carbon_atoms - description: number of carbon atoms - type: ordinal - names: - - noun: carbon atoms - - id: num_hydrogen_atoms - type: ordinal - description: number of hydrogen atoms - names: - - noun: hydrogen atoms - - id: num_nitrogen_atoms - description: number of nitrogen atoms - type: ordinal - names: - - noun: nitrogen atoms - - id: num_oxygen_atoms - description: number of oxygen atoms - type: ordinal - names: - - noun: oxygen atoms - - id: num_hydrogen_bond_acceptors - description: number of hydrogen bond acceptors - type: ordinal - names: - - noun: hydrogen bond acceptors - - id: num_hydrogen_bond_donors - description: number of hydrogen bond donors - type: ordinal - names: - - noun: hydrogen bond donors - - id: num_lipinski_violations - description: number of Lipinski violations - type: ordinal - names: - - noun: Lipinski violations - - noun: Lipinski rule of five violations - - id: num_chiral_centers - description: number of chiral centers - type: ordinal - names: - - noun: chiral center count + - id: aqeuous_solubility + description: aqueous solubility + units: log(mol/L) + type: continuous + names: + - noun: aqueous solubility (logarithmic) + - noun: water solubility (measured in log(mol/L)) + - noun: water solubility (logarithmic) + - adjective: dissolves in a water + uris: + - http://purl.jp/bio/4/id/200906006880450101 + - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C60821 + - id: toxicity_NR-AR + description: whether it toxic in a androgen receptor toxicity assay (1) or not (0) + units: + type: boolean + names: + - noun: NR-AR toxicity + - noun: NR-androgen receptor toxicity + - verb: is toxic in a androgen receptor toxicity assay + - adjective: toxic in the NR-AR assay + - adjective: toxic in the NR-androgen receptor assay + - gerund: displaying toxicity in the NR-AR assay + - gerund: exhibiting toxicity in the NR-androgen assay + - gerund: demonstrating toxicity in the NR-androgen assay + uris: + - id: carboxyl_count + description: number of carboxyl groups + type: ordinal + names: + - noun: carboxyl groups + - id: carbonyl_count + description: number of carbonyl groups + type: ordinal + names: + - noun: carbonyl groups + - id: ether_count + description: number of ether groups + type: ordinal + names: + - noun: ether groups + - id: alkanol_count + description: number of alkanol groups + type: ordinal + names: + - noun: alkanol groups + - id: thiol_count + description: number of thiol groups + type: ordinal + names: + - noun: thiol groups + - id: halogen_count + description: number of halogen groups + type: ordinal + names: + - noun: halogen groups + - id: amine_count + description: number of amine groups + type: ordinal + names: + - noun: amine groups + - id: amide_count + description: number of amide groups + type: ordinal + names: + - noun: amide groups + - id: ketone_count + description: number of ketone groups + type: ordinal + names: + - noun: ketone group count + - id: num_valence_electrons + description: number of valence electrons + type: ordinal + names: + - noun: valence electrons + - id: molecular_formula + description: molecular formula + type: text + names: + - noun: molecular formula + - id: monoisotopic_molecular_mass + description: monoisotopic molecular mass + type: continuous + units: g/mol + names: + - noun: monoisotopic molecular mass + - id: carbon_mass + description: carbon mass + type: continuous + units: g/mol + names: + - noun: carbon mass + - id: hydrogen_mass + description: hydrogen mass + type: continuous + units: g/mol + names: + - noun: hydrogen mass + - id: nitrogen_mass + description: nitrogen mass + type: continuous + units: g/mol + names: + - noun: nitrogen mass + - id: oxygen_mass + description: oxygen mass + units: g/mol + type: continuous + names: + - noun: oxygen mass + - id: num_carbon_atoms + description: number of carbon atoms + type: ordinal + names: + - noun: carbon atoms + - id: num_hydrogen_atoms + type: ordinal + description: number of hydrogen atoms + names: + - noun: hydrogen atoms + - id: num_nitrogen_atoms + description: number of nitrogen atoms + type: ordinal + names: + - noun: nitrogen atoms + - id: num_oxygen_atoms + description: number of oxygen atoms + type: ordinal + names: + - noun: oxygen atoms + - id: num_hydrogen_bond_acceptors + description: number of hydrogen bond acceptors + type: ordinal + names: + - noun: hydrogen bond acceptors + - id: num_hydrogen_bond_donors + description: number of hydrogen bond donors + type: ordinal + names: + - noun: hydrogen bond donors + - id: num_lipinski_violations + description: number of Lipinski violations + type: ordinal + names: + - noun: Lipinski violations + - noun: Lipinski rule of five violations + - id: num_chiral_centers + description: number of chiral centers + type: ordinal + names: + - noun: chiral center count benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://doi.org/10.1038/s41597-019-0151-1 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#aqeuous_solubility-aqsoldb - description: data source - - url: https://github.com/lamalab-org/chem-caption - description: software used to generate features + - url: https://doi.org/10.1038/s41597-019-0151-1 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#aqeuous_solubility-aqsoldb + description: data source + - url: https://github.com/lamalab-org/chem-caption + description: software used to generate features num_points: 2525 bibtex: - - |- - @article{Sorkun_2019, - doi = {10.1038/s41597-019-0151-1}, - url = {https://doi.org/10.1038%2Fs41597-019-0151-1}, - year = {2019}, - month = aug, - publisher = {Springer Science and Business Media LLC}, - volume = {6}, - number = {1}, - author = {Murat Cihan Sorkun and Abhishek Khetan and - Suleyman Er}, - title = {AqSolDB, a curated reference set of aqueous aqeuous_solubility - and 2D descriptors for a diverse set of compounds}, - journal = {Scientific Data} + - |- + @article{Sorkun_2019, + doi = {10.1038/s41597-019-0151-1}, + url = {https://doi.org/10.1038%2Fs41597-019-0151-1}, + year = {2019}, + month = aug, + publisher = {Springer Science and Business Media LLC}, + volume = {6}, + number = {1}, + author = {Murat Cihan Sorkun and Abhishek Khetan and + Suleyman Er}, + title = {AqSolDB, a curated reference set of aqueous aqeuous_solubility + and 2D descriptors for a diverse set of compounds}, + journal = {Scientific Data} templates: - - |- - User: {#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units} and is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. - Assistant: {#There might be multiple compounds that match these criteria. Do you have additional constraints?|Do you have additional constraints?|Is there anything else I should consider?|Is there anything else I should know?!} - User: {#No|No, there are no additional constraints.|No, there are no other constraints.|No, there are no other criteria.|No, there are no other requirements.!} - Assistant: {#In this case, |OK, |Alright, |Understood, |Got it, |I see, !}the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#should|will|is expected to!} fit your criteria. - - |- - User: {#I am a medicinal chemist. |I work in drug-discovery. |!}{#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units} and is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. {#Additionally, |Moreover, |On top of that, |In addition, |Furthermore, !}{#I|we!} {#want|need|require|would like|would prefer!} the {#molecule|compound|chemical structure!} to have a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. {#I|We!} {#want|would like to|need to!} ensure that {#there are|the molecule contains|the compound contains!} {carboxyl_count#} {carbonyl_count__names__noun}. - Assistant: {#Thanks for the detailed description. |Thanks. |!}{#I suggest|I recommend|I propose|I would suggest|I would recommend|I would propose!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}.{# This compound should fulfill your needs.| This chemical should satisfy your constrains|!} - - |- - User: {#I am a medicinal chemist. |I work in drug-discovery. |!}{#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} and has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units}. {#Additionally, |Moreover, |On top of that, |In addition, |Furthermore, !}{#I|we!} {#want|need|require|would like|would prefer!} the {#molecule|compound|chemical structure!} to have a {carbon_mass__names__noun} of {carbon_mass#} {carbon_mass__units}. {#I|We!} {#want|would like to|need to!} ensure that {#there are|the molecule contains|the compound contains!} {carboxyl_count#} {carbonyl_count__names__noun} and {ether_count#} {ether_count__names__noun} as well as {alkanol_count#} {alkanol_count__names__noun} and {thiol_count#} {thiol_count__names__noun}. - Assistant: {#Thanks for the detailed description. |Thanks. |!}{#I suggest|I recommend|I propose|I would suggest|I would recommend|I would propose!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}.{# This compound should fulfill your needs.| This chemical should satisfy your constrains|!} - - |- - User: {#I am researching|I am investigating|I am studying!} {#pharmaceuticals|medicinal compounds|drug molecules!} and {#need|require|am looking for!} a {#molecule|compound|chemical structure!} with a {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units}. It should also be {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. {#Additionally,|Moreover,|Furthermore,!} it {#must|should|needs to!} have {amine_count#} {amine_count__names__noun}. - Assistant: {#To meet these requirements, |Considering your specifications, |Taking into account your needs, !}I {#recommend|suggest|propose!} a {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}. {#This should match your criteria.|This fits your described parameters.|This aligns with your requirements.!} - - |- - User: {#As a chemist|Being a chemical researcher,|In my chemical research,!} I {#am looking for|require|need!} a {#molecule|compound|chemical structure!} with {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun} and {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun}. It {#also needs|should also!} to be {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} and have a {oxygen_mass__names__noun} of {oxygen_mass#} {oxygen_mass__units}. - Assistant: {#I've got the ideal compound for you.|I have a compound that fits these specifications.|I suggest a molecule that meets your needs.!} The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#should work perfectly.|is exactly what you're looking for.|matches your requirements.!} - - |- - User: {#In my pharmaceutical research,|For my current drug discovery project,|In my medicinal chemistry studies,!} I {#require|need|am looking for!} a {#molecule|compound|chemical structure!} with {num_chiral_centers#} {num_chiral_centers__names__noun} and {num_lipinski_violations#} {num_lipinski_violations__names__noun}. It should {#also have|also possess|also contain!} a {nitrogen_mass__names__noun} of {nitrogen_mass#} {nitrogen_mass__units} and be {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. - Assistant: {#After considering your needs,|Based on your requirements,|Taking your specifications into account,!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would be a great fit.|seems to be a perfect match.|should meet all your criteria.!} - - |- - User: {#In my research on|For my study of|While investigating!} {#non-toxic chemicals|safe compounds|environment-friendly substances!}, I {#need|require|am looking for!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} with {num_carbon_atoms#} {num_carbon_atoms__names__noun}. {#Additionally,|Furthermore,|Moreover,!} it {#should have|must have|needs to have!} {num_nitrogen_atoms#} {num_nitrogen_atoms__names__noun}. - Assistant: {#I have a compound in mind|I can suggest a molecule|I've identified a chemical structure!} that {#fits|meets|aligns with!} these requirements. The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#is suitable for your needs.|meets your specified criteria.|should work well for your research.!} - - |- - User: {#As a pharmacologist,|In my pharmacological studies,|For my drug development work,!} I {#require|need|am in need of!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} and has {num_oxygen_atoms#} {num_oxygen_atoms__names__noun}. {#Also,|In addition,|Moreover,!} it {#must|should!} have a {molecular_formula__names__noun} of {molecular_formula#}. - Assistant: {#I recommend|I suggest|I propose!} a {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} that {#satisfies these conditions.|meets these criteria.|is aligned with your requirements.!} - - |- - User: {#In my environmental chemistry work,|For my eco-friendly compound research,|As part of my sustainable chemical studies,!} I {#am looking for|require|need!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} with a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. {#Also,|Additionally,|Moreover,!} it {#should possess|must contain|needs to have!} {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun}. - Assistant: {#Considering your needs,|Based on your specifications,|With your requirements in mind,!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would be ideal.|seems perfect.|is a great match.!} - - |- - User: {#For my bioactive molecule research,|In my study of pharmacologically active substances,|As I explore biologically active compounds,!} I {#need|require|am searching for!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. It {#should also have|must also feature|also needs to have!} {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun} and {halogen_count#} {halogen_count__names__noun}. - Assistant: {#I've found a compound|I have a molecule|I suggest a chemical structure!} that {#fulfills|meets|matches!} these criteria. The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would suit your research.|is in line with your needs.|fits your specifications perfectly.!} + - |- + User: {#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units} and is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. + Assistant: {#There might be multiple compounds that match these criteria. Do you have additional constraints?|Do you have additional constraints?|Is there anything else I should consider?|Is there anything else I should know?!} + User: {#No|No, there are no additional constraints.|No, there are no other constraints.|No, there are no other criteria.|No, there are no other requirements.!} + Assistant: {#In this case, |OK, |Alright, |Understood, |Got it, |I see, !}the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#should|will|is expected to!} fit your criteria. + - |- + User: {#I am a medicinal chemist. |I work in drug-discovery. |!}{#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units} and is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. {#Additionally, |Moreover, |On top of that, |In addition, |Furthermore, !}{#I|we!} {#want|need|require|would like|would prefer!} the {#molecule|compound|chemical structure!} to have a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. {#I|We!} {#want|would like to|need to!} ensure that {#there are|the molecule contains|the compound contains!} {carboxyl_count#} {carbonyl_count__names__noun}. + Assistant: {#Thanks for the detailed description. |Thanks. |!}{#I suggest|I recommend|I propose|I would suggest|I would recommend|I would propose!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}.{# This compound should fulfill your needs.| This chemical should satisfy your constrains|!} + - |- + User: {#I am a medicinal chemist. |I work in drug-discovery. |!}{#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} and has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units}. {#Additionally, |Moreover, |On top of that, |In addition, |Furthermore, !}{#I|we!} {#want|need|require|would like|would prefer!} the {#molecule|compound|chemical structure!} to have a {carbon_mass__names__noun} of {carbon_mass#} {carbon_mass__units}. {#I|We!} {#want|would like to|need to!} ensure that {#there are|the molecule contains|the compound contains!} {carboxyl_count#} {carbonyl_count__names__noun} and {ether_count#} {ether_count__names__noun} as well as {alkanol_count#} {alkanol_count__names__noun} and {thiol_count#} {thiol_count__names__noun}. + Assistant: {#Thanks for the detailed description. |Thanks. |!}{#I suggest|I recommend|I propose|I would suggest|I would recommend|I would propose!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}.{# This compound should fulfill your needs.| This chemical should satisfy your constrains|!} + - |- + User: {#I am researching|I am investigating|I am studying!} {#pharmaceuticals|medicinal compounds|drug molecules!} and {#need|require|am looking for!} a {#molecule|compound|chemical structure!} with a {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units}. It should also be {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. {#Additionally,|Moreover,|Furthermore,!} it {#must|should|needs to!} have {amine_count#} {amine_count__names__noun}. + Assistant: {#To meet these requirements, |Considering your specifications, |Taking into account your needs, !}I {#recommend|suggest|propose!} a {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}. {#This should match your criteria.|This fits your described parameters.|This aligns with your requirements.!} + - |- + User: {#As a chemist|Being a chemical researcher,|In my chemical research,!} I {#am looking for|require|need!} a {#molecule|compound|chemical structure!} with {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun} and {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun}. It {#also needs|should also!} to be {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} and have a {oxygen_mass__names__noun} of {oxygen_mass#} {oxygen_mass__units}. + Assistant: {#I've got the ideal compound for you.|I have a compound that fits these specifications.|I suggest a molecule that meets your needs.!} The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#should work perfectly.|is exactly what you're looking for.|matches your requirements.!} + - |- + User: {#In my pharmaceutical research,|For my current drug discovery project,|In my medicinal chemistry studies,!} I {#require|need|am looking for!} a {#molecule|compound|chemical structure!} with {num_chiral_centers#} {num_chiral_centers__names__noun} and {num_lipinski_violations#} {num_lipinski_violations__names__noun}. It should {#also have|also possess|also contain!} a {nitrogen_mass__names__noun} of {nitrogen_mass#} {nitrogen_mass__units} and be {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. + Assistant: {#After considering your needs,|Based on your requirements,|Taking your specifications into account,!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would be a great fit.|seems to be a perfect match.|should meet all your criteria.!} + - |- + User: {#In my research on|For my study of|While investigating!} {#non-toxic chemicals|safe compounds|environment-friendly substances!}, I {#need|require|am looking for!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} with {num_carbon_atoms#} {num_carbon_atoms__names__noun}. {#Additionally,|Furthermore,|Moreover,!} it {#should have|must have|needs to have!} {num_nitrogen_atoms#} {num_nitrogen_atoms__names__noun}. + Assistant: {#I have a compound in mind|I can suggest a molecule|I've identified a chemical structure!} that {#fits|meets|aligns with!} these requirements. The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#is suitable for your needs.|meets your specified criteria.|should work well for your research.!} + - |- + User: {#As a pharmacologist,|In my pharmacological studies,|For my drug development work,!} I {#require|need|am in need of!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} and has {num_oxygen_atoms#} {num_oxygen_atoms__names__noun}. {#Also,|In addition,|Moreover,!} it {#must|should!} have a {molecular_formula__names__noun} of {molecular_formula#}. + Assistant: {#I recommend|I suggest|I propose!} a {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} that {#satisfies these conditions.|meets these criteria.|is aligned with your requirements.!} + - |- + User: {#In my environmental chemistry work,|For my eco-friendly compound research,|As part of my sustainable chemical studies,!} I {#am looking for|require|need!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} with a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. {#Also,|Additionally,|Moreover,!} it {#should possess|must contain|needs to have!} {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun}. + Assistant: {#Considering your needs,|Based on your specifications,|With your requirements in mind,!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would be ideal.|seems perfect.|is a great match.!} + - |- + User: {#For my bioactive molecule research,|In my study of pharmacologically active substances,|As I explore biologically active compounds,!} I {#need|require|am searching for!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. It {#should also have|must also feature|also needs to have!} {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun} and {halogen_count#} {halogen_count__names__noun}. + Assistant: {#I've found a compound|I have a molecule|I suggest a chemical structure!} that {#fulfills|meets|matches!} these criteria. The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would suit your research.|is in line with your needs.|fits your specifications perfectly.!} diff --git a/data/tabular/inverse_2/meta.yaml b/data/tabular/inverse_2/meta.yaml index 169beaa22..945040aac 100644 --- a/data/tabular/inverse_2/meta.yaml +++ b/data/tabular/inverse_2/meta.yaml @@ -1,225 +1,224 @@ ---- name: inverse_2 description: |- - Inverse design task constructed by merging solubility_aqsoldb and - sr_atad5_tox21 and augmenting it with molecular descriptors. + Inverse design task constructed by merging solubility_aqsoldb and + sr_atad5_tox21 and augmenting it with molecular descriptors. targets: - - id: aqeuous_solubility - description: aqueous solubility - units: log(mol/L) - type: continuous - names: - - noun: aqueous solubility (logarithmic) - - noun: water solubility (measured in log(mol/L)) - - noun: water solubility (logarithmic) - - adjective: dissolves in a water - uris: - - http://purl.jp/bio/4/id/200906006880450101 - - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C60821 - - id: toxicity_SR-ATAD5 - description: whether it shows activitiy in the SR-ATAD5 assay (1) or not (0) - units: - type: boolean - names: - - noun: SR-Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity - - noun: Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity - - noun: Luciferase-tagged ATAD5 toxicity - - verb: shows activity in the SR-Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity assay - - verb: is active in the Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity assay - - verb: is active in the Luciferase-tagged ATAD5 toxicity assay - - adjective: toxic in the SR-Luciferase-tagged ATAD5 in human embryonic kidney cells assay - - adjective: toxic in the Luciferase-tagged ATAD5 in human embryonic kidney cells assay - - adjective: toxic in the Luciferase-tagged ATAD5 assay - - gerund: showing SR-ATAD5 toxicity - uris: - - id: carboxyl_count - description: number of carboxyl groups - type: ordinal - names: - - noun: carboxyl groups - - id: carbonyl_count - description: number of carbonyl groups - type: ordinal - names: - - noun: carbonyl groups - - id: ether_count - description: number of ether groups - type: ordinal - names: - - noun: ether groups - - id: alkanol_count - description: number of alkanol groups - type: ordinal - names: - - noun: alkanol groups - - id: thiol_count - description: number of thiol groups - type: ordinal - names: - - noun: thiol groups - - id: halogen_count - description: number of halogen groups - type: ordinal - names: - - noun: halogen groups - - id: amine_count - description: number of amine groups - type: ordinal - names: - - noun: amine groups - - id: amide_count - description: number of amide groups - type: ordinal - names: - - noun: amide groups - - id: ketone_count - description: number of ketone groups - type: ordinal - names: - - noun: ketone group count - - id: num_valence_electrons - description: number of valence electrons - type: ordinal - names: - - noun: valence electrons - - id: molecular_formula - description: molecular formula - type: text - names: - - noun: molecular formula - - id: monoisotopic_molecular_mass - description: monoisotopic molecular mass - type: continuous - units: g/mol - names: - - noun: monoisotopic molecular mass - - id: carbon_mass - description: carbon mass - type: continuous - units: g/mol - names: - - noun: carbon mass - - id: hydrogen_mass - description: hydrogen mass - type: continuous - units: g/mol - names: - - noun: hydrogen mass - - id: nitrogen_mass - description: nitrogen mass - type: continuous - units: g/mol - names: - - noun: nitrogen mass - - id: oxygen_mass - description: oxygen mass - units: g/mol - type: continuous - names: - - noun: oxygen mass - - id: num_carbon_atoms - description: number of carbon atoms - type: ordinal - names: - - noun: carbon atoms - - id: num_hydrogen_atoms - type: ordinal - description: number of hydrogen atoms - names: - - noun: hydrogen atoms - - id: num_nitrogen_atoms - description: number of nitrogen atoms - type: ordinal - names: - - noun: nitrogen atoms - - id: num_oxygen_atoms - description: number of oxygen atoms - type: ordinal - names: - - noun: oxygen atoms - - id: num_hydrogen_bond_acceptors - description: number of hydrogen bond acceptors - type: ordinal - names: - - noun: hydrogen bond acceptors - - id: num_hydrogen_bond_donors - description: number of hydrogen bond donors - type: ordinal - names: - - noun: hydrogen bond donors - - id: num_lipinski_violations - description: number of Lipinski violations - type: ordinal - names: - - noun: Lipinski violations - - noun: Lipinski rule of five violations - - id: num_chiral_centers - description: number of chiral centers - type: ordinal - names: - - noun: chiral center count + - id: aqeuous_solubility + description: aqueous solubility + units: log(mol/L) + type: continuous + names: + - noun: aqueous solubility (logarithmic) + - noun: water solubility (measured in log(mol/L)) + - noun: water solubility (logarithmic) + - adjective: dissolves in a water + uris: + - http://purl.jp/bio/4/id/200906006880450101 + - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C60821 + - id: toxicity_SR-ATAD5 + description: whether it shows activity in the SR-ATAD5 assay (1) or not (0) + units: + type: boolean + names: + - noun: SR-Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity + - noun: Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity + - noun: Luciferase-tagged ATAD5 toxicity + - verb: shows activity in the SR-Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity assay + - verb: is active in the Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity assay + - verb: is active in the Luciferase-tagged ATAD5 toxicity assay + - adjective: toxic in the SR-Luciferase-tagged ATAD5 in human embryonic kidney cells assay + - adjective: toxic in the Luciferase-tagged ATAD5 in human embryonic kidney cells assay + - adjective: toxic in the Luciferase-tagged ATAD5 assay + - gerund: showing SR-ATAD5 toxicity + uris: + - id: carboxyl_count + description: number of carboxyl groups + type: ordinal + names: + - noun: carboxyl groups + - id: carbonyl_count + description: number of carbonyl groups + type: ordinal + names: + - noun: carbonyl groups + - id: ether_count + description: number of ether groups + type: ordinal + names: + - noun: ether groups + - id: alkanol_count + description: number of alkanol groups + type: ordinal + names: + - noun: alkanol groups + - id: thiol_count + description: number of thiol groups + type: ordinal + names: + - noun: thiol groups + - id: halogen_count + description: number of halogen groups + type: ordinal + names: + - noun: halogen groups + - id: amine_count + description: number of amine groups + type: ordinal + names: + - noun: amine groups + - id: amide_count + description: number of amide groups + type: ordinal + names: + - noun: amide groups + - id: ketone_count + description: number of ketone groups + type: ordinal + names: + - noun: ketone group count + - id: num_valence_electrons + description: number of valence electrons + type: ordinal + names: + - noun: valence electrons + - id: molecular_formula + description: molecular formula + type: text + names: + - noun: molecular formula + - id: monoisotopic_molecular_mass + description: monoisotopic molecular mass + type: continuous + units: g/mol + names: + - noun: monoisotopic molecular mass + - id: carbon_mass + description: carbon mass + type: continuous + units: g/mol + names: + - noun: carbon mass + - id: hydrogen_mass + description: hydrogen mass + type: continuous + units: g/mol + names: + - noun: hydrogen mass + - id: nitrogen_mass + description: nitrogen mass + type: continuous + units: g/mol + names: + - noun: nitrogen mass + - id: oxygen_mass + description: oxygen mass + units: g/mol + type: continuous + names: + - noun: oxygen mass + - id: num_carbon_atoms + description: number of carbon atoms + type: ordinal + names: + - noun: carbon atoms + - id: num_hydrogen_atoms + type: ordinal + description: number of hydrogen atoms + names: + - noun: hydrogen atoms + - id: num_nitrogen_atoms + description: number of nitrogen atoms + type: ordinal + names: + - noun: nitrogen atoms + - id: num_oxygen_atoms + description: number of oxygen atoms + type: ordinal + names: + - noun: oxygen atoms + - id: num_hydrogen_bond_acceptors + description: number of hydrogen bond acceptors + type: ordinal + names: + - noun: hydrogen bond acceptors + - id: num_hydrogen_bond_donors + description: number of hydrogen bond donors + type: ordinal + names: + - noun: hydrogen bond donors + - id: num_lipinski_violations + description: number of Lipinski violations + type: ordinal + names: + - noun: Lipinski violations + - noun: Lipinski rule of five violations + - id: num_chiral_centers + description: number of chiral centers + type: ordinal + names: + - noun: chiral center count benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://doi.org/10.1038/s41597-019-0151-1 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#aqeuous_solubility-aqsoldb - description: data source - - url: https://github.com/lamalab-org/chem-caption - description: software used to generate features + - url: https://doi.org/10.1038/s41597-019-0151-1 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#aqeuous_solubility-aqsoldb + description: data source + - url: https://github.com/lamalab-org/chem-caption + description: software used to generate features num_points: 2517 bibtex: - - |- - @article{Sorkun_2019, - doi = {10.1038/s41597-019-0151-1}, - url = {https://doi.org/10.1038%2Fs41597-019-0151-1}, - year = {2019}, - month = aug, - publisher = {Springer Science and Business Media LLC}, - volume = {6}, - number = {1}, - author = {Murat Cihan Sorkun and Abhishek Khetan and - Suleyman Er}, - title = {AqSolDB, a curated reference set of aqueous aqeuous_solubility - and 2D descriptors for a diverse set of compounds}, - journal = {Scientific Data} + - |- + @article{Sorkun_2019, + doi = {10.1038/s41597-019-0151-1}, + url = {https://doi.org/10.1038%2Fs41597-019-0151-1}, + year = {2019}, + month = aug, + publisher = {Springer Science and Business Media LLC}, + volume = {6}, + number = {1}, + author = {Murat Cihan Sorkun and Abhishek Khetan and + Suleyman Er}, + title = {AqSolDB, a curated reference set of aqueous aqeuous_solubility + and 2D descriptors for a diverse set of compounds}, + journal = {Scientific Data} templates: - - |- - User: {#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units} and is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. - Assistant: {#There might be multiple compounds that match these criteria. Do you have additional constraints?|Do you have additional constraints?|Is there anything else I should consider?|Is there anything else I should know?!} - User: {#No|No, there are no additional constraints.|No, there are no other constraints.|No, there are no other criteria.|No, there are no other requirements.!} - Assistant: {#In this case, |OK, |Alright, |Understood, |Got it, |I see, !}the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#should|will|is expected to!} fit your criteria. - - |- - User: {#I am a medicinal chemist. |I work in drug-discovery. |!}{#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units} and is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. {#Additionally, |Moreover, |On top of that, |In addition, |Furthermore, !}{#I|we!} {#want|need|require|would like|would prefer!} the {#molecule|compound|chemical structure!} to have a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. {#I|We!} {#want|would like to|need to!} ensure that {#there are|the molecule contains|the compound contains!} {carboxyl_count#} {carbonyl_count__names__noun}. - Assistant: {#Thanks for the detailed description. |Thanks. |!}{#I suggest|I recommend|I propose|I would suggest|I would recommend|I would propose!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}.{# This compound should fulfill your needs.| This chemical should satisfy your constrains|!} - - |- - User: {#I am a medicinal chemist. |I work in drug-discovery. |!}{#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} and has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units}. {#Additionally, |Moreover, |On top of that, |In addition, |Furthermore, !}{#I|we!} {#want|need|require|would like|would prefer!} the {#molecule|compound|chemical structure!} to have a {carbon_mass__names__noun} of {carbon_mass#} {carbon_mass__units}. {#I|We!} {#want|would like to|need to!} ensure that {#there are|the molecule contains|the compound contains!} {carboxyl_count#} {carbonyl_count__names__noun} and {ether_count#} {ether_count__names__noun} as well as {alkanol_count#} {alkanol_count__names__noun} and {thiol_count#} {thiol_count__names__noun}. - Assistant: {#Thanks for the detailed description. |Thanks. |!}{#I suggest|I recommend|I propose|I would suggest|I would recommend|I would propose!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}.{# This compound should fulfill your needs.| This chemical should satisfy your constrains|!} - - |- - User: {#I am researching|I am investigating|I am studying!} {#pharmaceuticals|medicinal compounds|drug molecules!} and {#need|require|am looking for!} a {#molecule|compound|chemical structure!} with a {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units}. It should also be {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. {#Additionally,|Moreover,|Furthermore,!} it {#must|should|needs to!} have {amine_count#} {amine_count__names__noun}. - Assistant: {#To meet these requirements, |Considering your specifications, |Taking into account your needs, !}I {#recommend|suggest|propose!} a {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}. {#This should match your criteria.|This fits your described parameters.|This aligns with your requirements.!} - - |- - User: {#As a chemist|Being a chemical researcher,|In my chemical research,!} I {#am looking for|require|need!} a {#molecule|compound|chemical structure!} with {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun} and {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun}. It {#also needs|should also!} to be {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} and have a {oxygen_mass__names__noun} of {oxygen_mass#} {oxygen_mass__units}. - Assistant: {#I've got the ideal compound for you.|I have a compound that fits these specifications.|I suggest a molecule that meets your needs.!} The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#should work perfectly.|is exactly what you're looking for.|matches your requirements.!} - - |- - User: {#In my pharmaceutical research,|For my current drug discovery project,|In my medicinal chemistry studies,!} I {#require|need|am looking for!} a {#molecule|compound|chemical structure!} with {num_chiral_centers#} {num_chiral_centers__names__noun} and {num_lipinski_violations#} {num_lipinski_violations__names__noun}. It should {#also have|also possess|also contain!} a {nitrogen_mass__names__noun} of {nitrogen_mass#} {nitrogen_mass__units} and be {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. - Assistant: {#After considering your needs,|Based on your requirements,|Taking your specifications into account,!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would be a great fit.|seems to be a perfect match.|should meet all your criteria.!} - - |- - User: {#In my research on|For my study of|While investigating!} {#non-toxic chemicals|safe compounds|environment-friendly substances!}, I {#need|require|am looking for!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} with {num_carbon_atoms#} {num_carbon_atoms__names__noun}. {#Additionally,|Furthermore,|Moreover,!} it {#should have|must have|needs to have!} {num_nitrogen_atoms#} {num_nitrogen_atoms__names__noun}. - Assistant: {#I have a compound in mind|I can suggest a molecule|I've identified a chemical structure!} that {#fits|meets|aligns with!} these requirements. The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#is suitable for your needs.|meets your specified criteria.|should work well for your research.!} - - |- - User: {#As a pharmacologist,|In my pharmacological studies,|For my drug development work,!} I {#require|need|am in need of!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} and has {num_oxygen_atoms#} {num_oxygen_atoms__names__noun}. {#Also,|In addition,|Moreover,!} it {#must|should!} have a {molecular_formula__names__noun} of {molecular_formula#}. - Assistant: {#I recommend|I suggest|I propose!} a {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} that {#satisfies these conditions.|meets these criteria.|is aligned with your requirements.!} - - |- - User: {#In my environmental chemistry work,|For my eco-friendly compound research,|As part of my sustainable chemical studies,!} I {#am looking for|require|need!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} with a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. {#Also,|Additionally,|Moreover,!} it {#should possess|must contain|needs to have!} {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun}. - Assistant: {#Considering your needs,|Based on your specifications,|With your requirements in mind,!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would be ideal.|seems perfect.|is a great match.!} - - |- - User: {#For my bioactive molecule research,|In my study of pharmacologically active substances,|As I explore biologically active compounds,!} I {#need|require|am searching for!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. It {#should also have|must also feature|also needs to have!} {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun} and {halogen_count#} {halogen_count__names__noun}. - Assistant: {#I've found a compound|I have a molecule|I suggest a chemical structure!} that {#fulfills|meets|matches!} these criteria. The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would suit your research.|is in line with your needs.|fits your specifications perfectly.!} + - |- + User: {#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units} and is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. + Assistant: {#There might be multiple compounds that match these criteria. Do you have additional constraints?|Do you have additional constraints?|Is there anything else I should consider?|Is there anything else I should know?!} + User: {#No|No, there are no additional constraints.|No, there are no other constraints.|No, there are no other criteria.|No, there are no other requirements.!} + Assistant: {#In this case, |OK, |Alright, |Understood, |Got it, |I see, !}the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#should|will|is expected to!} fit your criteria. + - |- + User: {#I am a medicinal chemist. |I work in drug-discovery. |!}{#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units} and is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. {#Additionally, |Moreover, |On top of that, |In addition, |Furthermore, !}{#I|we!} {#want|need|require|would like|would prefer!} the {#molecule|compound|chemical structure!} to have a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. {#I|We!} {#want|would like to|need to!} ensure that {#there are|the molecule contains|the compound contains!} {carboxyl_count#} {carbonyl_count__names__noun}. + Assistant: {#Thanks for the detailed description. |Thanks. |!}{#I suggest|I recommend|I propose|I would suggest|I would recommend|I would propose!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}.{# This compound should fulfill your needs.| This chemical should satisfy your constrains|!} + - |- + User: {#I am a medicinal chemist. |I work in drug-discovery. |!}{#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} and has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units}. {#Additionally, |Moreover, |On top of that, |In addition, |Furthermore, !}{#I|we!} {#want|need|require|would like|would prefer!} the {#molecule|compound|chemical structure!} to have a {carbon_mass__names__noun} of {carbon_mass#} {carbon_mass__units}. {#I|We!} {#want|would like to|need to!} ensure that {#there are|the molecule contains|the compound contains!} {carboxyl_count#} {carbonyl_count__names__noun} and {ether_count#} {ether_count__names__noun} as well as {alkanol_count#} {alkanol_count__names__noun} and {thiol_count#} {thiol_count__names__noun}. + Assistant: {#Thanks for the detailed description. |Thanks. |!}{#I suggest|I recommend|I propose|I would suggest|I would recommend|I would propose!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}.{# This compound should fulfill your needs.| This chemical should satisfy your constrains|!} + - |- + User: {#I am researching|I am investigating|I am studying!} {#pharmaceuticals|medicinal compounds|drug molecules!} and {#need|require|am looking for!} a {#molecule|compound|chemical structure!} with a {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units}. It should also be {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. {#Additionally,|Moreover,|Furthermore,!} it {#must|should|needs to!} have {amine_count#} {amine_count__names__noun}. + Assistant: {#To meet these requirements, |Considering your specifications, |Taking into account your needs, !}I {#recommend|suggest|propose!} a {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}. {#This should match your criteria.|This fits your described parameters.|This aligns with your requirements.!} + - |- + User: {#As a chemist|Being a chemical researcher,|In my chemical research,!} I {#am looking for|require|need!} a {#molecule|compound|chemical structure!} with {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun} and {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun}. It {#also needs|should also!} to be {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} and have a {oxygen_mass__names__noun} of {oxygen_mass#} {oxygen_mass__units}. + Assistant: {#I've got the ideal compound for you.|I have a compound that fits these specifications.|I suggest a molecule that meets your needs.!} The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#should work perfectly.|is exactly what you're looking for.|matches your requirements.!} + - |- + User: {#In my pharmaceutical research,|For my current drug discovery project,|In my medicinal chemistry studies,!} I {#require|need|am looking for!} a {#molecule|compound|chemical structure!} with {num_chiral_centers#} {num_chiral_centers__names__noun} and {num_lipinski_violations#} {num_lipinski_violations__names__noun}. It should {#also have|also possess|also contain!} a {nitrogen_mass__names__noun} of {nitrogen_mass#} {nitrogen_mass__units} and be {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. + Assistant: {#After considering your needs,|Based on your requirements,|Taking your specifications into account,!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would be a great fit.|seems to be a perfect match.|should meet all your criteria.!} + - |- + User: {#In my research on|For my study of|While investigating!} {#non-toxic chemicals|safe compounds|environment-friendly substances!}, I {#need|require|am looking for!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} with {num_carbon_atoms#} {num_carbon_atoms__names__noun}. {#Additionally,|Furthermore,|Moreover,!} it {#should have|must have|needs to have!} {num_nitrogen_atoms#} {num_nitrogen_atoms__names__noun}. + Assistant: {#I have a compound in mind|I can suggest a molecule|I've identified a chemical structure!} that {#fits|meets|aligns with!} these requirements. The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#is suitable for your needs.|meets your specified criteria.|should work well for your research.!} + - |- + User: {#As a pharmacologist,|In my pharmacological studies,|For my drug development work,!} I {#require|need|am in need of!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} and has {num_oxygen_atoms#} {num_oxygen_atoms__names__noun}. {#Also,|In addition,|Moreover,!} it {#must|should!} have a {molecular_formula__names__noun} of {molecular_formula#}. + Assistant: {#I recommend|I suggest|I propose!} a {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} that {#satisfies these conditions.|meets these criteria.|is aligned with your requirements.!} + - |- + User: {#In my environmental chemistry work,|For my eco-friendly compound research,|As part of my sustainable chemical studies,!} I {#am looking for|require|need!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} with a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. {#Also,|Additionally,|Moreover,!} it {#should possess|must contain|needs to have!} {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun}. + Assistant: {#Considering your needs,|Based on your specifications,|With your requirements in mind,!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would be ideal.|seems perfect.|is a great match.!} + - |- + User: {#For my bioactive molecule research,|In my study of pharmacologically active substances,|As I explore biologically active compounds,!} I {#need|require|am searching for!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. It {#should also have|must also feature|also needs to have!} {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun} and {halogen_count#} {halogen_count__names__noun}. + Assistant: {#I've found a compound|I have a molecule|I suggest a chemical structure!} that {#fulfills|meets|matches!} these criteria. The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would suit your research.|is in line with your needs.|fits your specifications perfectly.!} diff --git a/data/tabular/inverse_3/meta.yaml b/data/tabular/inverse_3/meta.yaml index 9d3d20a5d..27775ce4f 100644 --- a/data/tabular/inverse_3/meta.yaml +++ b/data/tabular/inverse_3/meta.yaml @@ -1,240 +1,239 @@ ---- name: inverse_3 description: |- - Inverse design task constructed by merging kcnq2_potassium_channel_butkiewicz and - choline_transporter_butkiewicz and augmenting it with molecular descriptors. + Inverse design task constructed by merging kcnq2_potassium_channel_butkiewicz and + choline_transporter_butkiewicz and augmenting it with molecular descriptors. targets: - - id: activity_kcnq2_potassium_channel - description: whether it is active against kcnq2 potassium channel receptor (1) or not (0). - units: - type: boolean - names: - - adjective: kcnq2 potassium channel inhibiting - pubchem_aids: - - 2239 - - 2287 - - 2282 - - 2283 - - 2558 - uris: [] - - id: activity_choline_transporter - description: inhibition of choline transporter receptor (1) or not (0). - units: - type: boolean - names: - - adjective: choline transporter activity inhibiting - pubchem_aids: - - 488975 - - 493221 - - 504840 - - 588401 - - 493222 - - 602208 - - id: carboxyl_count - description: number of carboxyl groups - type: ordinal - significant_digits: 0 - names: - - noun: carboxyl groups - - id: carbonyl_count - description: number of carbonyl groups - type: ordinal - significant_digits: 0 - names: - - noun: carbonyl groups - - id: ether_count - description: number of ether groups - type: ordinal - names: - - noun: ether groups - - id: alkanol_count - description: number of alkanol groups - significant_digits: 0 - type: ordinal - names: - - noun: alkanol groups - - id: thiol_count - description: number of thiol groups - type: ordinal - significant_digits: 0 - names: - - noun: thiol groups - - id: halogen_count - description: number of halogen groups - type: ordinal - significant_digits: 0 - names: - - noun: halogen groups - - id: amine_count - description: number of amine groups - type: ordinal - significant_digits: 0 - names: - - noun: amine groups - - id: amide_count - description: number of amide groups - type: ordinal - significant_digits: 0 - names: - - noun: amide groups - - id: ketone_count - description: number of ketone groups - significant_digits: 0 - type: ordinal - names: - - noun: ketone group count - - id: num_valence_electrons - description: number of valence electrons - significant_digits: 0 - type: ordinal - names: - - noun: valence electrons - - id: molecular_formula - description: molecular formula - type: text - names: - - noun: molecular formula - - id: monoisotopic_molecular_mass - description: monoisotopic molecular mass - type: continuous - units: g/mol - names: - - noun: monoisotopic molecular mass - - id: carbon_mass - description: carbon mass - type: continuous - units: g/mol - names: - - noun: carbon mass - - id: hydrogen_mass - description: hydrogen mass - type: continuous - units: g/mol - names: - - noun: hydrogen mass - - id: nitrogen_mass - description: nitrogen mass - type: continuous - units: g/mol - names: - - noun: nitrogen mass - - id: oxygen_mass - description: oxygen mass - units: g/mol - type: continuous - names: - - noun: oxygen mass - - id: num_carbon_atoms - description: number of carbon atoms - type: ordinal - significant_digits: 0 - names: - - noun: carbon atoms - - id: num_hydrogen_atoms - type: ordinal - significant_digits: 0 - description: number of hydrogen atoms - names: - - noun: hydrogen atoms - - id: num_nitrogen_atoms - significant_digits: 0 - description: number of nitrogen atoms - type: ordinal - names: - - noun: nitrogen atoms - - id: num_oxygen_atoms - significant_digits: 0 - description: number of oxygen atoms - type: ordinal - names: - - noun: oxygen atoms - - id: num_hydrogen_bond_acceptors - significant_digits: 0 - description: number of hydrogen bond acceptors - type: ordinal - names: - - noun: hydrogen bond acceptors - - id: num_hydrogen_bond_donors - description: number of hydrogen bond donors - type: ordinal - significant_digits: 0 - names: - - noun: hydrogen bond donors - - id: num_lipinski_violations - description: number of Lipinski violations - type: ordinal - significant_digits: 0 - names: - - noun: Lipinski violations - - noun: Lipinski rule of five violations - - id: num_chiral_centers - description: number of chiral centers - significant_digits: 0 - type: ordinal - names: - - noun: chiral center count + - id: activity_kcnq2_potassium_channel + description: whether it is active against kcnq2 potassium channel receptor (1) or not (0). + units: + type: boolean + names: + - adjective: kcnq2 potassium channel inhibiting + pubchem_aids: + - 2239 + - 2287 + - 2282 + - 2283 + - 2558 + uris: [] + - id: activity_choline_transporter + description: inhibition of choline transporter receptor (1) or not (0). + units: + type: boolean + names: + - adjective: choline transporter activity inhibiting + pubchem_aids: + - 488975 + - 493221 + - 504840 + - 588401 + - 493222 + - 602208 + - id: carboxyl_count + description: number of carboxyl groups + type: ordinal + significant_digits: 0 + names: + - noun: carboxyl groups + - id: carbonyl_count + description: number of carbonyl groups + type: ordinal + significant_digits: 0 + names: + - noun: carbonyl groups + - id: ether_count + description: number of ether groups + type: ordinal + names: + - noun: ether groups + - id: alkanol_count + description: number of alkanol groups + significant_digits: 0 + type: ordinal + names: + - noun: alkanol groups + - id: thiol_count + description: number of thiol groups + type: ordinal + significant_digits: 0 + names: + - noun: thiol groups + - id: halogen_count + description: number of halogen groups + type: ordinal + significant_digits: 0 + names: + - noun: halogen groups + - id: amine_count + description: number of amine groups + type: ordinal + significant_digits: 0 + names: + - noun: amine groups + - id: amide_count + description: number of amide groups + type: ordinal + significant_digits: 0 + names: + - noun: amide groups + - id: ketone_count + description: number of ketone groups + significant_digits: 0 + type: ordinal + names: + - noun: ketone group count + - id: num_valence_electrons + description: number of valence electrons + significant_digits: 0 + type: ordinal + names: + - noun: valence electrons + - id: molecular_formula + description: molecular formula + type: text + names: + - noun: molecular formula + - id: monoisotopic_molecular_mass + description: monoisotopic molecular mass + type: continuous + units: g/mol + names: + - noun: monoisotopic molecular mass + - id: carbon_mass + description: carbon mass + type: continuous + units: g/mol + names: + - noun: carbon mass + - id: hydrogen_mass + description: hydrogen mass + type: continuous + units: g/mol + names: + - noun: hydrogen mass + - id: nitrogen_mass + description: nitrogen mass + type: continuous + units: g/mol + names: + - noun: nitrogen mass + - id: oxygen_mass + description: oxygen mass + units: g/mol + type: continuous + names: + - noun: oxygen mass + - id: num_carbon_atoms + description: number of carbon atoms + type: ordinal + significant_digits: 0 + names: + - noun: carbon atoms + - id: num_hydrogen_atoms + type: ordinal + significant_digits: 0 + description: number of hydrogen atoms + names: + - noun: hydrogen atoms + - id: num_nitrogen_atoms + significant_digits: 0 + description: number of nitrogen atoms + type: ordinal + names: + - noun: nitrogen atoms + - id: num_oxygen_atoms + significant_digits: 0 + description: number of oxygen atoms + type: ordinal + names: + - noun: oxygen atoms + - id: num_hydrogen_bond_acceptors + significant_digits: 0 + description: number of hydrogen bond acceptors + type: ordinal + names: + - noun: hydrogen bond acceptors + - id: num_hydrogen_bond_donors + description: number of hydrogen bond donors + type: ordinal + significant_digits: 0 + names: + - noun: hydrogen bond donors + - id: num_lipinski_violations + description: number of Lipinski violations + type: ordinal + significant_digits: 0 + names: + - noun: Lipinski violations + - noun: Lipinski rule of five violations + - id: num_chiral_centers + description: number of chiral centers + significant_digits: 0 + type: ordinal + names: + - noun: chiral center count benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://doi.org/10.1038/s41597-019-0151-1 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#aqeuous_solubility-aqsoldb - description: data source - - url: https://github.com/lamalab-org/chem-caption - description: software used to generate features + - url: https://doi.org/10.1038/s41597-019-0151-1 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#aqeuous_solubility-aqsoldb + description: data source + - url: https://github.com/lamalab-org/chem-caption + description: software used to generate features num_points: 299452 bibtex: - - |- - @article{Sorkun_2019, - doi = {10.1038/s41597-019-0151-1}, - url = {https://doi.org/10.1038%2Fs41597-019-0151-1}, - year = {2019}, - month = aug, - publisher = {Springer Science and Business Media LLC}, - volume = {6}, - number = {1}, - author = {Murat Cihan Sorkun and Abhishek Khetan and - Suleyman Er}, - title = {AqSolDB, a curated reference set of aqueous aqeuous_solubility - and 2D descriptors for a diverse set of compounds}, - journal = {Scientific Data} + - |- + @article{Sorkun_2019, + doi = {10.1038/s41597-019-0151-1}, + url = {https://doi.org/10.1038%2Fs41597-019-0151-1}, + year = {2019}, + month = aug, + publisher = {Springer Science and Business Media LLC}, + volume = {6}, + number = {1}, + author = {Murat Cihan Sorkun and Abhishek Khetan and + Suleyman Er}, + title = {AqSolDB, a curated reference set of aqueous aqeuous_solubility + and 2D descriptors for a diverse set of compounds}, + journal = {Scientific Data} templates: - - |- - User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a specific number of carbon atoms? - Assistant: What is the desired {#count for|number of!} {num_carbon_atoms__names__noun} in the molecule? - User: {#Preferably |!}{num_carbon_atoms#}. - Assistant: For a count of {num_carbon_atoms#} {num_carbon_atoms__names__noun}, a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#} {#would be suitable|would satisfy the constraints!}. - - |- - User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a specific number of hydrogen atoms? - Assistant: What is the desired {#count for|number of!} {num_hydrogen_atoms__names__noun} in the molecule? - User: {#Preferably |!}{num_hydrogen_atoms#}. - Assistant: For a count of {num_hydrogen_atoms#} {num_hydrogen_atoms__names__noun}, a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#} {#would be suitable|would satisfy the constraints!}. - - |- - User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a specific number of nitrogen atoms and molecular formula? - Assistant: What is the desired {molecular_formula__names__noun}? - User: {#Preferably |!}{molecular_formula#}. - Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}. - - |- - User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with {num_chiral_centers__names__noun} and a specific molecular formula? - Assistant: What is the desired {molecular_formula__names__noun}? - User: {#Preferably |Ideally |!}{molecular_formula#}. - Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}. - - |- - User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun} and a {molecular_formula__names__noun} of {molecular_formula#}? - Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}. - - |- - User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun} and a {molecular_formula__names__noun} of {molecular_formula#}? - Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}. - - |- - User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a {num_lipinski_violations#} {num_lipinski_violations__names__noun} and a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}? - Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}. - - |- - User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a {num_carbon_atoms#} {num_carbon_atoms__names__noun}, {num_hydrogen_atoms#} {num_hydrogen_atoms__names__noun} and {num_nitrogen_atoms#} {num_nitrogen_atoms__names__noun}? - Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}. + - |- + User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a specific number of carbon atoms? + Assistant: What is the desired {#count for|number of!} {num_carbon_atoms__names__noun} in the molecule? + User: {#Preferably |!}{num_carbon_atoms#}. + Assistant: For a count of {num_carbon_atoms#} {num_carbon_atoms__names__noun}, a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#} {#would be suitable|would satisfy the constraints!}. + - |- + User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a specific number of hydrogen atoms? + Assistant: What is the desired {#count for|number of!} {num_hydrogen_atoms__names__noun} in the molecule? + User: {#Preferably |!}{num_hydrogen_atoms#}. + Assistant: For a count of {num_hydrogen_atoms#} {num_hydrogen_atoms__names__noun}, a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#} {#would be suitable|would satisfy the constraints!}. + - |- + User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a specific number of nitrogen atoms and molecular formula? + Assistant: What is the desired {molecular_formula__names__noun}? + User: {#Preferably |!}{molecular_formula#}. + Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}. + - |- + User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with {num_chiral_centers__names__noun} and a specific molecular formula? + Assistant: What is the desired {molecular_formula__names__noun}? + User: {#Preferably |Ideally |!}{molecular_formula#}. + Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}. + - |- + User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun} and a {molecular_formula__names__noun} of {molecular_formula#}? + Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}. + - |- + User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun} and a {molecular_formula__names__noun} of {molecular_formula#}? + Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}. + - |- + User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a {num_lipinski_violations#} {num_lipinski_violations__names__noun} and a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}? + Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}. + - |- + User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a {num_carbon_atoms#} {num_carbon_atoms__names__noun}, {num_hydrogen_atoms#} {num_hydrogen_atoms__names__noun} and {num_nitrogen_atoms#} {num_nitrogen_atoms__names__noun}? + Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}. diff --git a/data/tabular/iupac_goldbook/meta.yaml b/data/tabular/iupac_goldbook/meta.yaml index 32fbd8e2a..423db0011 100644 --- a/data/tabular/iupac_goldbook/meta.yaml +++ b/data/tabular/iupac_goldbook/meta.yaml @@ -1,95 +1,94 @@ ---- name: IUPAC Compendium of Chemical Terminology description: |- - The Compendium is popularly referred to as the Gold - Book, in recognition of the contribution of the late Victor Gold, who - initiated work on the first edition. It is one of the series of IUPAC - Colour Books on chemical nomenclature, terminology, symbols and units - (see the list of source documents), and collects together terminology - definitions from IUPAC recommendations already published in Pure and - Applied Chemistry and in the other Colour Books. Terminology - definitions published by IUPAC are drafted by international committees - of experts in the appropriate chemistry sub-disciplines, and ratified - by IUPAC's Interdivisional Committee on Terminology, Nomenclature and - Symbols (ICTNS). In this edition of the Compendium these IUPAC-approved - definitions are supplemented with some definitions from ISO and from - the International Vocabulary of Basic and General Terms in Metrology, - both these sources are recognised by IUPAC as authoritative. The result - is a collection of nearly 7000 terms, with authoritative definitions, - spanning the whole range of chemistry. + The Compendium is popularly referred to as the Gold + Book, in recognition of the contribution of the late Victor Gold, who + initiated work on the first edition. It is one of the series of IUPAC + Colour Books on chemical nomenclature, terminology, symbols and units + (see the list of source documents), and collects together terminology + definitions from IUPAC recommendations already published in Pure and + Applied Chemistry and in the other Colour Books. Terminology + definitions published by IUPAC are drafted by international committees + of experts in the appropriate chemistry sub-disciplines, and ratified + by IUPAC's Interdivisional Committee on Terminology, Nomenclature and + Symbols (ICTNS). In this edition of the Compendium these IUPAC-approved + definitions are supplemented with some definitions from ISO and from + the International Vocabulary of Basic and General Terms in Metrology, + both these sources are recognised by IUPAC as authoritative. The result + is a collection of nearly 7000 terms, with authoritative definitions, + spanning the whole range of chemistry. targets: - - id: definition - description: definition of a chemistry term - units: - type: string - names: - - noun: definition - - noun: text definition + - id: definition + description: definition of a chemistry term + units: + type: string + names: + - noun: definition + - noun: text definition identifiers: - - id: term - type: Other - description: chemistry term - names: - - noun: chemistry term + - id: term + type: Other + description: chemistry term + names: + - noun: chemistry term license: CC BY-NC-ND 4.0 links: - - url: https://goldbook.iupac.org - description: home page - - url: https://creativecommons.org/licenses/by-nc-nd/4.0/ - description: license description + - url: https://goldbook.iupac.org + description: home page + - url: https://creativecommons.org/licenses/by-nc-nd/4.0/ + description: license description num_points: 5551 bibtex: - - |- - @article{iupac2023,title={IUPAC Compendium of Chemical Terminology}, - publisher={International Union of Pure and Applied Chemistry}, - isbn={978-0865426849}, - doi={10.1351/goldbook}, - accessdate={2023-01-13T17:08:12+00:00}, - } + - |- + @article{iupac2023,title={IUPAC Compendium of Chemical Terminology}, + publisher={International Union of Pure and Applied Chemistry}, + isbn={978-0865426849}, + doi={10.1351/goldbook}, + accessdate={2023-01-13T17:08:12+00:00}, + } templates: - - |- - The {term__names__noun} "{term#}" can be {#described|defined!} {#by|as!}: - {#definition} - - |- - Task: Please {#give me|create|generate!} a {definition__names__noun} of a {term__names__noun}. - Term: {term#} - Constraint: Answer the question with {#full|complete!} sentences. - Result: {definition#} - - |- - Task: Please {#give me|create|generate!} a {term__names__noun} for the {#following |!}{definition__names__noun}: - Definition: {definition#} - Result: {term#} - - |- - User: Can you {#give me|create|generate!} a {term__names__noun} {#described|defined!} by: - {#definition} - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {term#} - - |- - User: Can you {#give me|create|generate!} the {definition__names__noun} for the following {term__names__noun}: - {#term} - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: - {#definition} - - |- - User: I'm {#searching|looking!} for the {term__names__noun} that can be described {#by|as!}: - {#definition} - Assistant: This {term__names__noun} fits {#your|this!} definition: {term#} - - |- - User: I want to {#come up with|create|generate!} a {definition__names__noun}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The {term__names__noun} can be described {#by|as!}: - {#term} - Assistant: {#Ok|Got it!},{# here you go,|!} this {definition__names__noun} fits {#your|this!} description: {definition#} - - |- - User: I want to {#come up with|create|generate!} a {term__names__noun}. - Assistant: {#This sounds very exciting. |This sounds very interesting. |!}How is the {term__names__noun} described? - User: The {term__names__noun} can be described {#by|as!}: - {#definition} - Assistant: {#Ok|Got it!},{# here you go,|!} this {term__names__noun} fits {#your|this!} description: {term#} - - |- - Task: Please {#give me|create|generate!} a {definition__names__noun} of a {term__names__noun}. - Term: {term#} - Constraint: Answer the question with {#full|complete!} sentences. - Result:{definition#} - - |- - Task: Please {#give me|create|generate!} a {term__names__noun} for the {#following |!}{definition__names__noun}: - Definition: {definition#} - Result:{term#} + - |- + The {term__names__noun} "{term#}" can be {#described|defined!} {#by|as!}: + {#definition} + - |- + Task: Please {#give me|create|generate!} a {definition__names__noun} of a {term__names__noun}. + Term: {term#} + Constraint: Answer the question with {#full|complete!} sentences. + Result: {definition#} + - |- + Task: Please {#give me|create|generate!} a {term__names__noun} for the {#following |!}{definition__names__noun}: + Definition: {definition#} + Result: {term#} + - |- + User: Can you {#give me|create|generate!} a {term__names__noun} {#described|defined!} by: + {#definition} + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {term#} + - |- + User: Can you {#give me|create|generate!} the {definition__names__noun} for the following {term__names__noun}: + {#term} + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: + {#definition} + - |- + User: I'm {#searching|looking!} for the {term__names__noun} that can be described {#by|as!}: + {#definition} + Assistant: This {term__names__noun} fits {#your|this!} definition: {term#} + - |- + User: I want to {#come up with|create|generate!} a {definition__names__noun}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The {term__names__noun} can be described {#by|as!}: + {#term} + Assistant: {#Ok|Got it!},{# here you go,|!} this {definition__names__noun} fits {#your|this!} description: {definition#} + - |- + User: I want to {#come up with|create|generate!} a {term__names__noun}. + Assistant: {#This sounds very exciting. |This sounds very interesting. |!}How is the {term__names__noun} described? + User: The {term__names__noun} can be described {#by|as!}: + {#definition} + Assistant: {#Ok|Got it!},{# here you go,|!} this {term__names__noun} fits {#your|this!} description: {term#} + - |- + Task: Please {#give me|create|generate!} a {definition__names__noun} of a {term__names__noun}. + Term: {term#} + Constraint: Answer the question with {#full|complete!} sentences. + Result:{definition#} + - |- + Task: Please {#give me|create|generate!} a {term__names__noun} for the {#following |!}{definition__names__noun}: + Definition: {definition#} + Result:{term#} diff --git a/data/tabular/iupac_smiles/meta.yaml b/data/tabular/iupac_smiles/meta.yaml index fe6f3267d..e61c42cb2 100644 --- a/data/tabular/iupac_smiles/meta.yaml +++ b/data/tabular/iupac_smiles/meta.yaml @@ -1,73 +1,72 @@ ---- name: iupac_to_smiles description: |- - PubChem is an open chemistry database at the National Institutes of Health (NIH). - This dataset contains the SMILES and different versions of the IUPAC names + PubChem is an open chemistry database at the National Institutes of Health (NIH). + This dataset contains the SMILES and different versions of the IUPAC names targets: - - id: Traditional - description: traditional IUPAC name - type: string - names: - - noun: traditional IUPAC name - - id: Systematic - description: systematic IUPAC name - type: string - names: - - noun: systematic IUPAC name - - id: CAS_like_Style - description: CAS-like name - type: string - names: - - noun: CAS-like IUPAC name - - noun: IUAPC name in CAS-like style - - id: Preferred - description: preferred IUPAC name - type: string - names: - - noun: preferred IUPAC name - - noun: IUPAC name + - id: Traditional + description: traditional IUPAC name + type: string + names: + - noun: traditional IUPAC name + - id: Systematic + description: systematic IUPAC name + type: string + names: + - noun: systematic IUPAC name + - id: CAS_like_Style + description: CAS-like name + type: string + names: + - noun: CAS-like IUPAC name + - noun: IUAPC name in CAS-like style + - id: Preferred + description: preferred IUPAC name + type: string + names: + - noun: preferred IUPAC name + - noun: IUPAC name identifiers: - - id: SMILES - type: SMILES - description: SMILES - names: - - noun: SMILES + - id: SMILES + type: SMILES + description: SMILES + names: + - noun: SMILES license: CC0 (Public Domain) links: - - url: https://pubchem.ncbi.nlm.nih.gov/ - description: original data source + - url: https://pubchem.ncbi.nlm.nih.gov/ + description: original data source num_points: 27224618 bibtex: - - |- - @article{Kim_2022, title={PubChem 2023 update}, - volume={51}, ISSN={1362-4962}, - url={http://dx.doi.org/10.1093/nar/gkac956}, - DOI={10.1093/nar/gkac956}, number={D1}, - journal={Nucleic Acids Research}, - publisher={Oxford University Press (OUP)}, - author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun - and Gindulyte, Asta and He, Jia and He, Siqian - and Li, Qingliang and Shoemaker, Benjamin A - and Thiessen, Paul A and Yu, Bo and Zaslavsky, Leonid - and Zhang, Jian and Bolton, Evan E}, - year={2022}, month=oct, pages={D1373–D1380} } + - |- + @article{Kim_2022, title={PubChem 2023 update}, + volume={51}, ISSN={1362-4962}, + url={http://dx.doi.org/10.1093/nar/gkac956}, + DOI={10.1093/nar/gkac956}, number={D1}, + journal={Nucleic Acids Research}, + publisher={Oxford University Press (OUP)}, + author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun + and Gindulyte, Asta and He, Jia and He, Siqian + and Li, Qingliang and Shoemaker, Benjamin A + and Thiessen, Paul A and Yu, Bo and Zaslavsky, Leonid + and Zhang, Jian and Bolton, Evan E}, + year={2022}, month=oct, pages={D1373–D1380} } templates: - - The {Traditional__names__noun} of the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#} is {Traditional#}. - - The {CAS_like_Style__names__noun} of the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#} is {CAS_like_Style#}. - - The {Preferred__names__noun} of the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#} is {Preferred#}. - - The {SMILES__description} of the {#molecule|chemical|compound!} with {Traditional__names__noun} {Traditional#} is {SMILES#}. - - The {SMILES__description} of the {#molecule|chemical|compound!} with {Systematic__names__noun} {Systematic#} is {SMILES#}. - - The {SMILES__description} of the {#molecule|chemical|compound!} with {CAS_like_Style__names__noun} {CAS_like_Style#} is {SMILES#}. - - The {SMILES__description} of the {#molecule|chemical|compound!} with {Preferred__names__noun} {Preferred#} is {SMILES#}. - - |- - Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|compound!} {#given the|based on the!} {Traditional__names__noun}. - IUPAC name: {Traditional#} - Result: {SMILES#} - - |- - Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|compound!} {#given the|based on the!} {Systematic__names__noun}. - IUPAC name: {Systematic#} - Result: {SMILES#} - - |- - Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|compound!} {#given the|based on the!} {CAS_like_Style__names__noun}. - IUPAC name: {CAS_like_Style#} - Result: {SMILES#} + - The {Traditional__names__noun} of the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#} is {Traditional#}. + - The {CAS_like_Style__names__noun} of the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#} is {CAS_like_Style#}. + - The {Preferred__names__noun} of the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#} is {Preferred#}. + - The {SMILES__description} of the {#molecule|chemical|compound!} with {Traditional__names__noun} {Traditional#} is {SMILES#}. + - The {SMILES__description} of the {#molecule|chemical|compound!} with {Systematic__names__noun} {Systematic#} is {SMILES#}. + - The {SMILES__description} of the {#molecule|chemical|compound!} with {CAS_like_Style__names__noun} {CAS_like_Style#} is {SMILES#}. + - The {SMILES__description} of the {#molecule|chemical|compound!} with {Preferred__names__noun} {Preferred#} is {SMILES#}. + - |- + Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|compound!} {#given the|based on the!} {Traditional__names__noun}. + IUPAC name: {Traditional#} + Result: {SMILES#} + - |- + Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|compound!} {#given the|based on the!} {Systematic__names__noun}. + IUPAC name: {Systematic#} + Result: {SMILES#} + - |- + Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|compound!} {#given the|based on the!} {CAS_like_Style__names__noun}. + IUPAC name: {CAS_like_Style#} + Result: {SMILES#} diff --git a/data/tabular/kcnq2_potassium_channel_butkiewicz/meta.yaml b/data/tabular/kcnq2_potassium_channel_butkiewicz/meta.yaml index 04a2b86c8..d5e61b49a 100644 --- a/data/tabular/kcnq2_potassium_channel_butkiewicz/meta.yaml +++ b/data/tabular/kcnq2_potassium_channel_butkiewicz/meta.yaml @@ -1,170 +1,169 @@ ---- name: kcnq2_potassium_channel_butkiewicz description: |- - This dataset was initially curated from HTS data at - the PubChem database. Details are reported by Butkiewicz et al. (2013). - Primary screen AID 2239, AID 2287 validated active compounds to be - potentiators. Counter screens are AID 2282, AID 2283, and AID 2558. - Final set of 213 active compounds was acquired by removing the active - compounds of AID 2282, AID 2283 and AID 2558 from the confirmatory - screen active set of compounds (AID 2287). + This dataset was initially curated from HTS data at + the PubChem database. Details are reported by Butkiewicz et al. (2013). + Primary screen AID 2239, AID 2287 validated active compounds to be + potentiators. Counter screens are AID 2282, AID 2283, and AID 2558. + Final set of 213 active compounds was acquired by removing the active + compounds of AID 2282, AID 2283 and AID 2558 from the confirmatory + screen active set of compounds (AID 2287). targets: - - id: activity_kcnq2_potassium_channel - description: whether it is active against kcnq2 potassium channel receptor (1) or not (0). - units: - type: boolean - names: - - noun: inhibition of the kcnq2 potassium channel activity - - adjective: kcnq2 potassium channel inhibition - - gerund: inhibiting the activity of kcnq2 potassium channels - - verb: blocks kcnq2 potassium channels - - verb: inhibits kcnq2 potassium channels - pubchem_aids: - - 2239 - - 2287 - - 2282 - - 2283 - - 2558 - uris: [] + - id: activity_kcnq2_potassium_channel + description: whether it is active against kcnq2 potassium channel receptor (1) or not (0). + units: + type: boolean + names: + - noun: inhibition of the kcnq2 potassium channel activity + - adjective: kcnq2 potassium channel inhibition + - gerund: inhibiting the activity of kcnq2 potassium channels + - verb: blocks kcnq2 potassium channels + - verb: inhibits kcnq2 potassium channels + pubchem_aids: + - 2239 + - 2287 + - 2282 + - 2283 + - 2558 + uris: [] identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al - description: original dataset - - url: https://doi.org/10.3390/molecules18010735 - description: corresponding publication - - url: https://doi.org/10.1093/nar/gky1033 - description: corresponding publication - - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ - description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al + description: original dataset + - url: https://doi.org/10.3390/molecules18010735 + description: corresponding publication + - url: https://doi.org/10.1093/nar/gky1033 + description: corresponding publication + - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ + description: corresponding publication benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split num_points: 302405 bibtex: - - |- - @article{Butkiewicz2013, - doi = {10.3390/molecules18010735}, - url = {https://doi.org/10.3390/molecules18010735}, - year = {2013}, - month = jan, - publisher = {{MDPI} {AG}}, - volume = {18}, - number = {1}, - pages = {735--756}, - author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and - Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens - Meiler}, - title = {Benchmarking Ligand-Based Virtual High-Throughput - Screening with the {PubChem} Database}, - journal = {Molecules}} - - |- - @article{Kim2018, - doi = {10.1093/nar/gky1033}, - url = {https://doi.org/10.1093/nar/gky1033}, - year = {2018}, - month = oct, - publisher = {Oxford University Press ({OUP})}, - volume = {47}, - number = {D1}, - pages = {D1102--D1109}, - author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta - Gindulyte and Jia He and Siqian He and Qingliang Li and Benjamin - A Shoemaker and Paul A Thiessen and Bo Yu and Leonid Zaslavsky - and Jian Zhang and Evan E Bolton}, - title = {{PubChem} 2019 update: improved access to chemical data}, - journal = {Nucleic Acids Research}} - - |- - @article{Butkiewicz2017, - doi = {}, - url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, - year = {2017}, - publisher = {Chem Inform}, - volume = {3}, - number = {1}, - author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe, - E. W. and Weaver, D. C. and Meiler, J.}, - title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from - the {P}ub{C}hem {D}atabase}}, - journal = {Chemical Science}} + - |- + @article{Butkiewicz2013, + doi = {10.3390/molecules18010735}, + url = {https://doi.org/10.3390/molecules18010735}, + year = {2013}, + month = jan, + publisher = {{MDPI} {AG}}, + volume = {18}, + number = {1}, + pages = {735--756}, + author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and + Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens + Meiler}, + title = {Benchmarking Ligand-Based Virtual High-Throughput + Screening with the {PubChem} Database}, + journal = {Molecules}} + - |- + @article{Kim2018, + doi = {10.1093/nar/gky1033}, + url = {https://doi.org/10.1093/nar/gky1033}, + year = {2018}, + month = oct, + publisher = {Oxford University Press ({OUP})}, + volume = {47}, + number = {D1}, + pages = {D1102--D1109}, + author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta + Gindulyte and Jia He and Siqian He and Qingliang Li and Benjamin + A Shoemaker and Paul A Thiessen and Bo Yu and Leonid Zaslavsky + and Jian Zhang and Evan E Bolton}, + title = {{PubChem} 2019 update: improved access to chemical data}, + journal = {Nucleic Acids Research}} + - |- + @article{Butkiewicz2017, + doi = {}, + url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, + year = {2017}, + publisher = {Chem Inform}, + volume = {3}, + number = {1}, + author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe, + E. W. and Weaver, D. C. and Meiler, J.}, + title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from + the {P}ub{C}hem {D}atabase}}, + journal = {Chemical Science}} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_kcnq2_potassium_channel#no &NULL}{activity_kcnq2_potassium_channel__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {activity_kcnq2_potassium_channel#no &NULL}{activity_kcnq2_potassium_channel__names__noun}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_kcnq2_potassium_channel#no &NULL}{activity_kcnq2_potassium_channel__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {activity_kcnq2_potassium_channel#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_kcnq2_potassium_channel__names__gerund}? - Assistant: {activity_kcnq2_potassium_channel#No&Yes}, this molecule is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {activity_kcnq2_potassium_channel__names__gerund}? - Assistant: {activity_kcnq2_potassium_channel#No&Yes}, it is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}? - Assistant: This is a molecule that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {activity_kcnq2_potassium_channel#not &NULL}be {activity_kcnq2_potassium_channel__names__gerund}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {activity_kcnq2_potassium_channel#not &NULL}be {activity_kcnq2_potassium_channel__names__gerund}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {activity_kcnq2_potassium_channel__names__gerund}:{activity_kcnq2_potassium_channel#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{activity_kcnq2_potassium_channel#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result:This molecule is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}. - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_kcnq2_potassium_channel__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {activity_kcnq2_potassium_channel%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_kcnq2_potassium_channel%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_kcnq2_potassium_channel%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_kcnq2_potassium_channel#no &NULL}{activity_kcnq2_potassium_channel__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {activity_kcnq2_potassium_channel#no &NULL}{activity_kcnq2_potassium_channel__names__noun}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_kcnq2_potassium_channel#no &NULL}{activity_kcnq2_potassium_channel__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {activity_kcnq2_potassium_channel#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_kcnq2_potassium_channel__names__gerund}? + Assistant: {activity_kcnq2_potassium_channel#No&Yes}, this molecule is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {activity_kcnq2_potassium_channel__names__gerund}? + Assistant: {activity_kcnq2_potassium_channel#No&Yes}, it is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}? + Assistant: This is a molecule that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {activity_kcnq2_potassium_channel#not &NULL}be {activity_kcnq2_potassium_channel__names__gerund}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {activity_kcnq2_potassium_channel#not &NULL}be {activity_kcnq2_potassium_channel__names__gerund}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {activity_kcnq2_potassium_channel__names__gerund}:{activity_kcnq2_potassium_channel#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{activity_kcnq2_potassium_channel#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result:This molecule is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}. + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_kcnq2_potassium_channel__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {activity_kcnq2_potassium_channel%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_kcnq2_potassium_channel%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_kcnq2_potassium_channel%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/ld50_zhu/example_processing_and_templates.ipynb b/data/tabular/ld50_zhu/example_processing_and_templates.ipynb index 5f2f66c1b..c2a09dcac 100644 --- a/data/tabular/ld50_zhu/example_processing_and_templates.ipynb +++ b/data/tabular/ld50_zhu/example_processing_and_templates.ipynb @@ -26,11 +26,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "cf59e3e9-8061-4022-9eae-e978311b4155", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", @@ -56,11 +54,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "7bb8eb5e-f513-40d2-a68c-7cda1a51ad31", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "fn_data_original = \"data_original.csv\"" @@ -68,11 +64,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "b39a142e-ccbc-49d2-98b0-a5f9bde9fd27", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stderr", @@ -86,16 +80,14 @@ } ], "source": [ - "data = Tox(name = 'LD50_Zhu')" + "data = Tox(name=\"LD50_Zhu\")" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "26d9f62a-07f5-4113-8161-d5dfcf0bfb71", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "data.get_data().to_csv(fn_data_original, index=False)" @@ -103,11 +95,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "43873fc3-20a8-487d-a7c5-33bd58414159", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -136,11 +126,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "77f614e7-b133-40bc-8759-2d930e4c120e", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -160,11 +148,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "8f5a0387-f9e3-4e1a-8d14-5df618195f70", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(fn_data_original, delimiter=\",\")" @@ -172,12 +158,9 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "55b0bd63-62a0-469e-9d8a-e9ada3fe01c4", - "metadata": { - "scrolled": true, - "tags": [] - }, + "metadata": {}, "outputs": [ { "data": { @@ -256,7 +239,7 @@ "4 S=C=Nc1ccc(Br)cc1 2.729 " ] }, - "execution_count": 8, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -276,11 +259,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "ec2458e5-455f-4f03-8ce9-c0d12e9ed371", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "data": { @@ -288,7 +269,7 @@ "['Drug_ID', 'Drug', 'Y']" ] }, - "execution_count": 9, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -300,21 +281,19 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "28c9b695", "metadata": {}, "outputs": [], "source": [ - "assert fields_orig == ['Drug_ID', 'Drug', 'Y']" + "assert fields_orig == [\"Drug_ID\", \"Drug\", \"Y\"]" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "a46dd8ff-37b3-4894-8226-3bf98226dd09", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "fields_clean = [\n", @@ -326,11 +305,9 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "785d37cb-1fb4-4a91-a923-d5a78a37f36a", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "df.columns = fields_clean" @@ -338,7 +315,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "aaad8f07", "metadata": {}, "outputs": [], @@ -348,11 +325,9 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "1bf212cb-1653-457b-9f5d-416d4dd14b53", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "data": { @@ -431,7 +406,7 @@ "4 S=C=Nc1ccc(Br)cc1 2.729 " ] }, - "execution_count": 14, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -450,11 +425,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "7e746003-cb1f-434f-bba6-00f0c439c4ac", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "df.compound_name = (\n", @@ -464,11 +437,9 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "d544fa60-343e-40e1-bd0c-4750f07a7145", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "assert not df.duplicated().sum()" @@ -484,11 +455,9 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "d6d5efa5-b4b4-4a25-8626-e10f3d691e83", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "fn_data_csv = \"data_clean.csv\"" @@ -496,11 +465,9 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "727f8d7b-cbb6-43c7-9eab-9d4d65be6b3f", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "df.to_csv(fn_data_csv, index=False)" @@ -508,11 +475,9 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "63c8d4a4-906e-418d-be39-879365b4dfa0", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -528,11 +493,9 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "a51b9001-25d7-4e0e-a607-477cfc4a9f1c", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -552,11 +515,9 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "1a512943-4909-4d56-867d-50c151d8d607", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "data": { @@ -635,7 +596,7 @@ "4 S=C=Nc1ccc(Br)cc1 2.729 " ] }, - "execution_count": 21, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -654,11 +615,9 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "077b0c5f-8772-4879-9317-3fa28799689b", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "fn_data_csv = \"data_clean.csv\"" @@ -666,11 +625,9 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "6eaef0e6-2115-4793-ac43-a196b25d47a0", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(fn_data_csv)" @@ -678,11 +635,9 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "43619e7c-9c82-4ff0-ae25-403861304635", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "data": { @@ -761,7 +716,7 @@ "4 S=C=Nc1ccc(Br)cc1 2.729 " ] }, - "execution_count": 24, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -780,11 +735,9 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "49771077-471d-4d71-a9a7-d6b094bbc4f3", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "data": { @@ -863,7 +816,7 @@ "4 S=C=Nc1ccc(Br)cc1 2.729 " ] }, - "execution_count": 25, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -874,11 +827,9 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "d3890961-444e-4a26-b8fc-ed8c4e959af9", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "meta = {\n", @@ -945,11 +896,9 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "ec455cf0-962a-4c0d-bb3e-066e415ffd9b", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "def str_presenter(dumper, data):\n", @@ -969,11 +918,9 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "580bbd79-4845-4515-be94-3e4a9815d048", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "fn_meta = \"meta.yaml\"" @@ -981,11 +928,9 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "873fa5dd-9b60-40f5-b537-4d7a206414ea", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "with open(fn_meta, \"w\") as f:\n", @@ -994,11 +939,9 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "d01686c0-6746-4fc4-b019-350270dfc26f", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -1014,11 +957,9 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "ef6063c5-7a8b-4344-bccf-a073443feebf", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -1087,11 +1028,9 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "9aab00fd-58a8-40b0-be30-1e269e0d323b", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "path_file = \"transform.py\"" @@ -1099,11 +1038,9 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "8368bb20-8e1c-4b7d-b0e2-b39da36b5972", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -1245,11 +1182,9 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "id": "d0474f26-70f3-4655-b81a-df4ada90e7a6", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -1268,11 +1203,9 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "id": "953e7bee-bd5e-41d0-a2be-506e0bc97727", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -1316,8 +1249,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" + "pygments_lexer": "ipython3" } }, "nbformat": 4, diff --git a/data/tabular/ld50_zhu/meta.yaml b/data/tabular/ld50_zhu/meta.yaml index bf89726c0..7f77efa10 100644 --- a/data/tabular/ld50_zhu/meta.yaml +++ b/data/tabular/ld50_zhu/meta.yaml @@ -1,54 +1,53 @@ ---- name: ld50_zhu description: |- - Acute toxicity LD50 measures - the most conservative dose that can lead to lethal adverse effects. - The higher the dose, the more lethal of a drug. + Acute toxicity LD50 measures + the most conservative dose that can lead to lethal adverse effects. + The higher the dose, the more lethal of a drug. targets: - - id: acute_toxicity - description: Acute Toxicity LD50. - units: log10(1/(mol/kg)) - type: continuous - names: - - noun: acute oral toxicity rat LD50 - - noun: acute oral toxicity (LD50 in rats) - - noun: LD50 in rats (oral exposure) - - noun: rat LD50 (oral exposure) - uris: - - http://www.bioassayontology.org/bao#BAO_0002117 + - id: acute_toxicity + description: Acute Toxicity LD50. + units: log10(1/(mol/kg)) + type: continuous + names: + - noun: acute oral toxicity rat LD50 + - noun: acute oral toxicity (LD50 in rats) + - noun: LD50 in rats (oral exposure) + - noun: rat LD50 (oral exposure) + uris: + - http://www.bioassayontology.org/bao#BAO_0002117 identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: compound_name - type: Other - description: compound name - names: - - noun: compound - - noun: compound name - - noun: drug + - id: SMILES + type: SMILES + description: SMILES + - id: compound_name + type: Other + description: compound name + names: + - noun: compound + - noun: compound name + - noun: drug license: CC BY 4.0 links: - - url: https://doi.org/10.1021/tx900189p - description: corresponding publication + - url: https://doi.org/10.1021/tx900189p + description: corresponding publication benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split num_points: 7385 bibtex: - - |- - @article{Zhu2009, - doi = {10.1021/tx900189p}, - url = {https://doi.org/10.1021/tx900189p}, - year = {2009}, - month = oct, - publisher = {American Chemical Society ({ACS})}, - volume = {22}, - number = {12}, - pages = {1913--1921}, - author = {Hao Zhu and Todd M. Martin and Lin Ye and Alexander - Sedykh and Douglas M. Young and Alexander Tropsha}, - title = {Quantitative Structure-Activity Relationship Modeling - of Rat Acute Toxicity by Oral Exposure}, - journal = {Chemical Research in Toxicology}} + - |- + @article{Zhu2009, + doi = {10.1021/tx900189p}, + url = {https://doi.org/10.1021/tx900189p}, + year = {2009}, + month = oct, + publisher = {American Chemical Society ({ACS})}, + volume = {22}, + number = {12}, + pages = {1913--1921}, + author = {Hao Zhu and Todd M. Martin and Lin Ye and Alexander + Sedykh and Douglas M. Young and Alexander Tropsha}, + title = {Quantitative Structure-Activity Relationship Modeling + of Rat Acute Toxicity by Oral Exposure}, + journal = {Chemical Research in Toxicology}} diff --git a/data/tabular/lipophilicity/meta.yaml b/data/tabular/lipophilicity/meta.yaml index c06a26d5c..a8ee84384 100644 --- a/data/tabular/lipophilicity/meta.yaml +++ b/data/tabular/lipophilicity/meta.yaml @@ -1,60 +1,59 @@ ---- name: lipophilicity description: Experimental results of octanol/water distribution coefficient (logD at pH 7.4). targets: - - id: exp - description: experimental results of octanol/water distribution coefficient (logD at pH 7.4) - units: (dimensionless) - type: continuous - names: - - noun: octanol/water distribution coefficient (logD at pH 7.4) - - noun: logD at pH 7.4 - - noun: octanol/water distribution coefficient - uris: - - http://www.bioassayontology.org/bao#BAO_0002129 - - http://purl.obolibrary.org/obo/MI_2107 + - id: exp + description: experimental results of octanol/water distribution coefficient (logD at pH 7.4) + units: (dimensionless) + type: continuous + names: + - noun: octanol/water distribution coefficient (logD at pH 7.4) + - noun: logD at pH 7.4 + - noun: octanol/water distribution coefficient + uris: + - http://www.bioassayontology.org/bao#BAO_0002129 + - http://purl.obolibrary.org/obo/MI_2107 identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY-SA 3.0 links: - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/Lipophilicity.csv - description: original dataset link - - url: https://github.com/cheminfo/molecule-features/blob/main/data/lipophilicity/meta.yaml - description: original meta data - - url: https://deepchem.readthedocs.io/en/latest/api_reference/moleculenet.html#lipo-datasets - description: original dataset link from moleculenet - - url: https://www.ebi.ac.uk/chembl/document_report_card/CHEMBL3301361/ - description: original report card - - url: https://chembl.gitbook.io/chembl-interface-documentation/about#data-licensing - description: original dataset license from chembl - - url: https://creativecommons.org/licenses/by-sa/3.0/ - description: used dataset license + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/Lipophilicity.csv + description: original dataset link + - url: https://github.com/cheminfo/molecule-features/blob/main/data/lipophilicity/meta.yaml + description: original meta data + - url: https://deepchem.readthedocs.io/en/latest/api_reference/moleculenet.html#lipo-datasets + description: original dataset link from moleculenet + - url: https://www.ebi.ac.uk/chembl/document_report_card/CHEMBL3301361/ + description: original report card + - url: https://chembl.gitbook.io/chembl-interface-documentation/about#data-licensing + description: original dataset license from chembl + - url: https://creativecommons.org/licenses/by-sa/3.0/ + description: used dataset license num_points: 4200 bibtex: - - |- - @techreport{hersey2015chembl, - title={ChEMBL Deposited Data Set-AZ dataset}, - author={Hersey, Anne}, - year={2015}, - institution={Technical Report, Technical report, EMBL-EBI, 2015. https://www. ebi. ac. uk} - } + - |- + @techreport{hersey2015chembl, + title={ChEMBL Deposited Data Set-AZ dataset}, + author={Hersey, Anne}, + year={2015}, + institution={Technical Report, Technical report, EMBL-EBI, 2015. https://www. ebi. ac. uk} + } templates: - - |- - Task: Please answer the multiple choice question below with {%multiple_choice_enum%3-6%aA1}. - Question: What is the {exp__names__noun} of the {SMILES__description} {SMILES#}? - Options: - {exp%} - Answer: {%multiple_choice_result} - - |- - Question: Please estimate the {exp__names__noun} of {SMILES#} by picking one choice of {%multiple_choice_enum%3-6%aA1}. - Options: - {exp%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question below with {%multiple_choice_enum%3-6%aA1}. - Question: What is the {exp__names__noun} of the {SMILES__description} {SMILES#}? - Options: - {exp%} - Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question below with {%multiple_choice_enum%3-6%aA1}. + Question: What is the {exp__names__noun} of the {SMILES__description} {SMILES#}? + Options: + {exp%} + Answer: {%multiple_choice_result} + - |- + Question: Please estimate the {exp__names__noun} of {SMILES#} by picking one choice of {%multiple_choice_enum%3-6%aA1}. + Options: + {exp%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question below with {%multiple_choice_enum%3-6%aA1}. + Question: What is the {exp__names__noun} of the {SMILES__description} {SMILES#}? + Options: + {exp%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/m1_muscarinic_receptor_agonists_butkiewicz/meta.yaml b/data/tabular/m1_muscarinic_receptor_agonists_butkiewicz/meta.yaml index 59a1f8b79..c222cdaa0 100644 --- a/data/tabular/m1_muscarinic_receptor_agonists_butkiewicz/meta.yaml +++ b/data/tabular/m1_muscarinic_receptor_agonists_butkiewicz/meta.yaml @@ -1,162 +1,161 @@ ---- name: m1_muscarinic_receptor_agonists_butkiewicz description: |- - Positive allosteric modulation of the M1 Muscarinic - receptor screened with AID626. Confirmed by screen AID 1488. A second - counter screen AID 1741. The final set of selective positive - allosteric modulators of M1 was obtained by removing compounds active - in AID 1741 from the compounds active in AID 1488 resulting in 188 - compounds. + Positive allosteric modulation of the M1 Muscarinic + receptor screened with AID626. Confirmed by screen AID 1488. A second + counter screen AID 1741. The final set of selective positive + allosteric modulators of M1 was obtained by removing compounds active + in AID 1741 from the compounds active in AID 1488 resulting in 188 + compounds. targets: - - id: m1_muscarinic_agonist - description: whether it agonist on m1 muscarinic receptor (1) or not (0). - units: - type: boolean - names: - - noun: positive allosteric modulation of the M1 muscarinic receptor activity - - gerund: modulating the M1 muscarinic receptor activity in a positive allosteric way - pubchem_aids: - - 626 - - 1488 - - 1741 - uris: [] + - id: m1_muscarinic_agonist + description: whether it agonist on m1 muscarinic receptor (1) or not (0). + units: + type: boolean + names: + - noun: positive allosteric modulation of the M1 muscarinic receptor activity + - gerund: modulating the M1 muscarinic receptor activity in a positive allosteric way + pubchem_aids: + - 626 + - 1488 + - 1741 + uris: [] identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al - description: original dataset - - url: https://doi.org/10.3390/molecules18010735 - description: corresponding publication - - url: https://doi.org/10.1093/nar/gky1033 - description: corresponding publication - - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ - description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al + description: original dataset + - url: https://doi.org/10.3390/molecules18010735 + description: corresponding publication + - url: https://doi.org/10.1093/nar/gky1033 + description: corresponding publication + - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ + description: corresponding publication benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split num_points: 61833 bibtex: - - |- - @article{Butkiewicz2013, - doi = {10.3390/molecules18010735}, - url = {https://doi.org/10.3390/molecules18010735}, - year = {2013}, - month = jan, - publisher = {{MDPI} {AG}}, - volume = {18}, - number = {1}, - pages = {735--756}, - author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller - and Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens Meiler}, - title = {Benchmarking Ligand-Based Virtual High-Throughput Screening - with the {PubChem} Database}, - journal = {Molecules}} - - |- - @article{Kim2018, - doi = {10.1093/nar/gky1033}, - url = {https://doi.org/10.1093/nar/gky1033}, - year = {2018}, - month = oct, - publisher = {Oxford University Press ({OUP})}, - volume = {47}, - number = {D1}, - pages = {D1102--D1109}, - author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta Gindulyte - and Jia He and Siqian He and Qingliang Li and Benjamin A Shoemaker - and Paul A Thiessen and Bo Yu and Leonid Zaslavsky and Jian Zhang and Evan E Bolton}, - title = {{PubChem} 2019 update: improved access to chemical data}, - journal = {Nucleic Acids Research}} - - |- - @article{Butkiewicz2017, - doi = {}, - url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, - year = {2017}, - publisher = {Chem Inform}, - volume = {3}, - number = {1}, - author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. - and Lowe, E. W. and Weaver, D. C. and Meiler, J.}, - title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets - from the {P}ub{C}hem {D}atabase}}, - journal = {Chemical Science}} + - |- + @article{Butkiewicz2013, + doi = {10.3390/molecules18010735}, + url = {https://doi.org/10.3390/molecules18010735}, + year = {2013}, + month = jan, + publisher = {{MDPI} {AG}}, + volume = {18}, + number = {1}, + pages = {735--756}, + author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller + and Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens Meiler}, + title = {Benchmarking Ligand-Based Virtual High-Throughput Screening + with the {PubChem} Database}, + journal = {Molecules}} + - |- + @article{Kim2018, + doi = {10.1093/nar/gky1033}, + url = {https://doi.org/10.1093/nar/gky1033}, + year = {2018}, + month = oct, + publisher = {Oxford University Press ({OUP})}, + volume = {47}, + number = {D1}, + pages = {D1102--D1109}, + author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta Gindulyte + and Jia He and Siqian He and Qingliang Li and Benjamin A Shoemaker + and Paul A Thiessen and Bo Yu and Leonid Zaslavsky and Jian Zhang and Evan E Bolton}, + title = {{PubChem} 2019 update: improved access to chemical data}, + journal = {Nucleic Acids Research}} + - |- + @article{Butkiewicz2017, + doi = {}, + url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, + year = {2017}, + publisher = {Chem Inform}, + volume = {3}, + number = {1}, + author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. + and Lowe, E. W. and Weaver, D. C. and Meiler, J.}, + title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets + from the {P}ub{C}hem {D}atabase}}, + journal = {Chemical Science}} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {m1_muscarinic_agonist#no &NULL}{m1_muscarinic_agonist__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {m1_muscarinic_agonist#no &NULL}{m1_muscarinic_agonist__names__noun}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {m1_muscarinic_agonist#no &NULL}{m1_muscarinic_agonist__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {m1_muscarinic_agonist__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {m1_muscarinic_agonist#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {m1_muscarinic_agonist__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {m1_muscarinic_agonist__names__gerund}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {m1_muscarinic_agonist__names__gerund}? - Assistant: {m1_muscarinic_agonist#No&Yes}, this molecule is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {m1_muscarinic_agonist__names__gerund}? - Assistant: {m1_muscarinic_agonist#No&Yes}, it is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}? - Assistant: This is a molecule that is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {m1_muscarinic_agonist#not &NULL}be {m1_muscarinic_agonist__names__gerund}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {m1_muscarinic_agonist#not &NULL}be {m1_muscarinic_agonist__names__gerund}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {m1_muscarinic_agonist__names__gerund}:{m1_muscarinic_agonist#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {m1_muscarinic_agonist__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{m1_muscarinic_agonist#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {m1_muscarinic_agonist__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result:This molecule is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}. - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {m1_muscarinic_agonist__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {m1_muscarinic_agonist%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%m1_muscarinic_agonist%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%m1_muscarinic_agonist%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {m1_muscarinic_agonist#no &NULL}{m1_muscarinic_agonist__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {m1_muscarinic_agonist#no &NULL}{m1_muscarinic_agonist__names__noun}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {m1_muscarinic_agonist#no &NULL}{m1_muscarinic_agonist__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {m1_muscarinic_agonist__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {m1_muscarinic_agonist#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {m1_muscarinic_agonist__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {m1_muscarinic_agonist__names__gerund}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {m1_muscarinic_agonist__names__gerund}? + Assistant: {m1_muscarinic_agonist#No&Yes}, this molecule is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {m1_muscarinic_agonist__names__gerund}? + Assistant: {m1_muscarinic_agonist#No&Yes}, it is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}? + Assistant: This is a molecule that is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {m1_muscarinic_agonist#not &NULL}be {m1_muscarinic_agonist__names__gerund}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {m1_muscarinic_agonist#not &NULL}be {m1_muscarinic_agonist__names__gerund}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {m1_muscarinic_agonist__names__gerund}:{m1_muscarinic_agonist#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {m1_muscarinic_agonist__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{m1_muscarinic_agonist#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {m1_muscarinic_agonist__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result:This molecule is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}. + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {m1_muscarinic_agonist__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {m1_muscarinic_agonist%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%m1_muscarinic_agonist%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%m1_muscarinic_agonist%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/m1_muscarinic_receptor_antagonists_butkiewicz/meta.yaml b/data/tabular/m1_muscarinic_receptor_antagonists_butkiewicz/meta.yaml index 848ee4cbd..188ad2067 100644 --- a/data/tabular/m1_muscarinic_receptor_antagonists_butkiewicz/meta.yaml +++ b/data/tabular/m1_muscarinic_receptor_antagonists_butkiewicz/meta.yaml @@ -1,164 +1,163 @@ ---- name: m1_muscarinic_receptor_antagonists_butkiewicz description: |- - Primary screen AID628 confirmed by screen AID677. - AID859 confirmed activity on rat M1 receptor. - The counter screen AID860 removed non-selective compounds - being active also at the rat M4 receptor. - Final set of active compoundsobtained by subtracting active compounds of AID860 - from those in AID677, resulting in 448 total active compounds. + Primary screen AID628 confirmed by screen AID677. + AID859 confirmed activity on rat M1 receptor. + The counter screen AID860 removed non-selective compounds + being active also at the rat M4 receptor. + Final set of active compoundsobtained by subtracting active compounds of AID860 + from those in AID677, resulting in 448 total active compounds. targets: - - id: m1_muscarinic_antagonist - description: whether it negatively modulates the m1 muscarinic receptor (1) or not (0). - units: - type: boolean - names: - - noun: negative modulation of the M1 muscarinic receptor activity - - gerund: modulating the M1 muscarinic receptor activity in a negative way - pubchem_aids: - - 628 - - 677 - - 860 - uris: [] + - id: m1_muscarinic_antagonist + description: whether it negatively modulates the m1 muscarinic receptor (1) or not (0). + units: + type: boolean + names: + - noun: negative modulation of the M1 muscarinic receptor activity + - gerund: modulating the M1 muscarinic receptor activity in a negative way + pubchem_aids: + - 628 + - 677 + - 860 + uris: [] identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al - description: original dataset - - url: https://doi.org/10.3390/molecules18010735 - description: corresponding publication - - url: https://doi.org/10.1093/nar/gky1033 - description: corresponding publication - - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ - description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al + description: original dataset + - url: https://doi.org/10.3390/molecules18010735 + description: corresponding publication + - url: https://doi.org/10.1093/nar/gky1033 + description: corresponding publication + - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ + description: corresponding publication benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split num_points: 61756 bibtex: - - |- - @article{Butkiewicz2013, - doi = {10.3390/molecules18010735}, - url = {https://doi.org/10.3390/molecules18010735}, - year = {2013}, - month = jan, - publisher = {{MDPI} {AG}}, - volume = {18}, - number = {1}, - pages = {735--756}, - author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and - Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens - Meiler}, - title = {Benchmarking Ligand-Based Virtual High-Throughput - Screening with the {PubChem} Database}, - journal = {Molecules}} - - |- - @article{Kim2018, - doi = {10.1093/nar/gky1033}, - url = {https://doi.org/10.1093/nar/gky1033}, - year = {2018}, - month = oct, - publisher = {Oxford University Press ({OUP})}, - volume = {47}, - number = {D1}, - pages = {D1102--D1109}, - author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta - Gindulyte and Jia He and Siqian He and Qingliang Li and Benjamin - A Shoemaker and Paul A Thiessen and Bo Yu and Leonid Zaslavsky - and Jian Zhang and Evan E Bolton}, - title = {{PubChem} 2019 update: improved access to chemical data}, - journal = {Nucleic Acids Research}} - - |- - @article{Butkiewicz2017, - doi = {}, - url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, - year = {2017}, - publisher = {Chem Inform}, - volume = {3}, - number = {1}, - author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe, - E. W. and Weaver, D. C. and Meiler, J.}, - title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from - the {P}ub{C}hem {D}atabase}}, - journal = {Chemical Science}} + - |- + @article{Butkiewicz2013, + doi = {10.3390/molecules18010735}, + url = {https://doi.org/10.3390/molecules18010735}, + year = {2013}, + month = jan, + publisher = {{MDPI} {AG}}, + volume = {18}, + number = {1}, + pages = {735--756}, + author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and + Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens + Meiler}, + title = {Benchmarking Ligand-Based Virtual High-Throughput + Screening with the {PubChem} Database}, + journal = {Molecules}} + - |- + @article{Kim2018, + doi = {10.1093/nar/gky1033}, + url = {https://doi.org/10.1093/nar/gky1033}, + year = {2018}, + month = oct, + publisher = {Oxford University Press ({OUP})}, + volume = {47}, + number = {D1}, + pages = {D1102--D1109}, + author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta + Gindulyte and Jia He and Siqian He and Qingliang Li and Benjamin + A Shoemaker and Paul A Thiessen and Bo Yu and Leonid Zaslavsky + and Jian Zhang and Evan E Bolton}, + title = {{PubChem} 2019 update: improved access to chemical data}, + journal = {Nucleic Acids Research}} + - |- + @article{Butkiewicz2017, + doi = {}, + url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, + year = {2017}, + publisher = {Chem Inform}, + volume = {3}, + number = {1}, + author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe, + E. W. and Weaver, D. C. and Meiler, J.}, + title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from + the {P}ub{C}hem {D}atabase}}, + journal = {Chemical Science}} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {m1_muscarinic_antagonist#no &NULL}{m1_muscarinic_antagonist__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {m1_muscarinic_antagonist#no &NULL}{m1_muscarinic_antagonist__names__noun}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {m1_muscarinic_antagonist#no &NULL}{m1_muscarinic_antagonist__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {m1_muscarinic_antagonist#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {m1_muscarinic_antagonist__names__gerund}? - Assistant: {m1_muscarinic_antagonist#No&Yes}, this molecule is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {m1_muscarinic_antagonist__names__gerund}? - Assistant: {m1_muscarinic_antagonist#No&Yes}, it is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}? - Assistant: This is a molecule that is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {m1_muscarinic_antagonist#not &NULL}be {m1_muscarinic_antagonist__names__gerund}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {m1_muscarinic_antagonist#not &NULL}be {m1_muscarinic_antagonist__names__gerund}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {m1_muscarinic_antagonist__names__gerund}:{m1_muscarinic_antagonist#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{m1_muscarinic_antagonist#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result:This molecule is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}. - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {m1_muscarinic_antagonist__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {m1_muscarinic_antagonist%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%m1_muscarinic_antagonist%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%m1_muscarinic_antagonist%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {m1_muscarinic_antagonist#no &NULL}{m1_muscarinic_antagonist__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {m1_muscarinic_antagonist#no &NULL}{m1_muscarinic_antagonist__names__noun}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {m1_muscarinic_antagonist#no &NULL}{m1_muscarinic_antagonist__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {m1_muscarinic_antagonist#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {m1_muscarinic_antagonist__names__gerund}? + Assistant: {m1_muscarinic_antagonist#No&Yes}, this molecule is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {m1_muscarinic_antagonist__names__gerund}? + Assistant: {m1_muscarinic_antagonist#No&Yes}, it is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}? + Assistant: This is a molecule that is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {m1_muscarinic_antagonist#not &NULL}be {m1_muscarinic_antagonist__names__gerund}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {m1_muscarinic_antagonist#not &NULL}be {m1_muscarinic_antagonist__names__gerund}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {m1_muscarinic_antagonist__names__gerund}:{m1_muscarinic_antagonist#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{m1_muscarinic_antagonist#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result:This molecule is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}. + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {m1_muscarinic_antagonist__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {m1_muscarinic_antagonist%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%m1_muscarinic_antagonist%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%m1_muscarinic_antagonist%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/mattermodeling_stackexchange/meta.yaml b/data/tabular/mattermodeling_stackexchange/meta.yaml index 308073359..294c49de3 100644 --- a/data/tabular/mattermodeling_stackexchange/meta.yaml +++ b/data/tabular/mattermodeling_stackexchange/meta.yaml @@ -1,31 +1,30 @@ ---- name: mattermodeling_stackexchange description: |- - Questions and answers mined from mattermodeling.stackexchange.com. + Questions and answers mined from mattermodeling.stackexchange.com. targets: - - id: a - description: answer to the question - type: string - - id: title - description: title of the question - type: string + - id: a + description: answer to the question + type: string + - id: title + description: title of the question + type: string identifiers: - - id: q - type: string - description: question asked on mattermodeling.stackexchange.com + - id: q + type: string + description: question asked on mattermodeling.stackexchange.com license: CC BY-SA links: - - url: mattermodeling.stackexchange.com - description: original data source - - url: https://stackoverflow.com/help/licensing - description: information about the license + - url: mattermodeling.stackexchange.com + description: original data source + - url: https://stackoverflow.com/help/licensing + description: information about the license num_points: 664 templates: - - |- - {#Task: Please answer the question of the user.|Task: Provide a detailed response to the user's question.|Task: Address the user's query with a well-structured answer.|Task: Your role is to respond to the user's question with clarity.|Task: Offer a concise and informative answer to the user's question.|Task: Provide a clear and concise reply to the user's inquiry.!} - {#User: |Question: |Inquiry: |\n!}{#q} - {#Assistant: |Answer: !}{#a} - - |- - {#Task: Generate a title for this question.|Task: Create a meaningful title for this question.|Task: Summarize the question in a title.!} - {#Question: |Inquiry: |\n!}{#q} - {#Assistant: |Title: |Answer: !}{#title} + - |- + {#Task: Please answer the question of the user.|Task: Provide a detailed response to the user's question.|Task: Address the user's query with a well-structured answer.|Task: Your role is to respond to the user's question with clarity.|Task: Offer a concise and informative answer to the user's question.|Task: Provide a clear and concise reply to the user's inquiry.!} + {#User: |Question: |Inquiry: |\n!}{#q} + {#Assistant: |Answer: !}{#a} + - |- + {#Task: Generate a title for this question.|Task: Create a meaningful title for this question.|Task: Summarize the question in a title.!} + {#Question: |Inquiry: |\n!}{#q} + {#Assistant: |Title: |Answer: !}{#title} diff --git a/data/tabular/melting_points/meta.yaml b/data/tabular/melting_points/meta.yaml index dee983d9c..531cdd75f 100644 --- a/data/tabular/melting_points/meta.yaml +++ b/data/tabular/melting_points/meta.yaml @@ -1,90 +1,89 @@ ---- name: melting_points description: |- - Literature mined data on melting points of organic compounds. + Literature mined data on melting points of organic compounds. targets: - - id: mp - description: mean melting point - units: deg C - type: continuous - names: - - noun: mean melting point - uris: - - id: mp_range - description: melting point range - units: deg C - type: text - names: - - noun: melting point range + - id: mp + description: mean melting point + units: deg C + type: continuous + names: + - noun: mean melting point + uris: + - id: mp_range + description: melting point range + units: deg C + type: text + names: + - noun: melting point range benchmarks: [] identifiers: - - id: SMILES - type: text - description: SMILES - - id: NAME - type: text - description: name + - id: SMILES + type: text + description: SMILES + - id: NAME + type: text + description: name license: CC BY 4.0 links: - - url: https://ochem.eu/home/show.do?render-mode=popup - description: original data source + - url: https://ochem.eu/home/show.do?render-mode=popup + description: original data source num_points: 274983 bibtex: - - |- - @article{Tetko_2014, - doi = {10.1021/ci5005288}, - url = {https://doi.org/10.1021%2Fci5005288}, - year = 2014, - month = {dec}, - publisher = {American Chemical Society ({ACS})}, - volume = {54}, - number = {12}, - pages = {3320--3329}, - author = {Igor V. Tetko and Yurii Sushko and Sergii Novotarskyi and Luc Patiny and Ivan Kondratov and Alexander E. Petrenko and Larisa Charochkina and Abdullah M. Asiri}, - title = {How Accurately Can We Predict the Melting Points of Drug-like Compounds?}, - journal = {J. Chem. Inf. Model.} - } + - |- + @article{Tetko_2014, + doi = {10.1021/ci5005288}, + url = {https://doi.org/10.1021%2Fci5005288}, + year = 2014, + month = {dec}, + publisher = {American Chemical Society ({ACS})}, + volume = {54}, + number = {12}, + pages = {3320--3329}, + author = {Igor V. Tetko and Yurii Sushko and Sergii Novotarskyi and Luc Patiny and Ivan Kondratov and Alexander E. Petrenko and Larisa Charochkina and Abdullah M. Asiri}, + title = {How Accurately Can We Predict the Melting Points of Drug-like Compounds?}, + journal = {J. Chem. Inf. Model.} + } templates: - - |- - {#Task: |Task: |!}{#Predict|Estimate!} the melting point of {NAME#}. - {#Answer: |A: |!}The melting point is {mp#} deg C. - - |- - {#Task: |Task: |!}{#Predict|Estimate!} the melting point of a {#molecule|compound!} with the {SMILES__description} {SMILES#}? - {#Answer: |A: |!}{#The melting point is |!}{mp#} deg C. - - |- - {#Question: |Q: !}What is the melting point of {NAME#}? - {#Answer: |A: |!}{#The melting point is |!}{mp#} deg C. - - |- - {#Question: |Q: !}What is the melting point of a {#molecule|compound!} with the {SMILES__description} {SMILES#}? - {#Answer: |A: |!}{#The melting point is |!}{mp#} deg C. - - |- - {#Question: |Q: !}What is the melting point of {NAME#}? - {#Answer: |A: |!}{#The melting point is in the range |!}{mp_range#} deg C. - - |- - {#Question: |Q: !}What is the melting point of a {#molecule|compound!} with the {SMILES__description} {SMILES#}? - {#Answer: |A: |!}{#The melting point is in the range |!}{mp_range#} deg C. - - |- - {#Question: |Q: !}What is a compound with a melting point of {mp#} deg C? - {#Answer: |A: |!}{NAME#} - - |- - {#Question: |Q: !}What is a compound with a melting point in the range {mp_range#} deg C? - {#Answer: |A: |!}{NAME#} - - |- - User: I have a question about {NAME#}. - Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!} - User: What is the melting point of {#this compound|this molecule!}? - Assistant: {#The melting point is |!}{mp#} deg C. - - |- - User: I have a question about a {#compound|molecule!} with the {SMILES__description} {SMILES#}. - Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!} - User: What is the melting point of {#this compound|this molecule!}? - Assistant: {#The melting point is |!}{mp#} deg C. {#Is there anything else I can help you with?|Do you have any other questions?|Do you have any other questions for me?|Is there anything else I can help you with today?|Do you have any other questions for me today?!} - User: {#Yes,|Indeed,!} what is the name of {#this compound|this molecule!}? - Assistant: {NAME#} - - |- - User: I have a question about {NAME#}. - Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!} - User: What is the melting point of {#this compound|this molecule!}? - Assistant: {#The melting point is |!}{mp_range#} deg C. {#Is there anything else I can help you with?|Do you have any other questions?|Do you have any other questions for me?|Is there anything else I can help you with today?|Do you have any other questions for me today?!} - User: {#Yes,|Indeed,!} what is the {SMILES__description} of {#this compound|this molecule!}? - Assistant: {SMILES#} + - |- + {#Task: |Task: |!}{#Predict|Estimate!} the melting point of {NAME#}. + {#Answer: |A: |!}The melting point is {mp#} deg C. + - |- + {#Task: |Task: |!}{#Predict|Estimate!} the melting point of a {#molecule|compound!} with the {SMILES__description} {SMILES#}? + {#Answer: |A: |!}{#The melting point is |!}{mp#} deg C. + - |- + {#Question: |Q: !}What is the melting point of {NAME#}? + {#Answer: |A: |!}{#The melting point is |!}{mp#} deg C. + - |- + {#Question: |Q: !}What is the melting point of a {#molecule|compound!} with the {SMILES__description} {SMILES#}? + {#Answer: |A: |!}{#The melting point is |!}{mp#} deg C. + - |- + {#Question: |Q: !}What is the melting point of {NAME#}? + {#Answer: |A: |!}{#The melting point is in the range |!}{mp_range#} deg C. + - |- + {#Question: |Q: !}What is the melting point of a {#molecule|compound!} with the {SMILES__description} {SMILES#}? + {#Answer: |A: |!}{#The melting point is in the range |!}{mp_range#} deg C. + - |- + {#Question: |Q: !}What is a compound with a melting point of {mp#} deg C? + {#Answer: |A: |!}{NAME#} + - |- + {#Question: |Q: !}What is a compound with a melting point in the range {mp_range#} deg C? + {#Answer: |A: |!}{NAME#} + - |- + User: I have a question about {NAME#}. + Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!} + User: What is the melting point of {#this compound|this molecule!}? + Assistant: {#The melting point is |!}{mp#} deg C. + - |- + User: I have a question about a {#compound|molecule!} with the {SMILES__description} {SMILES#}. + Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!} + User: What is the melting point of {#this compound|this molecule!}? + Assistant: {#The melting point is |!}{mp#} deg C. {#Is there anything else I can help you with?|Do you have any other questions?|Do you have any other questions for me?|Is there anything else I can help you with today?|Do you have any other questions for me today?!} + User: {#Yes,|Indeed,!} what is the name of {#this compound|this molecule!}? + Assistant: {NAME#} + - |- + User: I have a question about {NAME#}. + Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!} + User: What is the melting point of {#this compound|this molecule!}? + Assistant: {#The melting point is |!}{mp_range#} deg C. {#Is there anything else I can help you with?|Do you have any other questions?|Do you have any other questions for me?|Is there anything else I can help you with today?|Do you have any other questions for me today?!} + User: {#Yes,|Indeed,!} what is the {SMILES__description} of {#this compound|this molecule!}? + Assistant: {SMILES#} diff --git a/data/tabular/mofdscribe/meta.yaml b/data/tabular/mofdscribe/meta.yaml index 28bd50d84..e437af8ed 100644 --- a/data/tabular/mofdscribe/meta.yaml +++ b/data/tabular/mofdscribe/meta.yaml @@ -1,61 +1,60 @@ ---- name: mofdscribe description: |- - Text descriptions of MOF structures. + Text descriptions of MOF structures. targets: - - id: description - description: description - type: text - names: - - noun: description + - id: description + description: description + type: text + names: + - noun: description benchmarks: [] identifiers: - - id: cif - type: text - description: CIFFILE - names: - - noun: CIF file - - noun: Crystallographic Information File (CIF) - - noun: CIF card + - id: cif + type: text + description: CIFFILE + names: + - noun: CIF file + - noun: Crystallographic Information File (CIF) + - noun: CIF card license: CC BY 4.0 links: - - url: https://github.com/kjappelbaum/mofdscribe - description: codebase used to generate this dataset + - url: https://github.com/kjappelbaum/mofdscribe + description: codebase used to generate this dataset num_points: 1267 bibtex: - - |- - @article{Jablonka_2023, - doi = {10.1021/acscentsci.2c01177}, - url = {https://doi.org/10.1021%2Facscentsci.2c01177}, - year = 2023, - month = {mar}, - publisher = {American Chemical Society ({ACS})}, - volume = {9}, - number = {4}, - pages = {563--581}, - author = {Kevin Maik Jablonka and Andrew S. Rosen and Aditi S. Krishnapriyan and Berend Smit}, - title = {An Ecosystem for Digital Reticular Chemistry}, - journal = {ACS Cent. Sci.} - } - - |- - @article{Ganose_2019, - doi = {10.1557/mrc.2019.94}, - url = {https://doi.org/10.1557%2Fmrc.2019.94}, - year = 2019, - month = {sep}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {9}, - number = {3}, - pages = {874--881}, - author = {Alex M. Ganose and Anubhav Jain}, - title = {Robocrystallographer: automated crystal structure text descriptions and analysis}, - journal = {MRS Communications} - } + - |- + @article{Jablonka_2023, + doi = {10.1021/acscentsci.2c01177}, + url = {https://doi.org/10.1021%2Facscentsci.2c01177}, + year = 2023, + month = {mar}, + publisher = {American Chemical Society ({ACS})}, + volume = {9}, + number = {4}, + pages = {563--581}, + author = {Kevin Maik Jablonka and Andrew S. Rosen and Aditi S. Krishnapriyan and Berend Smit}, + title = {An Ecosystem for Digital Reticular Chemistry}, + journal = {ACS Cent. Sci.} + } + - |- + @article{Ganose_2019, + doi = {10.1557/mrc.2019.94}, + url = {https://doi.org/10.1557%2Fmrc.2019.94}, + year = 2019, + month = {sep}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {9}, + number = {3}, + pages = {874--881}, + author = {Alex M. Ganose and Anubhav Jain}, + title = {Robocrystallographer: automated crystal structure text descriptions and analysis}, + journal = {MRS Communications} + } templates: - - |- - Task: {#Describe|Write a description of!} the structure with the {cif__names__noun} {cif#}. - {#Answer: |A: |!}{description#} - - |- - Task: {#Create|Generate|Propose!} a {cif__names__noun} of a {#metal-organic framework|MOF|crystal structure|structure|material!} with the following description - {description#}. - {#Answer: |A: |!}{cif#} + - |- + Task: {#Describe|Write a description of!} the structure with the {cif__names__noun} {cif#}. + {#Answer: |A: |!}{description#} + - |- + Task: {#Create|Generate|Propose!} a {cif__names__noun} of a {#metal-organic framework|MOF|crystal structure|structure|material!} with the following description + {description#}. + {#Answer: |A: |!}{cif#} diff --git a/data/tabular/mol2svg/meta.yaml b/data/tabular/mol2svg/meta.yaml index 4cb8400a0..1653d05d0 100644 --- a/data/tabular/mol2svg/meta.yaml +++ b/data/tabular/mol2svg/meta.yaml @@ -1,25 +1,24 @@ ---- name: mol2svg description: |- - This dataset contains SVG images of molecules, including some with substructures - highlighted. + This dataset contains SVG images of molecules, including some with substructures + highlighted. targets: - - id: completion - type: text - description: completion + - id: completion + type: text + description: completion identifiers: - - id: prompt - type: text - description: prompt - - id: SMILES - type: SMILES - description: SMILES + - id: prompt + type: text + description: prompt + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 num_points: 16019 links: - - url: https://github.com/lamalab-org/chem-caption - description: Original codebase used to generate this dataset + - url: https://github.com/lamalab-org/chem-caption + description: Original codebase used to generate this dataset templates: - - |- - {prompt#} - {completion#} + - |- + {prompt#} + {completion#} diff --git a/data/tabular/mona/example_processing_and_templates.ipynb b/data/tabular/mona/example_processing_and_templates.ipynb index 2b2e073c4..5f12a6f7f 100644 --- a/data/tabular/mona/example_processing_and_templates.ipynb +++ b/data/tabular/mona/example_processing_and_templates.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "ee8efc5a", "metadata": {}, "outputs": [], @@ -18,6 +18,7 @@ "from zipfile import ZipFile\n", "import os\n", "from tqdm import tqdm\n", + "\n", "# import datasets\n", "import rdkit\n", "import rdkit.Chem as Chem\n", @@ -26,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "a7de2c14", "metadata": {}, "outputs": [], @@ -36,17 +37,17 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "f3a7b111", "metadata": {}, "outputs": [], "source": [ - "RDLogger.DisableLog('rdApp.*')" + "RDLogger.DisableLog(\"rdApp.*\")" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "83e1ea15", "metadata": {}, "outputs": [], @@ -54,15 +55,15 @@ "# download the raw data\n", "url = \"https://mona.fiehnlab.ucdavis.edu/rest/downloads/retrieve/19a23fd5-4e06-4122-ae9d-169198ee9794\"\n", "response = requests.get(url)\n", - "tmp_dir = os.path.join(os.getcwd(),\"tmp\")\n", - "os.makedirs(tmp_dir,exist_ok=True)\n", + "tmp_dir = os.path.join(os.getcwd(), \"tmp\")\n", + "os.makedirs(tmp_dir, exist_ok=True)\n", "with ZipFile(BytesIO(response.content)) as my_zip:\n", " my_zip.extractall(path=tmp_dir)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "e18ebb68", "metadata": {}, "outputs": [ @@ -75,26 +76,26 @@ } ], "source": [ - "mona_file = os.path.join(tmp_dir,\"MoNA-export-Experimental_Spectra.json\")\n", + "mona_file = os.path.join(tmp_dir, \"MoNA-export-Experimental_Spectra.json\")\n", "print(os.path.isfile(mona_file))" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "5624add5", "metadata": {}, "outputs": [], "source": [ - "with open(mona_file,\"r\") as json_file:\n", - " _ = json_file.readline() # first line is garbage\n", + "with open(mona_file, \"r\") as json_file:\n", + " _ = json_file.readline() # first line is garbage\n", " line = json_file.readline()\n", "entry = json.loads(line[:-2])" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "10c7cdeb", "metadata": {}, "outputs": [ @@ -634,7 +635,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "94ea06e9", "metadata": {}, "outputs": [ @@ -653,7 +654,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "2c0f8b13", "metadata": {}, "outputs": [ @@ -1036,7 +1037,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "bcad758d", "metadata": {}, "outputs": [ @@ -1054,7 +1055,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "id": "b96d3353", "metadata": {}, "outputs": [ @@ -1085,7 +1086,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "b78de6f0", "metadata": {}, "outputs": [ @@ -1233,7 +1234,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "564659a3", "metadata": {}, "outputs": [ @@ -1251,7 +1252,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "1b0843a9", "metadata": {}, "outputs": [ @@ -1269,7 +1270,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "id": "bf226ff9", "metadata": {}, "outputs": [ @@ -1289,7 +1290,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "id": "563e8af1", "metadata": {}, "outputs": [ @@ -1391,19 +1392,19 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "9c117a3f", "metadata": {}, "outputs": [], "source": [ "# all keys should be camelcase\n", "def transform_key(key):\n", - " return key.lower().replace(\" \",\"_\")" + " return key.lower().replace(\" \", \"_\")" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "id": "96c159cc", "metadata": {}, "outputs": [ @@ -1421,7 +1422,7 @@ "# process the rows\n", "df_rows = []\n", "md_key_counter = Counter()\n", - "with open(mona_file,\"r\") as json_file:\n", + "with open(mona_file, \"r\") as json_file:\n", " line = json_file.readline().strip().rstrip(\",\")\n", " count = 0\n", " multi_compound_count, no_compound_count = 0, 0\n", @@ -1432,28 +1433,28 @@ " entry = None\n", " if entry is not None:\n", " df_row = {}\n", - " df_row[\"spectrum\"] = entry.get(\"spectrum\",np.nan)\n", - " df_row[\"id\"] = entry.get(\"id\",np.nan)\n", - " df_row[\"score\"] = entry.get(\"score\",{}).get(\"score\",np.nan)\n", - " df_row[\"library\"]= entry.get(\"library\",{}).get(\"library\",np.nan)\n", - " mds = entry.get(\"metaData\",[])\n", + " df_row[\"spectrum\"] = entry.get(\"spectrum\", np.nan)\n", + " df_row[\"id\"] = entry.get(\"id\", np.nan)\n", + " df_row[\"score\"] = entry.get(\"score\", {}).get(\"score\", np.nan)\n", + " df_row[\"library\"] = entry.get(\"library\", {}).get(\"library\", np.nan)\n", + " mds = entry.get(\"metaData\", [])\n", " # get all metadata properties\n", " md_keys = []\n", " for md in mds:\n", " k = md[\"name\"]\n", - " v = md.get(\"value\",np.nan)\n", - " df_row[\"md_\"+transform_key(k)] = v\n", + " v = md.get(\"value\", np.nan)\n", + " df_row[\"md_\" + transform_key(k)] = v\n", " if not (v is np.nan):\n", " md_keys.append(k)\n", " md_key_counter.update(md_keys)\n", - " compounds = entry.get(\"compound\",[])\n", + " compounds = entry.get(\"compound\", [])\n", " if len(compounds) > 0:\n", " if len(compounds) > 1:\n", - " multi_compound_count +=1\n", + " multi_compound_count += 1\n", " compound = compounds[0]\n", - " df_row[\"inchi\"] = compound.get(\"inchi\",np.nan)\n", - " df_row[\"inchikey\"] = compound.get(\"inchiKey\",np.nan)\n", - " compound_mds = compound.get(\"metaData\",[])\n", + " df_row[\"inchi\"] = compound.get(\"inchi\", np.nan)\n", + " df_row[\"inchikey\"] = compound.get(\"inchiKey\", np.nan)\n", + " compound_mds = compound.get(\"metaData\", [])\n", " for compound_md in compound_mds:\n", " k = compound_md[\"name\"]\n", " if k == \"SMILES\":\n", @@ -1465,14 +1466,16 @@ " no_compound_count += 1\n", " df_rows.append(df_row)\n", " if count % 100000 == 0:\n", - " print(f\"> line_count = {count}, num_rows = {len(df_rows)}, num_md_keys = {len(md_key_counter)}\")\n", + " print(\n", + " f\"> line_count = {count}, num_rows = {len(df_rows)}, num_md_keys = {len(md_key_counter)}\"\n", + " )\n", " line = json_file.readline().strip().rstrip(\",\")\n", " count += 1" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "cd0858cf", "metadata": {}, "outputs": [ @@ -1485,12 +1488,12 @@ } ], "source": [ - "print(no_compound_count,multi_compound_count)" + "print(no_compound_count, multi_compound_count)" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "1498e050", "metadata": {}, "outputs": [ @@ -1603,13 +1606,13 @@ ], "source": [ "# find the 100 most frequent metadata keys\n", - "md_top_keys = [\"md_\"+transform_key(k) for k,v in md_key_counter.most_common(100)]\n", + "md_top_keys = [\"md_\" + transform_key(k) for k, v in md_key_counter.most_common(100)]\n", "pprint(md_top_keys)" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "f2876070", "metadata": {}, "outputs": [], @@ -1631,12 +1634,12 @@ " \"md_license\",\n", " \"md_date\",\n", " \"md_accession\",\n", - " \"md_precursor_mz\", # alias for precursor_m/z\n", - " \"md_ion_type\", # alias for prec_type\n", + " \"md_precursor_mz\", # alias for precursor_m/z\n", + " \"md_ion_type\", # alias for prec_type\n", " \"md_ionization_energy\",\n", - " \"md_collision_energy_voltage\", # alias for collision energy\n", - " \"md_adduct\", # alias for prec_type\n", - " \"md_derivatization_type\"\n", + " \"md_collision_energy_voltage\", # alias for collision energy\n", + " \"md_adduct\", # alias for prec_type\n", + " \"md_derivatization_type\",\n", "]\n", "for i in range(len(df_rows)):\n", " cur_row = df_rows[i]\n", @@ -1652,7 +1655,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "b4296b41", "metadata": {}, "outputs": [ @@ -1716,7 +1719,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "6aab8be3", "metadata": {}, "outputs": [], @@ -1726,7 +1729,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "19ca503e", "metadata": {}, "outputs": [ @@ -1744,7 +1747,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "e9172622", "metadata": {}, "outputs": [ @@ -1790,7 +1793,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "d5ed018b", "metadata": {}, "outputs": [ @@ -1838,7 +1841,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "baff4039", "metadata": {}, "outputs": [ @@ -1853,13 +1856,13 @@ ], "source": [ "# look at compound id\n", - "print(df[\"inchikey\"].isna().sum(),df[\"inchi\"].isna().sum(),df[\"smiles\"].isna().sum())\n", - "print(df[[\"inchikey\",\"inchi\",\"smiles\"]].isna().all(axis=1).sum())" + "print(df[\"inchikey\"].isna().sum(), df[\"inchi\"].isna().sum(), df[\"smiles\"].isna().sum())\n", + "print(df[[\"inchikey\", \"inchi\", \"smiles\"]].isna().all(axis=1).sum())" ] }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "2e7b2c73", "metadata": {}, "outputs": [ @@ -1900,14 +1903,15 @@ " except:\n", " pass\n", " return mol\n", - " \n", - "mol = df.progress_apply(get_mol,axis=1)\n", + "\n", + "\n", + "mol = df.progress_apply(get_mol, axis=1)\n", "print(mol.isna().sum())" ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "905b3ee8", "metadata": {}, "outputs": [ @@ -1927,12 +1931,14 @@ " except:\n", " pass\n", " return smiles\n", + "\n", + "\n", "smiles = mol.progress_apply(get_smiles)" ] }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "f3deabce", "metadata": {}, "outputs": [ @@ -1952,12 +1958,14 @@ " except:\n", " pass\n", " return inchi\n", + "\n", + "\n", "inchi = mol.progress_apply(get_inchi)" ] }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "9cab140c", "metadata": {}, "outputs": [ @@ -1977,12 +1985,14 @@ " except:\n", " pass\n", " return inchikey\n", + "\n", + "\n", "inchikey = mol.progress_apply(get_inchikey)" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "1d9345d5", "metadata": {}, "outputs": [ @@ -1998,18 +2008,18 @@ "source": [ "# update dataframe\n", "proc_df = df.copy()\n", - "print(proc_df[[\"smiles\",\"inchi\",\"inchikey\"]].isna().any(axis=1).mean())\n", - "proc_df.loc[:,\"smiles\"] = smiles\n", - "proc_df.loc[:,\"inchi\"] = inchi\n", - "proc_df.loc[:,\"inchikey\"] = inchikey\n", - "print(proc_df[[\"smiles\",\"inchi\",\"inchikey\"]].isna().any(axis=1).mean())\n", + "print(proc_df[[\"smiles\", \"inchi\", \"inchikey\"]].isna().any(axis=1).mean())\n", + "proc_df.loc[:, \"smiles\"] = smiles\n", + "proc_df.loc[:, \"inchi\"] = inchi\n", + "proc_df.loc[:, \"inchikey\"] = inchikey\n", + "print(proc_df[[\"smiles\", \"inchi\", \"inchikey\"]].isna().any(axis=1).mean())\n", "# drop entries with invalid smiles/inchi/inchikey\n", - "proc_df = proc_df[proc_df[[\"smiles\",\"inchi\",\"inchikey\"]].notna().all(axis=1)]" + "proc_df = proc_df[proc_df[[\"smiles\", \"inchi\", \"inchikey\"]].notna().all(axis=1)]" ] }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "b3deec0c", "metadata": {}, "outputs": [ @@ -2047,7 +2057,7 @@ " 'precursor_mz']" ] }, - "execution_count": 33, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2058,7 +2068,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "id": "7bb650de", "metadata": {}, "outputs": [], @@ -2069,7 +2079,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "id": "152b7ab1", "metadata": {}, "outputs": [ @@ -2091,7 +2101,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 35, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2102,22 +2112,17 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "id": "852e61c2", "metadata": {}, "outputs": [], "source": [ - "proc_df.loc[:,\"ms_level\"] = proc_df[\"ms_level\"].replace(\n", - " {\n", - " \"2\": \"MS2\",\n", - " \"MS\": \"MS1\"\n", - " }\n", - ")" + "proc_df.loc[:, \"ms_level\"] = proc_df[\"ms_level\"].replace({\"2\": \"MS2\", \"MS\": \"MS1\"})" ] }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "id": "d59afd34", "metadata": {}, "outputs": [ @@ -2137,7 +2142,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 37, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2148,22 +2153,21 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "id": "ce43cc9a", "metadata": {}, "outputs": [], "source": [ "# merge precursor_type aliases\n", - "proc_df.loc[:,\"collision_energy\"] = proc_df[\"collision_energy\"].where(\n", - " ~(proc_df[\"collision_energy\"].isna()),\n", - " proc_df[\"collision_energy_voltage\"]\n", + "proc_df.loc[:, \"collision_energy\"] = proc_df[\"collision_energy\"].where(\n", + " ~(proc_df[\"collision_energy\"].isna()), proc_df[\"collision_energy_voltage\"]\n", ")\n", "proc_df = proc_df.drop(columns=\"collision_energy_voltage\")" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "id": "ef22cf11", "metadata": {}, "outputs": [ @@ -2185,7 +2189,7 @@ "Name: count, Length: 948, dtype: int64" ] }, - "execution_count": 39, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2196,24 +2200,19 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "id": "e6507c37", "metadata": {}, "outputs": [], "source": [ - "proc_df.loc[:,\"ionization_mode\"] = proc_df[\"ionization_mode\"].replace(\n", - " {\n", - " \"N/A\": np.nan,\n", - " \"ESI\": np.nan,\n", - " \"Positive\":\"positive\",\n", - " \"Negative\":\"negative\"\n", - " }\n", + "proc_df.loc[:, \"ionization_mode\"] = proc_df[\"ionization_mode\"].replace(\n", + " {\"N/A\": np.nan, \"ESI\": np.nan, \"Positive\": \"positive\", \"Negative\": \"negative\"}\n", ")" ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": null, "id": "0f4262b6", "metadata": {}, "outputs": [ @@ -2226,7 +2225,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 41, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2237,7 +2236,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "id": "df43b303", "metadata": {}, "outputs": [ @@ -2259,7 +2258,7 @@ "Name: count, Length: 77, dtype: int64" ] }, - "execution_count": 42, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2270,7 +2269,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": null, "id": "c4f5d0bf", "metadata": {}, "outputs": [ @@ -2285,7 +2284,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 43, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2296,26 +2295,24 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": null, "id": "ab8624fb", "metadata": {}, "outputs": [], "source": [ "# merge precursor_type aliases\n", - "proc_df.loc[:,\"precursor_type\"] = proc_df[\"precursor_type\"].where(\n", - " ~(proc_df[\"precursor_type\"].isna()),\n", - " proc_df[\"adduct\"]\n", + "proc_df.loc[:, \"precursor_type\"] = proc_df[\"precursor_type\"].where(\n", + " ~(proc_df[\"precursor_type\"].isna()), proc_df[\"adduct\"]\n", ")\n", - "proc_df.loc[:,\"precursor_type\"] = proc_df[\"precursor_type\"].where(\n", - " ~(proc_df[\"precursor_type\"].isna()),\n", - " proc_df[\"ion_type\"]\n", + "proc_df.loc[:, \"precursor_type\"] = proc_df[\"precursor_type\"].where(\n", + " ~(proc_df[\"precursor_type\"].isna()), proc_df[\"ion_type\"]\n", ")\n", - "proc_df = proc_df.drop(columns=[\"adduct\",\"ion_type\"])" + "proc_df = proc_df.drop(columns=[\"adduct\", \"ion_type\"])" ] }, { "cell_type": "code", - "execution_count": 45, + "execution_count": null, "id": "d667ec16", "metadata": {}, "outputs": [ @@ -2337,7 +2334,7 @@ "Name: count, Length: 137, dtype: int64" ] }, - "execution_count": 45, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2348,22 +2345,21 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": null, "id": "c7fd8802", "metadata": {}, "outputs": [], "source": [ "# merge precursor_m/z aliases\n", - "proc_df.loc[:,\"precursor_m/z\"] = proc_df[\"precursor_m/z\"].where(\n", - " ~(proc_df[\"precursor_m/z\"].isna()),\n", - " proc_df[\"precursor_mz\"]\n", + "proc_df.loc[:, \"precursor_m/z\"] = proc_df[\"precursor_m/z\"].where(\n", + " ~(proc_df[\"precursor_m/z\"].isna()), proc_df[\"precursor_mz\"]\n", ")\n", "proc_df = proc_df.drop(columns=\"precursor_mz\")" ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": null, "id": "5aeef3ff", "metadata": {}, "outputs": [ @@ -2385,7 +2381,7 @@ "Name: count, Length: 48388, dtype: int64" ] }, - "execution_count": 47, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2396,7 +2392,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": null, "id": "3433a1b6", "metadata": {}, "outputs": [ @@ -2455,7 +2451,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 48, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2466,7 +2462,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": null, "id": "e243e8aa", "metadata": {}, "outputs": [ @@ -2508,7 +2504,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 49, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2519,7 +2515,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": null, "id": "da163298", "metadata": {}, "outputs": [ @@ -2554,7 +2550,7 @@ "dtype: float64" ] }, - "execution_count": 50, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -2565,7 +2561,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": null, "id": "0fbfb46a", "metadata": {}, "outputs": [], @@ -2590,8 +2586,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" + "pygments_lexer": "ipython3" } }, "nbformat": 4, diff --git a/data/tabular/mona/meta.yaml b/data/tabular/mona/meta.yaml index ab9f4ae2b..3f991800a 100644 --- a/data/tabular/mona/meta.yaml +++ b/data/tabular/mona/meta.yaml @@ -1,87 +1,84 @@ ---- name: mona description: MassBank of North America, public repository of mass spectra for small molecules targets: - - id: spectral_entropy - type: continuous - units: nat - names: - - noun: spectral entropy - description: The entropy of the spectrum. - - id: normalized_entropy - type: continuous - units: - names: - - noun: normalized entropy - description: The normalized entropy of the spectrum (ratio of spectral entropy to maximum possible entropy for a spectrum with the same number of - peaks). + - id: spectral_entropy + type: continuous + units: nat + names: + - noun: spectral entropy + description: The entropy of the spectrum. + - id: normalized_entropy + type: continuous + units: + names: + - noun: normalized entropy + description: The normalized entropy of the spectrum (ratio of spectral entropy to maximum possible entropy for a spectrum with the same number of peaks). identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: inchi - type: InChI - description: InChI - - id: id - type: Other - description: MassBank ID - sample: 'False' + - id: SMILES + type: SMILES + description: SMILES + - id: inchi + type: InChI + description: InChI + - id: id + type: Other + description: MassBank ID + sample: "False" license: CC BY 4.0 links: - - name: MassBank of North America - url: https://mona.fiehnlab.ucdavis.edu/ - description: original repository - - name: HuggingFace dataset upload - url: https://huggingface.co/datasets/adamoyoung/mona - description: HuggingFace dataset upload + - name: MassBank of North America + url: https://mona.fiehnlab.ucdavis.edu/ + description: original repository + - name: HuggingFace dataset upload + url: https://huggingface.co/datasets/adamoyoung/mona + description: HuggingFace dataset upload benchmarks: [] num_points: 194721 bibtex: [] templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. - - Based on the {SMILES__description} {#representation of |!}{SMILES#}, the molecule has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. - - The {SMILES__description} {SMILES#} {#represents|is representing!} a molecule {#that has a|with a!} {spectral_entropy__names__noun} of {spectral_entropy#} - {spectral_entropy__units}. - - The molecule with the {SMILES__description} {SMILES#} has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. - - |- - Task: Please predict a molecule feature based on the description. - Description: Predict the {spectral_entropy__names__noun} in {spectral_entropy__units}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {spectral_entropy__units} without using any {#other|additional!} words. - Result: {spectral_entropy#} {spectral_entropy__units} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} the {spectral_entropy__names__noun} in {spectral_entropy__units} of the molecule with the {SMILES__description} {SMILES#}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, this molecule has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. - Assistant: This is a molecule that has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should have a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} represents a molecule that has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should have a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} represents a molecule that has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}: {SMILES#} - - The {spectral_entropy__names__noun} of the molecule with the {SMILES__description} {SMILES#} is:{spectral_entropy#} {spectral_entropy__units} - - The {spectral_entropy__names__noun} of the {SMILES__description} {SMILES#} is:{spectral_entropy#} {spectral_entropy__units} - - The {spectral_entropy__names__noun} of the molecule {SMILES__description} {SMILES#} is:{spectral_entropy#} {spectral_entropy__units} - - |- - Task: Please predict a molecule feature based on the description. - Description: Predict the {spectral_entropy__names__noun} in {spectral_entropy__units} of a molecule. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {spectral_entropy__units} without using any {#other|additional!} words. - Result:{spectral_entropy#} {spectral_entropy__units} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that has {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. - Result:{SMILES#} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. + - Based on the {SMILES__description} {#representation of |!}{SMILES#}, the molecule has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. + - The {SMILES__description} {SMILES#} {#represents|is representing!} a molecule {#that has a|with a!} {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. + - The molecule with the {SMILES__description} {SMILES#} has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. + - |- + Task: Please predict a molecule feature based on the description. + Description: Predict the {spectral_entropy__names__noun} in {spectral_entropy__units}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {spectral_entropy__units} without using any {#other|additional!} words. + Result: {spectral_entropy#} {spectral_entropy__units} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} the {spectral_entropy__names__noun} in {spectral_entropy__units} of the molecule with the {SMILES__description} {SMILES#}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, this molecule has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. + Assistant: This is a molecule that has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should have a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} represents a molecule that has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should have a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} represents a molecule that has a {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}: {SMILES#} + - The {spectral_entropy__names__noun} of the molecule with the {SMILES__description} {SMILES#} is:{spectral_entropy#} {spectral_entropy__units} + - The {spectral_entropy__names__noun} of the {SMILES__description} {SMILES#} is:{spectral_entropy#} {spectral_entropy__units} + - The {spectral_entropy__names__noun} of the molecule {SMILES__description} {SMILES#} is:{spectral_entropy#} {spectral_entropy__units} + - |- + Task: Please predict a molecule feature based on the description. + Description: Predict the {spectral_entropy__names__noun} in {spectral_entropy__units} of a molecule. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {spectral_entropy__units} without using any {#other|additional!} words. + Result:{spectral_entropy#} {spectral_entropy__units} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that has {spectral_entropy__names__noun} of {spectral_entropy#} {spectral_entropy__units}. + Result:{SMILES#} diff --git a/data/tabular/moses/meta.yaml b/data/tabular/moses/meta.yaml index 98e824fb6..e709cc923 100644 --- a/data/tabular/moses/meta.yaml +++ b/data/tabular/moses/meta.yaml @@ -1,35 +1,34 @@ ---- name: moses description: |- - Molecular Sets (MOSES) is a benchmark platform - for distribution learning based molecule generation. - Within this benchmark, MOSES provides a cleaned dataset of molecules that are ideal of optimization. - It is processed from the ZINC Clean Leads dataset. + Molecular Sets (MOSES) is a benchmark platform + for distribution learning based molecule generation. + Within this benchmark, MOSES provides a cleaned dataset of molecules that are ideal of optimization. + It is processed from the ZINC Clean Leads dataset. benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://arxiv.org/abs/1811.12823 - description: Article about original dataset - - url: https://pubs.acs.org/doi/abs/10.1021/acs.jcim.5b00559 - description: Link to publication of associated dataset - zinc - - url: https://github.com/molecularsets/moses - description: Github repository concering the datset + - url: https://arxiv.org/abs/1811.12823 + description: Article about original dataset + - url: https://pubs.acs.org/doi/abs/10.1021/acs.jcim.5b00559 + description: Link to publication of associated dataset - zinc + - url: https://github.com/molecularsets/moses + description: Github repository concerning the dataset num_points: 1936962 bibtex: - - |- - @article{10.3389/fphar.2020.565644, - title={{M}olecular {S}ets ({MOSES}): {A} {B}enchmarking {P}latform for {M}olecular {G}eneration {M}odels}, - author={Polykovskiy, Daniil and Zhebrak, Alexander and Sanchez-Lengeling, Benjamin and Golovanov, - Sergey and Tatanov, Oktai and Belyaev, Stanislav and Kurbanov, Rauf and Artamonov, - Aleksey and Aladinskiy, Vladimir and Veselov, Mark and Kadurin, Artur and Johansson, - Simon and Chen, Hongming and Nikolenko, Sergey and Aspuru-Guzik, Alan and Zhavoronkov, Alex}, - journal={Frontiers in Pharmacology}, - year={2020} - } + - |- + @article{10.3389/fphar.2020.565644, + title={{M}olecular {S}ets ({MOSES}): {A} {B}enchmarking {P}latform for {M}olecular {G}eneration {M}odels}, + author={Polykovskiy, Daniil and Zhebrak, Alexander and Sanchez-Lengeling, Benjamin and Golovanov, + Sergey and Tatanov, Oktai and Belyaev, Stanislav and Kurbanov, Rauf and Artamonov, + Aleksey and Aladinskiy, Vladimir and Veselov, Mark and Kadurin, Artur and Johansson, + Simon and Chen, Hongming and Nikolenko, Sergey and Aspuru-Guzik, Alan and Zhavoronkov, Alex}, + journal={Frontiers in Pharmacology}, + year={2020} + } diff --git a/data/tabular/moses/transform.py b/data/tabular/moses/transform.py index 207e76f02..5d5de23a2 100644 --- a/data/tabular/moses/transform.py +++ b/data/tabular/moses/transform.py @@ -69,7 +69,7 @@ def get_single_dataset(dataset_name): }, { "url": "https://github.com/molecularsets/moses", - "description": "Github repository concering the datset", + "description": "Github repository concerning the dataset", }, ], "num_points": len(df), # number of datapoints in this dataset diff --git a/data/tabular/mp_anisotropy/meta.yaml b/data/tabular/mp_anisotropy/meta.yaml index 6920ca79f..c11de6c80 100644 --- a/data/tabular/mp_anisotropy/meta.yaml +++ b/data/tabular/mp_anisotropy/meta.yaml @@ -1,94 +1,93 @@ ---- name: mp_elastic_anisotropy description: |- - DFT-computed shear moduli of inorganic solids + DFT-computed shear moduli of inorganic solids targets: - - id: elastic_anisotropy - description: elastic anisotropic index - units: - type: continuous - significant_figures: 3 - names: - - noun: elastic anisotropy index computed using DFT with the PBE GGA functional - - noun: elastic anisotropy index computed using DFT with the PBE functional - - noun: elastic anisotropy index derived from DFT simulations with the PBE functional - uris: + - id: elastic_anisotropy + description: elastic anisotropic index + units: + type: continuous + significant_figures: 3 + names: + - noun: elastic anisotropy index computed using DFT with the PBE GGA functional + - noun: elastic anisotropy index computed using DFT with the PBE functional + - noun: elastic anisotropy index derived from DFT simulations with the PBE functional + uris: benchmarks: - - name: CrabNet - link: https://github.com/anthony-wang/CrabNet/tree/master/data/benchmark_data - split_column: split + - name: CrabNet + link: https://github.com/anthony-wang/CrabNet/tree/master/data/benchmark_data + split_column: split identifiers: - - id: formula - type: COMPOSITION - description: composition + - id: formula + type: COMPOSITION + description: composition license: CC BY 4.0 links: - - url: https://next-gen.materialsproject.org/ - description: original data source + - url: https://next-gen.materialsproject.org/ + description: original data source num_points: 6184 bibtex: - - |- - @article{Jain_2013, - doi = {10.1063/1.4812323}, - url = {https://doi.org/10.1063%2F1.4812323}, - year = 2013, - month = {jul}, - publisher = {{AIP} Publishing}, - volume = {1}, - number = {1}, - author = {Anubhav Jain and Shyue Ping Ong and Geoffroy Hautier - and Wei Chen and William Davidson Richards and Stephen Dacek - and Shreyas Cholia and Dan Gunter and David Skinner - and Gerbrand Ceder and Kristin A. Persson}, - title = {Commentary: The Materials Project: - A materials genome approach to accelerating materials innovation}, - journal = {{APL} Materials} - } - - |- - @article{Ong_2015, - doi = {10.1016/j.commatsci.2014.10.037}, - url = {https://doi.org/10.1016%2Fj.commatsci.2014.10.037}, - year = 2015, - month = {feb}, - publisher = {Elsevier {BV}}, - volume = {97}, - pages = {209--215}, - author = {Shyue Ping Ong and Shreyas Cholia and Anubhav Jain - and Miriam Brafman and Dan Gunter and Gerbrand Ceder and Kristin A. Persson}, - title = {The Materials Application Programming Interface ({API}): - A simple, flexible and efficient {API} for materials data based - on {REpresentational} State Transfer ({REST}) principles}, - journal = {Computational Materials Science} - } - - |- - @article{de_Jong_2015, - doi = {10.1038/sdata.2015.9}, - url = {https://doi.org/10.1038%2Fsdata.2015.9}, - year = 2015, - month = {mar}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {2}, - number = {1}, - author = {Maarten de Jong and Wei Chen and Thomas Angsten - and Anubhav Jain and Randy Notestine and Anthony Gamst - and Marcel Sluiter and Chaitanya Krishna Ande - and Sybrand van der Zwaag and Jose J Plata and Cormac Toher - and Stefano Curtarolo and Gerbrand Ceder and Kristin A. Persson and Mark Asta}, - title = {Charting the complete elastic properties of inorganic crystalline compounds}, - journal = {Sci Data} - } + - |- + @article{Jain_2013, + doi = {10.1063/1.4812323}, + url = {https://doi.org/10.1063%2F1.4812323}, + year = 2013, + month = {jul}, + publisher = {{AIP} Publishing}, + volume = {1}, + number = {1}, + author = {Anubhav Jain and Shyue Ping Ong and Geoffroy Hautier + and Wei Chen and William Davidson Richards and Stephen Dacek + and Shreyas Cholia and Dan Gunter and David Skinner + and Gerbrand Ceder and Kristin A. Persson}, + title = {Commentary: The Materials Project: + A materials genome approach to accelerating materials innovation}, + journal = {{APL} Materials} + } + - |- + @article{Ong_2015, + doi = {10.1016/j.commatsci.2014.10.037}, + url = {https://doi.org/10.1016%2Fj.commatsci.2014.10.037}, + year = 2015, + month = {feb}, + publisher = {Elsevier {BV}}, + volume = {97}, + pages = {209--215}, + author = {Shyue Ping Ong and Shreyas Cholia and Anubhav Jain + and Miriam Brafman and Dan Gunter and Gerbrand Ceder and Kristin A. Persson}, + title = {The Materials Application Programming Interface ({API}): + A simple, flexible and efficient {API} for materials data based + on {REpresentational} State Transfer ({REST}) principles}, + journal = {Computational Materials Science} + } + - |- + @article{de_Jong_2015, + doi = {10.1038/sdata.2015.9}, + url = {https://doi.org/10.1038%2Fsdata.2015.9}, + year = 2015, + month = {mar}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {2}, + number = {1}, + author = {Maarten de Jong and Wei Chen and Thomas Angsten + and Anubhav Jain and Randy Notestine and Anthony Gamst + and Marcel Sluiter and Chaitanya Krishna Ande + and Sybrand van der Zwaag and Jose J Plata and Cormac Toher + and Stefano Curtarolo and Gerbrand Ceder and Kristin A. Persson and Mark Asta}, + title = {Charting the complete elastic properties of inorganic crystalline compounds}, + journal = {Sci Data} + } templates: - - The {elastic_anisotropy__names__noun} of {#the compound|the solid!} {formula#} is {elastic_anisotropy#}. - - |- - Question: How large is the {elastic_anisotropy__names__noun} of {#the compound|the solid!} {formula#}? - Answer: The {elastic_anisotropy__names__noun} of {#the compound|the solid!} {formula#} is {elastic_anisotropy#}. - - |- - User: {#I would like to|I want to!} know the {elastic_anisotropy__names__noun} of {#the compound|the solid!} {formula#}. - Assistant: The {elastic_anisotropy__names__noun} of {#the compound|the solid!} {formula#} is {elastic_anisotropy#}. - - |- - User: {#I would like to|I want to!} design a {#compound|material|solid!} with a {elastic_anisotropy__names__noun} of {elastic_anisotropy#}. - Assistant: {#I found|Here is|I have found|Here is!} a {#compound|material|solid!} with a {elastic_anisotropy__names__noun} of {elastic_anisotropy#}: {formula#}. - - A {#compound|material|solid!} with a {elastic_anisotropy__names__noun} of {elastic_anisotropy#} is {formula#}. - - |- - Task: Please {#give me|create|generate!} a {#compound|material|solid!} with a {elastic_anisotropy__names__noun} of {elastic_anisotropy#}. - Result: {formula#} + - The {elastic_anisotropy__names__noun} of {#the compound|the solid!} {formula#} is {elastic_anisotropy#}. + - |- + Question: How large is the {elastic_anisotropy__names__noun} of {#the compound|the solid!} {formula#}? + Answer: The {elastic_anisotropy__names__noun} of {#the compound|the solid!} {formula#} is {elastic_anisotropy#}. + - |- + User: {#I would like to|I want to!} know the {elastic_anisotropy__names__noun} of {#the compound|the solid!} {formula#}. + Assistant: The {elastic_anisotropy__names__noun} of {#the compound|the solid!} {formula#} is {elastic_anisotropy#}. + - |- + User: {#I would like to|I want to!} design a {#compound|material|solid!} with a {elastic_anisotropy__names__noun} of {elastic_anisotropy#}. + Assistant: {#I found|Here is|I have found|Here is!} a {#compound|material|solid!} with a {elastic_anisotropy__names__noun} of {elastic_anisotropy#}: {formula#}. + - A {#compound|material|solid!} with a {elastic_anisotropy__names__noun} of {elastic_anisotropy#} is {formula#}. + - |- + Task: Please {#give me|create|generate!} a {#compound|material|solid!} with a {elastic_anisotropy__names__noun} of {elastic_anisotropy#}. + Result: {formula#} diff --git a/data/tabular/mp_bulk_modulus/meta.yaml b/data/tabular/mp_bulk_modulus/meta.yaml index 6de8525a0..3ec3f02d0 100644 --- a/data/tabular/mp_bulk_modulus/meta.yaml +++ b/data/tabular/mp_bulk_modulus/meta.yaml @@ -1,94 +1,93 @@ ---- name: mp_bulk_modulus description: |- - DFT-computed bulk moduli of inorganic solids + DFT-computed bulk moduli of inorganic solids targets: - - id: bulk_modulus - description: bulk modulus of materials - units: GPa - type: continuous - significant_figures: 3 - names: - - noun: bulk modulus computed using DFT with the PBE GGA functional - - noun: bulk modulus computed using DFT with the PBE functional - - noun: bulk modulus derived from DFT simulations with the PBE functional - uris: + - id: bulk_modulus + description: bulk modulus of materials + units: GPa + type: continuous + significant_figures: 3 + names: + - noun: bulk modulus computed using DFT with the PBE GGA functional + - noun: bulk modulus computed using DFT with the PBE functional + - noun: bulk modulus derived from DFT simulations with the PBE functional + uris: benchmarks: - - name: CrabNet - link: https://github.com/anthony-wang/CrabNet/tree/master/data/benchmark_data - split_column: split + - name: CrabNet + link: https://github.com/anthony-wang/CrabNet/tree/master/data/benchmark_data + split_column: split identifiers: - - id: formula - type: text - description: composition + - id: formula + type: text + description: composition license: CC BY 4.0 links: - - url: https://next-gen.materialsproject.org/ - description: original data source + - url: https://next-gen.materialsproject.org/ + description: original data source num_points: 6307 bibtex: - - |- - @article{Jain_2013, - doi = {10.1063/1.4812323}, - url = {https://doi.org/10.1063%2F1.4812323}, - year = 2013, - month = {jul}, - publisher = {{AIP} Publishing}, - volume = {1}, - number = {1}, - author = {Anubhav Jain and Shyue Ping Ong and Geoffroy Hautier - and Wei Chen and William Davidson Richards and Stephen Dacek - and Shreyas Cholia and Dan Gunter and David Skinner - and Gerbrand Ceder and Kristin A. Persson}, - title = {Commentary: The Materials Project: - A materials genome approach to accelerating materials innovation}, - journal = {{APL} Materials} - } - - |- - @article{Ong_2015, - doi = {10.1016/j.commatsci.2014.10.037}, - url = {https://doi.org/10.1016%2Fj.commatsci.2014.10.037}, - year = 2015, - month = {feb}, - publisher = {Elsevier {BV}}, - volume = {97}, - pages = {209--215}, - author = {Shyue Ping Ong and Shreyas Cholia and Anubhav Jain - and Miriam Brafman and Dan Gunter and Gerbrand Ceder and Kristin A. Persson}, - title = {The Materials Application Programming Interface ({API}): - A simple, flexible and efficient {API} for materials data based - on {REpresentational} State Transfer ({REST}) principles}, - journal = {Computational Materials Science} - } - - |- - @article{de_Jong_2015, - doi = {10.1038/sdata.2015.9}, - url = {https://doi.org/10.1038%2Fsdata.2015.9}, - year = 2015, - month = {mar}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {2}, - number = {1}, - author = {Maarten de Jong and Wei Chen and Thomas Angsten - and Anubhav Jain and Randy Notestine and Anthony Gamst - and Marcel Sluiter and Chaitanya Krishna Ande - and Sybrand van der Zwaag and Jose J Plata and Cormac Toher - and Stefano Curtarolo and Gerbrand Ceder and Kristin A. Persson and Mark Asta}, - title = {Charting the complete elastic properties of inorganic crystalline compounds}, - journal = {Sci Data} - } + - |- + @article{Jain_2013, + doi = {10.1063/1.4812323}, + url = {https://doi.org/10.1063%2F1.4812323}, + year = 2013, + month = {jul}, + publisher = {{AIP} Publishing}, + volume = {1}, + number = {1}, + author = {Anubhav Jain and Shyue Ping Ong and Geoffroy Hautier + and Wei Chen and William Davidson Richards and Stephen Dacek + and Shreyas Cholia and Dan Gunter and David Skinner + and Gerbrand Ceder and Kristin A. Persson}, + title = {Commentary: The Materials Project: + A materials genome approach to accelerating materials innovation}, + journal = {{APL} Materials} + } + - |- + @article{Ong_2015, + doi = {10.1016/j.commatsci.2014.10.037}, + url = {https://doi.org/10.1016%2Fj.commatsci.2014.10.037}, + year = 2015, + month = {feb}, + publisher = {Elsevier {BV}}, + volume = {97}, + pages = {209--215}, + author = {Shyue Ping Ong and Shreyas Cholia and Anubhav Jain + and Miriam Brafman and Dan Gunter and Gerbrand Ceder and Kristin A. Persson}, + title = {The Materials Application Programming Interface ({API}): + A simple, flexible and efficient {API} for materials data based + on {REpresentational} State Transfer ({REST}) principles}, + journal = {Computational Materials Science} + } + - |- + @article{de_Jong_2015, + doi = {10.1038/sdata.2015.9}, + url = {https://doi.org/10.1038%2Fsdata.2015.9}, + year = 2015, + month = {mar}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {2}, + number = {1}, + author = {Maarten de Jong and Wei Chen and Thomas Angsten + and Anubhav Jain and Randy Notestine and Anthony Gamst + and Marcel Sluiter and Chaitanya Krishna Ande + and Sybrand van der Zwaag and Jose J Plata and Cormac Toher + and Stefano Curtarolo and Gerbrand Ceder and Kristin A. Persson and Mark Asta}, + title = {Charting the complete elastic properties of inorganic crystalline compounds}, + journal = {Sci Data} + } templates: - - The {bulk_modulus__names__noun} of {#the compound|the solid!} {formula#} is {bulk_modulus#} {bulk_modulus__units}. - - |- - Question: How large is the {bulk_modulus__names__noun} of {#the compound|the solid!} {formula#}? - Answer: The {bulk_modulus__names__noun} of {#the compound|the solid!} {formula#} is {bulk_modulus#} {bulk_modulus__units}. - - |- - User: {#I would like to|I want to!} know the {bulk_modulus__names__noun} of {#the compound|the solid!} {formula#}. - Assistant: The {bulk_modulus__names__noun} of {#the compound|the solid!} {formula#} is {bulk_modulus#} {bulk_modulus__units}. - - |- - User: {#I would like to|I want to!} design a {#compound|material|solid!} with a {bulk_modulus__names__noun} of {bulk_modulus#} {bulk_modulus__units}. - Assistant: {#I found|Here is|I have found|Here is!} a {#compound|material|solid!} with a {bulk_modulus__names__noun} of {bulk_modulus#} {bulk_modulus__units}: {formula#}. - - A {#compound|material|solid!} with a {bulk_modulus__names__noun} of {bulk_modulus#} {bulk_modulus__units} is {formula#}. - - |- - Task: Please {#give me|create|generate!} a {#compound|material|solid!} with a {bulk_modulus__names__noun} of {bulk_modulus#} {bulk_modulus__units}. - Result: {formula#} + - The {bulk_modulus__names__noun} of {#the compound|the solid!} {formula#} is {bulk_modulus#} {bulk_modulus__units}. + - |- + Question: How large is the {bulk_modulus__names__noun} of {#the compound|the solid!} {formula#}? + Answer: The {bulk_modulus__names__noun} of {#the compound|the solid!} {formula#} is {bulk_modulus#} {bulk_modulus__units}. + - |- + User: {#I would like to|I want to!} know the {bulk_modulus__names__noun} of {#the compound|the solid!} {formula#}. + Assistant: The {bulk_modulus__names__noun} of {#the compound|the solid!} {formula#} is {bulk_modulus#} {bulk_modulus__units}. + - |- + User: {#I would like to|I want to!} design a {#compound|material|solid!} with a {bulk_modulus__names__noun} of {bulk_modulus#} {bulk_modulus__units}. + Assistant: {#I found|Here is|I have found|Here is!} a {#compound|material|solid!} with a {bulk_modulus__names__noun} of {bulk_modulus#} {bulk_modulus__units}: {formula#}. + - A {#compound|material|solid!} with a {bulk_modulus__names__noun} of {bulk_modulus#} {bulk_modulus__units} is {formula#}. + - |- + Task: Please {#give me|create|generate!} a {#compound|material|solid!} with a {bulk_modulus__names__noun} of {bulk_modulus#} {bulk_modulus__units}. + Result: {formula#} diff --git a/data/tabular/mp_descriptions/meta.yaml b/data/tabular/mp_descriptions/meta.yaml index ffbc3ab65..e61c31814 100644 --- a/data/tabular/mp_descriptions/meta.yaml +++ b/data/tabular/mp_descriptions/meta.yaml @@ -1,98 +1,97 @@ ---- name: mp_descriptions description: |- - Text descriptions of materials. + Text descriptions of materials. targets: - - id: description - description: description - type: text - names: - - noun: description - - id: description_w_bondlengths - description: description with bond lengths - type: text - names: - - noun: description with bond lengths + - id: description + description: description + type: text + names: + - noun: description + - id: description_w_bondlengths + description: description with bond lengths + type: text + names: + - noun: description with bond lengths identifiers: - - id: formula - type: text - description: composition - - id: cifstr - type: CIFFILE - description: CIF file - names: - - noun: CIF file - - noun: Crystallographic Information File (CIF) - - noun: CIF card + - id: formula + type: text + description: composition + - id: cifstr + type: CIFFILE + description: CIF file + names: + - noun: CIF file + - noun: Crystallographic Information File (CIF) + - noun: CIF card license: CC BY 4.0 links: - - url: https://next-gen.materialsproject.org/ - description: original data source + - url: https://next-gen.materialsproject.org/ + description: original data source num_points: 117576 bibtex: - - |- - @article{Jain_2013, - doi = {10.1063/1.4812323}, - url = {https://doi.org/10.1063%2F1.4812323}, - year = 2013, - month = {jul}, - publisher = {{AIP} Publishing}, - volume = {1}, - number = {1}, - author = {Anubhav Jain and Shyue Ping Ong and Geoffroy Hautier - and Wei Chen and William Davidson Richards and Stephen Dacek - and Shreyas Cholia and Dan Gunter and David Skinner - and Gerbrand Ceder and Kristin A. Persson}, - title = {Commentary: The Materials Project: - A materials genome approach to accelerating materials innovation}, - journal = {{APL} Materials} - } - - |- - @article{Ong_2015, - doi = {10.1016/j.commatsci.2014.10.037}, - url = {https://doi.org/10.1016%2Fj.commatsci.2014.10.037}, - year = 2015, - month = {feb}, - publisher = {Elsevier {BV}}, - volume = {97}, - pages = {209--215}, - author = {Shyue Ping Ong and Shreyas Cholia and Anubhav Jain - and Miriam Brafman and Dan Gunter and Gerbrand Ceder and Kristin A. Persson}, - title = {The Materials Application Programming Interface ({API}): - A simple, flexible and efficient {API} for materials data based - on {REpresentational} State Transfer ({REST}) principles}, - journal = {Computational Materials Science} - } - - |- - @article{Ganose_2019, - doi = {10.1557/mrc.2019.94}, - url = {https://doi.org/10.1557%2Fmrc.2019.94}, - year = 2019, - month = {sep}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {9}, - number = {3}, - pages = {874--881}, - author = {Alex M. Ganose and Anubhav Jain}, - title = {Robocrystallographer: automated crystal structure text descriptions and analysis}, - journal = {MRS Communications} - } + - |- + @article{Jain_2013, + doi = {10.1063/1.4812323}, + url = {https://doi.org/10.1063%2F1.4812323}, + year = 2013, + month = {jul}, + publisher = {{AIP} Publishing}, + volume = {1}, + number = {1}, + author = {Anubhav Jain and Shyue Ping Ong and Geoffroy Hautier + and Wei Chen and William Davidson Richards and Stephen Dacek + and Shreyas Cholia and Dan Gunter and David Skinner + and Gerbrand Ceder and Kristin A. Persson}, + title = {Commentary: The Materials Project: + A materials genome approach to accelerating materials innovation}, + journal = {{APL} Materials} + } + - |- + @article{Ong_2015, + doi = {10.1016/j.commatsci.2014.10.037}, + url = {https://doi.org/10.1016%2Fj.commatsci.2014.10.037}, + year = 2015, + month = {feb}, + publisher = {Elsevier {BV}}, + volume = {97}, + pages = {209--215}, + author = {Shyue Ping Ong and Shreyas Cholia and Anubhav Jain + and Miriam Brafman and Dan Gunter and Gerbrand Ceder and Kristin A. Persson}, + title = {The Materials Application Programming Interface ({API}): + A simple, flexible and efficient {API} for materials data based + on {REpresentational} State Transfer ({REST}) principles}, + journal = {Computational Materials Science} + } + - |- + @article{Ganose_2019, + doi = {10.1557/mrc.2019.94}, + url = {https://doi.org/10.1557%2Fmrc.2019.94}, + year = 2019, + month = {sep}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {9}, + number = {3}, + pages = {874--881}, + author = {Alex M. Ganose and Anubhav Jain}, + title = {Robocrystallographer: automated crystal structure text descriptions and analysis}, + journal = {MRS Communications} + } templates: - - |- - Task: {#Please design|Design!} a {#crystal structure|material|compound|material structure|structure!} based on the {cifstr__names__noun}. - CIF: {cifstr#} - {#Description|Answer!}: {description#} - - |- - Task: {#Please design|Design!} a {cifstr__names__noun} that matches the description below. - Description: {description#} - {#Answer|CIF!}: {cifstr#} - - |- - User: {#Can|Could!} you describe a {#crystal structure|material|compound|material structure|structure!} based on the {cifstr__names__noun}? - Assistant: {#Sure, |I can give it a try, |!} I {#would need|need|require!} the {cifstr__names__noun} to do that. - User: {cifstr#} - Assistant: {description#} - - |- - User: {#Can|Could!} you design a {cifstr__names__noun} that matches a description of a {#crystal structure|material|compound|material structure|structure!}? - Assistant: {#Sure, |I can give it a try, |!} I {#would need|need|require!} the description of the {#crystal structure|material|compound|material structure|structure!} to do that. - User: {description#} - Assistant: {cifstr#} + - |- + Task: {#Please design|Design!} a {#crystal structure|material|compound|material structure|structure!} based on the {cifstr__names__noun}. + CIF: {cifstr#} + {#Description|Answer!}: {description#} + - |- + Task: {#Please design|Design!} a {cifstr__names__noun} that matches the description below. + Description: {description#} + {#Answer|CIF!}: {cifstr#} + - |- + User: {#Can|Could!} you describe a {#crystal structure|material|compound|material structure|structure!} based on the {cifstr__names__noun}? + Assistant: {#Sure, |I can give it a try, |!} I {#would need|need|require!} the {cifstr__names__noun} to do that. + User: {cifstr#} + Assistant: {description#} + - |- + User: {#Can|Could!} you design a {cifstr__names__noun} that matches a description of a {#crystal structure|material|compound|material structure|structure!}? + Assistant: {#Sure, |I can give it a try, |!} I {#would need|need|require!} the description of the {#crystal structure|material|compound|material structure|structure!} to do that. + User: {description#} + Assistant: {cifstr#} diff --git a/data/tabular/mp_self_supervised/meta.yaml b/data/tabular/mp_self_supervised/meta.yaml index 85a0d9807..ba545ad05 100644 --- a/data/tabular/mp_self_supervised/meta.yaml +++ b/data/tabular/mp_self_supervised/meta.yaml @@ -1,84 +1,83 @@ ---- name: mp_self_supervised description: |- - The materials project is a dabase of computed properties of materials. + The materials project is a dabase of computed properties of materials. targets: - - id: density - description: Density of the material - units: g/cm^3 - type: continuous - names: - - noun: density of the material - - noun: density - uris: - significant_digits: 2 - - id: spacegroup - description: Spacegroup of the material - type: categorical - names: - - noun: spacegroup of the material - - noun: spacegroup - uris: - significant_digits: 2 - - id: spacegroup_number - description: Spacegroup number of the material - type: categorical - names: - - noun: spacegroup number of the material - - noun: spacegroup number - - noun: number of the spacegroup in the International Tables for Crystallography + - id: density + description: Density of the material + units: g/cm^3 + type: continuous + names: + - noun: density of the material + - noun: density + uris: + significant_digits: 2 + - id: spacegroup + description: Spacegroup of the material + type: categorical + names: + - noun: spacegroup of the material + - noun: spacegroup + uris: + significant_digits: 2 + - id: spacegroup_number + description: Spacegroup number of the material + type: categorical + names: + - noun: spacegroup number of the material + - noun: spacegroup number + - noun: number of the spacegroup in the International Tables for Crystallography identifiers: - - id: cif - type: cif - description: CIF - - id: formula - type: COMPOSITION - description: reduced formula + - id: cif + type: cif + description: CIF + - id: formula + type: COMPOSITION + description: reduced formula license: CC BY 4.0 num_points: 130542 links: - - url: https://materialsproject.org/ - description: original data source + - url: https://materialsproject.org/ + description: original data source bibtex: - - |- - @article{jain2013commentary, - title={Commentary: The Materials Project: A materials genome approach to accelerating materials innovation}, - author={Jain, Anubhav and Ong, Shyue Ping and Hautier, Geoffroy and Chen, Wei and Richards, William Davidson and Dacek, Stephen and Cholia, Shreyas and Gunter, Dan and Skinner, David and Ceder, Gerbrand and others}, - journal={APL materials}, - volume={1}, - number={1}, - year={2013}, - publisher={AIP Publishing} - } + - |- + @article{jain2013commentary, + title={Commentary: The Materials Project: A materials genome approach to accelerating materials innovation}, + author={Jain, Anubhav and Ong, Shyue Ping and Hautier, Geoffroy and Chen, Wei and Richards, William Davidson and Dacek, Stephen and Cholia, Shreyas and Gunter, Dan and Skinner, David and Ceder, Gerbrand and others}, + journal={APL materials}, + volume={1}, + number={1}, + year={2013}, + publisher={AIP Publishing} + } templates: - - The {spacegroup__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {spacegroup#}. - - The {density__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {density#} {density__units}. - - The {#chemical formula|composition|reduced formula!} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {formula#}. - - The {spacegroup_number__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {spacegroup_number#}. - - |- - User: I want to design a material with a particular {density__names__noun}, {spacegroup__names__noun}, and {#chemical formula|composition|reduced formula!}. - Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}I would need to know the {density__names__noun}, {spacegroup__names__noun}, and {#chemical formula|composition|reduced formula!} of the material you want to design. - User: The {density__names__noun} should be {density#} {density__units}, the {spacegroup__names__noun} should be {spacegroup#}, and the {#chemical formula|composition|reduced formula!} should be {formula#}. - Assistant: I {#recommend|suggest|propose|advise|!} the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}. - - |- - Question: What is the {density__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? - Answer: {density#} {density__units} - - |- - Question: What is the {spacegroup__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? - Answer: {spacegroup#} - - |- - Question: What is the {#chemical formula|composition|reduced formula!} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? - Answer: {formula#} - - |- - Question: What is the {spacegroup_number__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? - Answer: {spacegroup_number#} - - |- - User: I want to design a {#material|compound|solid!} with a {density__names__noun} of {density#} {density__units}, and a {#chemical formula|composition|reduced formula!} of {formula#}. - Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}I suggest the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}. {#Is there anything else I can do for you?|Do you need anything else?|Anything else?|!} - User: {#Yes, |Yeah, |Yep, |Indeed, |!}I also want to know the {spacegroup__names__noun} of this {#material|compound|solid!}. - Assistant: The {spacegroup__names__noun} of the {#material|compound|solid!} is {spacegroup#}. - - |- - User: I have a {#material|compound|solid|structure!} with the following {#CIF|CIF file|CIF card!} {cif#}. {#Can you tell me the density?|What is the density?|!} - Assistant: The {density__names__noun} of the {#material|compound|solid!} is {density#} {density__units}. {#Is there anything else I can do for you?|Do you need anything else?|Anything else?|!} - User: {#Yes, |Yeah, |Yep, |Indeed, |!}I also want to know the {spacegroup__names__noun} of this {#material|compound|solid!}. - Assistant: The {spacegroup__names__noun} of the {#material|compound|solid!} is {spacegroup#}. + - The {spacegroup__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {spacegroup#}. + - The {density__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {density#} {density__units}. + - The {#chemical formula|composition|reduced formula!} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {formula#}. + - The {spacegroup_number__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {spacegroup_number#}. + - |- + User: I want to design a material with a particular {density__names__noun}, {spacegroup__names__noun}, and {#chemical formula|composition|reduced formula!}. + Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}I would need to know the {density__names__noun}, {spacegroup__names__noun}, and {#chemical formula|composition|reduced formula!} of the material you want to design. + User: The {density__names__noun} should be {density#} {density__units}, the {spacegroup__names__noun} should be {spacegroup#}, and the {#chemical formula|composition|reduced formula!} should be {formula#}. + Assistant: I {#recommend|suggest|propose|advise|!} the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}. + - |- + Question: What is the {density__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? + Answer: {density#} {density__units} + - |- + Question: What is the {spacegroup__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? + Answer: {spacegroup#} + - |- + Question: What is the {#chemical formula|composition|reduced formula!} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? + Answer: {formula#} + - |- + Question: What is the {spacegroup_number__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? + Answer: {spacegroup_number#} + - |- + User: I want to design a {#material|compound|solid!} with a {density__names__noun} of {density#} {density__units}, and a {#chemical formula|composition|reduced formula!} of {formula#}. + Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}I suggest the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}. {#Is there anything else I can do for you?|Do you need anything else?|Anything else?|!} + User: {#Yes, |Yeah, |Yep, |Indeed, |!}I also want to know the {spacegroup__names__noun} of this {#material|compound|solid!}. + Assistant: The {spacegroup__names__noun} of the {#material|compound|solid!} is {spacegroup#}. + - |- + User: I have a {#material|compound|solid|structure!} with the following {#CIF|CIF file|CIF card!} {cif#}. {#Can you tell me the density?|What is the density?|!} + Assistant: The {density__names__noun} of the {#material|compound|solid!} is {density#} {density__units}. {#Is there anything else I can do for you?|Do you need anything else?|Anything else?|!} + User: {#Yes, |Yeah, |Yep, |Indeed, |!}I also want to know the {spacegroup__names__noun} of this {#material|compound|solid!}. + Assistant: The {spacegroup__names__noun} of the {#material|compound|solid!} is {spacegroup#}. diff --git a/data/tabular/mp_shear_modulus/meta.yaml b/data/tabular/mp_shear_modulus/meta.yaml index 5fe3878f3..d3c6ccd05 100644 --- a/data/tabular/mp_shear_modulus/meta.yaml +++ b/data/tabular/mp_shear_modulus/meta.yaml @@ -1,94 +1,93 @@ ---- name: mp_shear_modulus description: |- - DFT-computed shear moduli of inorganic solids + DFT-computed shear moduli of inorganic solids targets: - - id: shear_modulus - description: shear modulus of materials - units: GPa - type: continuous - significant_figures: 3 - names: - - noun: shear modulus computed using DFT with the PBE GGA functional - - noun: shear modulus computed using DFT with the PBE functional - - noun: shear modulus derived from DFT simulations with the PBE functional - uris: + - id: shear_modulus + description: shear modulus of materials + units: GPa + type: continuous + significant_figures: 3 + names: + - noun: shear modulus computed using DFT with the PBE GGA functional + - noun: shear modulus computed using DFT with the PBE functional + - noun: shear modulus derived from DFT simulations with the PBE functional + uris: benchmarks: - - name: CrabNet - link: https://github.com/anthony-wang/CrabNet/tree/master/data/benchmark_data - split_column: split + - name: CrabNet + link: https://github.com/anthony-wang/CrabNet/tree/master/data/benchmark_data + split_column: split identifiers: - - id: formula - type: COMPOSITION - description: composition + - id: formula + type: COMPOSITION + description: composition license: CC BY 4.0 links: - - url: https://next-gen.materialsproject.org/ - description: original data source + - url: https://next-gen.materialsproject.org/ + description: original data source num_points: 6307 bibtex: - - |- - @article{Jain_2013, - doi = {10.1063/1.4812323}, - url = {https://doi.org/10.1063%2F1.4812323}, - year = 2013, - month = {jul}, - publisher = {{AIP} Publishing}, - volume = {1}, - number = {1}, - author = {Anubhav Jain and Shyue Ping Ong and Geoffroy Hautier - and Wei Chen and William Davidson Richards and Stephen Dacek - and Shreyas Cholia and Dan Gunter and David Skinner - and Gerbrand Ceder and Kristin A. Persson}, - title = {Commentary: The Materials Project: - A materials genome approach to accelerating materials innovation}, - journal = {{APL} Materials} - } - - |- - @article{Ong_2015, - doi = {10.1016/j.commatsci.2014.10.037}, - url = {https://doi.org/10.1016%2Fj.commatsci.2014.10.037}, - year = 2015, - month = {feb}, - publisher = {Elsevier {BV}}, - volume = {97}, - pages = {209--215}, - author = {Shyue Ping Ong and Shreyas Cholia and Anubhav Jain - and Miriam Brafman and Dan Gunter and Gerbrand Ceder and Kristin A. Persson}, - title = {The Materials Application Programming Interface ({API}): - A simple, flexible and efficient {API} for materials data based - on {REpresentational} State Transfer ({REST}) principles}, - journal = {Computational Materials Science} - } - - |- - @article{de_Jong_2015, - doi = {10.1038/sdata.2015.9}, - url = {https://doi.org/10.1038%2Fsdata.2015.9}, - year = 2015, - month = {mar}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {2}, - number = {1}, - author = {Maarten de Jong and Wei Chen and Thomas Angsten - and Anubhav Jain and Randy Notestine and Anthony Gamst - and Marcel Sluiter and Chaitanya Krishna Ande - and Sybrand van der Zwaag and Jose J Plata and Cormac Toher - and Stefano Curtarolo and Gerbrand Ceder and Kristin A. Persson and Mark Asta}, - title = {Charting the complete elastic properties of inorganic crystalline compounds}, - journal = {Sci Data} - } + - |- + @article{Jain_2013, + doi = {10.1063/1.4812323}, + url = {https://doi.org/10.1063%2F1.4812323}, + year = 2013, + month = {jul}, + publisher = {{AIP} Publishing}, + volume = {1}, + number = {1}, + author = {Anubhav Jain and Shyue Ping Ong and Geoffroy Hautier + and Wei Chen and William Davidson Richards and Stephen Dacek + and Shreyas Cholia and Dan Gunter and David Skinner + and Gerbrand Ceder and Kristin A. Persson}, + title = {Commentary: The Materials Project: + A materials genome approach to accelerating materials innovation}, + journal = {{APL} Materials} + } + - |- + @article{Ong_2015, + doi = {10.1016/j.commatsci.2014.10.037}, + url = {https://doi.org/10.1016%2Fj.commatsci.2014.10.037}, + year = 2015, + month = {feb}, + publisher = {Elsevier {BV}}, + volume = {97}, + pages = {209--215}, + author = {Shyue Ping Ong and Shreyas Cholia and Anubhav Jain + and Miriam Brafman and Dan Gunter and Gerbrand Ceder and Kristin A. Persson}, + title = {The Materials Application Programming Interface ({API}): + A simple, flexible and efficient {API} for materials data based + on {REpresentational} State Transfer ({REST}) principles}, + journal = {Computational Materials Science} + } + - |- + @article{de_Jong_2015, + doi = {10.1038/sdata.2015.9}, + url = {https://doi.org/10.1038%2Fsdata.2015.9}, + year = 2015, + month = {mar}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {2}, + number = {1}, + author = {Maarten de Jong and Wei Chen and Thomas Angsten + and Anubhav Jain and Randy Notestine and Anthony Gamst + and Marcel Sluiter and Chaitanya Krishna Ande + and Sybrand van der Zwaag and Jose J Plata and Cormac Toher + and Stefano Curtarolo and Gerbrand Ceder and Kristin A. Persson and Mark Asta}, + title = {Charting the complete elastic properties of inorganic crystalline compounds}, + journal = {Sci Data} + } templates: - - The {shear_modulus__names__noun} of {#the compound|the solid|!} {formula#} is {shear_modulus#} {shear_modulus__units}. - - |- - Question: How large is the {shear_modulus__names__noun} of {#the compound|the solid|!} {formula#}? - Answer: The {shear_modulus__names__noun} of {#the compound|the solid|!} {formula#} is {shear_modulus#} {shear_modulus__units}. - - |- - User: {#I would like to|I want to!} know the {shear_modulus__names__noun} of {#the compound|the solid|!} {formula#}. - Assistant: The {shear_modulus__names__noun} of {#the compound|the solid|!} {formula#} is {shear_modulus#} {shear_modulus__units}. - - |- - User: {#I would like to|I want to!} design a {#compound|material|solid|!} with a {shear_modulus__names__noun} of {shear_modulus#} {shear_modulus__units}. - Assistant: {#I found|Here is|I have found|Here is!} a {#compound|material|solid|!} with a {shear_modulus__names__noun} of {shear_modulus#} {shear_modulus__units}: {formula#}. - - A {#compound|material|solid|!} with a {shear_modulus__names__noun} of {shear_modulus#} {shear_modulus__units} is {formula#}. - - |- - Task: Please {#give me|create|generate!} a {#compound|material|solid|!} with a {shear_modulus__names__noun} of {shear_modulus#} {shear_modulus__units}. - Result: {formula#} + - The {shear_modulus__names__noun} of {#the compound|the solid|!} {formula#} is {shear_modulus#} {shear_modulus__units}. + - |- + Question: How large is the {shear_modulus__names__noun} of {#the compound|the solid|!} {formula#}? + Answer: The {shear_modulus__names__noun} of {#the compound|the solid|!} {formula#} is {shear_modulus#} {shear_modulus__units}. + - |- + User: {#I would like to|I want to!} know the {shear_modulus__names__noun} of {#the compound|the solid|!} {formula#}. + Assistant: The {shear_modulus__names__noun} of {#the compound|the solid|!} {formula#} is {shear_modulus#} {shear_modulus__units}. + - |- + User: {#I would like to|I want to!} design a {#compound|material|solid|!} with a {shear_modulus__names__noun} of {shear_modulus#} {shear_modulus__units}. + Assistant: {#I found|Here is|I have found|Here is!} a {#compound|material|solid|!} with a {shear_modulus__names__noun} of {shear_modulus#} {shear_modulus__units}: {formula#}. + - A {#compound|material|solid|!} with a {shear_modulus__names__noun} of {shear_modulus#} {shear_modulus__units} is {formula#}. + - |- + Task: Please {#give me|create|generate!} a {#compound|material|solid|!} with a {shear_modulus__names__noun} of {shear_modulus#} {shear_modulus__units}. + Result: {formula#} diff --git a/data/tabular/ncbi_disease/meta.yaml b/data/tabular/ncbi_disease/meta.yaml index 8cd0a7692..fb26f760d 100644 --- a/data/tabular/ncbi_disease/meta.yaml +++ b/data/tabular/ncbi_disease/meta.yaml @@ -1,55 +1,54 @@ ---- name: ncbi_disease description: |- - ncbi_disease is a named entity recognition dataset for disease mentions. + ncbi_disease is a named entity recognition dataset for disease mentions. targets: - - id: matched_words - description: matched words - type: text - names: - - noun: entity - - noun: matched entity + - id: matched_words + description: matched words + type: text + names: + - noun: entity + - noun: matched entity identifiers: - - id: sentence - description: Sentence - type: text - names: - - noun: sentence - - noun: text + - id: sentence + description: Sentence + type: text + names: + - noun: sentence + - noun: text license: https://huggingface.co/datasets/bigbio/blurb/blob/main/LICENSE links: - - url: https://huggingface.co/datasets/bigbio/blurb - description: original dataset + - url: https://huggingface.co/datasets/bigbio/blurb + description: original dataset benchmarks: - - name: ncbi_disease - link: hhttps://huggingface.co/datasets/bigbio/blurb - split_column: split + - name: ncbi_disease + link: hhttps://huggingface.co/datasets/bigbio/blurb + split_column: split num_points: 7075 bibtex: - - |- - @article{gu2021domain, - title = { - Domain-specific language model pretraining for biomedical natural - language processing - }, - author = { - Gu, Yu and Tinn, Robert and Cheng, Hao and Lucas, Michael and - Usuyama, Naoto and Liu, Xiaodong and Naumann, Tristan and Gao, - Jianfeng and Poon, Hoifung - }, - year = 2021, - journal = {ACM Transactions on Computing for Healthcare (HEALTH)}, - publisher = {ACM New York, NY}, - volume = 3, - number = 1, - pages = {1--23} - } + - |- + @article{gu2021domain, + title = { + Domain-specific language model pretraining for biomedical natural + language processing + }, + author = { + Gu, Yu and Tinn, Robert and Cheng, Hao and Lucas, Michael and + Usuyama, Naoto and Liu, Xiaodong and Naumann, Tristan and Gao, + Jianfeng and Poon, Hoifung + }, + year = 2021, + journal = {ACM Transactions on Computing for Healthcare (HEALTH)}, + publisher = {ACM New York, NY}, + volume = 3, + number = 1, + pages = {1--23} + } templates: - - |- - Task: Find all the mentions of diseases in the {#following|subsequent!} {#text|sentence!}. Return the matching {#words|entities!}. If there is no {#match|mention of a disease|matching entity!}, return `no match`. - {#Sentence|Description!}: {sentence#} - Answer: {matched_words#} - - |- - User: Does the following text contain mentions of diseases?{# Can you return matches?| Can you output matches?!} - {#Text: |!}{sentence#} - Assistant: {#I found|There is!} {matched_words#} + - |- + Task: Find all the mentions of diseases in the {#following|subsequent!} {#text|sentence!}. Return the matching {#words|entities!}. If there is no {#match|mention of a disease|matching entity!}, return `no match`. + {#Sentence|Description!}: {sentence#} + Answer: {matched_words#} + - |- + User: Does the following text contain mentions of diseases?{# Can you return matches?| Can you output matches?!} + {#Text: |!}{sentence#} + Assistant: {#I found|There is!} {matched_words#} diff --git a/data/tabular/nlmchem/meta.yaml b/data/tabular/nlmchem/meta.yaml index 54834424e..88cbbd8ad 100644 --- a/data/tabular/nlmchem/meta.yaml +++ b/data/tabular/nlmchem/meta.yaml @@ -1,71 +1,70 @@ ---- name: NLM-Chem description: NLM-Chem is a new resource for chemical entity recognition in PubMed full text literature. identifiers: - - id: Abbreviation - description: abbreviation of a Abbreviation - type: Other - names: - - noun: abbreviation - - id: MeSH_Identifier - description: unique codes for Medical Subject Headings - type: categorical - names: - - noun: MeSH identifier - sample: false + - id: Abbreviation + description: abbreviation of a Abbreviation + type: Other + names: + - noun: abbreviation + - id: MeSH_Identifier + description: unique codes for Medical Subject Headings + type: categorical + names: + - noun: MeSH identifier + sample: false targets: - - id: Full_Form - description: full form or meaning of the abbreviation - type: categorical - names: - - noun: full form or meaning + - id: Full_Form + description: full form or meaning of the abbreviation + type: categorical + names: + - noun: full form or meaning license: CC BY 4.0 links: - - url: https://ftp.ncbi.nlm.nih.gov/pub/lu/NLMChem/ - description: data source - - url: https://www.nature.com/articles/s41597-021-00875-1 - description: publication + - url: https://ftp.ncbi.nlm.nih.gov/pub/lu/NLMChem/ + description: data source + - url: https://www.nature.com/articles/s41597-021-00875-1 + description: publication num_points: 2695 bibtex: - - |- - @article{Islamaj2021, - author = {Islamaj, R. and Leaman, R. and Kim, S. and Lu, Z.}, - title = {NLM-Chem, a new resource for chemical entity recognition in PubMed full text literature}, - journal = {Nature Scientific Data}, - volume = {8}, - number = {91}, - year = {2021}, - doi = {10.1038/s41597-021-00875-1}, - url = {https://doi.org/10.1038/s41597-021-00875-1} - } + - |- + @article{Islamaj2021, + author = {Islamaj, R. and Leaman, R. and Kim, S. and Lu, Z.}, + title = {NLM-Chem, a new resource for chemical entity recognition in PubMed full text literature}, + journal = {Nature Scientific Data}, + volume = {8}, + number = {91}, + year = {2021}, + doi = {10.1038/s41597-021-00875-1}, + url = {https://doi.org/10.1038/s41597-021-00875-1} + } templates: - - The {Abbreviation__names__noun} "{Abbreviation#}" stands for "{#Full_Form}". - - |- - Task: Please give me the {Full_Form__names__noun} of the {Abbreviation__names__noun}. - Abbreviation: {Abbreviation#} - Constraint: Answer the question with {#full|complete!} words. - Result: {Full_Form#} - - |- - Task: Please give me the {Abbreviation__names__noun} of the following {Full_Form__names__noun}. - Full form or meaning of the abbreviation: {Full_Form#} - Constraint: Answer the question with an {Abbreviation__names__noun}. - Result: {Abbreviation#} - - |- - User: Can you give me the {Abbreviation__names__noun} of the following {Full_Form__names__noun}: {#Full_Form} - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {Abbreviation#} - - |- - User: Can you give me the {Full_Form__names__noun} of the following {Abbreviation__names__noun}: {#Abbreviation} - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {Full_Form#} - - |- - User: I'm {#searching|looking!} for the {Abbreviation__names__noun} for: {#Full_Form} - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {Abbreviation#} - - |- - Task: Please give me the {Full_Form__names__noun} of the {Abbreviation__names__noun}. - Abbreviation: {Abbreviation#} - Constraint: Answer the question with {#full|complete!} words. - Result:{Full_Form#} - - |- - Task: Please give me the {Abbreviation__names__noun} of the following {Full_Form__names__noun}. - Full form or meaning of the abbreviation: {Full_Form#} - Constraint: Answer the question with an {Abbreviation__names__noun}. - Result:{Abbreviation#} + - The {Abbreviation__names__noun} "{Abbreviation#}" stands for "{#Full_Form}". + - |- + Task: Please give me the {Full_Form__names__noun} of the {Abbreviation__names__noun}. + Abbreviation: {Abbreviation#} + Constraint: Answer the question with {#full|complete!} words. + Result: {Full_Form#} + - |- + Task: Please give me the {Abbreviation__names__noun} of the following {Full_Form__names__noun}. + Full form or meaning of the abbreviation: {Full_Form#} + Constraint: Answer the question with an {Abbreviation__names__noun}. + Result: {Abbreviation#} + - |- + User: Can you give me the {Abbreviation__names__noun} of the following {Full_Form__names__noun}: {#Full_Form} + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {Abbreviation#} + - |- + User: Can you give me the {Full_Form__names__noun} of the following {Abbreviation__names__noun}: {#Abbreviation} + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {Full_Form#} + - |- + User: I'm {#searching|looking!} for the {Abbreviation__names__noun} for: {#Full_Form} + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {Abbreviation#} + - |- + Task: Please give me the {Full_Form__names__noun} of the {Abbreviation__names__noun}. + Abbreviation: {Abbreviation#} + Constraint: Answer the question with {#full|complete!} words. + Result:{Full_Form#} + - |- + Task: Please give me the {Abbreviation__names__noun} of the following {Full_Form__names__noun}. + Full form or meaning of the abbreviation: {Full_Form#} + Constraint: Answer the question with an {Abbreviation__names__noun}. + Result:{Abbreviation#} diff --git a/data/tabular/nomad_structure/meta.yaml b/data/tabular/nomad_structure/meta.yaml index 5812c76c5..95d38a7ec 100644 --- a/data/tabular/nomad_structure/meta.yaml +++ b/data/tabular/nomad_structure/meta.yaml @@ -1,146 +1,144 @@ ---- name: nomad-structure description: |- - A subset from NOMAD dataset, which is a database of DFT computed results of materials. - This subset consists of cif structures of around 0.5 million bulk stable materials and their geometric and structural information. - All materials in this dataset are modeled using Density Functional Theory using GGA functional. + A subset from NOMAD dataset, which is a database of DFT computed results of materials. + This subset consists of cif structures of around 0.5 million bulk stable materials and their geometric and structural information. + All materials in this dataset are modeled using Density Functional Theory using GGA functional. targets: - - id: density - description: Density of the material - units: kg/m^3 - type: continuous - significant_digits: 3 - names: - - noun: density - uris: - - id: crystal_system - description: Geometric arrangement of atoms within a crystal - type: categorical - names: - - noun: crystal system - uris: - - id: spacegroup - description: Spacegroup of the material - type: categorical - names: - - noun: spacegroup - uris: - - id: pointgroup - description: Pointgroup of the material - type: categorical - names: - - noun: pointgroup - uris: - - id: spacegroup_number - description: Spacegroup number of the material - type: categorical - names: - - noun: spacegroup number - - noun: number of the spacegroup in the International Tables for Crystallography - uris: - - id: cif_masked - description: CIF file of the material - type: text - names: - - noun: CIF file with masked rows - - noun: CIF card with masked rows + - id: density + description: Density of the material + units: kg/m^3 + type: continuous + significant_digits: 3 + names: + - noun: density + uris: + - id: crystal_system + description: Geometric arrangement of atoms within a crystal + type: categorical + names: + - noun: crystal system + uris: + - id: spacegroup + description: Spacegroup of the material + type: categorical + names: + - noun: spacegroup + uris: + - id: pointgroup + description: Pointgroup of the material + type: categorical + names: + - noun: pointgroup + uris: + - id: spacegroup_number + description: Spacegroup number of the material + type: categorical + names: + - noun: spacegroup number + - noun: number of the spacegroup in the International Tables for Crystallography + uris: + - id: cif_masked + description: CIF file of the material + type: text + names: + - noun: CIF file with masked rows + - noun: CIF card with masked rows identifiers: - - id: cif - type: cif - description: CIF - - id: formula - type: COMPOSITION - description: reduced formula - names: - - noun: chemical formula - - noun: composition - - noun: reduced formula + - id: cif + type: cif + description: CIF + - id: formula + type: COMPOSITION + description: reduced formula + names: + - noun: chemical formula + - noun: composition + - noun: reduced formula license: CC BY 4.0 num_points: 527984 links: - - url: https://nomad-lab.eu/nomad-lab/ - description: original data source + - url: https://nomad-lab.eu/nomad-lab/ + description: original data source bibtex: - - |- - @article{scheidgen2023nomad, - title={NOMAD: A distributed web-based platform for managing materials science research data}, - author={Scheidgen, Markus and Himanen, Lauri and Ladines, Alvin Noe and Sikter, David and Nakhaee, Mohammad and Fekete, {\'A}d{\'a}m and Chang, Theodore and Golparvar, Amir and M{\'a}rquez, Jos{\'e} A and Brockhauser, Sandor and others}, - journal={Journal of Open Source Software}, - volume={8}, - number={90}, - pages={5388}, - year={2023} - } + - |- + @article{scheidgen2023nomad, + title={NOMAD: A distributed web-based platform for managing materials science research data}, + author={Scheidgen, Markus and Himanen, Lauri and Ladines, Alvin Noe and Sikter, David and Nakhaee, Mohammad and Fekete, {\'A}d{\'a}m and Chang, Theodore and Golparvar, Amir and M{\'a}rquez, Jos{\'e} A and Brockhauser, Sandor and others}, + journal={Journal of Open Source Software}, + volume={8}, + number={90}, + pages={5388}, + year={2023} + } templates: - - The {spacegroup__names__noun} of the symmetrized version of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {spacegroup#}. - - The {crystal_system__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {crystal_system#}. - - The {density__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {density#} {density__units}. - - The {#chemical formula|composition|reduced formula!} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {formula#}. - - The {spacegroup_number__names__noun} of the symmetrized version of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {spacegroup_number#}. - - The {#CIF|CIF file|CIF card!} of the material with {#chemical formula|composition|reduced formula!} {formula#}, {spacegroup_number__names__noun} {spacegroup_number#} - and {density__names__noun} {density#} {density__units} is {cif#}. - - |- - Question: {#What is the|What's the!} structure of {#material|compound|solid!} with {#chemical formula|composition|reduced formula!} {formula#} and {spacegroup_number__names__noun} {spacegroup_number#}? - Constraint: Return a {#CIF|CIF file|CIF card!}. - Answer: {cif#} - - |- - User: In the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}, what is the {pointgroup__names__noun}? - Assistant: The {pointgroup__names__noun} of the symmetrized version of the {#material|compound|solid!} is {pointgroup#}. - - |- - Question: {#What is the|What's the!} {density__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? - Answer: The {density__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {density#} {density__units}. - - |- - User: I want to design a material with a particular {density__names__noun}, {spacegroup__names__noun}, and {#chemical formula|composition|reduced formula!}. - Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}I would need to know the {density__names__noun}, {spacegroup__names__noun}, and {#chemical formula|composition|reduced formula!} of the material you want to design. - User: The {density__names__noun} should be {density#} {density__units}, the {spacegroup__names__noun} should be {spacegroup#}, and the {#chemical formula|composition|reduced formula!} should be {formula#}. - Assistant: I {#recommend|suggest|propose|advise|!} the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}. - - |- - Question: What is the {density__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? - Answer: {density#} {density__units}. - - |- - Question: What is the {spacegroup__names__noun} of the symmetrized version of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? - Answer: {spacegroup#}. - - |- - Question: What is the {#chemical formula|composition|reduced formula!} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? - Answer: {formula#}. - - |- - Question: What is the {spacegroup_number__names__noun} of the symmetrized version of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? - Answer: {spacegroup_number#}. - - |- - User: The {#CIF|CIF file|CIF card!} of the {#material|compound|solid!} is {cif#}. {#Tell|Show|Give|!} me the {#chemical formula|composition|reduced formula!}. - Assistant: {#Certainly|Sure|Of course!}, the {formula__names__noun} is {formula#} - - |- - User: The {#CIF|CIF file|CIF card!} of the {#material|compound|solid!} is {cif#}. {#Tell|Show|Give|!} me the {density__names__noun}. - Assistant: {#Certainly|Sure|Of course!}, {density__names__noun} of the {#material|compound|solid!} is {density#} {density__units} - - |- - User: I want to design a {#material|compound|solid!} with a {density__names__noun} of {density#} {density__units}, and a {#chemical formula|composition|reduced formula!} of {formula#}. - Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}I suggest the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}. {#Is there anything else I can do for you?|Do you need anything else?|Anything else?!} - User: {#Yes, |Yeah, |Yep, |Indeed, |!}I also want to know the {spacegroup__names__noun} of the symmetrized version of this {#material|compound|solid!}. - Assistant: The {spacegroup__names__noun} of the {#material|compound|solid!} is {spacegroup#}. - - |- - User: I have a {#material|compound|solid|structure!} with the following {#CIF|CIF file|CIF card!} {cif#}. {#Can you tell me the density?|What is the density?!} - Assistant: The {density__names__noun} of the {#material|compound|solid!} is {density#} {density__units}. {#Is there anything else I can do for you?|Do you need anything else?|Anything else?!} - User: {#Yes, |Yeah, |Yep, |Indeed, |!}I also want to know the {spacegroup__names__noun} of the symmetrized version of this {#material|compound|solid!}. - Assistant: The {spacegroup__names__noun} of the {#material|compound|solid!} is {spacegroup#}. - - |- - User: For a {#material|compound|solid|structure!} with {spacegroup_number__names__noun} {spacegroup#}, can you estimate the {density__names__noun} in {density__units}, and the {pointgroup__names__noun}? - Assistant: Certainly, the {density__names__noun} is {density#} {density__units}, and the {pointgroup__names__noun} is {pointgroup#}. - - |- - User: I want you to {#write|tell|suggest!} the {#CIF|CIF file|CIF card!} of a {#material|compound|solid!} with a {density__names__noun} of {density#} {density__units}, and {#chemical formula|composition|reduced formula!} {formula#}. Also the {spacegroup__names__noun} of the {#material|compound|solid!} should be {spacegroup#} - Assistant: {#Certainly|Sure|Of course!}, the {#CIF|CIF file|CIF card!} is {cif#}. - - |- - Task: Fill the rows masked with `[MASK]` in this {#CIF|CIF file|CIF card!} to fulfill the given constraints. Return the {#CIF|CIF file|CIF card!} with the masked rows filled. - Masked {#CIF|CIF file|CIF card!}: {cif_masked#} - Constraint: The {density__names__noun} should be {density#} {density__units}, and the {#chemical formula|composition|reduced formula!} should be {formula#}. - Answer: {cif#} - - |- - Task: Fill the rows masked with `[MASK]` in this {#CIF|CIF file|CIF card!} to fulfill the given constraints. Return the {#CIF|CIF file|CIF card!} with the masked rows filled. - Masked {#CIF|CIF file|CIF card!}: {cif_masked#} - Constraint: The {density__names__noun} should be {density#} {density__units}, the {#chemical formula|composition|reduced formula!} should be {formula#}, and the {spacegroup__names__noun} should be {spacegroup#}. - Answer: {cif#} - - |- - Question: {#What is the|What's the!} complete {#CIF|CIF file|CIF card!} of the {#material|compound|solid!} with the masked {#CIF|CIF file|CIF card!} {cif_masked#} and {density__names__noun} {density#} {density__units}? - Answer: {cif#} - - |- - Question: {#What is the|What's the!} complete {#CIF|CIF file|CIF card!} of the {#material|compound|solid!} with the masked {#CIF|CIF file|CIF card!} {cif_masked#}, {density__names__noun} {density#} {density__units}, and {#chemical formula|composition|reduced formula!} {formula#}? - Answer: {cif#} + - The {spacegroup__names__noun} of the symmetrized version of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {spacegroup#}. + - The {crystal_system__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {crystal_system#}. + - The {density__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {density#} {density__units}. + - The {#chemical formula|composition|reduced formula!} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {formula#}. + - The {spacegroup_number__names__noun} of the symmetrized version of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {spacegroup_number#}. + - The {#CIF|CIF file|CIF card!} of the material with {#chemical formula|composition|reduced formula!} {formula#}, {spacegroup_number__names__noun} {spacegroup_number#} and {density__names__noun} {density#} {density__units} is {cif#}. + - |- + Question: {#What is the|What's the!} structure of {#material|compound|solid!} with {#chemical formula|composition|reduced formula!} {formula#} and {spacegroup_number__names__noun} {spacegroup_number#}? + Constraint: Return a {#CIF|CIF file|CIF card!}. + Answer: {cif#} + - |- + User: In the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}, what is the {pointgroup__names__noun}? + Assistant: The {pointgroup__names__noun} of the symmetrized version of the {#material|compound|solid!} is {pointgroup#}. + - |- + Question: {#What is the|What's the!} {density__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? + Answer: The {density__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#} is {density#} {density__units}. + - |- + User: I want to design a material with a particular {density__names__noun}, {spacegroup__names__noun}, and {#chemical formula|composition|reduced formula!}. + Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}I would need to know the {density__names__noun}, {spacegroup__names__noun}, and {#chemical formula|composition|reduced formula!} of the material you want to design. + User: The {density__names__noun} should be {density#} {density__units}, the {spacegroup__names__noun} should be {spacegroup#}, and the {#chemical formula|composition|reduced formula!} should be {formula#}. + Assistant: I {#recommend|suggest|propose|advise|!} the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}. + - |- + Question: What is the {density__names__noun} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? + Answer: {density#} {density__units}. + - |- + Question: What is the {spacegroup__names__noun} of the symmetrized version of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? + Answer: {spacegroup#}. + - |- + Question: What is the {#chemical formula|composition|reduced formula!} of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? + Answer: {formula#}. + - |- + Question: What is the {spacegroup_number__names__noun} of the symmetrized version of the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}? + Answer: {spacegroup_number#}. + - |- + User: The {#CIF|CIF file|CIF card!} of the {#material|compound|solid!} is {cif#}. {#Tell|Show|Give|!} me the {#chemical formula|composition|reduced formula!}. + Assistant: {#Certainly|Sure|Of course!}, the {formula__names__noun} is {formula#} + - |- + User: The {#CIF|CIF file|CIF card!} of the {#material|compound|solid!} is {cif#}. {#Tell|Show|Give|!} me the {density__names__noun}. + Assistant: {#Certainly|Sure|Of course!}, {density__names__noun} of the {#material|compound|solid!} is {density#} {density__units} + - |- + User: I want to design a {#material|compound|solid!} with a {density__names__noun} of {density#} {density__units}, and a {#chemical formula|composition|reduced formula!} of {formula#}. + Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}I suggest the {#material|compound|solid!} with the {#CIF|CIF file|CIF card!} {cif#}. {#Is there anything else I can do for you?|Do you need anything else?|Anything else?!} + User: {#Yes, |Yeah, |Yep, |Indeed, |!}I also want to know the {spacegroup__names__noun} of the symmetrized version of this {#material|compound|solid!}. + Assistant: The {spacegroup__names__noun} of the {#material|compound|solid!} is {spacegroup#}. + - |- + User: I have a {#material|compound|solid|structure!} with the following {#CIF|CIF file|CIF card!} {cif#}. {#Can you tell me the density?|What is the density?!} + Assistant: The {density__names__noun} of the {#material|compound|solid!} is {density#} {density__units}. {#Is there anything else I can do for you?|Do you need anything else?|Anything else?!} + User: {#Yes, |Yeah, |Yep, |Indeed, |!}I also want to know the {spacegroup__names__noun} of the symmetrized version of this {#material|compound|solid!}. + Assistant: The {spacegroup__names__noun} of the {#material|compound|solid!} is {spacegroup#}. + - |- + User: For a {#material|compound|solid|structure!} with {spacegroup_number__names__noun} {spacegroup#}, can you estimate the {density__names__noun} in {density__units}, and the {pointgroup__names__noun}? + Assistant: Certainly, the {density__names__noun} is {density#} {density__units}, and the {pointgroup__names__noun} is {pointgroup#}. + - |- + User: I want you to {#write|tell|suggest!} the {#CIF|CIF file|CIF card!} of a {#material|compound|solid!} with a {density__names__noun} of {density#} {density__units}, and {#chemical formula|composition|reduced formula!} {formula#}. Also the {spacegroup__names__noun} of the {#material|compound|solid!} should be {spacegroup#} + Assistant: {#Certainly|Sure|Of course!}, the {#CIF|CIF file|CIF card!} is {cif#}. + - |- + Task: Fill the rows masked with `[MASK]` in this {#CIF|CIF file|CIF card!} to fulfill the given constraints. Return the {#CIF|CIF file|CIF card!} with the masked rows filled. + Masked {#CIF|CIF file|CIF card!}: {cif_masked#} + Constraint: The {density__names__noun} should be {density#} {density__units}, and the {#chemical formula|composition|reduced formula!} should be {formula#}. + Answer: {cif#} + - |- + Task: Fill the rows masked with `[MASK]` in this {#CIF|CIF file|CIF card!} to fulfill the given constraints. Return the {#CIF|CIF file|CIF card!} with the masked rows filled. + Masked {#CIF|CIF file|CIF card!}: {cif_masked#} + Constraint: The {density__names__noun} should be {density#} {density__units}, the {#chemical formula|composition|reduced formula!} should be {formula#}, and the {spacegroup__names__noun} should be {spacegroup#}. + Answer: {cif#} + - |- + Question: {#What is the|What's the!} complete {#CIF|CIF file|CIF card!} of the {#material|compound|solid!} with the masked {#CIF|CIF file|CIF card!} {cif_masked#} and {density__names__noun} {density#} {density__units}? + Answer: {cif#} + - |- + Question: {#What is the|What's the!} complete {#CIF|CIF file|CIF card!} of the {#material|compound|solid!} with the masked {#CIF|CIF file|CIF card!} {cif_masked#}, {density__names__noun} {density#} {density__units}, and {#chemical formula|composition|reduced formula!} {formula#}? + Answer: {cif#} diff --git a/data/tabular/nr_ahr_tox21/meta.yaml b/data/tabular/nr_ahr_tox21/meta.yaml index d3ab8db35..bc964782c 100644 --- a/data/tabular/nr_ahr_tox21/meta.yaml +++ b/data/tabular/nr_ahr_tox21/meta.yaml @@ -1,134 +1,133 @@ ---- name: nr_ahr_tox21 description: |- - Tox21 is a data challenge which contains qualitative toxicity measurements - for 7,831 compounds on 12 different targets, such as nuclear receptors and stress - response pathways. + Tox21 is a data challenge which contains qualitative toxicity measurements + for 7,831 compounds on 12 different targets, such as nuclear receptors and stress + response pathways. targets: - - id: toxicity_NR-AhR - description: whether it shows activity in a aryl hydrocarbon receptor toxicity assay (1) or not (0) - units: - type: boolean - names: - - noun: NR-AhR toxicity - - noun: aryl hydrocarbon receptor toxicity - - verb: is active in the aryl hydrocarbon receptor toxicity assay - - adjective: toxic in the NR-AhR assay - - adjective: toxic in the aryl hydrocarbon receptor assay - - gerund: displaying toxicity in the NR-AhR assay - - gerund: exhibiting toxicity in the NR-AhR assay - - gerund: demonstrating toxicity in the NR-aryl hydrocarbon receptor assay - uris: + - id: toxicity_NR-AhR + description: whether it shows activity in a aryl hydrocarbon receptor toxicity assay (1) or not (0) + units: + type: boolean + names: + - noun: NR-AhR toxicity + - noun: aryl hydrocarbon receptor toxicity + - verb: is active in the aryl hydrocarbon receptor toxicity assay + - adjective: toxic in the NR-AhR assay + - adjective: toxic in the aryl hydrocarbon receptor assay + - gerund: displaying toxicity in the NR-AhR assay + - gerund: exhibiting toxicity in the NR-AhR assay + - gerund: demonstrating toxicity in the NR-aryl hydrocarbon receptor assay + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: http://dx.doi.org/10.3389/fenvs.2017.00003 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 - description: data source - - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 - description: assay name + - url: http://dx.doi.org/10.3389/fenvs.2017.00003 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 + description: data source + - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 + description: assay name num_points: 6549 bibtex: - - |- - @article{Huang2017, - doi = {10.3389/fenvs.2017.00003}, - url = {https://doi.org/10.3389/fenvs.2017.00003}, - year = {2017}, - month = jan, - publisher = {Frontiers Media SA}, - volume = {5}, - author = {Ruili Huang and Menghang Xia}, - title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor - and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, - journal = {Frontiers in Environmental Science} + - |- + @article{Huang2017, + doi = {10.3389/fenvs.2017.00003}, + url = {https://doi.org/10.3389/fenvs.2017.00003}, + year = {2017}, + month = jan, + publisher = {Frontiers Media SA}, + volume = {5}, + author = {Ruili Huang and Menghang Xia}, + title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor + and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, + journal = {Frontiers in Environmental Science} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_NR-AhR#no &NULL}{toxicity_NR-AhR__names__noun} {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_NR-AhR#not &NULL}identified as {toxicity_NR-AhR__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-AhR__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. - Result: {toxicity_NR-AhR#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-AhR__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_NR-AhR__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_NR-AhR__names__adjective}? - Assistant: {toxicity_NR-AhR#No&Yes}, this molecule is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_NR-AhR__names__adjective}? - Assistant: {toxicity_NR-AhR#No&Yes}, it is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}? - Assistant: This is a molecule that is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {toxicity_NR-AhR#not &NULL}be {toxicity_NR-AhR__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {toxicity_NR-AhR#not &NULL}be {toxicity_NR-AhR__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {toxicity_NR-AhR__names__adjective}:{toxicity_NR-AhR#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-AhR__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{toxicity_NR-AhR#False&True} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_NR-AhR__names__adjective}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-AhR__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_NR-AhR%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-AhR__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_NR-AhR%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_NR-AhR%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_NR-AhR%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_NR-AhR#no &NULL}{toxicity_NR-AhR__names__noun} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_NR-AhR#not &NULL}identified as {toxicity_NR-AhR__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-AhR__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. + Result: {toxicity_NR-AhR#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-AhR__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_NR-AhR__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_NR-AhR__names__adjective}? + Assistant: {toxicity_NR-AhR#No&Yes}, this molecule is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_NR-AhR__names__adjective}? + Assistant: {toxicity_NR-AhR#No&Yes}, it is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}? + Assistant: This is a molecule that is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {toxicity_NR-AhR#not &NULL}be {toxicity_NR-AhR__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {toxicity_NR-AhR#not &NULL}be {toxicity_NR-AhR__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {toxicity_NR-AhR__names__adjective}:{toxicity_NR-AhR#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-AhR__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{toxicity_NR-AhR#False&True} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_NR-AhR__names__adjective}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-AhR__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_NR-AhR%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-AhR__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_NR-AhR%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_NR-AhR%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_NR-AhR#not &NULL}{toxicity_NR-AhR__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_NR-AhR%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/nr_ar_lbd_tox21/meta.yaml b/data/tabular/nr_ar_lbd_tox21/meta.yaml index 3aead6d38..ac6226ed8 100644 --- a/data/tabular/nr_ar_lbd_tox21/meta.yaml +++ b/data/tabular/nr_ar_lbd_tox21/meta.yaml @@ -1,135 +1,133 @@ ---- name: nr_ar_lbd_tox21 description: |- - Tox21 is a data challenge which contains qualitative toxicity measurements - for 7,831 compounds on 12 different targets, such as nuclear receptors and stress - response pathways. + Tox21 is a data challenge which contains qualitative toxicity measurements + for 7,831 compounds on 12 different targets, such as nuclear receptors and stress + response pathways. targets: - - id: toxicity_NR-AR-LBD - description: whether it shows activity in the NR-AR-LBD toxicity assay (1) or not (0) - units: - type: boolean - names: - - noun: NR-AR-LBD toxicity - - noun: androgen receptor ligand-binding domain toxicity - - verb: is active in the NR-AR-LBD toxicity assay - - adjective: toxic in the NR-AR-LBD assay - - adjective: toxic in the androgen receptor ligand-binding domain assay - - gerund: displaying toxicity in the NR-AR ligand binding domain assay - - gerund: exhibiting toxicity in the NR-androgen-LBD receptor alpha assay - - gerund: demonstrating toxicity in the NR-androgen-LBD receptor alpha assay - uris: + - id: toxicity_NR-AR-LBD + description: whether it shows activity in the NR-AR-LBD toxicity assay (1) or not (0) + units: + type: boolean + names: + - noun: NR-AR-LBD toxicity + - noun: androgen receptor ligand-binding domain toxicity + - verb: is active in the NR-AR-LBD toxicity assay + - adjective: toxic in the NR-AR-LBD assay + - adjective: toxic in the androgen receptor ligand-binding domain assay + - gerund: displaying toxicity in the NR-AR ligand binding domain assay + - gerund: exhibiting toxicity in the NR-androgen-LBD receptor alpha assay + - gerund: demonstrating toxicity in the NR-androgen-LBD receptor alpha assay + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: http://dx.doi.org/10.3389/fenvs.2017.00003 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 - description: data source - - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 - description: assay name + - url: http://dx.doi.org/10.3389/fenvs.2017.00003 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 + description: data source + - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 + description: assay name num_points: 6758 bibtex: - - |- - @article{Huang2017, - doi = {10.3389/fenvs.2017.00003}, - url = {https://doi.org/10.3389/fenvs.2017.00003}, - year = {2017}, - month = jan, - publisher = {Frontiers Media SA}, - volume = {5}, - author = {Ruili Huang and Menghang Xia}, - title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor - and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, - journal = {Frontiers in Environmental Science} + - |- + @article{Huang2017, + doi = {10.3389/fenvs.2017.00003}, + url = {https://doi.org/10.3389/fenvs.2017.00003}, + year = {2017}, + month = jan, + publisher = {Frontiers Media SA}, + volume = {5}, + author = {Ruili Huang and Menghang Xia}, + title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor + and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, + journal = {Frontiers in Environmental Science} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_NR-AR-LBD#no &NULL}{toxicity_NR-AR-LBD__names__noun} - {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_NR-AR-LBD#not &NULL}identified as {toxicity_NR-AR-LBD__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-AR-LBD__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. - Result: {toxicity_NR-AR-LBD#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-AR-LBD__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_NR-AR-LBD__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_NR-AR-LBD__names__adjective}? - Assistant: {toxicity_NR-AR-LBD#No&Yes}, this molecule is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_NR-AR-LBD__names__adjective}? - Assistant: {toxicity_NR-AR-LBD#No&Yes}, it is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}? - Assistant: This is a molecule that is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {toxicity_NR-AR-LBD#not &NULL}be {toxicity_NR-AR-LBD__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {toxicity_NR-AR-LBD#not &NULL}be {toxicity_NR-AR-LBD__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {toxicity_NR-AR-LBD__names__adjective}:{toxicity_NR-AR-LBD#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-AR-LBD__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{toxicity_NR-AR-LBD#False&True} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_NR-AR-LBD__names__adjective}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-AR-LBD__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_NR-AR-LBD%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-AR-LBD__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_NR-AR-LBD%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_NR-AR-LBD%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_NR-AR-LBD%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_NR-AR-LBD#no &NULL}{toxicity_NR-AR-LBD__names__noun} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_NR-AR-LBD#not &NULL}identified as {toxicity_NR-AR-LBD__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-AR-LBD__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. + Result: {toxicity_NR-AR-LBD#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-AR-LBD__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_NR-AR-LBD__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_NR-AR-LBD__names__adjective}? + Assistant: {toxicity_NR-AR-LBD#No&Yes}, this molecule is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_NR-AR-LBD__names__adjective}? + Assistant: {toxicity_NR-AR-LBD#No&Yes}, it is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}? + Assistant: This is a molecule that is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {toxicity_NR-AR-LBD#not &NULL}be {toxicity_NR-AR-LBD__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {toxicity_NR-AR-LBD#not &NULL}be {toxicity_NR-AR-LBD__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {toxicity_NR-AR-LBD__names__adjective}:{toxicity_NR-AR-LBD#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-AR-LBD__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{toxicity_NR-AR-LBD#False&True} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_NR-AR-LBD__names__adjective}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-AR-LBD__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_NR-AR-LBD%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-AR-LBD__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_NR-AR-LBD%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_NR-AR-LBD%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_NR-AR-LBD#not &NULL}{toxicity_NR-AR-LBD__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_NR-AR-LBD%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/nr_ar_tox21/meta.yaml b/data/tabular/nr_ar_tox21/meta.yaml index 95586ce19..7e09db035 100644 --- a/data/tabular/nr_ar_tox21/meta.yaml +++ b/data/tabular/nr_ar_tox21/meta.yaml @@ -1,134 +1,133 @@ ---- name: nr_ar_tox21 description: |- - Tox21 is a data challenge which contains qualitative toxicity measurements - for 7,831 compounds on 12 different targets, such as nuclear receptors and stress - response pathways. + Tox21 is a data challenge which contains qualitative toxicity measurements + for 7,831 compounds on 12 different targets, such as nuclear receptors and stress + response pathways. targets: - - id: toxicity_NR-AR - description: whether it toxic in a androgen receptor toxicity assay (1) or not (0) - units: - type: boolean - names: - - noun: NR-AR toxicity - - noun: NR-androgen receptor toxicity - - verb: is toxic in a androgen receptor toxicity assay - - adjective: toxic in the NR-AR assay - - adjective: toxic in the NR-androgen receptor assay - - gerund: displaying toxicity in the NR-AR assay - - gerund: exhibiting toxicity in the NR-androgen assay - - gerund: demonstrating toxicity in the NR-androgen assay - uris: + - id: toxicity_NR-AR + description: whether it toxic in a androgen receptor toxicity assay (1) or not (0) + units: + type: boolean + names: + - noun: NR-AR toxicity + - noun: NR-androgen receptor toxicity + - verb: is toxic in a androgen receptor toxicity assay + - adjective: toxic in the NR-AR assay + - adjective: toxic in the NR-androgen receptor assay + - gerund: displaying toxicity in the NR-AR assay + - gerund: exhibiting toxicity in the NR-androgen assay + - gerund: demonstrating toxicity in the NR-androgen assay + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: http://dx.doi.org/10.3389/fenvs.2017.00003 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 - description: data source - - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 - description: assay name + - url: http://dx.doi.org/10.3389/fenvs.2017.00003 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 + description: data source + - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 + description: assay name num_points: 7265 bibtex: - - |- - @article{Huang2017, - doi = {10.3389/fenvs.2017.00003}, - url = {https://doi.org/10.3389/fenvs.2017.00003}, - year = {2017}, - month = jan, - publisher = {Frontiers Media SA}, - volume = {5}, - author = {Ruili Huang and Menghang Xia}, - title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor - and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, - journal = {Frontiers in Environmental Science} + - |- + @article{Huang2017, + doi = {10.3389/fenvs.2017.00003}, + url = {https://doi.org/10.3389/fenvs.2017.00003}, + year = {2017}, + month = jan, + publisher = {Frontiers Media SA}, + volume = {5}, + author = {Ruili Huang and Menghang Xia}, + title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor + and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, + journal = {Frontiers in Environmental Science} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_NR-AR#no &NULL}{toxicity_NR-AR__names__noun} {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_NR-AR#not &NULL}identified as {toxicity_NR-AR__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-AR__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. - Result: {toxicity_NR-AR#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-AR__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_NR-AR__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_NR-AR__names__adjective}? - Assistant: {toxicity_NR-AR#No&Yes}, this molecule is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_NR-AR__names__adjective}? - Assistant: {toxicity_NR-AR#No&Yes}, it is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}? - Assistant: This is a molecule that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {toxicity_NR-AR#not &NULL}be {toxicity_NR-AR__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {toxicity_NR-AR#not &NULL}be {toxicity_NR-AR__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {toxicity_NR-AR__names__adjective}:{toxicity_NR-AR#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-AR__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{toxicity_NR-AR#False&True} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_NR-AR__names__adjective}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-AR__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_NR-AR%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-AR__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_NR-AR%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_NR-AR%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_NR-AR%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_NR-AR#no &NULL}{toxicity_NR-AR__names__noun} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_NR-AR#not &NULL}identified as {toxicity_NR-AR__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-AR__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. + Result: {toxicity_NR-AR#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-AR__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_NR-AR__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_NR-AR__names__adjective}? + Assistant: {toxicity_NR-AR#No&Yes}, this molecule is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_NR-AR__names__adjective}? + Assistant: {toxicity_NR-AR#No&Yes}, it is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}? + Assistant: This is a molecule that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {toxicity_NR-AR#not &NULL}be {toxicity_NR-AR__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {toxicity_NR-AR#not &NULL}be {toxicity_NR-AR__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {toxicity_NR-AR__names__adjective}:{toxicity_NR-AR#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-AR__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{toxicity_NR-AR#False&True} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_NR-AR__names__adjective}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-AR__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_NR-AR%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-AR__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_NR-AR%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_NR-AR%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_NR-AR%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/nr_aromatase_tox21/meta.yaml b/data/tabular/nr_aromatase_tox21/meta.yaml index 2419fee5d..9f8a2527a 100644 --- a/data/tabular/nr_aromatase_tox21/meta.yaml +++ b/data/tabular/nr_aromatase_tox21/meta.yaml @@ -1,135 +1,133 @@ ---- name: nr_aromatase_tox21 description: |- - Tox21 is a data challenge which contains qualitative toxicity measurements - for 7,831 compounds on 12 different targets, such as nuclear receptors and stress - response pathways. + Tox21 is a data challenge which contains qualitative toxicity measurements + for 7,831 compounds on 12 different targets, such as nuclear receptors and stress + response pathways. targets: - - id: toxicity_NR-Aromatase - description: whether it shows activity in the NR-Aromatase enzyme toxicity assay (1) or not (0) - units: - type: boolean - names: - - noun: NR-Aromatase enzyme toxicity - - noun: Aromatase enzyme toxicity - - verb: is active in the NR-Aromatase enzyme toxicity assay - - adjective: toxic in the NR-Aromatase enzyme assay - - adjective: toxic in the Aromatase enzyme assay - - gerund: displaying toxicity in the NR-Aromatase assay - - gerund: exhibiting toxicity in the NR-Aromatase enzyme assay - - gerund: demonstrating toxicity in the NR-aromatase enzyme assay - uris: + - id: toxicity_NR-Aromatase + description: whether it shows activity in the NR-Aromatase enzyme toxicity assay (1) or not (0) + units: + type: boolean + names: + - noun: NR-Aromatase enzyme toxicity + - noun: Aromatase enzyme toxicity + - verb: is active in the NR-Aromatase enzyme toxicity assay + - adjective: toxic in the NR-Aromatase enzyme assay + - adjective: toxic in the Aromatase enzyme assay + - gerund: displaying toxicity in the NR-Aromatase assay + - gerund: exhibiting toxicity in the NR-Aromatase enzyme assay + - gerund: demonstrating toxicity in the NR-aromatase enzyme assay + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: http://dx.doi.org/10.3389/fenvs.2017.00003 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 - description: data source - - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 - description: assay name + - url: http://dx.doi.org/10.3389/fenvs.2017.00003 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 + description: data source + - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 + description: assay name num_points: 5821 bibtex: - - |- - @article{Huang2017, - doi = {10.3389/fenvs.2017.00003}, - url = {https://doi.org/10.3389/fenvs.2017.00003}, - year = {2017}, - month = jan, - publisher = {Frontiers Media SA}, - volume = {5}, - author = {Ruili Huang and Menghang Xia}, - title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor - and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, - journal = {Frontiers in Environmental Science} + - |- + @article{Huang2017, + doi = {10.3389/fenvs.2017.00003}, + url = {https://doi.org/10.3389/fenvs.2017.00003}, + year = {2017}, + month = jan, + publisher = {Frontiers Media SA}, + volume = {5}, + author = {Ruili Huang and Menghang Xia}, + title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor + and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, + journal = {Frontiers in Environmental Science} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_NR-Aromatase#no &NULL}{toxicity_NR-Aromatase__names__noun} - {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_NR-Aromatase#not &NULL}identified as {toxicity_NR-Aromatase__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-Aromatase__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. - Result: {toxicity_NR-Aromatase#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-Aromatase__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_NR-Aromatase__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_NR-Aromatase__names__adjective}? - Assistant: {toxicity_NR-Aromatase#No&Yes}, this molecule is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_NR-Aromatase__names__adjective}? - Assistant: {toxicity_NR-Aromatase#No&Yes}, it is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}? - Assistant: This is a molecule that is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {toxicity_NR-Aromatase#not &NULL}be {toxicity_NR-Aromatase__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {toxicity_NR-Aromatase#not &NULL}be {toxicity_NR-Aromatase__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {toxicity_NR-Aromatase__names__adjective}:{toxicity_NR-Aromatase#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-Aromatase__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{toxicity_NR-Aromatase#False&True} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_NR-Aromatase__names__adjective}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-Aromatase__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_NR-Aromatase%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-Aromatase__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_NR-Aromatase%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_NR-Aromatase%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_NR-Aromatase%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_NR-Aromatase#no &NULL}{toxicity_NR-Aromatase__names__noun} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_NR-Aromatase#not &NULL}identified as {toxicity_NR-Aromatase__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-Aromatase__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. + Result: {toxicity_NR-Aromatase#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-Aromatase__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_NR-Aromatase__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_NR-Aromatase__names__adjective}? + Assistant: {toxicity_NR-Aromatase#No&Yes}, this molecule is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_NR-Aromatase__names__adjective}? + Assistant: {toxicity_NR-Aromatase#No&Yes}, it is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}? + Assistant: This is a molecule that is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {toxicity_NR-Aromatase#not &NULL}be {toxicity_NR-Aromatase__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {toxicity_NR-Aromatase#not &NULL}be {toxicity_NR-Aromatase__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {toxicity_NR-Aromatase__names__adjective}:{toxicity_NR-Aromatase#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-Aromatase__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{toxicity_NR-Aromatase#False&True} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_NR-Aromatase__names__adjective}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-Aromatase__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_NR-Aromatase%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-Aromatase__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_NR-Aromatase%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_NR-Aromatase%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_NR-Aromatase#not &NULL}{toxicity_NR-Aromatase__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_NR-Aromatase%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/nr_er_lbd_tox21/meta.yaml b/data/tabular/nr_er_lbd_tox21/meta.yaml index 406a5de29..1fdcb8b5e 100644 --- a/data/tabular/nr_er_lbd_tox21/meta.yaml +++ b/data/tabular/nr_er_lbd_tox21/meta.yaml @@ -1,140 +1,138 @@ ---- name: nr_er_lbd_tox21 description: |- - Tox21 is a data challenge which contains qualitative toxicity measurements - for 7,831 compounds on 12 different targets, such as nuclear receptors and stress - response pathways. + Tox21 is a data challenge which contains qualitative toxicity measurements + for 7,831 compounds on 12 different targets, such as nuclear receptors and stress + response pathways. targets: - - id: toxicity_NR-ER-LBD - description: whether it shows activity in the NR-ER-LBD toxicity assay (1) or not (0) - units: - type: boolean - names: - - noun: NR-ER-LBD toxicity - - noun: NR-estrogen receptor alpha ligand binding domain toxicity - - noun: estrogen receptor alpha ligand binding domain toxicity - - verb: is active in the NR-ER-LBD toxicity assay - - verb: is active in the NR-estrogen receptor alpha ligand binding domain assay - - verb: is active in the estrogen receptor alpha ligand binding domain assay - - adjective: toxic in the NR-ER-LBD assay - - adjective: toxic in the NR-estrogen receptor alpha ligand binding domain assay - - adjective: toxic in the estrogen receptor alpha ligand binding domain assay - - gerund: showing toxicity in the NR-ER-LBD assay - - gerund: displaying toxicity in the NR-ER ligand binding domain assay - - gerund: exhibiting toxicity in the NR-Estrogen-LBD receptor alpha assay - - gerund: demonstrating toxicity in the NR-estrogen-LBD receptor alpha assay - uris: + - id: toxicity_NR-ER-LBD + description: whether it shows activity in the NR-ER-LBD toxicity assay (1) or not (0) + units: + type: boolean + names: + - noun: NR-ER-LBD toxicity + - noun: NR-estrogen receptor alpha ligand binding domain toxicity + - noun: estrogen receptor alpha ligand binding domain toxicity + - verb: is active in the NR-ER-LBD toxicity assay + - verb: is active in the NR-estrogen receptor alpha ligand binding domain assay + - verb: is active in the estrogen receptor alpha ligand binding domain assay + - adjective: toxic in the NR-ER-LBD assay + - adjective: toxic in the NR-estrogen receptor alpha ligand binding domain assay + - adjective: toxic in the estrogen receptor alpha ligand binding domain assay + - gerund: showing toxicity in the NR-ER-LBD assay + - gerund: displaying toxicity in the NR-ER ligand binding domain assay + - gerund: exhibiting toxicity in the NR-Estrogen-LBD receptor alpha assay + - gerund: demonstrating toxicity in the NR-estrogen-LBD receptor alpha assay + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: http://dx.doi.org/10.3389/fenvs.2017.00003 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 - description: data source - - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 - description: assay name + - url: http://dx.doi.org/10.3389/fenvs.2017.00003 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 + description: data source + - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 + description: assay name num_points: 6955 bibtex: - - |- - @article{Huang2017, - doi = {10.3389/fenvs.2017.00003}, - url = {https://doi.org/10.3389/fenvs.2017.00003}, - year = {2017}, - month = jan, - publisher = {Frontiers Media SA}, - volume = {5}, - author = {Ruili Huang and Menghang Xia}, - title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor - and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, - journal = {Frontiers in Environmental Science} + - |- + @article{Huang2017, + doi = {10.3389/fenvs.2017.00003}, + url = {https://doi.org/10.3389/fenvs.2017.00003}, + year = {2017}, + month = jan, + publisher = {Frontiers Media SA}, + volume = {5}, + author = {Ruili Huang and Menghang Xia}, + title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor + and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, + journal = {Frontiers in Environmental Science} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_NR-ER-LBD#no &NULL}{toxicity_NR-ER-LBD__names__noun} - {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_NR-ER-LBD#not &NULL}identified as {toxicity_NR-ER-LBD__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-ER-LBD__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. - Result: {toxicity_NR-ER-LBD#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-ER-LBD__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_NR-ER-LBD__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_NR-ER-LBD__names__adjective}? - Assistant: {toxicity_NR-ER-LBD#No&Yes}, this molecule is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_NR-ER-LBD__names__adjective}? - Assistant: {toxicity_NR-ER-LBD#No&Yes}, it is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}? - Assistant: This is a molecule that is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {toxicity_NR-ER-LBD#not &NULL}be {toxicity_NR-ER-LBD__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {toxicity_NR-ER-LBD#not &NULL}be {toxicity_NR-ER-LBD__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {toxicity_NR-ER-LBD__names__adjective}:{toxicity_NR-ER-LBD#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-ER-LBD__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{toxicity_NR-ER-LBD#False&True} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_NR-ER-LBD__names__adjective}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-ER-LBD__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_NR-ER-LBD%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-ER-LBD__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_NR-ER-LBD%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_NR-ER-LBD%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_NR-ER-LBD%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_NR-ER-LBD#no &NULL}{toxicity_NR-ER-LBD__names__noun} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_NR-ER-LBD#not &NULL}identified as {toxicity_NR-ER-LBD__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-ER-LBD__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. + Result: {toxicity_NR-ER-LBD#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-ER-LBD__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_NR-ER-LBD__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_NR-ER-LBD__names__adjective}? + Assistant: {toxicity_NR-ER-LBD#No&Yes}, this molecule is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_NR-ER-LBD__names__adjective}? + Assistant: {toxicity_NR-ER-LBD#No&Yes}, it is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}? + Assistant: This is a molecule that is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {toxicity_NR-ER-LBD#not &NULL}be {toxicity_NR-ER-LBD__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {toxicity_NR-ER-LBD#not &NULL}be {toxicity_NR-ER-LBD__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {toxicity_NR-ER-LBD__names__adjective}:{toxicity_NR-ER-LBD#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-ER-LBD__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{toxicity_NR-ER-LBD#False&True} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_NR-ER-LBD__names__adjective}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-ER-LBD__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_NR-ER-LBD%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-ER-LBD__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_NR-ER-LBD%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_NR-ER-LBD%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_NR-ER-LBD#not &NULL}{toxicity_NR-ER-LBD__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_NR-ER-LBD%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/nr_er_tox21/meta.yaml b/data/tabular/nr_er_tox21/meta.yaml index cd3363285..1cd12711d 100644 --- a/data/tabular/nr_er_tox21/meta.yaml +++ b/data/tabular/nr_er_tox21/meta.yaml @@ -1,139 +1,138 @@ ---- name: nr_er_tox21 description: |- - Tox21 is a data challenge which contains qualitative toxicity measurements - for 7,831 compounds on 12 different targets, such as nuclear receptors and stress - response pathways. + Tox21 is a data challenge which contains qualitative toxicity measurements + for 7,831 compounds on 12 different targets, such as nuclear receptors and stress + response pathways. targets: - - id: toxicity_NR-ER - description: whether it shows activity in the NR-ER toxicity assay (1) or not (0) - units: - type: boolean - names: - - noun: NR-ER toxicity - - noun: NR-Estrogen receptor alpha toxicity - - noun: estrogen receptor alpha toxicity - - verb: is active in the NR-ER toxicity assay - - verb: is active in the NR-Estrogen receptor alpha toxicity assay - - verb: is active in the estrogen receptor alpha toxicity assay - - adjective: toxic in the NR-ER assay - - adjective: toxic in the NR-Estrogen receptor alpha assay - - adjective: toxic in the estrogen receptor alpha assay - - gerund: showing toxicity in the NR-ER assay - - gerund: displaying toxicity in the NR-ER assay - - gerund: exhibiting toxicity in the NR-Estrogen receptor alpha assay - - gerund: demonstrating toxicity in the NR-estrogen receptor alpha assay - uris: + - id: toxicity_NR-ER + description: whether it shows activity in the NR-ER toxicity assay (1) or not (0) + units: + type: boolean + names: + - noun: NR-ER toxicity + - noun: NR-Estrogen receptor alpha toxicity + - noun: estrogen receptor alpha toxicity + - verb: is active in the NR-ER toxicity assay + - verb: is active in the NR-Estrogen receptor alpha toxicity assay + - verb: is active in the estrogen receptor alpha toxicity assay + - adjective: toxic in the NR-ER assay + - adjective: toxic in the NR-Estrogen receptor alpha assay + - adjective: toxic in the estrogen receptor alpha assay + - gerund: showing toxicity in the NR-ER assay + - gerund: displaying toxicity in the NR-ER assay + - gerund: exhibiting toxicity in the NR-Estrogen receptor alpha assay + - gerund: demonstrating toxicity in the NR-estrogen receptor alpha assay + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: http://dx.doi.org/10.3389/fenvs.2017.00003 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 - description: data source - - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 - description: assay name + - url: http://dx.doi.org/10.3389/fenvs.2017.00003 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 + description: data source + - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 + description: assay name num_points: 6193 bibtex: - - |- - @article{Huang2017, - doi = {10.3389/fenvs.2017.00003}, - url = {https://doi.org/10.3389/fenvs.2017.00003}, - year = {2017}, - month = jan, - publisher = {Frontiers Media SA}, - volume = {5}, - author = {Ruili Huang and Menghang Xia}, - title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor - and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, - journal = {Frontiers in Environmental Science} + - |- + @article{Huang2017, + doi = {10.3389/fenvs.2017.00003}, + url = {https://doi.org/10.3389/fenvs.2017.00003}, + year = {2017}, + month = jan, + publisher = {Frontiers Media SA}, + volume = {5}, + author = {Ruili Huang and Menghang Xia}, + title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor + and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, + journal = {Frontiers in Environmental Science} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_NR-ER#no &NULL}{toxicity_NR-ER__names__noun} {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_NR-ER#not &NULL}identified as {toxicity_NR-ER__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-ER__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. - Result: {toxicity_NR-ER#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-ER__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_NR-ER__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_NR-ER__names__adjective}? - Assistant: {toxicity_NR-ER#No&Yes}, this molecule is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_NR-ER__names__adjective}? - Assistant: {toxicity_NR-ER#No&Yes}, it is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}? - Assistant: This is a molecule that is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {toxicity_NR-ER#not &NULL}be {toxicity_NR-ER__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {toxicity_NR-ER#not &NULL}be {toxicity_NR-ER__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {toxicity_NR-ER__names__adjective}:{toxicity_NR-ER#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-ER__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{toxicity_NR-ER#False&True} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_NR-ER__names__adjective}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-ER__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_NR-ER%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-ER__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_NR-ER%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_NR-ER%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_NR-ER%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_NR-ER#no &NULL}{toxicity_NR-ER__names__noun} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_NR-ER#not &NULL}identified as {toxicity_NR-ER__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-ER__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. + Result: {toxicity_NR-ER#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-ER__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_NR-ER__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_NR-ER__names__adjective}? + Assistant: {toxicity_NR-ER#No&Yes}, this molecule is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_NR-ER__names__adjective}? + Assistant: {toxicity_NR-ER#No&Yes}, it is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}? + Assistant: This is a molecule that is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {toxicity_NR-ER#not &NULL}be {toxicity_NR-ER__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {toxicity_NR-ER#not &NULL}be {toxicity_NR-ER__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {toxicity_NR-ER__names__adjective}:{toxicity_NR-ER#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-ER__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{toxicity_NR-ER#False&True} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_NR-ER__names__adjective}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-ER__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_NR-ER%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-ER__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_NR-ER%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_NR-ER%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_NR-ER#not &NULL}{toxicity_NR-ER__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_NR-ER%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/nr_ppar_gamma_tox21/meta.yaml b/data/tabular/nr_ppar_gamma_tox21/meta.yaml index 659ec815f..ba3f47178 100644 --- a/data/tabular/nr_ppar_gamma_tox21/meta.yaml +++ b/data/tabular/nr_ppar_gamma_tox21/meta.yaml @@ -1,136 +1,134 @@ ---- name: nr_ppar_gamma_tox21 description: |- - Tox21 is a data challenge which contains qualitative toxicity measurements - for 7,831 compounds on 12 different targets, such as nuclear receptors and stress - response pathways. + Tox21 is a data challenge which contains qualitative toxicity measurements + for 7,831 compounds on 12 different targets, such as nuclear receptors and stress + response pathways. targets: - - id: toxicity_NR-PPAR-gamma - description: whether it shows activity in NR-PPAR-gamma toxicity assay (1) or not (0) - units: - type: boolean - names: - - noun: NR-PPAR-gamma toxicity - - noun: NR-peroxisome proliferator-activated receptor gamma toxicity - - noun: peroxisome proliferator-activated receptor gamma toxicity - - adjective: toxic in the NR-PPAR-gamma assay - - adjective: toxic in the NR-peroxisome proliferator-activated receptor gamma assay - - adjective: toxic in the peroxisome proliferator-activated receptor gamma assay - - gerund: showing toxicity in the NR-PPAR-gamma assay - - gerund: displaying toxicity in the NR-PPAR-gamma assay - - gerund: exhibiting toxicity in the NR-PPAR-gamma assay - uris: + - id: toxicity_NR-PPAR-gamma + description: whether it shows activity in NR-PPAR-gamma toxicity assay (1) or not (0) + units: + type: boolean + names: + - noun: NR-PPAR-gamma toxicity + - noun: NR-peroxisome proliferator-activated receptor gamma toxicity + - noun: peroxisome proliferator-activated receptor gamma toxicity + - adjective: toxic in the NR-PPAR-gamma assay + - adjective: toxic in the NR-peroxisome proliferator-activated receptor gamma assay + - adjective: toxic in the peroxisome proliferator-activated receptor gamma assay + - gerund: showing toxicity in the NR-PPAR-gamma assay + - gerund: displaying toxicity in the NR-PPAR-gamma assay + - gerund: exhibiting toxicity in the NR-PPAR-gamma assay + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: http://dx.doi.org/10.3389/fenvs.2017.00003 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 - description: data source - - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 - description: assay name + - url: http://dx.doi.org/10.3389/fenvs.2017.00003 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 + description: data source + - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 + description: assay name num_points: 6450 bibtex: - - |- - @article{Huang2017, - doi = {10.3389/fenvs.2017.00003}, - url = {https://doi.org/10.3389/fenvs.2017.00003}, - year = {2017}, - month = jan, - publisher = {Frontiers Media SA}, - volume = {5}, - author = {Ruili Huang and Menghang Xia}, - title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor - and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, - journal = {Frontiers in Environmental Science} + - |- + @article{Huang2017, + doi = {10.3389/fenvs.2017.00003}, + url = {https://doi.org/10.3389/fenvs.2017.00003}, + year = {2017}, + month = jan, + publisher = {Frontiers Media SA}, + volume = {5}, + author = {Ruili Huang and Menghang Xia}, + title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor + and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, + journal = {Frontiers in Environmental Science} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_NR-PPAR-gamma#no &NULL}{toxicity_NR-PPAR-gamma__names__noun} - {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_NR-PPAR-gamma#not &NULL}identified as {toxicity_NR-PPAR-gamma__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-PPAR-gamma__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. - Result: {toxicity_NR-PPAR-gamma#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-PPAR-gamma__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_NR-PPAR-gamma__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_NR-PPAR-gamma__names__adjective}? - Assistant: {toxicity_NR-PPAR-gamma#No&Yes}, this molecule is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_NR-PPAR-gamma__names__adjective}? - Assistant: {toxicity_NR-PPAR-gamma#No&Yes}, it is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}? - Assistant: This is a molecule that is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {toxicity_NR-PPAR-gamma#not &NULL}be {toxicity_NR-PPAR-gamma__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {toxicity_NR-PPAR-gamma#not &NULL}be {toxicity_NR-PPAR-gamma__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {toxicity_NR-PPAR-gamma__names__adjective}:{toxicity_NR-PPAR-gamma#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_NR-PPAR-gamma__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{toxicity_NR-PPAR-gamma#False&True} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_NR-PPAR-gamma__names__adjective}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-PPAR-gamma__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_NR-PPAR-gamma%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-PPAR-gamma__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_NR-PPAR-gamma%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_NR-PPAR-gamma%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_NR-PPAR-gamma%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_NR-PPAR-gamma#no &NULL}{toxicity_NR-PPAR-gamma__names__noun} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_NR-PPAR-gamma#not &NULL}identified as {toxicity_NR-PPAR-gamma__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-PPAR-gamma__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. + Result: {toxicity_NR-PPAR-gamma#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-PPAR-gamma__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_NR-PPAR-gamma__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_NR-PPAR-gamma__names__adjective}? + Assistant: {toxicity_NR-PPAR-gamma#No&Yes}, this molecule is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_NR-PPAR-gamma__names__adjective}? + Assistant: {toxicity_NR-PPAR-gamma#No&Yes}, it is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}? + Assistant: This is a molecule that is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {toxicity_NR-PPAR-gamma#not &NULL}be {toxicity_NR-PPAR-gamma__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {toxicity_NR-PPAR-gamma#not &NULL}be {toxicity_NR-PPAR-gamma__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {toxicity_NR-PPAR-gamma__names__adjective}:{toxicity_NR-PPAR-gamma#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_NR-PPAR-gamma__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{toxicity_NR-PPAR-gamma#False&True} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_NR-PPAR-gamma__names__adjective}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-PPAR-gamma__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_NR-PPAR-gamma%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_NR-PPAR-gamma__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_NR-PPAR-gamma%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_NR-PPAR-gamma%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_NR-PPAR-gamma#not &NULL}{toxicity_NR-PPAR-gamma__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_NR-PPAR-gamma%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/ocp/meta.yaml b/data/tabular/ocp/meta.yaml index d4668ac8f..bc414d6aa 100644 --- a/data/tabular/ocp/meta.yaml +++ b/data/tabular/ocp/meta.yaml @@ -1,46 +1,45 @@ ---- name: ocp description: |- - CatBerta training data. + CatBerta training data. targets: - - id: target - description: target - type: continuous - units: eV - significant_digits: 4 - names: - - noun: adsorption energy + - id: target + description: target + type: continuous + units: eV + significant_digits: 4 + names: + - noun: adsorption energy identifiers: - - id: text - type: text - description: description + - id: text + type: text + description: description license: MIT (based on ocp) links: - - url: https://drive.google.com/drive/folders/1puiJ9FbLEA3QIHmZromecEndlemag9hg?usp=sharing - description: original data source + - url: https://drive.google.com/drive/folders/1puiJ9FbLEA3QIHmZromecEndlemag9hg?usp=sharing + description: original data source num_points: 125000 bibtex: - - |- - @article{ock2023catalyst, - title={Catalyst Property Prediction with CatBERTa: Unveiling Feature Exploration Strategies through Large Language Models}, - author={Ock, Janghoon and Guntuboina, Chakradhar and Farimani, Amir Barati}, - journal={arXiv preprint arXiv:2309.00563}, - year={2023} - } - - |- - @article{ocp_dataset, - author = {Chanussot*, Lowik and Das*, Abhishek and Goyal*, Siddharth and Lavril*, Thibaut and Shuaibi*, Muhammed and Riviere, Morgane and Tran, Kevin and Heras-Domingo, Javier and Ho, Caleb and Hu, Weihua and Palizhati, Aini and Sriram, Anuroop and Wood, Brandon and Yoon, Junwoong and Parikh, Devi and Zitnick, C. Lawrence and Ulissi, Zachary}, - title = {Open Catalyst 2020 (OC20) Dataset and Community Challenges}, - journal = {ACS Catalysis}, - year = {2021}, - doi = {10.1021/acscatal.0c04525}, - } + - |- + @article{ock2023catalyst, + title={Catalyst Property Prediction with CatBERTa: Unveiling Feature Exploration Strategies through Large Language Models}, + author={Ock, Janghoon and Guntuboina, Chakradhar and Farimani, Amir Barati}, + journal={arXiv preprint arXiv:2309.00563}, + year={2023} + } + - |- + @article{ocp_dataset, + author = {Chanussot*, Lowik and Das*, Abhishek and Goyal*, Siddharth and Lavril*, Thibaut and Shuaibi*, Muhammed and Riviere, Morgane and Tran, Kevin and Heras-Domingo, Javier and Ho, Caleb and Hu, Weihua and Palizhati, Aini and Sriram, Anuroop and Wood, Brandon and Yoon, Junwoong and Parikh, Devi and Zitnick, C. Lawrence and Ulissi, Zachary}, + title = {Open Catalyst 2020 (OC20) Dataset and Community Challenges}, + journal = {ACS Catalysis}, + year = {2021}, + doi = {10.1021/acscatal.0c04525}, + } templates: - - |- - Question: What is the adsorption energy of the following adsorbate-adsorbent pair? - Text: {text#} - Answer: {target#} {target__units} - - |- - Task: {#Predict|Estimate|Calculate|Compute|Determine!} the adsorption energy of the following adsorbate-adsorbent pair. - Text: {text#} - Answer: {target#} {target__units} + - |- + Question: What is the adsorption energy of the following adsorbate-adsorbent pair? + Text: {text#} + Answer: {target#} {target__units} + - |- + Task: {#Predict|Estimate|Calculate|Compute|Determine!} the adsorption energy of the following adsorbate-adsorbent pair. + Text: {text#} + Answer: {target#} {target__units} diff --git a/data/tabular/odd_one_out/meta.yaml b/data/tabular/odd_one_out/meta.yaml index 779ccead1..da2738b90 100644 --- a/data/tabular/odd_one_out/meta.yaml +++ b/data/tabular/odd_one_out/meta.yaml @@ -1,93 +1,92 @@ ---- name: odd_one_out description: |- - Tanimoto distance between Morgan fingerprints of SMILES in the ZINC dataset. - We performed filtering to exclude sequences of molecules where there is no strong difference. + Tanimoto distance between Morgan fingerprints of SMILES in the ZINC dataset. + We performed filtering to exclude sequences of molecules where there is no strong difference. targets: - - id: smallest_similarities - type: continuous - description: smallest Tanimoto similarity between Morgan fingerprints - names: - - noun: smallest Tanimoto similarity between Morgan fingerprints - - id: biggest_similarities - type: continuous - description: largest Tanimoto similarity between Morgan fingerprints - names: - - noun: largest Tanimoto similarity between Morgan fingerprints + - id: smallest_similarities + type: continuous + description: smallest Tanimoto similarity between Morgan fingerprints + names: + - noun: smallest Tanimoto similarity between Morgan fingerprints + - id: biggest_similarities + type: continuous + description: largest Tanimoto similarity between Morgan fingerprints + names: + - noun: largest Tanimoto similarity between Morgan fingerprints benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: smi_1 - type: SMILES - description: SMILES - - id: smi_2 - type: SMILES - description: SMILES - - id: smi_3 - type: SMILES - description: SMILES - - id: smi_4 - type: SMILES - description: SMILES - - id: odd_one_out_mol - type: SMILES - description: SMILES - - id: biggest_sim_0 - description: SMILES - type: SMILES - - id: biggest_sim_1 - type: SMILES - description: SMILES - - id: most_diff_0 - type: SMILES - description: SMILES - - id: most_diff_1 - type: SMILES - description: SMILES + - id: smi_1 + type: SMILES + description: SMILES + - id: smi_2 + type: SMILES + description: SMILES + - id: smi_3 + type: SMILES + description: SMILES + - id: smi_4 + type: SMILES + description: SMILES + - id: odd_one_out_mol + type: SMILES + description: SMILES + - id: biggest_sim_0 + description: SMILES + type: SMILES + - id: biggest_sim_1 + type: SMILES + description: SMILES + - id: most_diff_0 + type: SMILES + description: SMILES + - id: most_diff_1 + type: SMILES + description: SMILES license: MIT num_points: 98715 bibtex: - - |- - @article{Irwin_2020, - doi = {10.1021/acs.jcim.0c00675}, - url = {https://doi.org/10.1021%2Facs.jcim.0c00675}, - year = 2020, - month = {oct}, - publisher = {American Chemical Society ({ACS})}, - volume = {60}, - number = {12}, - pages = {6065--6073}, - author = {John J. Irwin and Khanh G. Tang and Jennifer Young and Chinzorig Dandarchuluun - and Benjamin R. Wong and Munkhzul Khurelbaatar and Yurii S. Moroz and John Mayfield and Roger A. Sayle}, - title = {{ZINC}20{\textemdash}A Free Ultralarge-Scale Chemical Database for Ligand Discovery}, - journal = {J. Chem. Inf. Model.} - } + - |- + @article{Irwin_2020, + doi = {10.1021/acs.jcim.0c00675}, + url = {https://doi.org/10.1021%2Facs.jcim.0c00675}, + year = 2020, + month = {oct}, + publisher = {American Chemical Society ({ACS})}, + volume = {60}, + number = {12}, + pages = {6065--6073}, + author = {John J. Irwin and Khanh G. Tang and Jennifer Young and Chinzorig Dandarchuluun + and Benjamin R. Wong and Munkhzul Khurelbaatar and Yurii S. Moroz and John Mayfield and Roger A. Sayle}, + title = {{ZINC}20{\textemdash}A Free Ultralarge-Scale Chemical Database for Ligand Discovery}, + journal = {J. Chem. Inf. Model.} + } templates: - - |- - Task: You are given a {#list|sequence!} of SMILES of {#molecules|chemicals|chemical compounds!} and {#must|are asked to!} find the {#molecule|chemical|compound!} that is {#most|maximally!} different from the others. - Molecules: {smi_1#}, {smi_2#}, {smi_3#}, and {smi_4#} - Constraint: Answer by returning the SMILES string. Similarity is measured in terms of Tanimoto distance between Morgan fingerprints of radius {#two|2!}. - Answer: {odd_one_out_mol#} - - |- - Task: You are given a {#list|sequence!} of SMILES of {#molecules|chemicals|chemical compounds!} and {#must|are asked to!} find the pair {#molecule|chemical|compound!} that is {#most|maximally!} different from each other. - Molecules: {smi_1#}, {smi_2#}, {smi_3#}, and {smi_4#} - Constraint: Answer by returning two SMILES strings separated by a comma. Similarity is measured in terms of Tanimoto distance between Morgan fingerprints of radius {#two|2!}. - Answer: {most_diff_0#}, {most_diff_1#} - - |- - Task: You are given a {#list|sequence!} of SMILES of {#molecules|chemicals|chemical compounds!} and {#must|are asked to!} find the pair {#molecule|chemical|compound!} that is {#most|maximally!} similar to each other. - Molecules: {smi_1#}, {smi_2#}, {smi_3#}, and {smi_4#} - Constraint: Answer by returning two SMILES strings separated by a comma. Similarity is measured in terms of Tanimoto distance between Morgan fingerprints of radius {#two|2!}. - Answer: {biggest_sim_1#}, {biggest_sim_0#} - - |- - Question: I have a {#list|sequence!} of SMILES for {#molecules|chemicals|chemical compounds!}: {smi_1#}, {smi_2#}, {smi_3#}, and {smi_4#}. Which two molecules have the highest similarity based on their Tanimoto distance calculated from Morgan fingerprints of radius {#two|2!}? - Answer: The two most similar molecules are {biggest_sim_1#} and {biggest_sim_0#}. - - |- - Question: I have the following SMILES strings: {smi_1#}, {smi_2#}, {smi_3#}, and {smi_4#}. Which {#molecule|chemical|chemical compound|compound!} is the most {#dissimilar|different!} from the all others based on Tanimoto distance of their Morgan fingerprints of radius {#two|2!}? - Answer: The most dissimilar {#molecule|chemical|chemical compound|compound!} is {odd_one_out_mol#}. - - |- - User: I have the following SMILES strings: {smi_1#}, {smi_2#}, {smi_3#}, and {smi_4#}. Which is the odd one in this list? - Assistant: {#Interesting question, what do you|Interesting, what do you|Cool, what do you|What do you!} {#mean by|understand as!} "odd one"? - User: {#For now, we|Let's assume we|We!} measure similarity in terms of Tanimoto distance between Morgan fingerprints of radius two. The "odd one" is the molecule that is most different from the others. - Assistant: {#In that case,|Then,!} I {#think|believe|propose!} that {odd_one_out_mol#} is the "odd one" you're looking for. + - |- + Task: You are given a {#list|sequence!} of SMILES of {#molecules|chemicals|chemical compounds!} and {#must|are asked to!} find the {#molecule|chemical|compound!} that is {#most|maximally!} different from the others. + Molecules: {smi_1#}, {smi_2#}, {smi_3#}, and {smi_4#} + Constraint: Answer by returning the SMILES string. Similarity is measured in terms of Tanimoto distance between Morgan fingerprints of radius {#two|2!}. + Answer: {odd_one_out_mol#} + - |- + Task: You are given a {#list|sequence!} of SMILES of {#molecules|chemicals|chemical compounds!} and {#must|are asked to!} find the pair {#molecule|chemical|compound!} that is {#most|maximally!} different from each other. + Molecules: {smi_1#}, {smi_2#}, {smi_3#}, and {smi_4#} + Constraint: Answer by returning two SMILES strings separated by a comma. Similarity is measured in terms of Tanimoto distance between Morgan fingerprints of radius {#two|2!}. + Answer: {most_diff_0#}, {most_diff_1#} + - |- + Task: You are given a {#list|sequence!} of SMILES of {#molecules|chemicals|chemical compounds!} and {#must|are asked to!} find the pair {#molecule|chemical|compound!} that is {#most|maximally!} similar to each other. + Molecules: {smi_1#}, {smi_2#}, {smi_3#}, and {smi_4#} + Constraint: Answer by returning two SMILES strings separated by a comma. Similarity is measured in terms of Tanimoto distance between Morgan fingerprints of radius {#two|2!}. + Answer: {biggest_sim_1#}, {biggest_sim_0#} + - |- + Question: I have a {#list|sequence!} of SMILES for {#molecules|chemicals|chemical compounds!}: {smi_1#}, {smi_2#}, {smi_3#}, and {smi_4#}. Which two molecules have the highest similarity based on their Tanimoto distance calculated from Morgan fingerprints of radius {#two|2!}? + Answer: The two most similar molecules are {biggest_sim_1#} and {biggest_sim_0#}. + - |- + Question: I have the following SMILES strings: {smi_1#}, {smi_2#}, {smi_3#}, and {smi_4#}. Which {#molecule|chemical|chemical compound|compound!} is the most {#dissimilar|different!} from the all others based on Tanimoto distance of their Morgan fingerprints of radius {#two|2!}? + Answer: The most dissimilar {#molecule|chemical|chemical compound|compound!} is {odd_one_out_mol#}. + - |- + User: I have the following SMILES strings: {smi_1#}, {smi_2#}, {smi_3#}, and {smi_4#}. Which is the odd one in this list? + Assistant: {#Interesting question, what do you|Interesting, what do you|Cool, what do you|What do you!} {#mean by|understand as!} "odd one"? + User: {#For now, we|Let's assume we|We!} measure similarity in terms of Tanimoto distance between Morgan fingerprints of radius two. The "odd one" is the molecule that is most different from the others. + Assistant: {#In that case,|Then,!} I {#think|believe|propose!} that {odd_one_out_mol#} is the "odd one" you're looking for. diff --git a/data/tabular/opv/meta.yaml b/data/tabular/opv/meta.yaml index d3ec78513..067854bf7 100644 --- a/data/tabular/opv/meta.yaml +++ b/data/tabular/opv/meta.yaml @@ -1,165 +1,158 @@ ---- name: opv description: |- - Database of organic nonfullerene solar cells with their composition, device stacks, and performance. + Database of organic nonfullerene solar cells with their composition, device stacks, and performance. targets: - - id: PCE_max(%) - description: Maximum power conversion efficiency of devices with this polymer - units: '%' - type: continuous - names: - - noun: maximum power conversion efficiency of tested devices - - noun: maximum achievable power conversion efficiency - uris: - significant_digits: 2 - sample: false - - id: PCE_ave - description: Average power conversion efficiency of devices with this polymer - units: '%' - names: - - noun: power conversion efficiency (PCE) - - noun: power conversion efficiency - type: continuous - significant_digits: 2 - - id: Voc - description: Open-circuit voltage of devices with this polymer - units: V - type: continuous - names: - - noun: open-circuit voltage of tested devices - - noun: open-circuit voltage - uris: - significant_digits: 2 - - id: Jsc - description: Short-circuit current density of devices with this polymer - units: mA/cm^2 - type: continuous - names: - - noun: short-circuit current density of tested devices - - noun: short-circuit current density - uris: - significant_digits: 2 - - id: FF - description: Fill factor of devices with this polymer - units: (dimensionless) - type: continuous - names: - - noun: fill factor of tested devices - - noun: fill factor - uris: - significant_digits: 2 - - id: bandgap - description: Bandgap of the polymer - units: eV - type: continuous - names: - - noun: bandgap of the polymer - - noun: bandgap - uris: - significant_digits: 2 - - id: HOMO - description: HOMO of the system, measured experimentally - type: continuous - units: eV - significant_digits: 2 - names: - - noun: highest-occupied molecular orbital energy - - noun: HOMO energy - - noun: highest-occupied molecular orbital (HOMO) energy - - id: LUMO - description: LUMO of the system, measured experimentally - type: continuous - units: eV - significant_digits: 2 - names: - - noun: lowest-unoccupied molecular orbital energy - - noun: LUMO energy - - noun: lowest-unoccupied molecular orbital (LUMO) energy + - id: PCE_max(%) + description: Maximum power conversion efficiency of devices with this polymer + units: "%" + type: continuous + names: + - noun: maximum power conversion efficiency of tested devices + - noun: maximum achievable power conversion efficiency + uris: + significant_digits: 2 + sample: false + - id: PCE_ave + description: Average power conversion efficiency of devices with this polymer + units: "%" + names: + - noun: power conversion efficiency (PCE) + - noun: power conversion efficiency + type: continuous + significant_digits: 2 + - id: Voc + description: Open-circuit voltage of devices with this polymer + units: V + type: continuous + names: + - noun: open-circuit voltage of tested devices + - noun: open-circuit voltage + uris: + significant_digits: 2 + - id: Jsc + description: Short-circuit current density of devices with this polymer + units: mA/cm^2 + type: continuous + names: + - noun: short-circuit current density of tested devices + - noun: short-circuit current density + uris: + significant_digits: 2 + - id: FF + description: Fill factor of devices with this polymer + units: (dimensionless) + type: continuous + names: + - noun: fill factor of tested devices + - noun: fill factor + uris: + significant_digits: 2 + - id: bandgap + description: Bandgap of the polymer + units: eV + type: continuous + names: + - noun: bandgap of the polymer + - noun: bandgap + uris: + significant_digits: 2 + - id: HOMO + description: HOMO of the system, measured experimentally + type: continuous + units: eV + significant_digits: 2 + names: + - noun: highest-occupied molecular orbital energy + - noun: HOMO energy + - noun: highest-occupied molecular orbital (HOMO) energy + - id: LUMO + description: LUMO of the system, measured experimentally + type: continuous + units: eV + significant_digits: 2 + names: + - noun: lowest-unoccupied molecular orbital energy + - noun: LUMO energy + - noun: lowest-unoccupied molecular orbital (LUMO) energy benchmarks: [] links: - - url: https://pubs.acs.org/doi/full/10.1021/acs.jpclett.8b00635 - description: original data source + - url: https://pubs.acs.org/doi/full/10.1021/acs.jpclett.8b00635 + description: original data source identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: Mw - type: Other - description: molecular weight - names: - - noun: molecular weight - - noun: molecular weight (Mw) - - noun: Mw - - id: PDI - type: Other - description: polydispersity index - names: - - noun: polydispersity index - - noun: polydispersity index (PDI) - - noun: PDI + - id: SMILES + type: SMILES + description: SMILES + - id: Mw + type: Other + description: molecular weight + names: + - noun: molecular weight + - noun: molecular weight (Mw) + - noun: Mw + - id: PDI + type: Other + description: polydispersity index + names: + - noun: polydispersity index + - noun: polydispersity index (PDI) + - noun: PDI license: num_points: 1098 bibtex: - - |- - @article{Nagasawa_2018, - doi = {10.1021/acs.jpclett.8b00635}, - url = {https://doi.org/10.1021%2Facs.jpclett.8b00635}, - year = 2018, - month = {may}, - publisher = {American Chemical Society ({ACS})}, - volume = {9}, - number = {10}, - pages = {2639--2646}, - author = {Shinji Nagasawa and Eman Al-Naamani and Akinori Saeki}, - title = {Computer-Aided Screening of Conjugated Polymers for Organic Solar Cell: - Classification by Random Forest}, - journal = {J. Phys. Chem. Lett.} - } + - |- + @article{Nagasawa_2018, + doi = {10.1021/acs.jpclett.8b00635}, + url = {https://doi.org/10.1021%2Facs.jpclett.8b00635}, + year = 2018, + month = {may}, + publisher = {American Chemical Society ({ACS})}, + volume = {9}, + number = {10}, + pages = {2639--2646}, + author = {Shinji Nagasawa and Eman Al-Naamani and Akinori Saeki}, + title = {Computer-Aided Screening of Conjugated Polymers for Organic Solar Cell: + Classification by Random Forest}, + journal = {J. Phys. Chem. Lett.} + } templates: - - |- - Question: What is the {PCE_ave__names__noun} of a {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a donor polymer with monomer {SMILES__description} {SMILES#} and {#Mw|weight-average molecular weight|weight-average molecular weight (Mw)!} {Mw#} g/mol and {#PDI|polydispersity index|polydispersity index (PDI)!} of {PDI#}? - Answer: {#The power conversion efficiency is |The PCE is!}{PCE_ave#} %. - - |- - Question: What is the {Voc__names__noun} of a {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a donor polymer with monomer {SMILES__description} {SMILES#} and {#Mw|weight-average molecular weight|weight-average molecular weight (Mw)!} {Mw#} g/mol and {#PDI|polydispersity index|polydispersity index (PDI)!} of {PDI#}? - Answer: {#The open-circuit voltage is |The Voc is!}{Voc#} {Voc__units}. - - |- - Question: What is the {Jsc__names__noun} of a {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a donor polymer with monomer {SMILES__description} {SMILES#} and {#Mw|weight-average molecular weight|weight-average molecular weight (Mw)!} {Mw#} g/mol and {#PDI|polydispersity index|polydispersity index (PDI)!} of {PDI#}? - Answer: {#The short-circuit current density is |The Jsc is!}{Jsc#} {Jsc__units}. - - |- - Question: What is the {FF__names__noun} of a {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a donor polymer with monomer {SMILES__description} {SMILES#} and {#Mw|weight-average molecular weight|weight-average molecular weight (Mw)!} {Mw#} g/mol and {#PDI|polydispersity index|polydispersity index (PDI)!} of {PDI#}? - Answer: {#The fill factor is |The FF is !}{FF#}. - - |- - Question: What is the {bandgap__names__noun} of a polymer with monomer {SMILES__description} {SMILES#}? - Answer: {#The bandgap is |The bandgap of the polymer is!}{bandgap#} {bandgap__units}. - - |- - Question: What is the {HOMO__names__noun} of a polymer with monomer {SMILES__description} {SMILES#}? - Answer: The {HOMO__names__noun} {#of the polymer|!} is {HOMO#} {HOMO__units}. - - |- - Question: What is the {LUMO__names__noun} of a polymer with monomer {SMILES__description} {SMILES#}? - Answer: The {LUMO__names__noun} {#of the polymer|!} is {LUMO#} {LUMO__units}. - - The {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a donor polymer with monomer - {SMILES__description} {SMILES#} and {#Mw|weight-average molecular weight|weight-average molecular weight (Mw)!} {Mw#} g/mol and {#PDI|polydispersity - index|polydispersity index (PDI)!} of {PDI#} has a {PCE_ave__names__noun} of {PCE_ave#}%. - - The {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a donor polymer with monomer - {SMILES__description} {SMILES#} and {#Mw|weight-average molecular weight|weight-average molecular weight (Mw)!} {Mw#} g/mol and {#PDI|polydispersity - index|polydispersity index (PDI)!} of {PDI#} has a {Jsc__names__noun} of {Jsc#} {Jsc__units}. - - The {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a donor polymer with monomer - {SMILES__description} {SMILES#} and {#Mw|weight-average molecular weight|weight-average molecular weight (Mw)!} {Mw#} g/mol and {#PDI|polydispersity - index|polydispersity index (PDI)!} of {PDI#} has a {FF__names__noun} of {FF#}. - - |- - User: I {#want to|would like to|aim to|wish to!} {#design|create|build!} a {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a {PCE_ave__names__noun} of {PCE_ave#}%. - Assistant: {#That's interesting.|Cool.|!} Do you have a donor polymer in mind? - User: Yes, I would like to use a polymer with monomer {SMILES__description} {SMILES#} and {#would like to|need to|must!} know the {PDI__names__noun} and {Mw__names__noun} of the polymer I should use. - Assistant: {#I recommend|I suggest|I propose!} trying a {Mw__names__noun} of {Mw#} g/mol and a {PDI__names__noun} of {PDI#}. - - |- - User: I {#want to|would like to|aim to|wish to!} {#design|create|build!} a {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a {PCE_ave__names__noun} of {PCE_ave#}%. - Assistant: {#That's interesting.|Cool.|!} Do you have additional constraints? - User: {#Yes, |Yeah, |Indeed, |!}I would like to have a {Jsc__names__noun} of {Jsc#} {Jsc__units}. - Assistant: {#I recommend|I suggest|I propose!} trying a {Mw__names__noun} of {Mw#} g/mol and {PDI__names__noun} of {PDI#} of a polymer with monomer {SMILES__description} {SMILES#}. - - |- - User: Can you {#recommend|suggest|propose!} a donor polymer for a {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a {PCE_ave__names__noun} of {PCE_ave#}% and a {Jsc__names__noun} of {Jsc#} {Jsc__units}? - Assistant: {#I recommend|I suggest|I propose!} trying a {Mw__names__noun} of {Mw#} g/mol and {PDI__names__noun} of {PDI#} of a polymer with monomer {SMILES__description} {SMILES#}. - - |- - Task: Predict the {PCE_ave__names__noun} of a {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device based on a description of the donor polymer. - Description: The donor polymer has monomer {SMILES__description} {SMILES#} and {#Mw|weight-average molecular weight|weight-average molecular weight (Mw)!} {Mw#} g/mol and {#PDI|polydispersity index|polydispersity index (PDI)!} of {PDI#}. - Solution: {#The power conversion efficiency is |The PCE is !}{PCE_ave#} %. + - |- + Question: What is the {PCE_ave__names__noun} of a {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a donor polymer with monomer {SMILES__description} {SMILES#} and {#Mw|weight-average molecular weight|weight-average molecular weight (Mw)!} {Mw#} g/mol and {#PDI|polydispersity index|polydispersity index (PDI)!} of {PDI#}? + Answer: {#The power conversion efficiency is |The PCE is!}{PCE_ave#} %. + - |- + Question: What is the {Voc__names__noun} of a {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a donor polymer with monomer {SMILES__description} {SMILES#} and {#Mw|weight-average molecular weight|weight-average molecular weight (Mw)!} {Mw#} g/mol and {#PDI|polydispersity index|polydispersity index (PDI)!} of {PDI#}? + Answer: {#The open-circuit voltage is |The Voc is!}{Voc#} {Voc__units}. + - |- + Question: What is the {Jsc__names__noun} of a {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a donor polymer with monomer {SMILES__description} {SMILES#} and {#Mw|weight-average molecular weight|weight-average molecular weight (Mw)!} {Mw#} g/mol and {#PDI|polydispersity index|polydispersity index (PDI)!} of {PDI#}? + Answer: {#The short-circuit current density is |The Jsc is!}{Jsc#} {Jsc__units}. + - |- + Question: What is the {FF__names__noun} of a {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a donor polymer with monomer {SMILES__description} {SMILES#} and {#Mw|weight-average molecular weight|weight-average molecular weight (Mw)!} {Mw#} g/mol and {#PDI|polydispersity index|polydispersity index (PDI)!} of {PDI#}? + Answer: {#The fill factor is |The FF is !}{FF#}. + - |- + Question: What is the {bandgap__names__noun} of a polymer with monomer {SMILES__description} {SMILES#}? + Answer: {#The bandgap is |The bandgap of the polymer is!}{bandgap#} {bandgap__units}. + - |- + Question: What is the {HOMO__names__noun} of a polymer with monomer {SMILES__description} {SMILES#}? + Answer: The {HOMO__names__noun} {#of the polymer|!} is {HOMO#} {HOMO__units}. + - |- + Question: What is the {LUMO__names__noun} of a polymer with monomer {SMILES__description} {SMILES#}? + Answer: The {LUMO__names__noun} {#of the polymer|!} is {LUMO#} {LUMO__units}. + - The {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a donor polymer with monomer {SMILES__description} {SMILES#} and {#Mw|weight-average molecular weight|weight-average molecular weight (Mw)!} {Mw#} g/mol and {#PDI|polydispersity index|polydispersity index (PDI)!} of {PDI#} has a {PCE_ave__names__noun} of {PCE_ave#}%. + - The {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a donor polymer with monomer {SMILES__description} {SMILES#} and {#Mw|weight-average molecular weight|weight-average molecular weight (Mw)!} {Mw#} g/mol and {#PDI|polydispersity index|polydispersity index (PDI)!} of {PDI#} has a {Jsc__names__noun} of {Jsc#} {Jsc__units}. + - The {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a donor polymer with monomer {SMILES__description} {SMILES#} and {#Mw|weight-average molecular weight|weight-average molecular weight (Mw)!} {Mw#} g/mol and {#PDI|polydispersity index|polydispersity index (PDI)!} of {PDI#} has a {FF__names__noun} of {FF#}. + - |- + User: I {#want to|would like to|aim to|wish to!} {#design|create|build!} a {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a {PCE_ave__names__noun} of {PCE_ave#}%. + Assistant: {#That's interesting.|Cool.|!} Do you have a donor polymer in mind? + User: Yes, I would like to use a polymer with monomer {SMILES__description} {SMILES#} and {#would like to|need to|must!} know the {PDI__names__noun} and {Mw__names__noun} of the polymer I should use. + Assistant: {#I recommend|I suggest|I propose!} trying a {Mw__names__noun} of {Mw#} g/mol and a {PDI__names__noun} of {PDI#}. + - |- + User: I {#want to|would like to|aim to|wish to!} {#design|create|build!} a {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a {PCE_ave__names__noun} of {PCE_ave#}%. + Assistant: {#That's interesting.|Cool.|!} Do you have additional constraints? + User: {#Yes, |Yeah, |Indeed, |!}I would like to have a {Jsc__names__noun} of {Jsc#} {Jsc__units}. + Assistant: {#I recommend|I suggest|I propose!} trying a {Mw__names__noun} of {Mw#} g/mol and {PDI__names__noun} of {PDI#} of a polymer with monomer {SMILES__description} {SMILES#}. + - |- + User: Can you {#recommend|suggest|propose!} a donor polymer for a {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device with a {PCE_ave__names__noun} of {PCE_ave#}% and a {Jsc__names__noun} of {Jsc#} {Jsc__units}? + Assistant: {#I recommend|I suggest|I propose!} trying a {Mw__names__noun} of {Mw#} g/mol and {PDI__names__noun} of {PDI#} of a polymer with monomer {SMILES__description} {SMILES#}. + - |- + Task: Predict the {PCE_ave__names__noun} of a {#non-fullerene|PC71BM|PCBM!} {#organic photovoltaics|OPV|organic solar cell|organic photovoltaics (OPV)!} device based on a description of the donor polymer. + Description: The donor polymer has monomer {SMILES__description} {SMILES#} and {#Mw|weight-average molecular weight|weight-average molecular weight (Mw)!} {Mw#} g/mol and {#PDI|polydispersity index|polydispersity index (PDI)!} of {PDI#}. + Solution: {#The power conversion efficiency is |The PCE is !}{PCE_ave#} %. diff --git a/data/tabular/oqmd/meta.yaml b/data/tabular/oqmd/meta.yaml index d4624d8cc..98153e7c3 100644 --- a/data/tabular/oqmd/meta.yaml +++ b/data/tabular/oqmd/meta.yaml @@ -1,149 +1,143 @@ ---- name: oqmd description: |- - Open Quantum Materials Database (OQMD) is a database of DFT-computed thermodynamic and structural properties of materials. - We used a compilation of a prior version of this database. + Open Quantum Materials Database (OQMD) is a database of DFT-computed thermodynamic and structural properties of materials. + We used a compilation of a prior version of this database. targets: - - id: spacegroup - description: space group number - type: integer - names: - - noun: space group with the International Tables number - - noun: space group - - id: energy_per_atom - type: continuous - units: eV / atom - significant_digits: 4 - names: - - noun: energy per atom computed using DFT (with the PAW method as implemented in VASP) using the PBE functional - - noun: energy per atom computed using the PBE GGA functional - - noun: energy per atom computed using PAW-PBE - - id: formation_energy_per_atom - type: continuous - units: eV / atom - significant_digits: 4 - names: - - noun: formation energy per atom computed using DFT (with the PAW method as implemented in VASP) using the PBE functional - - noun: formation energy per atom computed using the PBE GGA functional - - noun: formation energy per atom computed using PAW-PBE - - id: band_gap - type: continuous - units: eV - significant_digits: 4 - names: - - noun: band gap computed using DFT (with the PAW method as implemented in VASP) using the PBE functional - - noun: band gap computed using the PBE GGA functional - - noun: PBE-computed band gap - - noun: PAW-PBE-computed band gap - - noun: band gap computed using PAW-PBE - - id: volume_per_atom - type: continuous - units: \AA^3 / atom - significant_digits: 4 - names: - - noun: volume per atom computed using DFT (with the PAW method as implemented in VASP) using the PBE functional - - noun: volume per atom computed using the PBE GGA functional - - noun: volume per atom computed using PAW-PBE - - id: magnetization_per_atom - type: continuous - units: \mu B - significant_digits: 4 - names: - - noun: magnetization per atom computed using DFT (with the PAW method as implemented in VASP) using the PBE functional - - noun: magnetization per atom computed using the PBE GGA functional - - noun: magnetization per atom computed using PAW-PBE + - id: spacegroup + description: space group number + type: integer + names: + - noun: space group with the International Tables number + - noun: space group + - id: energy_per_atom + type: continuous + units: eV / atom + significant_digits: 4 + names: + - noun: energy per atom computed using DFT (with the PAW method as implemented in VASP) using the PBE functional + - noun: energy per atom computed using the PBE GGA functional + - noun: energy per atom computed using PAW-PBE + - id: formation_energy_per_atom + type: continuous + units: eV / atom + significant_digits: 4 + names: + - noun: formation energy per atom computed using DFT (with the PAW method as implemented in VASP) using the PBE functional + - noun: formation energy per atom computed using the PBE GGA functional + - noun: formation energy per atom computed using PAW-PBE + - id: band_gap + type: continuous + units: eV + significant_digits: 4 + names: + - noun: band gap computed using DFT (with the PAW method as implemented in VASP) using the PBE functional + - noun: band gap computed using the PBE GGA functional + - noun: PBE-computed band gap + - noun: PAW-PBE-computed band gap + - noun: band gap computed using PAW-PBE + - id: volume_per_atom + type: continuous + units: \AA^3 / atom + significant_digits: 4 + names: + - noun: volume per atom computed using DFT (with the PAW method as implemented in VASP) using the PBE functional + - noun: volume per atom computed using the PBE GGA functional + - noun: volume per atom computed using PAW-PBE + - id: magnetization_per_atom + type: continuous + units: \mu B + significant_digits: 4 + names: + - noun: magnetization per atom computed using DFT (with the PAW method as implemented in VASP) using the PBE functional + - noun: magnetization per atom computed using the PBE GGA functional + - noun: magnetization per atom computed using PAW-PBE identifiers: - - id: name - description: OQMD identifier - sample: false - type: Other - - id: formula - description: composition - type: COMPOSITION + - id: name + description: OQMD identifier + sample: false + type: Other + - id: formula + description: composition + type: COMPOSITION license: CC-BY 4.0 num_points: 561882 links: - - url: https://oqmd.org/ - description: raw data source - - urL: https://zenodo.org/records/7118055 - description: original data source + - url: https://oqmd.org/ + description: raw data source + - urL: https://zenodo.org/records/7118055 + description: original data source bibtex: - - |- - @article{yamamoto2019crystal, - title={Crystal graph neural networks for data mining in materials science}, - author={Yamamoto, Takenori}, - journal={Research Institute for Mathematical and Computational Sciences, LLC}, - year={2019} - } - - |- - @article{kirklin2015open, - title={The Open Quantum Materials Database (OQMD): assessing the accuracy of DFT formation energies}, - author={Kirklin, Scott and Saal, James E and Meredig, Bryce and Thompson, Alex and Doak, Jeff W and Aykol, Muratahan and R{\"u}hl, Stephan and Wolverton, Chris}, - journal={npj Computational Materials}, - volume={1}, - number={1}, - pages={1--15}, - year={2015}, - publisher={Nature Publishing Group} - }{spa} + - |- + @article{yamamoto2019crystal, + title={Crystal graph neural networks for data mining in materials science}, + author={Yamamoto, Takenori}, + journal={Research Institute for Mathematical and Computational Sciences, LLC}, + year={2019} + } + - |- + @article{kirklin2015open, + title={The Open Quantum Materials Database (OQMD): assessing the accuracy of DFT formation energies}, + author={Kirklin, Scott and Saal, James E and Meredig, Bryce and Thompson, Alex and Doak, Jeff W and Aykol, Muratahan and R{\"u}hl, Stephan and Wolverton, Chris}, + journal={npj Computational Materials}, + volume={1}, + number={1}, + pages={1--15}, + year={2015}, + publisher={Nature Publishing Group} + }{spa} templates: - - The {#material|compound|structure!} with composition {formula#} has a {band_gap__names__noun} of {band_gap#} {band_gap__units}. - - The {#material|compound|structure!} with composition {formula#} and {spacegroup__names__noun} {spacegroup#} has a {band_gap__names__noun} of {band_gap#} - {band_gap__units}. - - The {#material|compound|structure!} with composition {formula#} has a {band_gap__names__noun} of {band_gap#} {band_gap__units} and a {energy_per_atom__names__noun} - of {energy_per_atom#} {energy_per_atom__units}. - - The {#material|compound|structure!} with composition {formula#} and {spacegroup__names__noun} {spacegroup#} has a {band_gap__names__noun} of {band_gap#} - {band_gap__units} and a {magnetization_per_atom__names__noun} of {magnetization_per_atom#} {magnetization_per_atom__units}. - - The {#material|compound|structure!} with composition {formula#} has a {band_gap__names__noun} of {band_gap#} {band_gap__units} and a {volume_per_atom__names__noun} - of {volume_per_atom#} {volume_per_atom__units}. - - The {#material|compound|structure!} with composition {formula#} {#can be found|occurs|crystallizes!} in the {spacegroup__names__noun} {spacegroup#}. - - The {#material|compound|structure!} with composition {formula#} {#can be found|occurs|crystallizes!} in the {spacegroup__names__noun} {spacegroup#} - and has a {band_gap__names__noun} of {band_gap#} {band_gap__units}. - - |- - Question: What is the {band_gap__names__noun} of the {#material|compound|structure!} with composition {formula#}? - Answer: The {band_gap__names__noun} of the {#material|compound|structure!} with composition {formula#} is {band_gap#} {band_gap__units}. - - |- - Question: What is the {band_gap__names__noun} of the {#material|compound|structure!} with composition {formula#} and {spacegroup__names__noun} {spacegroup#}? - Answer: The {band_gap__names__noun} of the {#material|compound|structure!} with composition {formula#} and {spacegroup__names__noun} {spacegroup#} is {band_gap#} {band_gap__units}. - - |- - Question: What is the {volume_per_atom__names__noun} of the {#material|compound|structure!} with composition {formula#}? - Answer: The {volume_per_atom__names__noun} of the {#material|compound|structure!} with composition {formula#} is {volume_per_atom#} {volume_per_atom__units}. - - |- - Question: What is the {formation_energy_per_atom__names__noun} of the {#material|compound|structure!} with composition {formula#}? - Answer: The {formation_energy_per_atom__names__noun} of the {#material|compound|structure!} with composition {formula#} is {formation_energy_per_atom#} {formation_energy_per_atom__units}. - - |- - Question: What is the {magnetization_per_atom__names__noun} of the {#material|compound|structure!} with composition {formula#}? - Answer: The {magnetization_per_atom__names__noun} of the {#material|compound|structure!} with composition {formula#} is {magnetization_per_atom#} {magnetization_per_atom__units}. - - |- - User: {#I am looking for|I need!} a {#material|compound|structure!} with a {band_gap__names__noun} of {band_gap#} {band_gap__units}. - Assistant: {#Do you have other constraints?|Are there are requirements?|Is there anything else you need the compound to have?!} - User: I would like it to have a {volume_per_atom__names__noun} of {volume_per_atom#} {volume_per_atom__units}. - Assistant: {#Do you have other constraints?|Are there are requirements?|Is there anything else you need the compound to have?!} - User: I would like it to {#crystallize|occur|be found!} in the {spacegroup__names__noun} {spacegroup#}. - Assistant: {#Okay|In this case|In that case!}, I {#recommend|suggest!} the {#material|compound|structure!} with composition {formula#}. - - |- - User: {#I am looking for|I need!} a {#material|compound|structure!} with a {band_gap__names__noun} of {band_gap#} {band_gap__units}. - Assistant: {#Do you have other constraints?|Are there are requirements?|Is there anything else you need the compound to have?!} - User: I would like it to {#crystallize|occur|be found!} in the {spacegroup__names__noun} {spacegroup#}. - Assistant: {#Great|Okay|In this case|In that case!}, I {#recommend|suggest!} the {#material|compound|structure!} with composition {formula#}. - - |- - User: {#I am looking for|I need!} a {#material|compound|structure!} with a {volume_per_atom__names__noun} of {volume_per_atom#} {volume_per_atom__units}. - Assistant: {#Do you have other constraints?|Are there are requirements?|Is there anything else you need the compound to have?!} - User: I would like it to {#crystallize|occur|be found!} in the {spacegroup__names__noun} {spacegroup#}. - Assistant: {#Great|Okay|In this case|In that case!}, I {#recommend|suggest!} the {#material|compound|structure!} with composition {formula#}. - - |- - User: {#I am looking for|I need!} a {#material|compound|structure!} with a {band_gap__names__noun} of {band_gap#} {band_gap__units}. - Assistant: {#Do you have other constraints?|Are there are requirements?|Is there anything else you need the compound to have?!} - User: {#No|Nope|None|Nothing|No other constraints|No other requirements|No other needs!} - Assistant: {#Okay|In this case|In that case!}, I {#recommend|suggest!} the {#material|compound|structure!} with composition {formula#} and {spacegroup__names__noun} {spacegroup#}. - - |- - User: {#I want to design|I need to design|I need a!} material with a {band_gap__names__noun} of {band_gap#} {band_gap__units} and a {volume_per_atom__names__noun} of {volume_per_atom#} {volume_per_atom__units}, {#what should I do?|what should I do?|what do you suggest?|what do you recommend?!} - Assistant: {#I found|Here is|I have found|Here is!} a {#material|compound|structure!} with a {band_gap__names__noun} of {band_gap#} {band_gap__units} and a {volume_per_atom__names__noun} of {volume_per_atom#} {volume_per_atom__units}: {formula#}. - - |- - Task: Predict a property of a material based on the description of the material. - Description: Predict the {band_gap__names__noun} of the {#material|compound|structure!} with composition {formula#}. - Result: {band_gap#} {band_gap__units} - - |- - Task: Predict a property of a material based on the description of the material. - Description: Predict the {band_gap__names__noun} of the {#material|compound|structure!} with composition {formula#} and {spacegroup__names__noun} {spacegroup#} and a {volume_per_atom__names__noun} of {volume_per_atom#} {volume_per_atom__units}. - Result: {band_gap#} {band_gap__units} + - The {#material|compound|structure!} with composition {formula#} has a {band_gap__names__noun} of {band_gap#} {band_gap__units}. + - The {#material|compound|structure!} with composition {formula#} and {spacegroup__names__noun} {spacegroup#} has a {band_gap__names__noun} of {band_gap#} {band_gap__units}. + - The {#material|compound|structure!} with composition {formula#} has a {band_gap__names__noun} of {band_gap#} {band_gap__units} and a {energy_per_atom__names__noun} of {energy_per_atom#} {energy_per_atom__units}. + - The {#material|compound|structure!} with composition {formula#} and {spacegroup__names__noun} {spacegroup#} has a {band_gap__names__noun} of {band_gap#} {band_gap__units} and a {magnetization_per_atom__names__noun} of {magnetization_per_atom#} {magnetization_per_atom__units}. + - The {#material|compound|structure!} with composition {formula#} has a {band_gap__names__noun} of {band_gap#} {band_gap__units} and a {volume_per_atom__names__noun} of {volume_per_atom#} {volume_per_atom__units}. + - The {#material|compound|structure!} with composition {formula#} {#can be found|occurs|crystallizes!} in the {spacegroup__names__noun} {spacegroup#}. + - The {#material|compound|structure!} with composition {formula#} {#can be found|occurs|crystallizes!} in the {spacegroup__names__noun} {spacegroup#} and has a {band_gap__names__noun} of {band_gap#} {band_gap__units}. + - |- + Question: What is the {band_gap__names__noun} of the {#material|compound|structure!} with composition {formula#}? + Answer: The {band_gap__names__noun} of the {#material|compound|structure!} with composition {formula#} is {band_gap#} {band_gap__units}. + - |- + Question: What is the {band_gap__names__noun} of the {#material|compound|structure!} with composition {formula#} and {spacegroup__names__noun} {spacegroup#}? + Answer: The {band_gap__names__noun} of the {#material|compound|structure!} with composition {formula#} and {spacegroup__names__noun} {spacegroup#} is {band_gap#} {band_gap__units}. + - |- + Question: What is the {volume_per_atom__names__noun} of the {#material|compound|structure!} with composition {formula#}? + Answer: The {volume_per_atom__names__noun} of the {#material|compound|structure!} with composition {formula#} is {volume_per_atom#} {volume_per_atom__units}. + - |- + Question: What is the {formation_energy_per_atom__names__noun} of the {#material|compound|structure!} with composition {formula#}? + Answer: The {formation_energy_per_atom__names__noun} of the {#material|compound|structure!} with composition {formula#} is {formation_energy_per_atom#} {formation_energy_per_atom__units}. + - |- + Question: What is the {magnetization_per_atom__names__noun} of the {#material|compound|structure!} with composition {formula#}? + Answer: The {magnetization_per_atom__names__noun} of the {#material|compound|structure!} with composition {formula#} is {magnetization_per_atom#} {magnetization_per_atom__units}. + - |- + User: {#I am looking for|I need!} a {#material|compound|structure!} with a {band_gap__names__noun} of {band_gap#} {band_gap__units}. + Assistant: {#Do you have other constraints?|Are there are requirements?|Is there anything else you need the compound to have?!} + User: I would like it to have a {volume_per_atom__names__noun} of {volume_per_atom#} {volume_per_atom__units}. + Assistant: {#Do you have other constraints?|Are there are requirements?|Is there anything else you need the compound to have?!} + User: I would like it to {#crystallize|occur|be found!} in the {spacegroup__names__noun} {spacegroup#}. + Assistant: {#Okay|In this case|In that case!}, I {#recommend|suggest!} the {#material|compound|structure!} with composition {formula#}. + - |- + User: {#I am looking for|I need!} a {#material|compound|structure!} with a {band_gap__names__noun} of {band_gap#} {band_gap__units}. + Assistant: {#Do you have other constraints?|Are there are requirements?|Is there anything else you need the compound to have?!} + User: I would like it to {#crystallize|occur|be found!} in the {spacegroup__names__noun} {spacegroup#}. + Assistant: {#Great|Okay|In this case|In that case!}, I {#recommend|suggest!} the {#material|compound|structure!} with composition {formula#}. + - |- + User: {#I am looking for|I need!} a {#material|compound|structure!} with a {volume_per_atom__names__noun} of {volume_per_atom#} {volume_per_atom__units}. + Assistant: {#Do you have other constraints?|Are there are requirements?|Is there anything else you need the compound to have?!} + User: I would like it to {#crystallize|occur|be found!} in the {spacegroup__names__noun} {spacegroup#}. + Assistant: {#Great|Okay|In this case|In that case!}, I {#recommend|suggest!} the {#material|compound|structure!} with composition {formula#}. + - |- + User: {#I am looking for|I need!} a {#material|compound|structure!} with a {band_gap__names__noun} of {band_gap#} {band_gap__units}. + Assistant: {#Do you have other constraints?|Are there are requirements?|Is there anything else you need the compound to have?!} + User: {#No|Nope|None|Nothing|No other constraints|No other requirements|No other needs!} + Assistant: {#Okay|In this case|In that case!}, I {#recommend|suggest!} the {#material|compound|structure!} with composition {formula#} and {spacegroup__names__noun} {spacegroup#}. + - |- + User: {#I want to design|I need to design|I need a!} material with a {band_gap__names__noun} of {band_gap#} {band_gap__units} and a {volume_per_atom__names__noun} of {volume_per_atom#} {volume_per_atom__units}, {#what should I do?|what should I do?|what do you suggest?|what do you recommend?!} + Assistant: {#I found|Here is|I have found|Here is!} a {#material|compound|structure!} with a {band_gap__names__noun} of {band_gap#} {band_gap__units} and a {volume_per_atom__names__noun} of {volume_per_atom#} {volume_per_atom__units}: {formula#}. + - |- + Task: Predict a property of a material based on the description of the material. + Description: Predict the {band_gap__names__noun} of the {#material|compound|structure!} with composition {formula#}. + Result: {band_gap#} {band_gap__units} + - |- + Task: Predict a property of a material based on the description of the material. + Description: Predict the {band_gap__names__noun} of the {#material|compound|structure!} with composition {formula#} and {spacegroup__names__noun} {spacegroup#} and a {volume_per_atom__names__noun} of {volume_per_atom#} {volume_per_atom__units}. + Result: {band_gap#} {band_gap__units} diff --git a/data/tabular/orbnet_denali/develop_transform.ipynb b/data/tabular/orbnet_denali/develop_transform.ipynb index d2e9b3ab3..039c60f89 100644 --- a/data/tabular/orbnet_denali/develop_transform.ipynb +++ b/data/tabular/orbnet_denali/develop_transform.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -15,28 +15,28 @@ } ], "source": [ - "%load_ext autoreload \n", + "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", - "from rdkit import Chem \n", - "import matplotlib.pyplot as plt \n", - "import numpy as np \n", - "import os \n", - "import pandas as pd \n", + "from rdkit import Chem\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import os\n", + "import pandas as pd\n", "from glob import glob" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -45,18 +45,18 @@ "2338891" ] }, - "execution_count": 9, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "len(glob('xyz_files/**/*.xyz'))" + "len(glob(\"xyz_files/**/*.xyz\"))" ] }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -65,12 +65,13 @@ "\n", "from io import BytesIO\n", "\n", + "\n", "def extract_tarball(url, output_dir):\n", " # Download the tarball from the URL\n", " response = requests.get(url)\n", " if response.status_code != 200:\n", " raise ValueError(f\"Failed to download tarball from {url}\")\n", - " \n", + "\n", " # Extract the contents of the tarball to the output directory\n", " with tarfile.open(fileobj=BytesIO(response.content), mode=\"r|gz\") as tar:\n", " tar.extractall(output_dir)" @@ -78,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -112,12 +113,12 @@ } ], "source": [ - "extract_tarball('https://figshare.com/ndownloader/files/28672248', 'labels')" + "extract_tarball(\"https://figshare.com/ndownloader/files/28672248\", \"labels\")" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -126,16 +127,18 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "config = load_config('/Users/kevinmaikjablonka/git/openbioml/chemnlp/data/orbnet_denali/orbnet_denali_structures/meta.yaml')" + "config = load_config(\n", + " \"/Users/kevinmaikjablonka/git/openbioml/chemnlp/data/orbnet_denali/orbnet_denali_structures/meta.yaml\"\n", + ")" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -145,13 +148,13 @@ " 'description': 'structure download'}]" ] }, - "execution_count": 15, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "list(filter(lambda x: x['description'] == 'structure download', config['links']))\n" + "list(filter(lambda x: x[\"description\"] == \"structure download\", config[\"links\"]))" ] }, { @@ -164,16 +167,16 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "df = pd.read_csv('labels/denali_labels.csv')" + "df = pd.read_csv(\"labels/denali_labels.csv\")" ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -308,7 +311,7 @@ "4 True 1 -1175.659620 -60.622829 " ] }, - "execution_count": 29, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -319,7 +322,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -328,7 +331,7 @@ "2344594" ] }, - "execution_count": 30, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -339,7 +342,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -348,16 +351,16 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "len(glob('/Users/kevinmaikjablonka/Downloads/xyz_files/**/*.xyz'))" + "len(glob(\"/Users/kevinmaikjablonka/Downloads/xyz_files/**/*.xyz\"))" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -367,18 +370,18 @@ " 'jsch', 'ssi'], dtype=object)" ] }, - "execution_count": 10, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df['subset'].unique()" + "df[\"subset\"].unique()" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -387,27 +390,27 @@ "0.005750249296893194" ] }, - "execution_count": 21, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "len(df[df['subset']=='ssi'])/len(df)" + "len(df[df[\"subset\"] == \"ssi\"]) / len(df)" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "protomer_subset = df[(df['subset']=='protomers') | (df['subset']=='qm7bprotomers')]" + "protomer_subset = df[(df[\"subset\"] == \"protomers\") | (df[\"subset\"] == \"qm7bprotomers\")]" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -416,7 +419,7 @@ "0.09206967176406662" ] }, - "execution_count": 17, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -435,16 +438,18 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "mol = Chem.MolFromXYZFile(\"/Users/kevinmaikjablonka/Downloads/xyz_files/CHEMBL10003_protomer_1/8ce922b35ecfcab2d4d5a5483424dcdbfc005711ae5294a97d59268dd3b77c81.xyz\")" + "mol = Chem.MolFromXYZFile(\n", + " \"/Users/kevinmaikjablonka/Downloads/xyz_files/CHEMBL10003_protomer_1/8ce922b35ecfcab2d4d5a5483424dcdbfc005711ae5294a97d59268dd3b77c81.xyz\"\n", + ")" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -454,7 +459,7 @@ "" ] }, - "execution_count": 4, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -465,26 +470,30 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from rdkit.Chem import rdDetermineBonds\n", - "from chemnlp.utils import xyz_to_mol \n" + "from chemnlp.utils import xyz_to_mol" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "mol = xyz_to_mol(\"/Users/kevinmaikjablonka/Downloads/xyz_files/CHEMBL10003_protomer_1/8ce922b35ecfcab2d4d5a5483424dcdbfc005711ae5294a97d59268dd3b77c81.xyz\", 1, True)\n" + "mol = xyz_to_mol(\n", + " \"/Users/kevinmaikjablonka/Downloads/xyz_files/CHEMBL10003_protomer_1/8ce922b35ecfcab2d4d5a5483424dcdbfc005711ae5294a97d59268dd3b77c81.xyz\",\n", + " 1,\n", + " True,\n", + ")" ] }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -493,7 +502,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -502,7 +511,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -512,7 +521,7 @@ "" ] }, - "execution_count": 35, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -523,7 +532,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -532,7 +541,7 @@ "-2.423494526704" ] }, - "execution_count": 29, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -543,7 +552,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -553,7 +562,7 @@ "" ] }, - "execution_count": 30, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -564,7 +573,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -573,7 +582,7 @@ "'O=c1oc([NH2+]CCc2ccccc2I)nc2ccccc12'" ] }, - "execution_count": 22, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -584,7 +593,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -602,13 +611,6 @@ "source": [ "len(df)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -626,10 +628,8 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "orig_nbformat": 4 + "pygments_lexer": "ipython3" + } }, "nbformat": 4, "nbformat_minor": 2 diff --git a/data/tabular/orbnet_denali/meta.yaml b/data/tabular/orbnet_denali/meta.yaml index 4120879ec..e3f843571 100644 --- a/data/tabular/orbnet_denali/meta.yaml +++ b/data/tabular/orbnet_denali/meta.yaml @@ -1,104 +1,103 @@ ---- name: orbnet_denali description: |- - Structures (including conformers, protomers, ...) of structures. - Has been used for training of OrbNet Denali. + Structures (including conformers, protomers, ...) of structures. + Has been used for training of OrbNet Denali. targets: - - id: charge - type: ordinal - description: integer charge of the molecule - names: - - noun: charge - - id: xtb1_energy - type: continuous - description: XTB1 energy - units: Hartree - significant_digits: 5 - names: - - noun: total energy computed at the GFN1-xTB level of theory - - id: dft_energy - type: continuous - description: DFT energy - units: Hartree - significant_digits: 5 - names: - - noun: total energy computed at the {\omega}B97X-D3/def2-TZVP level of theory - - lot: the {\omega}B97X-D3/def2-TZVP + - id: charge + type: ordinal + description: integer charge of the molecule + names: + - noun: charge + - id: xtb1_energy + type: continuous + description: XTB1 energy + units: Hartree + significant_digits: 5 + names: + - noun: total energy computed at the GFN1-xTB level of theory + - id: dft_energy + type: continuous + description: DFT energy + units: Hartree + significant_digits: 5 + names: + - noun: total energy computed at the {\omega}B97X-D3/def2-TZVP level of theory + - lot: the {\omega}B97X-D3/def2-TZVP identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: xyz - type: XYZFILE - description: XYZ file - names: - - noun: XYZ file - - id: mol2000 - type: MOL2000FILE - description: MOL2000 file - names: - - noun: MOL2000 file - - id: mol3000 - type: MOL3000FILE - description: MOL3000 file - names: - - noun: MOL3000 file + - id: SMILES + type: SMILES + description: SMILES + - id: xyz + type: XYZFILE + description: XYZ file + names: + - noun: XYZ file + - id: mol2000 + type: MOL2000FILE + description: MOL2000 file + names: + - noun: MOL2000 file + - id: mol3000 + type: MOL3000FILE + description: MOL3000 file + names: + - noun: MOL3000 file license: CC BY 4.0 links: - - url: https://arxiv.org/abs/2107.00299 - description: corresponding publication - - url: https://figshare.com/ndownloader/files/28672287 - description: structure download - md5: edd35e95a018836d5f174a3431a751df - - url: https://figshare.com/ndownloader/files/28672248 - description: label download - md5: bc9b612f75373d1d191ce7493eebfd62 - - url: https://figshare.com/articles/dataset/OrbNet_Denali_Training_Data/14883867?file=28672248 - description: data source + - url: https://arxiv.org/abs/2107.00299 + description: corresponding publication + - url: https://figshare.com/ndownloader/files/28672287 + description: structure download + md5: edd35e95a018836d5f174a3431a751df + - url: https://figshare.com/ndownloader/files/28672248 + description: label download + md5: bc9b612f75373d1d191ce7493eebfd62 + - url: https://figshare.com/articles/dataset/OrbNet_Denali_Training_Data/14883867?file=28672248 + description: data source num_points: 1050713 bibtex: - - |- - @article{Christensen_2021, - doi = {10.1063/5.0061990}, - url = {https://doi.org/10.1063%2F5.0061990}, - year = 2021, - month = {nov}, - publisher = {{AIP} Publishing}, - volume = {155}, - number = {20}, - author = {Anders S. Christensen and Sai Krishna Sirumalla and Zhuoran Qiao and Michael B. O'Connor and Daniel G. A. Smith and Feizhi Ding and Peter J. Bygrave and Animashree Anandkumar and Matthew Welborn and Frederick R. Manby and Thomas F. Miller}, - title = {{OrbNet} Denali: A machine learning potential for biological and organic chemistry with semi-empirical cost and {DFT} accuracy}, - journal = {The Journal of Chemical Physics} - } + - |- + @article{Christensen_2021, + doi = {10.1063/5.0061990}, + url = {https://doi.org/10.1063%2F5.0061990}, + year = 2021, + month = {nov}, + publisher = {{AIP} Publishing}, + volume = {155}, + number = {20}, + author = {Anders S. Christensen and Sai Krishna Sirumalla and Zhuoran Qiao and Michael B. O'Connor and Daniel G. A. Smith and Feizhi Ding and Peter J. Bygrave and Animashree Anandkumar and Matthew Welborn and Frederick R. Manby and Thomas F. Miller}, + title = {{OrbNet} Denali: A machine learning potential for biological and organic chemistry with semi-empirical cost and {DFT} accuracy}, + journal = {The Journal of Chemical Physics} + } templates: - - The {#molecule|chemical|compound|chemical structure!} with {SMILES__description} {SMILES#} has a charge of {charge#}. - - |- - Question: {#What is the|What's the!} structure of a conformer of the {#molecule|chemical|compound|chemical structure!} with {SMILES__description} {SMILES#}? - Constraint: Return a {xyz__names__noun}. - Answer: {xyz#} - - |- - Question: {#What is the|What's the!} structure of a conformer of the {#molecule|chemical|compound|chemical structure!} with {SMILES__description} {SMILES#}? - Constraint: Return a {mol2000__names__noun}. - Answer: {mol2000#} - - |- - Question: {#What is the|What's the!} structure of a conformer of the {#molecule|chemical|compound|chemical structure!} with {SMILES__description} {SMILES#}? - Constraint: Return a {mol3000__names__noun}. - Answer: {mol3000#} - - |- - Task: Return the total energy of a {#molecule|chemical|compound|chemical structure!} computed at the GFN1-xTB level of theory. - Description: The {#molecule|chemical|compound|chemical structure!} has the {xyz__names__noun} {xyz#}. - Answer: {xtb1_energy#} {xtb1_energy__units} - - |- - Task: Return the total energy of a {#molecule|chemical|compound|chemical structure!} computed at {dft_energy__names__lot} level of theory. - Description: The {#molecule|chemical|compound|chemical structure!} has the {xyz__names__noun} {xyz#}. - Answer: {dft_energy#} {dft_energy__units} - - |- - User: {#I want to|I have to|I must|I would like to!} know the GFN1-xTB total energy of the {#molecule|chemical|compound|chemical structure!} with {SMILES__description} {SMILES#}. - Assistant: Do you have the {xyz__names__noun} file of a conformer of the {#molecule|chemical|compound|chemical structure!}? - User: {#Yes:|Here it is:|I have it:|I do:!} {xyz#} - Assistant: The GFN1-xTB total energy of the {#molecule|chemical|compound|chemical structure!} is {xtb1_energy#} {xtb1_energy__units}. - - |- - User: {#I want to|I have to|I must|I would like to!} know {dft_energy__names__lot} total energy of the {#molecule|chemical|compound|chemical structure!} with {SMILES__description} {SMILES#}. - Assistant: Do you have the {xyz__names__noun} file of a conformer of the {#molecule|chemical|compound|chemical structure!}? - User: {#Yes:|Here it is:|I have it:|I do:!} {xyz#} - Assistant: The total energy on {dft_energy__names__lot} level of theory of the {#molecule|chemical|compound|chemical structure!} is {dft_energy#} {dft_energy__units}. + - The {#molecule|chemical|compound|chemical structure!} with {SMILES__description} {SMILES#} has a charge of {charge#}. + - |- + Question: {#What is the|What's the!} structure of a conformer of the {#molecule|chemical|compound|chemical structure!} with {SMILES__description} {SMILES#}? + Constraint: Return a {xyz__names__noun}. + Answer: {xyz#} + - |- + Question: {#What is the|What's the!} structure of a conformer of the {#molecule|chemical|compound|chemical structure!} with {SMILES__description} {SMILES#}? + Constraint: Return a {mol2000__names__noun}. + Answer: {mol2000#} + - |- + Question: {#What is the|What's the!} structure of a conformer of the {#molecule|chemical|compound|chemical structure!} with {SMILES__description} {SMILES#}? + Constraint: Return a {mol3000__names__noun}. + Answer: {mol3000#} + - |- + Task: Return the total energy of a {#molecule|chemical|compound|chemical structure!} computed at the GFN1-xTB level of theory. + Description: The {#molecule|chemical|compound|chemical structure!} has the {xyz__names__noun} {xyz#}. + Answer: {xtb1_energy#} {xtb1_energy__units} + - |- + Task: Return the total energy of a {#molecule|chemical|compound|chemical structure!} computed at {dft_energy__names__lot} level of theory. + Description: The {#molecule|chemical|compound|chemical structure!} has the {xyz__names__noun} {xyz#}. + Answer: {dft_energy#} {dft_energy__units} + - |- + User: {#I want to|I have to|I must|I would like to!} know the GFN1-xTB total energy of the {#molecule|chemical|compound|chemical structure!} with {SMILES__description} {SMILES#}. + Assistant: Do you have the {xyz__names__noun} file of a conformer of the {#molecule|chemical|compound|chemical structure!}? + User: {#Yes:|Here it is:|I have it:|I do:!} {xyz#} + Assistant: The GFN1-xTB total energy of the {#molecule|chemical|compound|chemical structure!} is {xtb1_energy#} {xtb1_energy__units}. + - |- + User: {#I want to|I have to|I must|I would like to!} know {dft_energy__names__lot} total energy of the {#molecule|chemical|compound|chemical structure!} with {SMILES__description} {SMILES#}. + Assistant: Do you have the {xyz__names__noun} file of a conformer of the {#molecule|chemical|compound|chemical structure!}? + User: {#Yes:|Here it is:|I have it:|I do:!} {xyz#} + Assistant: The total energy on {dft_energy__names__lot} level of theory of the {#molecule|chemical|compound|chemical structure!} is {dft_energy#} {dft_energy__units}. diff --git a/data/tabular/ord_masked/meta.yaml b/data/tabular/ord_masked/meta.yaml index 3871ec22f..8c1486436 100644 --- a/data/tabular/ord_masked/meta.yaml +++ b/data/tabular/ord_masked/meta.yaml @@ -1,51 +1,50 @@ ---- name: ord_rxn_smiles_yield_pred description: |- - The open reaction database is a database of chemical reactions and their conditions + The open reaction database is a database of chemical reactions and their conditions identifiers: - - id: masked_rxn_smiles - type: text - description: reaction SMILES with one element masked - names: - - noun: reaction SMILES with one element masked as `MASK` - - noun: reaction SMILES with one element hidden as `MASK` - - noun: masked reaction SMILES (one component masked as `MASK`) - - noun: masked reaction SMILES string (one component masked as `MASK`) - - noun: masked RXNSMILES (one component masked as `MASK`) + - id: masked_rxn_smiles + type: text + description: reaction SMILES with one element masked + names: + - noun: reaction SMILES with one element masked as `MASK` + - noun: reaction SMILES with one element hidden as `MASK` + - noun: masked reaction SMILES (one component masked as `MASK`) + - noun: masked reaction SMILES string (one component masked as `MASK`) + - noun: masked RXNSMILES (one component masked as `MASK`) targets: - - id: missing_component - type: text - description: masked element + - id: missing_component + type: text + description: masked element license: CC BY SA 4.0 links: - - url: https://github.com/open-reaction-database/ord-data - description: original data source + - url: https://github.com/open-reaction-database/ord-data + description: original data source num_points: 2263983 bibtex: - - |- - @article{Kearnes_2021, - doi = {10.1021/jacs.1c09820}, - url = {https://doi.org/10.1021%2Fjacs.1c09820}, - year = 2021, - month = {nov}, - publisher = {American Chemical Society ({ACS})}, - volume = {143}, - number = {45}, - pages = {18820--18826}, - author = {Steven M. Kearnes and Michael R. Maser - and Michael Wleklinski and Anton Kast and Abigail G. Doyle - and Spencer D. Dreher and Joel M. Hawkins - and Klavs F. Jensen and Connor W. Coley}, - title = {The Open Reaction Database}, - journal = {J. Am. Chem. Soc.} - } + - |- + @article{Kearnes_2021, + doi = {10.1021/jacs.1c09820}, + url = {https://doi.org/10.1021%2Fjacs.1c09820}, + year = 2021, + month = {nov}, + publisher = {American Chemical Society ({ACS})}, + volume = {143}, + number = {45}, + pages = {18820--18826}, + author = {Steven M. Kearnes and Michael R. Maser + and Michael Wleklinski and Anton Kast and Abigail G. Doyle + and Spencer D. Dreher and Joel M. Hawkins + and Klavs F. Jensen and Connor W. Coley}, + title = {The Open Reaction Database}, + journal = {J. Am. Chem. Soc.} + } templates: - - The masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#} is {missing_component#}. - - The {#chemical|compound!} with SMILES {missing_component#} is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}. - - |- - Question: What is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}? - Answer: {missing_component#}. - - |- - Task: Predict the masked component in a {masked_rxn_smiles__names__noun}. - Description: {masked_rxn_smiles#} - {#Answer|Solution!}: {missing_component#} + - The masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#} is {missing_component#}. + - The {#chemical|compound!} with SMILES {missing_component#} is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}. + - |- + Question: What is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}? + Answer: {missing_component#}. + - |- + Task: Predict the masked component in a {masked_rxn_smiles__names__noun}. + Description: {masked_rxn_smiles#} + {#Answer|Solution!}: {missing_component#} diff --git a/data/tabular/ord_predictions/meta.yaml b/data/tabular/ord_predictions/meta.yaml index 19f0689a8..abc0c10ab 100644 --- a/data/tabular/ord_predictions/meta.yaml +++ b/data/tabular/ord_predictions/meta.yaml @@ -1,64 +1,63 @@ ---- name: ord_rxn_smiles_yield_pred description: |- - The open reaction database is a database of chemical reactions and their conditions + The open reaction database is a database of chemical reactions and their conditions identifiers: - - id: educt_string - type: text - description: reaction educts - names: - - noun: reaction educts - - noun: educts - - noun: starting materials - - id: RXNSMILES - type: RXNSMILES - description: reaction SMILES - names: - - noun: reaction SMILES - - noun: reaction SMILES string - - noun: RXNSMILES - - noun: reaction SMILES (RXNSMILES) + - id: educt_string + type: text + description: reaction educts + names: + - noun: reaction educts + - noun: educts + - noun: starting materials + - id: RXNSMILES + type: RXNSMILES + description: reaction SMILES + names: + - noun: reaction SMILES + - noun: reaction SMILES string + - noun: RXNSMILES + - noun: reaction SMILES (RXNSMILES) targets: - - id: product_string - type: text - description: reaction products - names: - - noun: reaction products - - noun: products + - id: product_string + type: text + description: reaction products + names: + - noun: reaction products + - noun: products license: CC BY SA 4.0 links: - - url: https://github.com/open-reaction-database/ord-data - description: original data source + - url: https://github.com/open-reaction-database/ord-data + description: original data source num_points: 2263057 bibtex: - - |- - @article{Kearnes_2021, - doi = {10.1021/jacs.1c09820}, - url = {https://doi.org/10.1021%2Fjacs.1c09820}, - year = 2021, - month = {nov}, - publisher = {American Chemical Society ({ACS})}, - volume = {143}, - number = {45}, - pages = {18820--18826}, - author = {Steven M. Kearnes and Michael R. Maser - and Michael Wleklinski and Anton Kast and Abigail G. Doyle - and Spencer D. Dreher and Joel M. Hawkins - and Klavs F. Jensen and Connor W. Coley}, - title = {The Open Reaction Database}, - journal = {J. Am. Chem. Soc.} - } + - |- + @article{Kearnes_2021, + doi = {10.1021/jacs.1c09820}, + url = {https://doi.org/10.1021%2Fjacs.1c09820}, + year = 2021, + month = {nov}, + publisher = {American Chemical Society ({ACS})}, + volume = {143}, + number = {45}, + pages = {18820--18826}, + author = {Steven M. Kearnes and Michael R. Maser + and Michael Wleklinski and Anton Kast and Abigail G. Doyle + and Spencer D. Dreher and Joel M. Hawkins + and Klavs F. Jensen and Connor W. Coley}, + title = {The Open Reaction Database}, + journal = {J. Am. Chem. Soc.} + } templates: - - The {RXNSMILES__names__noun} {RXNSMILES#} has the {educt_string__names__noun} {educt_string#} and the {product_string__names__noun} {product_string#}. - - The {RXNSMILES__names__noun} {RXNSMILES#} has the {product_string__names__noun} {product_string#} and the {educt_string__names__noun} {educt_string#}. - - |- - Question: {#What|Which!} {educt_string__names__noun} are {#needed|required!} to {#produce|synthesize!} {product_string#}? - Answer: {educt_string#}. - - |- - Question: {#What|Which!} {product_string__names__noun} are produced from the {educt_string__names__noun} {educt_string#}? - Answer: {product_string#}. - - |- - User: I {#want|would like to|must|need to!} {#synthesize|produce!} {product_string#}. - Assistant: {#Cool, is|That's interesting, is|Great, is|Is!} there anything else I can do for you? - User: {#Yes, |!}I would like to know the {educt_string__names__noun} I need to produce the {product_string__names__noun} {product_string#}. - Assistant: {#I recommend|I suggest|I propose|I advise!} the following {educt_string__names__noun}: {educt_string#}. + - The {RXNSMILES__names__noun} {RXNSMILES#} has the {educt_string__names__noun} {educt_string#} and the {product_string__names__noun} {product_string#}. + - The {RXNSMILES__names__noun} {RXNSMILES#} has the {product_string__names__noun} {product_string#} and the {educt_string__names__noun} {educt_string#}. + - |- + Question: {#What|Which!} {educt_string__names__noun} are {#needed|required!} to {#produce|synthesize!} {product_string#}? + Answer: {educt_string#}. + - |- + Question: {#What|Which!} {product_string__names__noun} are produced from the {educt_string__names__noun} {educt_string#}? + Answer: {product_string#}. + - |- + User: I {#want|would like to|must|need to!} {#synthesize|produce!} {product_string#}. + Assistant: {#Cool, is|That's interesting, is|Great, is|Is!} there anything else I can do for you? + User: {#Yes, |!}I would like to know the {educt_string__names__noun} I need to produce the {product_string__names__noun} {product_string#}. + Assistant: {#I recommend|I suggest|I propose|I advise!} the following {educt_string__names__noun}: {educt_string#}. diff --git a/data/tabular/ord_procedure_steps/meta.yaml b/data/tabular/ord_procedure_steps/meta.yaml index 7b8568f53..4c2102f4e 100644 --- a/data/tabular/ord_procedure_steps/meta.yaml +++ b/data/tabular/ord_procedure_steps/meta.yaml @@ -1,54 +1,53 @@ ---- name: ord_procedure_steps description: |- - The open reaction database is a database of chemical reactions and their conditions + The open reaction database is a database of chemical reactions and their conditions identifiers: - - id: steps_string - type: text - description: reaction action sequence - names: - - noun: reaction action sequence - - noun: reaction action steps + - id: steps_string + type: text + description: reaction action sequence + names: + - noun: reaction action sequence + - noun: reaction action steps targets: - - id: procedure - type: text - description: reaction procedure - names: - - noun: reaction procedure - - noun: description of reaction procedure - - noun: reaction procedure description - - noun: procedure + - id: procedure + type: text + description: reaction procedure + names: + - noun: reaction procedure + - noun: description of reaction procedure + - noun: reaction procedure description + - noun: procedure license: CC BY SA 4.0 links: - - url: https://github.com/open-reaction-database/ord-data - description: original data source + - url: https://github.com/open-reaction-database/ord-data + description: original data source num_points: 76815 bibtex: - - |- - @article{Kearnes_2021, - doi = {10.1021/jacs.1c09820}, - url = {https://doi.org/10.1021%2Fjacs.1c09820}, - year = 2021, - month = {nov}, - publisher = {American Chemical Society ({ACS})}, - volume = {143}, - number = {45}, - pages = {18820--18826}, - author = {Steven M. Kearnes and Michael R. Maser - and Michael Wleklinski and Anton Kast and Abigail G. Doyle - and Spencer D. Dreher and Joel M. Hawkins - and Klavs F. Jensen and Connor W. Coley}, - title = {The Open Reaction Database}, - journal = {J. Am. Chem. Soc.} - } + - |- + @article{Kearnes_2021, + doi = {10.1021/jacs.1c09820}, + url = {https://doi.org/10.1021%2Fjacs.1c09820}, + year = 2021, + month = {nov}, + publisher = {American Chemical Society ({ACS})}, + volume = {143}, + number = {45}, + pages = {18820--18826}, + author = {Steven M. Kearnes and Michael R. Maser + and Michael Wleklinski and Anton Kast and Abigail G. Doyle + and Spencer D. Dreher and Joel M. Hawkins + and Klavs F. Jensen and Connor W. Coley}, + title = {The Open Reaction Database}, + journal = {J. Am. Chem. Soc.} + } templates: - - |- - User: {#Can you|Could you!} {#tell me|give me|show me!} the {procedure__names__noun} for the {steps_string__names__noun} {steps_string#}? - Assistant: {#I propose|I suggest!} the {procedure__names__noun} {procedure#} - - |- - User: {#Can you|Could you!} {#tell me|give me|show me!} the {steps_string__names__noun} for the {procedure__names__noun} {procedure#}? - Assistant: {#I propose|I suggest!} the {steps_string__names__noun} {steps_string#} - - |- - Task: Convert a {procedure__names__noun} into a {steps_string__names__noun}. - Procedure: {procedure#} - Answer: {steps_string#} + - |- + User: {#Can you|Could you!} {#tell me|give me|show me!} the {procedure__names__noun} for the {steps_string__names__noun} {steps_string#}? + Assistant: {#I propose|I suggest!} the {procedure__names__noun} {procedure#} + - |- + User: {#Can you|Could you!} {#tell me|give me|show me!} the {steps_string__names__noun} for the {procedure__names__noun} {procedure#}? + Assistant: {#I propose|I suggest!} the {steps_string__names__noun} {steps_string#} + - |- + Task: Convert a {procedure__names__noun} into a {steps_string__names__noun}. + Procedure: {procedure#} + Answer: {steps_string#} diff --git a/data/tabular/ord_rxn_smiles_procedure/meta.yaml b/data/tabular/ord_rxn_smiles_procedure/meta.yaml index 09f4780db..0a72528b9 100644 --- a/data/tabular/ord_rxn_smiles_procedure/meta.yaml +++ b/data/tabular/ord_rxn_smiles_procedure/meta.yaml @@ -1,65 +1,64 @@ ---- name: ord_rxn_smiles_procedure description: |- - The open reaction database is a database of chemical reactions and their conditions + The open reaction database is a database of chemical reactions and their conditions targets: - - id: procedure - type: text - description: reaction procedure - names: - - noun: reaction procedure - - noun: description of reaction procedure - - noun: reaction procedure description - - noun: procedure + - id: procedure + type: text + description: reaction procedure + names: + - noun: reaction procedure + - noun: description of reaction procedure + - noun: reaction procedure description + - noun: procedure identifiers: - - id: RXNSMILES - type: RXNSMILES - description: reaction SMILES - names: - - noun: reaction SMILES - - noun: reaction SMILES string - - noun: RXNSMILES - - noun: reaction SMILES (RXNSMILES) + - id: RXNSMILES + type: RXNSMILES + description: reaction SMILES + names: + - noun: reaction SMILES + - noun: reaction SMILES string + - noun: RXNSMILES + - noun: reaction SMILES (RXNSMILES) license: CC BY SA 4.0 links: - - url: https://github.com/open-reaction-database/ord-data - description: original data source + - url: https://github.com/open-reaction-database/ord-data + description: original data source num_points: 76648 bibtex: - - |- - @article{Kearnes_2021, - doi = {10.1021/jacs.1c09820}, - url = {https://doi.org/10.1021%2Fjacs.1c09820}, - year = 2021, - month = {nov}, - publisher = {American Chemical Society ({ACS})}, - volume = {143}, - number = {45}, - pages = {18820--18826}, - author = {Steven M. Kearnes and Michael R. Maser - and Michael Wleklinski and Anton Kast and Abigail G. Doyle - and Spencer D. Dreher and Joel M. Hawkins - and Klavs F. Jensen and Connor W. Coley}, - title = {The Open Reaction Database}, - journal = {J. Am. Chem. Soc.} - } + - |- + @article{Kearnes_2021, + doi = {10.1021/jacs.1c09820}, + url = {https://doi.org/10.1021%2Fjacs.1c09820}, + year = 2021, + month = {nov}, + publisher = {American Chemical Society ({ACS})}, + volume = {143}, + number = {45}, + pages = {18820--18826}, + author = {Steven M. Kearnes and Michael R. Maser + and Michael Wleklinski and Anton Kast and Abigail G. Doyle + and Spencer D. Dreher and Joel M. Hawkins + and Klavs F. Jensen and Connor W. Coley}, + title = {The Open Reaction Database}, + journal = {J. Am. Chem. Soc.} + } templates: - - |- - The {RXNSMILES__names__noun} of a reaction with the {procedure__names__noun} below is {RXNSMILES#}. - Procedure: {procedure#} - - |- - The {procedure__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#} is: - {procedure#} - - |- - User: {#I want|I need|I would like!} to run a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}. - Assistant: {#Cool, is|That's interesting, is|Great, is|Is!} there anything else I can do for you? - User: {#Yes, |!}I would like to know the {procedure__names__noun} I should follow to run the reaction. - Assistant: {#I recommend|I suggest|I propose|I advise!} the following procedure: {procedure#} - - |- - User: {#I want|I need|I would like!} to run a reaction with the {procedure__names__noun} below and now need to know the {RXNSMILES__names__noun}. - Procedure: {procedure#} - Assistant: The {RXNSMILES__names__noun} of the reaction is {RXNSMILES#}. - - |- - Task: Extract the {RXNSMILES__names__noun} of a reaction based on its {procedure__names__noun}. - Procedure: {procedure#} - Answer: {RXNSMILES#} + - |- + The {RXNSMILES__names__noun} of a reaction with the {procedure__names__noun} below is {RXNSMILES#}. + Procedure: {procedure#} + - |- + The {procedure__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#} is: + {procedure#} + - |- + User: {#I want|I need|I would like!} to run a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}. + Assistant: {#Cool, is|That's interesting, is|Great, is|Is!} there anything else I can do for you? + User: {#Yes, |!}I would like to know the {procedure__names__noun} I should follow to run the reaction. + Assistant: {#I recommend|I suggest|I propose|I advise!} the following procedure: {procedure#} + - |- + User: {#I want|I need|I would like!} to run a reaction with the {procedure__names__noun} below and now need to know the {RXNSMILES__names__noun}. + Procedure: {procedure#} + Assistant: The {RXNSMILES__names__noun} of the reaction is {RXNSMILES#}. + - |- + Task: Extract the {RXNSMILES__names__noun} of a reaction based on its {procedure__names__noun}. + Procedure: {procedure#} + Answer: {RXNSMILES#} diff --git a/data/tabular/ord_rxn_smiles_yield_pred/meta.yaml b/data/tabular/ord_rxn_smiles_yield_pred/meta.yaml index a4c44563c..92ce5a078 100644 --- a/data/tabular/ord_rxn_smiles_yield_pred/meta.yaml +++ b/data/tabular/ord_rxn_smiles_yield_pred/meta.yaml @@ -1,53 +1,52 @@ ---- name: ord_rxn_smiles_yield_pred description: |- - The open reaction database is a database of chemical reactions and their conditions + The open reaction database is a database of chemical reactions and their conditions targets: - - id: yield - type: continuous - significant_digits: 0 - description: reaction yield - units: \% - names: - - noun: yield - - noun: reaction yield + - id: yield + type: continuous + significant_digits: 0 + description: reaction yield + units: \% + names: + - noun: yield + - noun: reaction yield identifiers: - - id: RXNSMILES - type: RXNSMILES - description: reaction SMILES - names: - - noun: reaction SMILES - - noun: reaction SMILES string - - noun: RXNSMILES - - noun: reaction SMILES (RXNSMILES) + - id: RXNSMILES + type: RXNSMILES + description: reaction SMILES + names: + - noun: reaction SMILES + - noun: reaction SMILES string + - noun: RXNSMILES + - noun: reaction SMILES (RXNSMILES) license: CC BY SA 4.0 links: - - url: https://github.com/open-reaction-database/ord-data - description: original data source + - url: https://github.com/open-reaction-database/ord-data + description: original data source num_points: 28 bibtex: - - |- - @article{Kearnes_2021, - doi = {10.1021/jacs.1c09820}, - url = {https://doi.org/10.1021%2Fjacs.1c09820}, - year = 2021, - month = {nov}, - publisher = {American Chemical Society ({ACS})}, - volume = {143}, - number = {45}, - pages = {18820--18826}, - author = {Steven M. Kearnes and Michael R. Maser - and Michael Wleklinski and Anton Kast and Abigail G. Doyle - and Spencer D. Dreher and Joel M. Hawkins - and Klavs F. Jensen and Connor W. Coley}, - title = {The Open Reaction Database}, - journal = {J. Am. Chem. Soc.} - } + - |- + @article{Kearnes_2021, + doi = {10.1021/jacs.1c09820}, + url = {https://doi.org/10.1021%2Fjacs.1c09820}, + year = 2021, + month = {nov}, + publisher = {American Chemical Society ({ACS})}, + volume = {143}, + number = {45}, + pages = {18820--18826}, + author = {Steven M. Kearnes and Michael R. Maser + and Michael Wleklinski and Anton Kast and Abigail G. Doyle + and Spencer D. Dreher and Joel M. Hawkins + and Klavs F. Jensen and Connor W. Coley}, + title = {The Open Reaction Database}, + journal = {J. Am. Chem. Soc.} + } templates: - - The {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#} is {yield#}{yield__units}. - - |- - User: {#I need|I want|I would like!} to run a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}. What is the {yield__names__noun} {#I can expect|I should expect|I should get|I can get!}? - Assistant: {#The|The expected|The predicted|The estimated!} {yield__names__noun} is {yield#}{yield__units}. - - |- - Question: {#What is|What's|What is the|What's the!} {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}? - Answer: {yield#}{yield__units}. + - The {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#} is {yield#}{yield__units}. + - |- + User: {#I need|I want|I would like!} to run a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}. What is the {yield__names__noun} {#I can expect|I should expect|I should get|I can get!}? + Assistant: {#The|The expected|The predicted|The estimated!} {yield__names__noun} is {yield#}{yield__units}. + - |- + Question: {#What is|What's|What is the|What's the!} {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}? + Answer: {yield#}{yield__units}. diff --git a/data/tabular/ord_steps_yield/meta.yaml b/data/tabular/ord_steps_yield/meta.yaml index d69ab54ca..40087f18d 100644 --- a/data/tabular/ord_steps_yield/meta.yaml +++ b/data/tabular/ord_steps_yield/meta.yaml @@ -1,54 +1,53 @@ ---- name: ord_steps_yield description: |- - The open reaction database is a database of chemical reactions and their conditions + The open reaction database is a database of chemical reactions and their conditions identifiers: - - id: non_yield_steps_string - type: text - description: reaction action sequence - names: - - noun: reaction action sequence - - noun: reaction action steps + - id: non_yield_steps_string + type: text + description: reaction action sequence + names: + - noun: reaction action sequence + - noun: reaction action steps targets: - - id: yield - type: continuous - significant_digits: 0 - description: reaction yield - units: \% - names: - - noun: yield - - noun: reaction yield + - id: yield + type: continuous + significant_digits: 0 + description: reaction yield + units: \% + names: + - noun: yield + - noun: reaction yield license: CC BY SA 4.0 links: - - url: https://github.com/open-reaction-database/ord-data - description: original data source + - url: https://github.com/open-reaction-database/ord-data + description: original data source num_points: 30 bibtex: - - |- - @article{Kearnes_2021, - doi = {10.1021/jacs.1c09820}, - url = {https://doi.org/10.1021%2Fjacs.1c09820}, - year = 2021, - month = {nov}, - publisher = {American Chemical Society ({ACS})}, - volume = {143}, - number = {45}, - pages = {18820--18826}, - author = {Steven M. Kearnes and Michael R. Maser - and Michael Wleklinski and Anton Kast and Abigail G. Doyle - and Spencer D. Dreher and Joel M. Hawkins - and Klavs F. Jensen and Connor W. Coley}, - title = {The Open Reaction Database}, - journal = {J. Am. Chem. Soc.} - } + - |- + @article{Kearnes_2021, + doi = {10.1021/jacs.1c09820}, + url = {https://doi.org/10.1021%2Fjacs.1c09820}, + year = 2021, + month = {nov}, + publisher = {American Chemical Society ({ACS})}, + volume = {143}, + number = {45}, + pages = {18820--18826}, + author = {Steven M. Kearnes and Michael R. Maser + and Michael Wleklinski and Anton Kast and Abigail G. Doyle + and Spencer D. Dreher and Joel M. Hawkins + and Klavs F. Jensen and Connor W. Coley}, + title = {The Open Reaction Database}, + journal = {J. Am. Chem. Soc.} + } templates: - - |- - The {yield__names__noun} of a reaction with the {non_yield_steps_string__names__noun} below is {yield#}{yield__units}. - {non_yield_steps_string__names__noun}: {non_yield_steps_string#} - - |- - User: {#I need|I want|I would like!} to run a reaction with the {non_yield_steps_string__names__noun} {non_yield_steps_string#}. What is the {yield__names__noun} {#I can expect|I should expect|I should get|I can get!}? - Assistant: {#The|The expected|The predicted|The estimated!} {yield__names__noun} is {yield#}{yield__units}. - - |- - Task: {#Predict|Estimate!} the {yield__names__noun} of a reaction based on the {non_yield_steps_string__names__noun}. - Description: {non_yield_steps_string#} - Answer: {yield#}{yield__units} + - |- + The {yield__names__noun} of a reaction with the {non_yield_steps_string__names__noun} below is {yield#}{yield__units}. + {non_yield_steps_string__names__noun}: {non_yield_steps_string#} + - |- + User: {#I need|I want|I would like!} to run a reaction with the {non_yield_steps_string__names__noun} {non_yield_steps_string#}. What is the {yield__names__noun} {#I can expect|I should expect|I should get|I can get!}? + Assistant: {#The|The expected|The predicted|The estimated!} {yield__names__noun} is {yield#}{yield__units}. + - |- + Task: {#Predict|Estimate!} the {yield__names__noun} of a reaction based on the {non_yield_steps_string__names__noun}. + Description: {non_yield_steps_string#} + Answer: {yield#}{yield__units} diff --git a/data/tabular/orexin1_receptor_butkiewicz/meta.yaml b/data/tabular/orexin1_receptor_butkiewicz/meta.yaml index da2695735..8284c38ef 100644 --- a/data/tabular/orexin1_receptor_butkiewicz/meta.yaml +++ b/data/tabular/orexin1_receptor_butkiewicz/meta.yaml @@ -1,176 +1,175 @@ ---- name: orexin1_receptor_butkiewicz description: |- - "GPCR Orexin 1 is relevant for behavioral plasticity, - the sleep-wake cycle, and gastric acid secretion.Three primary screens, - AID 485270, AID 463079, AID 434989, were performed. Validation assay - AID504701, AD492963. Counter screen 493232. More specific assay - AID504699. AID504701 and AID504699 were combined to identify 234 active - compounds excluding an overlap of 155 molecules. + "GPCR Orexin 1 is relevant for behavioral plasticity, + the sleep-wake cycle, and gastric acid secretion.Three primary screens, + AID 485270, AID 463079, AID 434989, were performed. Validation assay + AID504701, AD492963. Counter screen 493232. More specific assay + AID504699. AID504701 and AID504699 were combined to identify 234 active + compounds excluding an overlap of 155 molecules. targets: - - id: activity_orexin1 - description: whether it is active against orexin1 receptor (1) or not (0). - units: - type: boolean - names: - - noun: orexin 1 inhibitor - - noun: a orexin 1 receptor antagonist - - gerund: inhibiting orexin 1 receptor - - adjective: orexin-1 inhibitory - pubchem_aids: - - 485270 - - 463079 - - 434989 - - 504701 - - 493232 - - 504699 - uris: - - http://purl.bioontology.org/ontology/SNOMEDCT/838464006 + - id: activity_orexin1 + description: whether it is active against orexin1 receptor (1) or not (0). + units: + type: boolean + names: + - noun: orexin 1 inhibitor + - noun: a orexin 1 receptor antagonist + - gerund: inhibiting orexin 1 receptor + - adjective: orexin-1 inhibitory + pubchem_aids: + - 485270 + - 463079 + - 434989 + - 504701 + - 493232 + - 504699 + uris: + - http://purl.bioontology.org/ontology/SNOMEDCT/838464006 identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split license: CC BY 4.0 links: - - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al - description: original dataset - - url: https://doi.org/10.3390/molecules18010735 - description: corresponding publication - - url: https://doi.org/10.1093/nar/gky1033 - description: corresponding publication - - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ - description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al + description: original dataset + - url: https://doi.org/10.3390/molecules18010735 + description: corresponding publication + - url: https://doi.org/10.1093/nar/gky1033 + description: corresponding publication + - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ + description: corresponding publication num_points: 218158 bibtex: - - |- - @article{Butkiewicz2013, - doi = {10.3390/molecules18010735}, - url = {https://doi.org/10.3390/molecules18010735}, - year = {2013}, - month = jan, - publisher = {{MDPI} {AG}}, - volume = {18}, - number = {1}, - pages = {735--756}, - author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and - Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens - Meiler}, - title = {Benchmarking Ligand-Based Virtual High-Throughput - Screening with the {PubChem} Database}, - journal = {Molecules}} - - |- - @article{Kim2018, - doi = {10.1093/nar/gky1033}, - url = {https://doi.org/10.1093/nar/gky1033}, - year = {2018}, - month = oct, - publisher = {Oxford University Press ({OUP})}, - volume = {47}, - number = {D1}, - pages = {D1102--D1109}, - author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta - Gindulyte and Jia He and Siqian He and Qingliang Li and Benjamin - A Shoemaker and Paul A Thiessen and Bo Yu and Leonid Zaslavsky - and Jian Zhang and Evan E Bolton}, - title = {{PubChem} 2019 update: improved access to chemical data}, - journal = {Nucleic Acids Research}} - - |- - @article{Butkiewicz2017, - doi = {}, - url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, - year = {2017}, - publisher = {Chem Inform}, - volume = {3}, - number = {1}, - author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe, - E. W. and Weaver, D. C. and Meiler, J.}, - title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from - the {P}ub{C}hem {D}atabase}}, - journal = {Chemical Science}} + - |- + @article{Butkiewicz2013, + doi = {10.3390/molecules18010735}, + url = {https://doi.org/10.3390/molecules18010735}, + year = {2013}, + month = jan, + publisher = {{MDPI} {AG}}, + volume = {18}, + number = {1}, + pages = {735--756}, + author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and + Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens + Meiler}, + title = {Benchmarking Ligand-Based Virtual High-Throughput + Screening with the {PubChem} Database}, + journal = {Molecules}} + - |- + @article{Kim2018, + doi = {10.1093/nar/gky1033}, + url = {https://doi.org/10.1093/nar/gky1033}, + year = {2018}, + month = oct, + publisher = {Oxford University Press ({OUP})}, + volume = {47}, + number = {D1}, + pages = {D1102--D1109}, + author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta + Gindulyte and Jia He and Siqian He and Qingliang Li and Benjamin + A Shoemaker and Paul A Thiessen and Bo Yu and Leonid Zaslavsky + and Jian Zhang and Evan E Bolton}, + title = {{PubChem} 2019 update: improved access to chemical data}, + journal = {Nucleic Acids Research}} + - |- + @article{Butkiewicz2017, + doi = {}, + url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, + year = {2017}, + publisher = {Chem Inform}, + volume = {3}, + number = {1}, + author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe, + E. W. and Weaver, D. C. and Meiler, J.}, + title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from + the {P}ub{C}hem {D}atabase}}, + journal = {Chemical Science}} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {activity_orexin1#not &NULL}{activity_orexin1__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {activity_orexin1#no &NULL}{activity_orexin1__names__noun} {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {activity_orexin1#not &NULL}identified as {activity_orexin1__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_orexin1__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. - Result: {activity_orexin1#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_orexin1__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {activity_orexin1__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_orexin1__names__adjective}? - Assistant: {activity_orexin1#No&Yes}, this molecule is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {activity_orexin1__names__adjective}? - Assistant: {activity_orexin1#No&Yes}, it is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}? - Assistant: This is a molecule that is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} the {SMILES__description} of a {#molecule|chemical|chemical structure!}. - Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {activity_orexin1#not &NULL}be {activity_orexin1__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} the {SMILES__description} of a {#molecule|chemical|chemical structure!}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {activity_orexin1#not &NULL}be {activity_orexin1__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {activity_orexin1__names__adjective}:{activity_orexin1#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_orexin1__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{activity_orexin1#False&True} - - |- - Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|chemical structure!} based on the {#text |!}description{# below|!}. - Description: A molecule that is {activity_orexin1__names__adjective}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_orexin1__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {activity_orexin1%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_orexin1__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {activity_orexin1%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_orexin1%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional|extra!} words. - Options: - {SMILES%activity_orexin1%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {activity_orexin1#not &NULL}{activity_orexin1__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {activity_orexin1#no &NULL}{activity_orexin1__names__noun} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {activity_orexin1#not &NULL}identified as {activity_orexin1__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_orexin1__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. + Result: {activity_orexin1#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_orexin1__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {activity_orexin1__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_orexin1__names__adjective}? + Assistant: {activity_orexin1#No&Yes}, this molecule is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {activity_orexin1__names__adjective}? + Assistant: {activity_orexin1#No&Yes}, it is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}? + Assistant: This is a molecule that is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} the {SMILES__description} of a {#molecule|chemical|chemical structure!}. + Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {activity_orexin1#not &NULL}be {activity_orexin1__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} the {SMILES__description} of a {#molecule|chemical|chemical structure!}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {activity_orexin1#not &NULL}be {activity_orexin1__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {activity_orexin1__names__adjective}:{activity_orexin1#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_orexin1__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{activity_orexin1#False&True} + - |- + Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|chemical structure!} based on the {#text |!}description{# below|!}. + Description: A molecule that is {activity_orexin1__names__adjective}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_orexin1__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {activity_orexin1%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_orexin1__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {activity_orexin1%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_orexin1%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_orexin1#not &NULL}{activity_orexin1__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional|extra!} words. + Options: + {SMILES%activity_orexin1%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/p_glycoprotein_inhibition_broccatelli_et_al/meta.yaml b/data/tabular/p_glycoprotein_inhibition_broccatelli_et_al/meta.yaml index 1d8f4f652..eee4e831f 100644 --- a/data/tabular/p_glycoprotein_inhibition_broccatelli_et_al/meta.yaml +++ b/data/tabular/p_glycoprotein_inhibition_broccatelli_et_al/meta.yaml @@ -1,141 +1,140 @@ ---- name: p_glycoprotein_inhibition_broccatelli_et_al description: |- - P-glycoprotein (Pgp) is an ABC transporter protein involved in intestinal - absorption, drug metabolism, and brain penetration, and its inhibition can seriously - alter a drug's bioavailability and safety. In addition, inhibitors of Pgp can - be used to overcome multidrug resistance. + P-glycoprotein (Pgp) is an ABC transporter protein involved in intestinal + absorption, drug metabolism, and brain penetration, and its inhibition can seriously + alter a drug's bioavailability and safety. In addition, inhibitors of Pgp can + be used to overcome multidrug resistance. targets: - - id: Pgp_inhibition - description: whether it shows Pgp inhibition (1) or not (0) - units: - type: boolean - names: - - noun: P-glycoprotein inhibition - - noun: Pgp inhibition - - gerund: showing P-glycoprotein inhibition - - gerund: showing Pgp inhibition - - adjective: Pgp inhibitory - - adjective: P-glycoprotein inhibitory - uris: - - http://purl.bioontology.org/ontology/CSP/4000-0278 + - id: Pgp_inhibition + description: whether it shows Pgp inhibition (1) or not (0) + units: + type: boolean + names: + - noun: P-glycoprotein inhibition + - noun: Pgp inhibition + - gerund: showing P-glycoprotein inhibition + - gerund: showing Pgp inhibition + - adjective: Pgp inhibitory + - adjective: P-glycoprotein inhibitory + uris: + - http://purl.bioontology.org/ontology/CSP/4000-0278 benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: compound_name - type: Other - names: - - noun: compound name - - noun: drug name - - noun: generic drug name - description: drug name + - id: SMILES + type: SMILES + description: SMILES + - id: compound_name + type: Other + names: + - noun: compound name + - noun: drug name + - noun: generic drug name + description: drug name license: CC BY 4.0 links: - - url: https://doi.org/10.1021/jm101421d - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#pgp-p-glycoprotein-inhibition-broccatelli-et-al - description: data source + - url: https://doi.org/10.1021/jm101421d + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#pgp-p-glycoprotein-inhibition-broccatelli-et-al + description: data source num_points: 1218 bibtex: - - |- - @article{Broccatelli2011, - doi = {10.1021/jm101421d}, - url = {https://doi.org/10.1021/jm101421d}, - year = {2011}, - month = feb, - publisher = {American Chemical Society (ACS)}, - volume = {54}, - number = {6}, - author = {Fabio Broccatelli and Emanuele Carosati and Annalisa Neri and - Maria Frosini and Laura Goracci and Tudor I. Oprea and Gabriele Cruciani}, - title = {A Novel Approach for Predicting P-Glycoprotein (ABCB1) Inhibition - Using Molecular Interaction Fields}, - journal = {Journal of Medicinal Chemistry} + - |- + @article{Broccatelli2011, + doi = {10.1021/jm101421d}, + url = {https://doi.org/10.1021/jm101421d}, + year = {2011}, + month = feb, + publisher = {American Chemical Society (ACS)}, + volume = {54}, + number = {6}, + author = {Fabio Broccatelli and Emanuele Carosati and Annalisa Neri and + Maria Frosini and Laura Goracci and Tudor I. Oprea and Gabriele Cruciani}, + title = {A Novel Approach for Predicting P-Glycoprotein (ABCB1) Inhibition + Using Molecular Interaction Fields}, + journal = {Journal of Medicinal Chemistry} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {Pgp_inhibition#no &NULL}{Pgp_inhibition__names__noun} {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {Pgp_inhibition#not &NULL}identified as {Pgp_inhibition__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {Pgp_inhibition__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. - Result: {Pgp_inhibition#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {Pgp_inhibition__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {SMILES__description} of a {#molecule|chemical|chemical compound!} based on the {#text |!}description{# below|!}. - Description: A molecule that is {Pgp_inhibition__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {Pgp_inhibition__names__adjective}? - Assistant: {Pgp_inhibition#No&Yes}, this molecule is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {Pgp_inhibition__names__adjective}? - Assistant: {Pgp_inhibition#No&Yes}, it is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}? - Assistant: This is a molecule that is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {SMILES__description} of a {#molecule|chemical|chemical compound!}. - Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {Pgp_inhibition#not &NULL}be {Pgp_inhibition__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {SMILES__description} of a {#molecule|chemical|chemical compound!}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {Pgp_inhibition#not &NULL}be {Pgp_inhibition__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {Pgp_inhibition__names__adjective}:{Pgp_inhibition#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {Pgp_inhibition__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{Pgp_inhibition#False&True} - - |- - Task: Please {#give me|create|generate!} a {SMILES__description} of a {#molecule|chemical|chemical compound!} based on the {#text |!}description{# below|!}. - Description: A molecule that is {Pgp_inhibition__names__adjective}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {Pgp_inhibition__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {Pgp_inhibition%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {Pgp_inhibition__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {Pgp_inhibition%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%Pgp_inhibition%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%Pgp_inhibition%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {Pgp_inhibition#no &NULL}{Pgp_inhibition__names__noun} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {Pgp_inhibition#not &NULL}identified as {Pgp_inhibition__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {Pgp_inhibition__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. + Result: {Pgp_inhibition#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {Pgp_inhibition__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {SMILES__description} of a {#molecule|chemical|chemical compound!} based on the {#text |!}description{# below|!}. + Description: A molecule that is {Pgp_inhibition__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {Pgp_inhibition__names__adjective}? + Assistant: {Pgp_inhibition#No&Yes}, this molecule is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {Pgp_inhibition__names__adjective}? + Assistant: {Pgp_inhibition#No&Yes}, it is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}? + Assistant: This is a molecule that is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {SMILES__description} of a {#molecule|chemical|chemical compound!}. + Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {Pgp_inhibition#not &NULL}be {Pgp_inhibition__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {SMILES__description} of a {#molecule|chemical|chemical compound!}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {Pgp_inhibition#not &NULL}be {Pgp_inhibition__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {Pgp_inhibition__names__adjective}:{Pgp_inhibition#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {Pgp_inhibition__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{Pgp_inhibition#False&True} + - |- + Task: Please {#give me|create|generate!} a {SMILES__description} of a {#molecule|chemical|chemical compound!} based on the {#text |!}description{# below|!}. + Description: A molecule that is {Pgp_inhibition__names__adjective}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {Pgp_inhibition__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {Pgp_inhibition%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {Pgp_inhibition__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {Pgp_inhibition%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%Pgp_inhibition%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {Pgp_inhibition#not &NULL}{Pgp_inhibition__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%Pgp_inhibition%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/pampa_ncats/example_processing_and_templates.ipynb b/data/tabular/pampa_ncats/example_processing_and_templates.ipynb index 07b2d8094..625a862cb 100644 --- a/data/tabular/pampa_ncats/example_processing_and_templates.ipynb +++ b/data/tabular/pampa_ncats/example_processing_and_templates.ipynb @@ -26,11 +26,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "cf59e3e9-8061-4022-9eae-e978311b4155", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", @@ -56,11 +54,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "7bb8eb5e-f513-40d2-a68c-7cda1a51ad31", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "fn_data_original = \"data_original.csv\"" @@ -68,11 +64,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "b39a142e-ccbc-49d2-98b0-a5f9bde9fd27", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stderr", @@ -91,11 +85,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "26d9f62a-07f5-4113-8161-d5dfcf0bfb71", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "data.get_data().to_csv(fn_data_original, index=False)" @@ -105,9 +97,7 @@ "cell_type": "code", "execution_count": null, "id": "43873fc3-20a8-487d-a7c5-33bd58414159", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "!ls -lh" @@ -123,11 +113,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "77f614e7-b133-40bc-8759-2d930e4c120e", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -147,11 +135,9 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "id": "8f5a0387-f9e3-4e1a-8d14-5df618195f70", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(fn_data_original, delimiter=\",\")" @@ -168,11 +154,9 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "ec2458e5-455f-4f03-8ce9-c0d12e9ed371", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "data": { @@ -180,7 +164,7 @@ "['Drug_ID', 'Drug', 'Y']" ] }, - "execution_count": 23, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -192,11 +176,9 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "a46dd8ff-37b3-4894-8226-3bf98226dd09", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "fields_clean = [\n", @@ -208,11 +190,9 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "785d37cb-1fb4-4a91-a923-d5a78a37f36a", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "df.columns = fields_clean" @@ -220,11 +200,9 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "1bf212cb-1653-457b-9f5d-416d4dd14b53", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "data": { @@ -303,7 +281,7 @@ "4 1 " ] }, - "execution_count": 26, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -322,11 +300,9 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "03fcdcb3-2af2-47cc-81fd-f350b3f268d3", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "df.drop(columns=[\"compound_id\"], inplace=True)" @@ -334,11 +310,9 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "d544fa60-343e-40e1-bd0c-4750f07a7145", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "assert not df.duplicated().sum()" @@ -354,11 +328,9 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "d6d5efa5-b4b4-4a25-8626-e10f3d691e83", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "fn_data_csv = \"data_clean.csv\"" @@ -366,11 +338,9 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "727f8d7b-cbb6-43c7-9eab-9d4d65be6b3f", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "df.to_csv(fn_data_csv, index=False)" @@ -380,9 +350,7 @@ "cell_type": "code", "execution_count": null, "id": "63c8d4a4-906e-418d-be39-879365b4dfa0", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "!ls -lh {fn_data_csv}" @@ -390,11 +358,9 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "a51b9001-25d7-4e0e-a607-477cfc4a9f1c", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -414,11 +380,9 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "1a512943-4909-4d56-867d-50c151d8d607", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "data": { @@ -484,7 +448,7 @@ "4 CN1C2=CC=CC=C2C(=O)C3=C1N=C(N(C3=O)C4=CC=CC=C4... 1" ] }, - "execution_count": 33, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -503,11 +467,9 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "id": "077b0c5f-8772-4879-9317-3fa28799689b", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "fn_data_csv = \"data_clean.csv\"" @@ -515,11 +477,9 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "id": "6eaef0e6-2115-4793-ac43-a196b25d47a0", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(fn_data_csv)" @@ -527,11 +487,9 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "id": "43619e7c-9c82-4ff0-ae25-403861304635", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "data": { @@ -597,7 +555,7 @@ "4 CN1C2=CC=CC=C2C(=O)C3=C1N=C(N(C3=O)C4=CC=CC=C4... 1" ] }, - "execution_count": 36, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -616,11 +574,9 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "id": "49771077-471d-4d71-a9a7-d6b094bbc4f3", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "data": { @@ -686,7 +642,7 @@ "4 CN1C2=CC=CC=C2C(=O)C3=C1N=C(N(C3=O)C4=CC=CC=C4... 1" ] }, - "execution_count": 37, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -697,11 +653,9 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "id": "d3890961-444e-4a26-b8fc-ed8c4e959af9", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "meta = {\n", @@ -762,11 +716,9 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "id": "580bbd79-4845-4515-be94-3e4a9815d048", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "fn_meta = \"meta.yaml\"" @@ -796,11 +748,9 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "id": "873fa5dd-9b60-40f5-b537-4d7a206414ea", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "with open(fn_meta, \"w\") as f:\n", @@ -811,9 +761,7 @@ "cell_type": "code", "execution_count": null, "id": "d01686c0-6746-4fc4-b019-350270dfc26f", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "!ls -lh {fn_meta}" @@ -821,11 +769,9 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "id": "ef6063c5-7a8b-4344-bccf-a073443feebf", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -883,11 +829,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "9aab00fd-58a8-40b0-be30-1e269e0d323b", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "path_file = \"transform.py\"" @@ -895,11 +839,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "8368bb20-8e1c-4b7d-b0e2-b39da36b5972", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -1035,9 +977,7 @@ "cell_type": "code", "execution_count": null, "id": "d0474f26-70f3-4655-b81a-df4ada90e7a6", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "!python3 transform.py" @@ -1047,9 +987,7 @@ "cell_type": "code", "execution_count": null, "id": "953e7bee-bd5e-41d0-a2be-506e0bc97727", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "ls -lh # fmt: skip" @@ -1079,8 +1017,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" + "pygments_lexer": "ipython3" } }, "nbformat": 4, diff --git a/data/tabular/pampa_ncats/meta.yaml b/data/tabular/pampa_ncats/meta.yaml index 0fd68d7b5..8ba6dc79b 100644 --- a/data/tabular/pampa_ncats/meta.yaml +++ b/data/tabular/pampa_ncats/meta.yaml @@ -1,135 +1,134 @@ ---- name: pampa_ncats description: |- - PAMPA (parallel artificial membrane permeability assay) is a commonly - employed assay to evaluate drug permeability across the cellular membrane. - PAMPA is a non-cell-based, low-cost and high-throughput alternative to cellular models. - Although PAMPA does not model active and efflux transporters, it still provides permeability values - that are useful for absorption prediction because the majority of drugs are absorbed - by passive diffusion through the membrane. + PAMPA (parallel artificial membrane permeability assay) is a commonly + employed assay to evaluate drug permeability across the cellular membrane. + PAMPA is a non-cell-based, low-cost and high-throughput alternative to cellular models. + Although PAMPA does not model active and efflux transporters, it still provides permeability values + that are useful for absorption prediction because the majority of drugs are absorbed + by passive diffusion through the membrane. targets: - - id: permeability - description: Binary permeability in PAMPA assay. - units: - type: boolean - names: - - noun: permeability - - verb: is permeable in the PAMPA assay - - verb: shows permeability in parallel artificial membrane permeability assay (PAMPA) assay - - adjective: permeable in the PAMPA assay - - gerund: permeating in the PAMPA assay - pubchem_aids: - - 1508612 - uris: - - http://purl.bioontology.org/ontology/MESH/D002463 + - id: permeability + description: Binary permeability in PAMPA assay. + units: + type: boolean + names: + - noun: permeability + - verb: is permeable in the PAMPA assay + - verb: shows permeability in parallel artificial membrane permeability assay (PAMPA) assay + - adjective: permeable in the PAMPA assay + - gerund: permeating in the PAMPA assay + pubchem_aids: + - 1508612 + uris: + - http://purl.bioontology.org/ontology/MESH/D002463 identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://tdcommons.ai/single_pred_tasks/adme/#pampa-permeability-ncats - description: original dataset link - - url: https://journals.sagepub.com/doi/full/10.1177/24725552211017520 - description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#pampa-permeability-ncats + description: original dataset link + - url: https://journals.sagepub.com/doi/full/10.1177/24725552211017520 + description: corresponding publication benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split num_points: 2034 bibtex: - - |- - @article{siramshetty2021validating, - title={Validating ADME QSAR Models Using Marketed Drugs}, - author={Siramshetty, Vishal and Williams, Jordan and Nguyen, DHac-Trung and Neyra, Jorge and Southall, - Noel and Math'e, Ewy and Xu, Xin and Shah, Pranav}, - journal={SLAS DISCOVERY: Advancing the Science of Drug Discovery}, - volume={26}, - number={10}, - pages={1326--1336}, - year={2021}, - publisher={SAGE Publications Sage CA: Los Angeles, CA} - } + - |- + @article{siramshetty2021validating, + title={Validating ADME QSAR Models Using Marketed Drugs}, + author={Siramshetty, Vishal and Williams, Jordan and Nguyen, DHac-Trung and Neyra, Jorge and Southall, + Noel and Math'e, Ewy and Xu, Xin and Shah, Pranav}, + journal={SLAS DISCOVERY: Advancing the Science of Drug Discovery}, + volume={26}, + number={10}, + pages={1326--1336}, + year={2021}, + publisher={SAGE Publications Sage CA: Los Angeles, CA} + } templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {permeability#not &NULL}{permeability__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {permeability#not &NULL}{permeability__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {permeability#no &NULL}{permeability__names__noun} {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {permeability#not &NULL}identified as {permeability__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {permeability#not &NULL}{permeability__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {permeability__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. - Result: {permeability#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {permeability__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {permeability#not &NULL}{permeability__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {permeability__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {permeability__names__adjective}? - Assistant: {permeability#No&Yes}, this molecule is {permeability#not &NULL}{permeability__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {permeability__names__adjective}? - Assistant: {permeability#No&Yes}, it is {permeability#not &NULL}{permeability__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {permeability#not &NULL}{permeability__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {permeability#not &NULL}{permeability__names__adjective}? - Assistant: This is a molecule that is {permeability#not &NULL}{permeability__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {permeability#not &NULL}be {permeability__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {permeability#not &NULL}{permeability__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {permeability#not &NULL}be {permeability__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {permeability#not &NULL}{permeability__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {permeability__names__adjective}:{permeability#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {permeability__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{permeability#False&True} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {permeability__names__adjective}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {permeability__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {permeability%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {permeability__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {permeability%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {permeability#not &NULL}{permeability__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%permeability%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {permeability#not &NULL}{permeability__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%permeability%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {permeability#not &NULL}{permeability__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {permeability#not &NULL}{permeability__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {permeability#no &NULL}{permeability__names__noun} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {permeability#not &NULL}identified as {permeability__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {permeability#not &NULL}{permeability__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {permeability__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. + Result: {permeability#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {permeability__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {permeability#not &NULL}{permeability__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {permeability__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {permeability__names__adjective}? + Assistant: {permeability#No&Yes}, this molecule is {permeability#not &NULL}{permeability__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {permeability__names__adjective}? + Assistant: {permeability#No&Yes}, it is {permeability#not &NULL}{permeability__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {permeability#not &NULL}{permeability__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {permeability#not &NULL}{permeability__names__adjective}? + Assistant: This is a molecule that is {permeability#not &NULL}{permeability__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {permeability#not &NULL}be {permeability__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {permeability#not &NULL}{permeability__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {permeability#not &NULL}be {permeability__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {permeability#not &NULL}{permeability__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {permeability__names__adjective}:{permeability#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {permeability__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{permeability#False&True} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {permeability__names__adjective}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {permeability__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {permeability%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {permeability__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {permeability%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {permeability#not &NULL}{permeability__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%permeability%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {permeability#not &NULL}{permeability__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%permeability%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/peptides_hemolytic/meta.yaml b/data/tabular/peptides_hemolytic/meta.yaml index 4163cd150..cee39ecc5 100644 --- a/data/tabular/peptides_hemolytic/meta.yaml +++ b/data/tabular/peptides_hemolytic/meta.yaml @@ -1,146 +1,137 @@ ---- name: peptides_hemolytic -description: "Hemolysis is referred to the disruption of erythrocyte\nmembranes that decrease the life span of red blood cells and causes\nthe release of\ - \ Hemoglobin. It is critical to identify non-hemolytic\nantimicrobial peptides as a non-toxic and safe measure against bacterial\ninfections. However,\ - \ distinguishing between hemolytic and non-hemolytic\npeptides is a challenge, since they primarily exert their activity at the\ncharged surface of\ - \ the bacterial plasma membrane.\nThe data here comes from the Database of Antimicrobial Activity and Structure of\nPeptides (DBAASP v3). Hemolytic\ - \ activity is defined by extrapolating a measurement\nassuming dose response curves to the point\nat which 50% of red blood cells are lysed. Activities\ - \ below 100 mu g/ml, are\nconsidered hemolytic.\nThe data contains sequences of only L- and canonical amino acids. Each measurement\nis treated independently,\ - \ so sequences can appear multiple times. This experimental\ndataset contains noise, and in some observations (40%), an identical sequence appears\n\ - in both negative and positive class. As an example, sequence \"RVKRVWPLVIRTVIAGYNLYRAIKKK\"\nis found to be both hemolytic and\nnon-hemolytic in two\ - \ different lab experiments (i.e. two different training examples). " +description: "Hemolysis is referred to the disruption of erythrocyte\nmembranes that decrease the life span of red blood cells and causes\nthe release of Hemoglobin. It is critical to identify non-hemolytic\nantimicrobial peptides as a non-toxic and safe measure against bacterial\ninfections. However, distinguishing between hemolytic and non-hemolytic\npeptides is a challenge, since they primarily exert their activity at the\ncharged surface of the bacterial plasma membrane.\nThe data here comes from the Database of Antimicrobial Activity and Structure of\nPeptides (DBAASP v3). Hemolytic activity is defined by extrapolating a measurement\nassuming dose response curves to the point\nat which 50% of red blood cells are lysed. Activities below 100 mu g/ml, are\nconsidered hemolytic.\nThe data contains sequences of only L- and canonical amino acids. Each measurement\nis treated independently, so sequences can appear multiple times. This experimental\ndataset contains noise, and in some observations (40%), an identical sequence appears\nin both negative and positive class. As an example, sequence \"RVKRVWPLVIRTVIAGYNLYRAIKKK\"\nis found to be both hemolytic and\nnon-hemolytic in two different lab experiments (i.e. two different training examples). " targets: - - id: hemolytic - description: The ability of a peptide sequence to lyse red blood cells (1) or not (0). - units: - type: boolean - names: - - noun: hemolytic activity - - noun: hemolysis - - verb: lyse red blood cells - - adjective: hemolytic - - gerund: lysing red blood cells - uris: + - id: hemolytic + description: The ability of a peptide sequence to lyse red blood cells (1) or not (0). + units: + type: boolean + names: + - noun: hemolytic activity + - noun: hemolysis + - verb: lyse red blood cells + - adjective: hemolytic + - gerund: lysing red blood cells + uris: benchmarks: [] identifiers: - - id: sequence - type: AS_SEQUENCE - description: amino acid sequence + - id: sequence + type: AS_SEQUENCE + description: amino acid sequence license: CC BY 4.0 links: - - url: https://doi.org/10.1021/acs.jcim.2c01317 - description: corresponding publication - - url: https://doi.org/10.1093/nar/gkaa991 - description: data source + - url: https://doi.org/10.1021/acs.jcim.2c01317 + description: corresponding publication + - url: https://doi.org/10.1093/nar/gkaa991 + description: data source num_points: 6541 bibtex: - - |- - @article{Martins2012, - doi = {10.1021/ci300124c}, - url = {https://doi.org/10.1021/ci300124c}, - year = {2012}, - month = jun, - publisher = {American Chemical Society (ACS)}, - volume = {52}, - number = {6}, - pages = {1686--1697}, - author = {Ines Filipa Martins and Ana L. Teixeira and Luis Pinheiro - and Andre O. Falcao}, - title = {A Bayesian Approach to in Silico Blood-Brain Barrier Penetration Modeling}, - journal = {Journal of Chemical Information and Modeling} - - |- - @article{Wu2018, - doi = {10.1039/c7sc02664a}, - url = {https://doi.org/10.1039/c7sc02664a}, - year = {2018}, - publisher = {Royal Society of Chemistry (RSC)}, - volume = {9}, - number = {2}, - pages = {513--530}, - author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg and Joseph - Gomes and Caleb Geniesse and Aneesh S. Pappu and Karl Leswing and Vijay Pande}, - title = {MoleculeNet: a benchmark for molecular machine learning}, - journal = {Chemical Science} + - |- + @article{Martins2012, + doi = {10.1021/ci300124c}, + url = {https://doi.org/10.1021/ci300124c}, + year = {2012}, + month = jun, + publisher = {American Chemical Society (ACS)}, + volume = {52}, + number = {6}, + pages = {1686--1697}, + author = {Ines Filipa Martins and Ana L. Teixeira and Luis Pinheiro + and Andre O. Falcao}, + title = {A Bayesian Approach to in Silico Blood-Brain Barrier Penetration Modeling}, + journal = {Journal of Chemical Information and Modeling} + - |- + @article{Wu2018, + doi = {10.1039/c7sc02664a}, + url = {https://doi.org/10.1039/c7sc02664a}, + year = {2018}, + publisher = {Royal Society of Chemistry (RSC)}, + volume = {9}, + number = {2}, + pages = {513--530}, + author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg and Joseph + Gomes and Caleb Geniesse and Aneesh S. Pappu and Karl Leswing and Vijay Pande}, + title = {MoleculeNet: a benchmark for molecular machine learning}, + journal = {Chemical Science} templates: - - The sequence of {#amino acids|AAs!} {sequence#} {#shows|exhibits|demonstrates!} {hemolytic#no &NULL}{hemolytic__names__adjective} properties. - - The amino acid sequence {sequence#} {#shows|exhibits|displays!} {hemolytic#no &NULL}{hemolytic__names__adjective} properties. - - Based on the {#amino acid sequence |sequence of amino acids !}{sequence#}, the peptide has {hemolytic#no &NULL}{hemolytic__names__adjective} {#properties|characteristics|features!}. - - The {sequence__description} {sequence#} {#represents|is from!} a peptide that is {hemolytic#not &NULL}identified as {hemolytic__names__adjective}. - - The {#amino acid sequence|sequence of amino acids!} {sequence#} is {hemolytic#not &NULL}{hemolytic__names__adjective}. - - |- - Task: Please classify a peptide based on the description. - Description: A amino acid sequence that is {hemolytic__names__adjective}. - {#amino acid sequence |sequence of amino acids!}: {sequence#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {hemolytic#False&True} - - |- - Task: Please classify a amino acid sequence based on the description. - Description: A amino acid sequence that is {hemolytic__names__adjective}. - {#amino acid sequence |sequence of amino acids !}: {sequence#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This amino acid sequence is {hemolytic#not &NULL}{hemolytic__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#amino acid sequence|sequence of amino acids!} based on the {#text |!}description{# below|!}. - Description: A amino acid sequence that is {hemolytic__names__adjective}. - Result: {sequence#} - - |- - User: Can you {#tell me|derive|estimate!} if the peptide with the {#amino acid sequence|sequence of amino acids!} {sequence#} is {hemolytic__names__adjective}? - Assistant: {hemolytic#No&Yes}, this amino acid sequence is {hemolytic#not &NULL}{hemolytic__names__adjective}. - - |- - User: Is the peptide with the {#amino acid sequence|sequence of amino acids!} {sequence#} {hemolytic__names__adjective}? - Assistant: {hemolytic#No&Yes}, it is {hemolytic#not &NULL}{hemolytic__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {sequence__description} of a peptide that is {hemolytic#not &NULL}{hemolytic__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {sequence#} - - |- - User: I'm {#searching|looking!} for the {sequence__description} of a peptide that is {hemolytic#not &NULL}{hemolytic__names__adjective}? - Assistant: This is a amino acid sequence that is {hemolytic#not &NULL}{hemolytic__names__adjective}: {sequence#} - - |- - User: I want to {#come up with|create|generate!} a {#amino acid sequence|sequence of amino acids|peptide!}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The amino acid sequence should {hemolytic#not &NULL}be {hemolytic__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {sequence__description} is {hemolytic#not &NULL}{hemolytic__names__adjective}: {sequence#} - - |- - User: I want to {#come up with|create|generate!} a {#amino acid sequence|sequence of amino acids|peptide!}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#amino acid sequence|one!}? - User: Yes, the amino acid sequence should {hemolytic#not &NULL}be {hemolytic__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {sequence__description} is {hemolytic#not &NULL}{hemolytic__names__adjective}: {sequence#} - - Is the {sequence__description} {sequence#} {hemolytic__names__adjective}:{hemolytic#no&yes} - - |- - Task: Please classify a {#amino acid sequence|sequence of amino acids|peptide!} based on the description. - Description: A amino acid sequence that is {hemolytic__names__adjective}. - {#amino acid sequence|sequence of amino acids!}: {sequence#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{hemolytic#False&True} - - |- - Task: Please {#give me|create|generate!} a {#amino acid sequence|sequence of amino acids|peptide!} based on the {#text |!}description{# below|!}. - Description: A {#amino acid sequence|sequence of amino acids|peptide!} that is {hemolytic__names__adjective}. - Result:{sequence#} - - |- - Task: Please answer the multiple choice question. - Question: Is the peptide with the {sequence__description} {#representation of |!}{sequence#} {hemolytic__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {hemolytic%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the peptide with the {sequence__description} {#representation of |!}{sequence#} {hemolytic__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {hemolytic%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which amino acid sequences are {hemolytic#not &NULL}{hemolytic__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {sequence%hemolytic%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which amino acid sequences are {hemolytic#not &NULL}{hemolytic__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {sequence%hemolytic%} - Answer:{%multiple_choice_result} + - The sequence of {#amino acids|AAs!} {sequence#} {#shows|exhibits|demonstrates!} {hemolytic#no &NULL}{hemolytic__names__adjective} properties. + - The amino acid sequence {sequence#} {#shows|exhibits|displays!} {hemolytic#no &NULL}{hemolytic__names__adjective} properties. + - Based on the {#amino acid sequence |sequence of amino acids !}{sequence#}, the peptide has {hemolytic#no &NULL}{hemolytic__names__adjective} {#properties|characteristics|features!}. + - The {sequence__description} {sequence#} {#represents|is from!} a peptide that is {hemolytic#not &NULL}identified as {hemolytic__names__adjective}. + - The {#amino acid sequence|sequence of amino acids!} {sequence#} is {hemolytic#not &NULL}{hemolytic__names__adjective}. + - |- + Task: Please classify a peptide based on the description. + Description: A amino acid sequence that is {hemolytic__names__adjective}. + {#amino acid sequence |sequence of amino acids!}: {sequence#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {hemolytic#False&True} + - |- + Task: Please classify a amino acid sequence based on the description. + Description: A amino acid sequence that is {hemolytic__names__adjective}. + {#amino acid sequence |sequence of amino acids !}: {sequence#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This amino acid sequence is {hemolytic#not &NULL}{hemolytic__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#amino acid sequence|sequence of amino acids!} based on the {#text |!}description{# below|!}. + Description: A amino acid sequence that is {hemolytic__names__adjective}. + Result: {sequence#} + - |- + User: Can you {#tell me|derive|estimate!} if the peptide with the {#amino acid sequence|sequence of amino acids!} {sequence#} is {hemolytic__names__adjective}? + Assistant: {hemolytic#No&Yes}, this amino acid sequence is {hemolytic#not &NULL}{hemolytic__names__adjective}. + - |- + User: Is the peptide with the {#amino acid sequence|sequence of amino acids!} {sequence#} {hemolytic__names__adjective}? + Assistant: {hemolytic#No&Yes}, it is {hemolytic#not &NULL}{hemolytic__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {sequence__description} of a peptide that is {hemolytic#not &NULL}{hemolytic__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {sequence#} + - |- + User: I'm {#searching|looking!} for the {sequence__description} of a peptide that is {hemolytic#not &NULL}{hemolytic__names__adjective}? + Assistant: This is a amino acid sequence that is {hemolytic#not &NULL}{hemolytic__names__adjective}: {sequence#} + - |- + User: I want to {#come up with|create|generate!} a {#amino acid sequence|sequence of amino acids|peptide!}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The amino acid sequence should {hemolytic#not &NULL}be {hemolytic__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {sequence__description} is {hemolytic#not &NULL}{hemolytic__names__adjective}: {sequence#} + - |- + User: I want to {#come up with|create|generate!} a {#amino acid sequence|sequence of amino acids|peptide!}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#amino acid sequence|one!}? + User: Yes, the amino acid sequence should {hemolytic#not &NULL}be {hemolytic__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {sequence__description} is {hemolytic#not &NULL}{hemolytic__names__adjective}: {sequence#} + - Is the {sequence__description} {sequence#} {hemolytic__names__adjective}:{hemolytic#no&yes} + - |- + Task: Please classify a {#amino acid sequence|sequence of amino acids|peptide!} based on the description. + Description: A amino acid sequence that is {hemolytic__names__adjective}. + {#amino acid sequence|sequence of amino acids!}: {sequence#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{hemolytic#False&True} + - |- + Task: Please {#give me|create|generate!} a {#amino acid sequence|sequence of amino acids|peptide!} based on the {#text |!}description{# below|!}. + Description: A {#amino acid sequence|sequence of amino acids|peptide!} that is {hemolytic__names__adjective}. + Result:{sequence#} + - |- + Task: Please answer the multiple choice question. + Question: Is the peptide with the {sequence__description} {#representation of |!}{sequence#} {hemolytic__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {hemolytic%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the peptide with the {sequence__description} {#representation of |!}{sequence#} {hemolytic__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {hemolytic%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which amino acid sequences are {hemolytic#not &NULL}{hemolytic__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {sequence%hemolytic%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which amino acid sequences are {hemolytic#not &NULL}{hemolytic__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {sequence%hemolytic%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/peptides_nonfouling/meta.yaml b/data/tabular/peptides_nonfouling/meta.yaml index dfc2814b8..a04f3a862 100644 --- a/data/tabular/peptides_nonfouling/meta.yaml +++ b/data/tabular/peptides_nonfouling/meta.yaml @@ -1,137 +1,136 @@ ---- name: peptides_nonfouling description: |- - Non-fouling is defined as resistance to non-specific interactions. - A non-fouling peptide (positive example) is defined using the mechanism proposed in - ref white2012decoding. Briefly, ref white2012decoding, showed that the exterior surfaces - of proteins have a significantly different frequency of amino acids, and this increases - in aggregation prone environments, like the cytoplasm. Synthesizing self-assembling peptides - that follow this amino acid distribution and coating surfaces with the peptides creates - non-fouling surfaces. This pattern was also found inside chaperone proteins, - another area where resistance to non-specific interactions is important (ref white2012role). + Non-fouling is defined as resistance to non-specific interactions. + A non-fouling peptide (positive example) is defined using the mechanism proposed in + ref white2012decoding. Briefly, ref white2012decoding, showed that the exterior surfaces + of proteins have a significantly different frequency of amino acids, and this increases + in aggregation prone environments, like the cytoplasm. Synthesizing self-assembling peptides + that follow this amino acid distribution and coating surfaces with the peptides creates + non-fouling surfaces. This pattern was also found inside chaperone proteins, + another area where resistance to non-specific interactions is important (ref white2012role). targets: - - id: nonfouling - description: The nonfouling activity of a peptide sequence (1) or not (0). - units: - type: boolean - names: - - noun: nonfouling activity - - adjective: nonfouling - uris: + - id: nonfouling + description: The nonfouling activity of a peptide sequence (1) or not (0). + units: + type: boolean + names: + - noun: nonfouling activity + - adjective: nonfouling + uris: benchmarks: [] identifiers: - - id: sequence - type: AS_SEQUENCE - description: amino acid sequence + - id: sequence + type: AS_SEQUENCE + description: amino acid sequence license: CC BY 4.0 links: - - url: https://doi.org/10.1021/acs.jcim.2c01317 - description: corresponding publication - - url: https://doi.org/10.18653/v1/K18-1030 - description: data source + - url: https://doi.org/10.1021/acs.jcim.2c01317 + description: corresponding publication + - url: https://doi.org/10.18653/v1/K18-1030 + description: data source num_points: 6541 bibtex: - - |- - @article{white2012decoding, - title={Decoding nonspecific interactions from nature}, - author={White, Andrew D and Nowinski, Ann K and Huang, Wenjun and Keefe, - Andrew J and Sun, Fang and Jiang, Shaoyi}, - journal={Chemical Science}, - volume={3}, - number={12}, - pages={3488--3494}, - year={2012}, - publisher={Royal Society of Chemistry} - - |- - @article{barrett2018classifying, - title={Classifying antimicrobial and multifunctional peptides with Bayesian network models}, - author={Barrett, Rainier and Jiang, Shaoyi and White, Andrew D}, - journal={Peptide Science}, - volume={110}, - number={4}, - pages={e24079}, - year={2018}, - publisher={Wiley Online Library} + - |- + @article{white2012decoding, + title={Decoding nonspecific interactions from nature}, + author={White, Andrew D and Nowinski, Ann K and Huang, Wenjun and Keefe, + Andrew J and Sun, Fang and Jiang, Shaoyi}, + journal={Chemical Science}, + volume={3}, + number={12}, + pages={3488--3494}, + year={2012}, + publisher={Royal Society of Chemistry} + - |- + @article{barrett2018classifying, + title={Classifying antimicrobial and multifunctional peptides with Bayesian network models}, + author={Barrett, Rainier and Jiang, Shaoyi and White, Andrew D}, + journal={Peptide Science}, + volume={110}, + number={4}, + pages={e24079}, + year={2018}, + publisher={Wiley Online Library} templates: - - The sequence of {#amino acid|AAs!} {sequence#} {#shows|exhibits|demonstrates!} {nonfouling#no &NULL}{nonfouling__names__adjective} properties. - - The amino acid sequence {sequence#} {#shows|exhibits|displays!} {nonfouling#no &NULL}{nonfouling__names__adjective} properties. - - Based on the {sequence__description} {#representation |!}{sequence#}, the peptide has {nonfouling#no &NULL}{nonfouling__names__adjective} {#properties|characteristics|features!}. - - The {sequence__description} {sequence#} {#represents|is from!} a peptide that is {nonfouling#not &NULL}identified as {nonfouling__names__adjective}. - - The {#amino acid sequence|sequence of AAs!} {sequence#} is {nonfouling#not &NULL}{nonfouling__names__adjective}. - - |- - Task: Please classify a amino acid sequence based on the description. - Description: A amino acid sequence that is {nonfouling__names__adjective}. - {#amino acid sequence |!}: {sequence#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {nonfouling#False&True} - - |- - Task: Please classify a amino acid sequence based on the description. - Description: A amino acid sequence that is {nonfouling__names__adjective}. - {#amino acid sequence |!}: {sequence#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This amino acid sequence is {nonfouling#not &NULL}{nonfouling__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#amino acid sequence |sequence of amino acids|AA sequence!} based on the {#text |!}description{# below|!}. - Description: A amino acid sequence of a peptide that is {nonfouling__names__adjective}. - Result: {sequence#} - - |- - User: Can you {#tell me|derive|estimate!} if the amino acid sequence {sequence#} is {nonfouling__names__adjective}? - Assistant: {nonfouling#No&Yes}, this amino acid sequence is {nonfouling#not &NULL}{nonfouling__names__adjective}. - - |- - User: Is the amino acid sequence {sequence#} {nonfouling__names__adjective}? - Assistant: {nonfouling#No&Yes}, it is {nonfouling#not &NULL}{nonfouling__names__adjective}. - - |- - User: Can you {#give me|create|generate!} a amino acid sequence that is {nonfouling#not &NULL}{nonfouling__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {sequence#} - - |- - User: I'm {#searching|looking!} for a amino acid sequence that is {nonfouling#not &NULL}{nonfouling__names__adjective}? - Assistant: This is a amino acid sequence that is {nonfouling#not &NULL}{nonfouling__names__adjective}: {sequence#} - - |- - User: I want to {#come up with|create|generate!} a {#amino acid sequence |sequence of amino acids|AA sequence!}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The amino acid sequence should {nonfouling#not &NULL}be {nonfouling__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this is {nonfouling#not &NULL}{nonfouling__names__adjective}: {sequence#} - - |- - User: I want to {#come up with|create|generate!} a {#amino acid sequence |sequence of amino acids|AA sequence!}. - Assistant: {#This sounds very exciting. |Nice. | Very interesting. |I would love to help you. |This sounds very interesting. !}Should it be a special {#amino acid sequence|one!}? - User: Yes, the amino acid sequence should {nonfouling#not &NULL}be {nonfouling__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this is {nonfouling#not &NULL}{nonfouling__names__adjective}: {sequence#} - - Is the {sequence#} {nonfouling__names__adjective}:{nonfouling#no&yes} - - |- - Task: Please classify a amino acid sequence based on the description. - Description: A amino acid sequence that is {nonfouling__names__adjective}. - {#amino acid sequence |!}: {sequence#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{nonfouling#False&True} - - |- - Task: Please {#give me|create|generate!} a {#amino acid sequence |!} based on the {#text |!}description{# below|!}. - Description: A amino acid sequence that is {nonfouling__names__adjective}. - Result:{sequence#} - - |- - Task: Please answer the multiple choice question. - Question: Is the amino acid sequence with the {#representation of |!}{sequence#} {nonfouling__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {nonfouling%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the amino acid sequence with the {#representation of |!}{sequence#} {nonfouling__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {nonfouling%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which amino acid sequences are {nonfouling#not &NULL}{nonfouling__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {sequence%nonfouling%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which amino acid sequences are {nonfouling#not &NULL}{nonfouling__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {sequence%nonfouling%} - Answer:{%multiple_choice_result} + - The sequence of {#amino acid|AAs!} {sequence#} {#shows|exhibits|demonstrates!} {nonfouling#no &NULL}{nonfouling__names__adjective} properties. + - The amino acid sequence {sequence#} {#shows|exhibits|displays!} {nonfouling#no &NULL}{nonfouling__names__adjective} properties. + - Based on the {sequence__description} {#representation |!}{sequence#}, the peptide has {nonfouling#no &NULL}{nonfouling__names__adjective} {#properties|characteristics|features!}. + - The {sequence__description} {sequence#} {#represents|is from!} a peptide that is {nonfouling#not &NULL}identified as {nonfouling__names__adjective}. + - The {#amino acid sequence|sequence of AAs!} {sequence#} is {nonfouling#not &NULL}{nonfouling__names__adjective}. + - |- + Task: Please classify a amino acid sequence based on the description. + Description: A amino acid sequence that is {nonfouling__names__adjective}. + {#amino acid sequence |!}: {sequence#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {nonfouling#False&True} + - |- + Task: Please classify a amino acid sequence based on the description. + Description: A amino acid sequence that is {nonfouling__names__adjective}. + {#amino acid sequence |!}: {sequence#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This amino acid sequence is {nonfouling#not &NULL}{nonfouling__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#amino acid sequence |sequence of amino acids|AA sequence!} based on the {#text |!}description{# below|!}. + Description: A amino acid sequence of a peptide that is {nonfouling__names__adjective}. + Result: {sequence#} + - |- + User: Can you {#tell me|derive|estimate!} if the amino acid sequence {sequence#} is {nonfouling__names__adjective}? + Assistant: {nonfouling#No&Yes}, this amino acid sequence is {nonfouling#not &NULL}{nonfouling__names__adjective}. + - |- + User: Is the amino acid sequence {sequence#} {nonfouling__names__adjective}? + Assistant: {nonfouling#No&Yes}, it is {nonfouling#not &NULL}{nonfouling__names__adjective}. + - |- + User: Can you {#give me|create|generate!} a amino acid sequence that is {nonfouling#not &NULL}{nonfouling__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {sequence#} + - |- + User: I'm {#searching|looking!} for a amino acid sequence that is {nonfouling#not &NULL}{nonfouling__names__adjective}? + Assistant: This is a amino acid sequence that is {nonfouling#not &NULL}{nonfouling__names__adjective}: {sequence#} + - |- + User: I want to {#come up with|create|generate!} a {#amino acid sequence |sequence of amino acids|AA sequence!}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The amino acid sequence should {nonfouling#not &NULL}be {nonfouling__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this is {nonfouling#not &NULL}{nonfouling__names__adjective}: {sequence#} + - |- + User: I want to {#come up with|create|generate!} a {#amino acid sequence |sequence of amino acids|AA sequence!}. + Assistant: {#This sounds very exciting. |Nice. | Very interesting. |I would love to help you. |This sounds very interesting. !}Should it be a special {#amino acid sequence|one!}? + User: Yes, the amino acid sequence should {nonfouling#not &NULL}be {nonfouling__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this is {nonfouling#not &NULL}{nonfouling__names__adjective}: {sequence#} + - Is the {sequence#} {nonfouling__names__adjective}:{nonfouling#no&yes} + - |- + Task: Please classify a amino acid sequence based on the description. + Description: A amino acid sequence that is {nonfouling__names__adjective}. + {#amino acid sequence |!}: {sequence#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{nonfouling#False&True} + - |- + Task: Please {#give me|create|generate!} a {#amino acid sequence |!} based on the {#text |!}description{# below|!}. + Description: A amino acid sequence that is {nonfouling__names__adjective}. + Result:{sequence#} + - |- + Task: Please answer the multiple choice question. + Question: Is the amino acid sequence with the {#representation of |!}{sequence#} {nonfouling__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {nonfouling%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the amino acid sequence with the {#representation of |!}{sequence#} {nonfouling__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {nonfouling%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which amino acid sequences are {nonfouling#not &NULL}{nonfouling__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {sequence%nonfouling%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which amino acid sequences are {nonfouling#not &NULL}{nonfouling__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {sequence%nonfouling%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/peptides_soluble/meta.yaml b/data/tabular/peptides_soluble/meta.yaml index 5ba569fa4..f656e0995 100644 --- a/data/tabular/peptides_soluble/meta.yaml +++ b/data/tabular/peptides_soluble/meta.yaml @@ -1,138 +1,137 @@ ---- name: peptides_soluble description: |- - Solubility was estimated by retrospective analysis of electronic laboratory notebooks. - The notebooks were part of a large effort called the Protein Structure Initiative and consider sequences - linearly through the following stages: Selected, Cloned, Expressed, Soluble, Purified, Crystallized, - HSQC (heteronuclear single quantum coherence), Structure, and deposited in PDB. The peptides were identified - as soluble or insoluble by "Comparing the experimental status at two time points, September 2009 and May 2010, - we were able to derive a set of insoluble proteins defined as those which were not - soluble in September 2009 and still remained in that state 8 months later." + Solubility was estimated by retrospective analysis of electronic laboratory notebooks. + The notebooks were part of a large effort called the Protein Structure Initiative and consider sequences + linearly through the following stages: Selected, Cloned, Expressed, Soluble, Purified, Crystallized, + HSQC (heteronuclear single quantum coherence), Structure, and deposited in PDB. The peptides were identified + as soluble or insoluble by "Comparing the experimental status at two time points, September 2009 and May 2010, + we were able to derive a set of insoluble proteins defined as those which were not + soluble in September 2009 and still remained in that state 8 months later." targets: - - id: soluble - description: The solubility of a peptide sequence (1) or not (0). - units: - type: boolean - names: - - noun: solubility - - adjective: soluble - uris: + - id: soluble + description: The solubility of a peptide sequence (1) or not (0). + units: + type: boolean + names: + - noun: solubility + - adjective: soluble + uris: benchmarks: [] identifiers: - - id: sequence - type: AS_SEQUENCE - description: amino acid sequence + - id: sequence + type: AS_SEQUENCE + description: amino acid sequence license: CC BY 4.0 links: - - url: https://doi.org/10.1021/acs.jcim.2c01317 - description: corresponding publication - - url: https://doi.org/10.1111/j.1742-4658.2012.08603.x - description: data source + - url: https://doi.org/10.1021/acs.jcim.2c01317 + description: corresponding publication + - url: https://doi.org/10.1111/j.1742-4658.2012.08603.x + description: data source num_points: 6541 bibtex: - - |- - @article{berman2009protein, - title={The protein structure initiative structural genomics knowledgebase}, - author={Berman, Helen M and Westbrook, John D and Gabanyi, Margaret J and Tao, - Wendy and Shah, Raship and Kouranov, Andrei and Schwede, Torsten and Arnold, - Konstantin and Kiefer, Florian and Bordoli, Lorenza and others}, - journal={Nucleic acids research}, - volume={37}, - number={suppl1}, - pages={D365--D368}, - year={2009}, - publisher={Oxford University Press} - - |- - @article{smialowski2012proso, - title={PROSO II--a new method for protein solubility prediction}, - author={Smialowski, Pawel and Doose, Gero and Torkler, Phillipp and Kaufmann, - Stefanie and Frishman, Dmitrij}, - journal={The FEBS journal}, - volume={279}, - number={12}, - pages={2192--2200}, - year={2012}, - publisher={Wiley Online Library} + - |- + @article{berman2009protein, + title={The protein structure initiative structural genomics knowledgebase}, + author={Berman, Helen M and Westbrook, John D and Gabanyi, Margaret J and Tao, + Wendy and Shah, Raship and Kouranov, Andrei and Schwede, Torsten and Arnold, + Konstantin and Kiefer, Florian and Bordoli, Lorenza and others}, + journal={Nucleic acids research}, + volume={37}, + number={suppl1}, + pages={D365--D368}, + year={2009}, + publisher={Oxford University Press} + - |- + @article{smialowski2012proso, + title={PROSO II--a new method for protein solubility prediction}, + author={Smialowski, Pawel and Doose, Gero and Torkler, Phillipp and Kaufmann, + Stefanie and Frishman, Dmitrij}, + journal={The FEBS journal}, + volume={279}, + number={12}, + pages={2192--2200}, + year={2012}, + publisher={Wiley Online Library} templates: - - The sequence of {#amino acids|AAs!} {sequence#} {#shows|exhibits|demonstrates!} {soluble#no &NULL}{soluble__names__adjective} properties. - - The amino acid sequence {sequence#} {#shows|exhibits|displays!} {soluble#no &NULL}{soluble__names__adjective} properties. - - Based on the {sequence__description} {#representation |!}{sequence#}, the peptide has {soluble#no &NULL}{soluble__names__adjective} {#properties|characteristics|features!}. - - The {sequence__description} {sequence#} {#represents|is from!} a peptide that is {soluble#not &NULL}identified as {soluble__names__adjective}. - - The {#amino acid sequence|sequence of amino acids (AAs)|AA sequence|peptide with amino acid sequence!} {sequence#} is {soluble#not &NULL}{soluble__names__adjective}. - - |- - Task: Please classify a amino acid sequence based on the description. - Description: A amino acid sequence that is {soluble__names__adjective}. - {#amino acid sequence |sequence|AA sequence!}: {sequence#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {soluble#False&True} - - |- - Task: Please classify a amino acid sequence based on the description. - Description: A amino acid sequence that is {soluble__names__adjective}. - {#amino acid sequence |sequence|AA sequence!}: {sequence#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This amino acid sequence is {soluble#not &NULL}{soluble__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#amino acid sequence |sequence|AA sequence!} based on the {#text |!}description{# below|!}. - Description: A amino acid sequence that is {soluble__names__adjective}. - Result: {sequence#} - - |- - User: Can you {#tell me|derive|estimate!} if the peptide with the {sequence__description} {sequence#} is {soluble__names__adjective}? - Assistant: {soluble#No&Yes}, this amino acid sequence is {soluble#not &NULL}{soluble__names__adjective}. - - |- - User: Is the peptide with the {sequence__description} {sequence#} {soluble__names__adjective}? - Assistant: {soluble#No&Yes}, it is {soluble#not &NULL}{soluble__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {sequence__description} of a peptide that is {soluble#not &NULL}{soluble__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {sequence#} - - |- - User: I'm {#searching|looking!} for the {sequence__description} of a peptide that is {soluble#not &NULL}{soluble__names__adjective}? - Assistant: This is a amino acid sequence that is {soluble#not &NULL}{soluble__names__adjective}: {sequence#} - - |- - User: I want to {#come up with|create|generate!} a {#amino acid sequence|AA sequence!}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The amino acid sequence should {soluble#not &NULL}be {soluble__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {sequence__description} is {soluble#not &NULL}{soluble__names__adjective}: {sequence#} - - |- - User: I want to {#come up with|create|generate!} a {#amino acid sequence|sequence|AA sequence!}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#amino acid sequence|one!}? - User: Yes, the amino acid sequence should {soluble#not &NULL}be {soluble__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {sequence__description} is {soluble#not &NULL}{soluble__names__adjective}: {sequence#} - - Is the {sequence__description} {sequence#} {soluble__names__adjective}:{soluble#no&yes} - - |- - Task: Please classify a amino acid sequence based on the description. - Description: A amino acid sequence that is {soluble__names__adjective}. - {#amino acid sequence |sequence|AA sequence!}: {sequence#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{soluble#False&True} - - |- - Task: Please {#give me|create|generate!} a {#amino acid sequence|sequence|AA sequence!} based on the {#text |!}description{# below|!}. - Description: A amino acid sequence that is {soluble__names__adjective}. - Result:{sequence#} - - |- - Task: Please answer the multiple choice question. - Question: Is the peptide with the {sequence__description} {#representation of |!}{sequence#} {soluble__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {soluble%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the peptide with the {sequence__description} {#representation of |!}{sequence#} {soluble__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {soluble%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which amino acid sequences are {soluble#not &NULL}{soluble__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {sequence%soluble%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which amino acid sequences are {soluble#not &NULL}{soluble__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {sequence%soluble%} - Answer:{%multiple_choice_result} + - The sequence of {#amino acids|AAs!} {sequence#} {#shows|exhibits|demonstrates!} {soluble#no &NULL}{soluble__names__adjective} properties. + - The amino acid sequence {sequence#} {#shows|exhibits|displays!} {soluble#no &NULL}{soluble__names__adjective} properties. + - Based on the {sequence__description} {#representation |!}{sequence#}, the peptide has {soluble#no &NULL}{soluble__names__adjective} {#properties|characteristics|features!}. + - The {sequence__description} {sequence#} {#represents|is from!} a peptide that is {soluble#not &NULL}identified as {soluble__names__adjective}. + - The {#amino acid sequence|sequence of amino acids (AAs)|AA sequence|peptide with amino acid sequence!} {sequence#} is {soluble#not &NULL}{soluble__names__adjective}. + - |- + Task: Please classify a amino acid sequence based on the description. + Description: A amino acid sequence that is {soluble__names__adjective}. + {#amino acid sequence |sequence|AA sequence!}: {sequence#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {soluble#False&True} + - |- + Task: Please classify a amino acid sequence based on the description. + Description: A amino acid sequence that is {soluble__names__adjective}. + {#amino acid sequence |sequence|AA sequence!}: {sequence#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This amino acid sequence is {soluble#not &NULL}{soluble__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#amino acid sequence |sequence|AA sequence!} based on the {#text |!}description{# below|!}. + Description: A amino acid sequence that is {soluble__names__adjective}. + Result: {sequence#} + - |- + User: Can you {#tell me|derive|estimate!} if the peptide with the {sequence__description} {sequence#} is {soluble__names__adjective}? + Assistant: {soluble#No&Yes}, this amino acid sequence is {soluble#not &NULL}{soluble__names__adjective}. + - |- + User: Is the peptide with the {sequence__description} {sequence#} {soluble__names__adjective}? + Assistant: {soluble#No&Yes}, it is {soluble#not &NULL}{soluble__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {sequence__description} of a peptide that is {soluble#not &NULL}{soluble__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {sequence#} + - |- + User: I'm {#searching|looking!} for the {sequence__description} of a peptide that is {soluble#not &NULL}{soluble__names__adjective}? + Assistant: This is a amino acid sequence that is {soluble#not &NULL}{soluble__names__adjective}: {sequence#} + - |- + User: I want to {#come up with|create|generate!} a {#amino acid sequence|AA sequence!}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The amino acid sequence should {soluble#not &NULL}be {soluble__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {sequence__description} is {soluble#not &NULL}{soluble__names__adjective}: {sequence#} + - |- + User: I want to {#come up with|create|generate!} a {#amino acid sequence|sequence|AA sequence!}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#amino acid sequence|one!}? + User: Yes, the amino acid sequence should {soluble#not &NULL}be {soluble__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {sequence__description} is {soluble#not &NULL}{soluble__names__adjective}: {sequence#} + - Is the {sequence__description} {sequence#} {soluble__names__adjective}:{soluble#no&yes} + - |- + Task: Please classify a amino acid sequence based on the description. + Description: A amino acid sequence that is {soluble__names__adjective}. + {#amino acid sequence |sequence|AA sequence!}: {sequence#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{soluble#False&True} + - |- + Task: Please {#give me|create|generate!} a {#amino acid sequence|sequence|AA sequence!} based on the {#text |!}description{# below|!}. + Description: A amino acid sequence that is {soluble__names__adjective}. + Result:{sequence#} + - |- + Task: Please answer the multiple choice question. + Question: Is the peptide with the {sequence__description} {#representation of |!}{sequence#} {soluble__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {soluble%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the peptide with the {sequence__description} {#representation of |!}{sequence#} {soluble__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {soluble%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which amino acid sequences are {soluble#not &NULL}{soluble__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {sequence%soluble%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which amino acid sequences are {soluble#not &NULL}{soluble__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {sequence%soluble%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/perovskite_db/meta.yaml b/data/tabular/perovskite_db/meta.yaml index 319dfa4df..4ba892e3e 100644 --- a/data/tabular/perovskite_db/meta.yaml +++ b/data/tabular/perovskite_db/meta.yaml @@ -1,165 +1,159 @@ ---- name: perovskite_db description: |- - Database of perovskite solar cells with their composition, device stacks, and performance. + Database of perovskite solar cells with their composition, device stacks, and performance. targets: - - id: bandgap - description: bandgap of the perovskite material - units: eV - type: continuous - names: - - noun: bandgap - uris: - significant_digits: 2 - - id: voc - description: open-circuit voltage of the solar cell - units: V - type: continuous - names: - - noun: open-circuit voltage - uris: - significant_digits: 2 - - id: jsc - description: short-circuit current density of the solar cell - units: mA/cm^2 - type: continuous - names: - - noun: short-circuit current density - uris: - significant_digits: 2 - - id: ff - description: fill factor of the solar cell - units: percent - type: continuous - names: - - noun: fill factor - uris: - significant_digits: 2 - - id: pce - description: power conversion efficiency of the solar cell - units: percent - type: continuous - names: - - noun: power conversion efficiency - uris: - significant_digits: 2 + - id: bandgap + description: bandgap of the perovskite material + units: eV + type: continuous + names: + - noun: bandgap + uris: + significant_digits: 2 + - id: voc + description: open-circuit voltage of the solar cell + units: V + type: continuous + names: + - noun: open-circuit voltage + uris: + significant_digits: 2 + - id: jsc + description: short-circuit current density of the solar cell + units: mA/cm^2 + type: continuous + names: + - noun: short-circuit current density + uris: + significant_digits: 2 + - id: ff + description: fill factor of the solar cell + units: percent + type: continuous + names: + - noun: fill factor + uris: + significant_digits: 2 + - id: pce + description: power conversion efficiency of the solar cell + units: percent + type: continuous + names: + - noun: power conversion efficiency + uris: + significant_digits: 2 benchmarks: [] identifiers: - - id: reduced_formulas - type: COMPOSITION - description: reduced chemical formula - - id: iupac_formulas - type: Other - description: IUPAC chemical formula - - id: descriptive_formulas - type: Other - description: descriptive chemical formula - - id: device_stack_string - type: Other - description: device stack + - id: reduced_formulas + type: COMPOSITION + description: reduced chemical formula + - id: iupac_formulas + type: Other + description: IUPAC chemical formula + - id: descriptive_formulas + type: Other + description: descriptive chemical formula + - id: device_stack_string + type: Other + description: device stack license: CC BY 4.0 links: - - url: http://www.perovskitedatabase.com - description: original data source + - url: http://www.perovskitedatabase.com + description: original data source num_points: 29601 bibtex: - - |- - @article{Jacobsson_2021, - doi = {10.1038/s41560-021-00941-3}, - url = {https://doi.org/10.1038%2Fs41560-021-00941-3}, - year = 2021, - month = {dec}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {7}, - number = {1}, - pages = {107--115}, - author = {T. Jesper Jacobsson and Adam Hultqvist and Alberto Garc{\'{\i}}a-Fern{\'{a}}ndez and Aman Anand and Amran Al-Ashouri and Anders Hagfeldt and Andrea Crovetto and Antonio Abate and Antonio Gaetano Ricciardulli and Anuja Vijayan and Ashish Kulkarni and Assaf Y. Anderson and Barbara Primera Darwich and Bowen Yang and Brendan L. Coles and Carlo A. R. Perini and Carolin Rehermann and Daniel Ramirez and David Fairen-Jimenez and Diego Di Girolamo and Donglin Jia and Elena Avila and Emilio J. Juarez-Perez and Fanny Baumann and Florian Mathies and G. S. Anaya Gonz{\'{a}}lez and Gerrit Boschloo and Giuseppe Nasti and Gopinath Paramasivam and Guillermo Mart{\'{\i}}nez-Denegri and Hampus Näsström and Hannes Michaels and Hans Köbler and Hua Wu and Iacopo Benesperi and M. Ibrahim Dar and Ilknur Bayrak Pehlivan and Isaac E. Gould and Jacob N. Vagott and Janardan Dagar and Jeff Kettle and Jie Yang and Jinzhao Li and Joel A. Smith and Jorge Pascual and Jose J. Jer{\'{o}}nimo-Rend{\'{o}}n and Juan Felipe Montoya and Juan-Pablo Correa-Baena and Junming Qiu and Junxin Wang and K{\'{a}}ri Sveinbjörnsson and Katrin Hirselandt and Krishanu Dey and Kyle Frohna and Lena Mathies and Luigi A. Castriotta and Mahmoud. H. Aldamasy and Manuel Vasquez-Montoya and Marco A. Ruiz-Preciado and Marion A. Flatken and Mark V. Khenkin and Max Grischek and Mayank Kedia and Michael Saliba and Miguel Anaya and Misha Veldhoen and Neha Arora and Oleksandra Shargaieva and Oliver Maus and Onkar S. Game and Ori Yudilevich and Paul Fassl and Qisen Zhou and Rafael Betancur and Rahim Munir and Rahul Patidar and Samuel D. Stranks and Shahidul Alam and Shaoni Kar and Thomas Unold and Tobias Abzieher and Tomas Edvinsson and Tudur Wyn David and Ulrich W. Paetzold and Waqas Zia and Weifei Fu and Weiwei Zuo and Vincent R. F. Schröder and Wolfgang Tress and Xiaoliang Zhang and Yu-Hsien Chiang and Zafar Iqbal and Zhiqiang Xie and Eva Unger}, - title = {An open-access database and analysis tool for perovskite solar cells based on the {FAIR} data principles}, - journal = {Nat Energy} - } + - |- + @article{Jacobsson_2021, + doi = {10.1038/s41560-021-00941-3}, + url = {https://doi.org/10.1038%2Fs41560-021-00941-3}, + year = 2021, + month = {dec}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {7}, + number = {1}, + pages = {107--115}, + author = {T. Jesper Jacobsson and Adam Hultqvist and Alberto Garc{\'{\i}}a-Fern{\'{a}}ndez and Aman Anand and Amran Al-Ashouri and Anders Hagfeldt and Andrea Crovetto and Antonio Abate and Antonio Gaetano Ricciardulli and Anuja Vijayan and Ashish Kulkarni and Assaf Y. Anderson and Barbara Primera Darwich and Bowen Yang and Brendan L. Coles and Carlo A. R. Perini and Carolin Rehermann and Daniel Ramirez and David Fairen-Jimenez and Diego Di Girolamo and Donglin Jia and Elena Avila and Emilio J. Juarez-Perez and Fanny Baumann and Florian Mathies and G. S. Anaya Gonz{\'{a}}lez and Gerrit Boschloo and Giuseppe Nasti and Gopinath Paramasivam and Guillermo Mart{\'{\i}}nez-Denegri and Hampus Näsström and Hannes Michaels and Hans Köbler and Hua Wu and Iacopo Benesperi and M. Ibrahim Dar and Ilknur Bayrak Pehlivan and Isaac E. Gould and Jacob N. Vagott and Janardan Dagar and Jeff Kettle and Jie Yang and Jinzhao Li and Joel A. Smith and Jorge Pascual and Jose J. Jer{\'{o}}nimo-Rend{\'{o}}n and Juan Felipe Montoya and Juan-Pablo Correa-Baena and Junming Qiu and Junxin Wang and K{\'{a}}ri Sveinbjörnsson and Katrin Hirselandt and Krishanu Dey and Kyle Frohna and Lena Mathies and Luigi A. Castriotta and Mahmoud. H. Aldamasy and Manuel Vasquez-Montoya and Marco A. Ruiz-Preciado and Marion A. Flatken and Mark V. Khenkin and Max Grischek and Mayank Kedia and Michael Saliba and Miguel Anaya and Misha Veldhoen and Neha Arora and Oleksandra Shargaieva and Oliver Maus and Onkar S. Game and Ori Yudilevich and Paul Fassl and Qisen Zhou and Rafael Betancur and Rahim Munir and Rahul Patidar and Samuel D. Stranks and Shahidul Alam and Shaoni Kar and Thomas Unold and Tobias Abzieher and Tomas Edvinsson and Tudur Wyn David and Ulrich W. Paetzold and Waqas Zia and Weifei Fu and Weiwei Zuo and Vincent R. F. Schröder and Wolfgang Tress and Xiaoliang Zhang and Yu-Hsien Chiang and Zafar Iqbal and Zhiqiang Xie and Eva Unger}, + title = {An open-access database and analysis tool for perovskite solar cells based on the {FAIR} data principles}, + journal = {Nat Energy} + } templates: - - The perovskite material with the {reduced_formulas__description} of {reduced_formulas#} has a bandgap of {bandgap#} {bandgap__units}. - - The perovskite material with the {descriptive_formulas__description} of {descriptive_formulas#} has an open-circuit voltage of {voc#} {voc__units}. - - The {#perovskite solar cell|solar cell!} with the device stack of {device_stack_string#} has a bandgap of {bandgap#} {bandgap__units}. - - The {#perovskite solar cell|solar cell!} with the device stack of {device_stack_string#} {#has an|achieves an!} {#open-circuit voltage|OCV|open-circuit - voltage (OCV)!} of {voc#} {voc__units}. - - The {#perovskite solar cell|solar cell!} with the device stack of {device_stack_string#} {#has a|achieves a!} {#short-circuit voltage|JSC|short-circuit - voltage (JSC)!} of {jsc#} {jsc__units}. - - The {#perovskite solar cell|solar cell!} with the device stack of {device_stack_string#} {#has a|achieves a!} {#fill factor|FF|fill factor (FF)!} - of {ff#} percent. - - The {#perovskite solar cell|solar cell!} with the device stack of {device_stack_string#} {#has a|achieves a!} {#power conversion efficiency|PCE|power - conversion efficiency (PCE)!} of {pce#} percent. - - The {#perovskite solar cell|solar cell!} with the device stack of {device_stack_string#} {#has an|achieves an!} {#open-circuit voltage|OCV|open-circuit - voltage (OCV)!} of {voc#} V and {#a|achieves a!} {#short-circuit voltage|JSC|short-circuit voltage (JSC)!} of {jsc#} {jsc__units}. - - |- - Question: What is a perovskite material with a bandgap of {bandgap#} {bandgap__units}? - Answer: A perovskite material with {reduced_formulas__description} {reduced_formulas#} - - |- - Question: What is a perovskite solar cell with a an {#open-circuit voltage|OCV|open-circuit voltage (OCV)!} of {voc#} {voc__units}? - Answer: A {#perovskite solar cell|solar cell|device!} with the device stack of {device_stack_string#} - - |- - Question: What is a perovskite solar cell with a a {#short-circuit voltage|JSC|short-circuit voltage (JSC)!} of {jsc#} {jsc__units}? - Answer: A {#perovskite solar cell|solar cell|device!} with the device stack of {device_stack_string#} - - |- - Question: What is a perovskite solar cell with a a {#fill factor|FF|fill factor (FF)!} of {ff#} percent? - Answer: A {#perovskite solar cell|solar cell|device!} with the device stack of {device_stack_string#} - - |- - Question: What is a perovskite solar cell with a a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent? - Answer: A {#perovskite solar cell|solar cell|device!} with the device stack of {device_stack_string#} - - |- - Question: What is a perovskite solar cell with a an {#open-circuit voltage|OCV|open-circuit voltage (OCV)!} of {voc#} {voc__units} and a {#short-circuit voltage|JSC|short-circuit voltage (JSC)!} of {jsc#} {jsc__units}? - Answer: A {#perovskite solar cell|solar cell|device!} with the device stack of {device_stack_string#} - - |- - Question: What is a perovskite solar cell with a an {#open-circuit voltage|OCV|open-circuit voltage (OCV)!} of {voc#} {voc__units} and a {#fill factor|FF|fill factor (FF)!} of {ff#} percent? - Answer: A {#perovskite solar cell|solar cell|device!} with the device stack of {device_stack_string#} - - |- - Question: What is a perovskite solar cell with a an {#open-circuit voltage|OCV|open-circuit voltage (OCV)!} of {voc#} {voc__units} and a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent? - Answer: A {#perovskite solar cell|solar cell|device!} with the device stack of {device_stack_string#} - - |- - Question: What is a perovskite solar cell with a a {#short-circuit voltage|JSC|short-circuit voltage (JSC)!} of {jsc#} {jsc__units} and a {#fill factor|FF|fill factor (FF)!} of {ff#} percent? - Answer: A {#perovskite solar cell|solar cell|device!} with the device stack of {device_stack_string#} - - |- - User: {#I would like to|I want to|I would love to!} know the bandgap of a perovskite material. - Assistant: {#That's interesting|Cool|Interesting|Awesome!}! What material are you interested in? - User: I am interested in the material with the {reduced_formulas__description} of {reduced_formulas#}. - Assistant: The bandgap of the material with {reduced_formulas__description} {reduced_formulas#} is {bandgap#} {bandgap__units}. - - |- - User: {#I would like to|I want to|I would love to!} know the bandgap of a perovskite material. - Assistant: {#That's interesting|Cool|Interesting|Awesome!}! What material are you interested in? - User: I am interested in the material with the {descriptive_formulas__description} of {descriptive_formulas#}. - Assistant: The bandgap of the material with {descriptive_formulas__description} {descriptive_formulas#} is {bandgap#} {bandgap__units}. - - |- - User: {#I would like to|I want to|I would love to!} design a {#perovskite solar cell|solar cell!} with a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent. - Assistant: {#That's interesting|Cool|Interesting|Awesome!}! Do you have other requirements? - User: {#I would like|I want!} the {#perovskite solar cell|solar cell!} to have a {#fill factor|FF|fill factor (FF)!} of {ff#} percent. - Assistant: In that case, you should use the device stack of {device_stack_string#}. - - |- - User: {#I would like to|I want to|I would love to!} design a {#perovskite solar cell|solar cell!} with a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent. - Assistant: {#That's interesting|Cool|Interesting|Awesome!}! Do you have other {#requirements|constraints|needs!}? - User: {#I would like|I want!} the {#perovskite solar cell|solar cell!} to have a {#short-circuit voltage|JSC|short-circuit voltage (JSC)!} of {jsc#} {jsc__units}. - Assistant: {#In that case, you should use the device stack of|In your case, I recommend|I would try the device stack!} {device_stack_string#}. - - |- - User: {#I would like to|I want to|I would love to!} design a {#perovskite solar cell|solar cell!} with a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent. - Assistant: {#That's interesting|Cool|Interesting|Awesome!}! Do you have other {#requirements|constraints|needs!}? - User: {#I would like|I want!} the {#perovskite solar cell|solar cell!} to have an {#open-circuit voltage|OCV|open-circuit voltage (OCV)!} of {voc#} {voc__units}. - Assistant: {#In that case, you should use the device stack of|In your case, I recommend|I would try the device stack!} {device_stack_string#}. - - |- - User: {#I would like to|I want to|I would love to!} design a {#perovskite solar cell|solar cell!} with a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent. - Assistant: {#That's interesting|Cool|Interesting|Awesome!}! Do you have other {#requirements|constraints|needs!}? - User: {#I would like|I want!} the {#perovskite solar cell|solar cell!} to have an {#open-circuit voltage|OCV|open-circuit voltage (OCV)!} of {voc#} {voc__units} and a {#short-circuit voltage|JSC|short-circuit voltage (JSC)!} of {jsc#} {jsc__units}. - Assistant: {#In that case, you should use the device stack of|In your case, I recommend|I would try the device stack!} {device_stack_string#}. - - |- - User: {#I would like to|I want to|I would love to!} design a {#perovskite solar cell|solar cell!} with a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent. - Assistant: {#That's interesting|Cool|Interesting|Awesome!}! Do you have other {#requirements|constraints|needs!}? - User: {#I would like|I want!} the {#perovskite solar cell|solar cell!} to have an {#open-circuit voltage|OCV|open-circuit voltage (OCV)!} of {voc#} {voc__units} and a {#fill factor|FF|fill factor (FF)!} of {ff#} percent. - Assistant: {#In that case, you should use the device stack of|In your case, I recommend|I would try the device stack!} {device_stack_string#}. - - |- - User: {#I would like to|I want to|I would love to!} design a {#perovskite solar cell|solar cell!} with a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent. - Assistant: {#That's interesting|Cool|Interesting|Awesome!}! Do you have other {#requirements|constraints|needs!}? - User: {#Yes,|Indeed,!} {#I also want|I want|I need!} the {#perovskite solar cell|solar cell!} to have {descriptive_formulas#} as the {#perovskite material|absorber!}. - Assistant: {#In that case, you should use the device stack of|In your case, I recommend|I would try the device stack!} {device_stack_string#}. - - |- - User: {#I would like to|I want to|I would love to!} design a {#perovskite solar cell|solar cell!} with a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent. - Assistant: {#That's interesting|Cool|Interesting|Awesome!}! Do you have other {#requirements|constraints|needs!}? - User: {#Yes,|Indeed,!} {#I also want|I want|I need!} the {#perovskite solar cell|solar cell!} to have {reduced_formulas#} as the {#perovskite material|absorber!}. - Assistant: {#In that case, you should use the device stack of|In your case, I recommend|I would try the device stack!} {device_stack_string#}. + - The perovskite material with the {reduced_formulas__description} of {reduced_formulas#} has a bandgap of {bandgap#} {bandgap__units}. + - The perovskite material with the {descriptive_formulas__description} of {descriptive_formulas#} has an open-circuit voltage of {voc#} {voc__units}. + - The {#perovskite solar cell|solar cell!} with the device stack of {device_stack_string#} has a bandgap of {bandgap#} {bandgap__units}. + - The {#perovskite solar cell|solar cell!} with the device stack of {device_stack_string#} {#has an|achieves an!} {#open-circuit voltage|OCV|open-circuit voltage (OCV)!} of {voc#} {voc__units}. + - The {#perovskite solar cell|solar cell!} with the device stack of {device_stack_string#} {#has a|achieves a!} {#short-circuit voltage|JSC|short-circuit voltage (JSC)!} of {jsc#} {jsc__units}. + - The {#perovskite solar cell|solar cell!} with the device stack of {device_stack_string#} {#has a|achieves a!} {#fill factor|FF|fill factor (FF)!} of {ff#} percent. + - The {#perovskite solar cell|solar cell!} with the device stack of {device_stack_string#} {#has a|achieves a!} {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent. + - The {#perovskite solar cell|solar cell!} with the device stack of {device_stack_string#} {#has an|achieves an!} {#open-circuit voltage|OCV|open-circuit voltage (OCV)!} of {voc#} V and {#a|achieves a!} {#short-circuit voltage|JSC|short-circuit voltage (JSC)!} of {jsc#} {jsc__units}. + - |- + Question: What is a perovskite material with a bandgap of {bandgap#} {bandgap__units}? + Answer: A perovskite material with {reduced_formulas__description} {reduced_formulas#} + - |- + Question: What is a perovskite solar cell with a an {#open-circuit voltage|OCV|open-circuit voltage (OCV)!} of {voc#} {voc__units}? + Answer: A {#perovskite solar cell|solar cell|device!} with the device stack of {device_stack_string#} + - |- + Question: What is a perovskite solar cell with a a {#short-circuit voltage|JSC|short-circuit voltage (JSC)!} of {jsc#} {jsc__units}? + Answer: A {#perovskite solar cell|solar cell|device!} with the device stack of {device_stack_string#} + - |- + Question: What is a perovskite solar cell with a a {#fill factor|FF|fill factor (FF)!} of {ff#} percent? + Answer: A {#perovskite solar cell|solar cell|device!} with the device stack of {device_stack_string#} + - |- + Question: What is a perovskite solar cell with a a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent? + Answer: A {#perovskite solar cell|solar cell|device!} with the device stack of {device_stack_string#} + - |- + Question: What is a perovskite solar cell with a an {#open-circuit voltage|OCV|open-circuit voltage (OCV)!} of {voc#} {voc__units} and a {#short-circuit voltage|JSC|short-circuit voltage (JSC)!} of {jsc#} {jsc__units}? + Answer: A {#perovskite solar cell|solar cell|device!} with the device stack of {device_stack_string#} + - |- + Question: What is a perovskite solar cell with a an {#open-circuit voltage|OCV|open-circuit voltage (OCV)!} of {voc#} {voc__units} and a {#fill factor|FF|fill factor (FF)!} of {ff#} percent? + Answer: A {#perovskite solar cell|solar cell|device!} with the device stack of {device_stack_string#} + - |- + Question: What is a perovskite solar cell with a an {#open-circuit voltage|OCV|open-circuit voltage (OCV)!} of {voc#} {voc__units} and a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent? + Answer: A {#perovskite solar cell|solar cell|device!} with the device stack of {device_stack_string#} + - |- + Question: What is a perovskite solar cell with a a {#short-circuit voltage|JSC|short-circuit voltage (JSC)!} of {jsc#} {jsc__units} and a {#fill factor|FF|fill factor (FF)!} of {ff#} percent? + Answer: A {#perovskite solar cell|solar cell|device!} with the device stack of {device_stack_string#} + - |- + User: {#I would like to|I want to|I would love to!} know the bandgap of a perovskite material. + Assistant: {#That's interesting|Cool|Interesting|Awesome!}! What material are you interested in? + User: I am interested in the material with the {reduced_formulas__description} of {reduced_formulas#}. + Assistant: The bandgap of the material with {reduced_formulas__description} {reduced_formulas#} is {bandgap#} {bandgap__units}. + - |- + User: {#I would like to|I want to|I would love to!} know the bandgap of a perovskite material. + Assistant: {#That's interesting|Cool|Interesting|Awesome!}! What material are you interested in? + User: I am interested in the material with the {descriptive_formulas__description} of {descriptive_formulas#}. + Assistant: The bandgap of the material with {descriptive_formulas__description} {descriptive_formulas#} is {bandgap#} {bandgap__units}. + - |- + User: {#I would like to|I want to|I would love to!} design a {#perovskite solar cell|solar cell!} with a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent. + Assistant: {#That's interesting|Cool|Interesting|Awesome!}! Do you have other requirements? + User: {#I would like|I want!} the {#perovskite solar cell|solar cell!} to have a {#fill factor|FF|fill factor (FF)!} of {ff#} percent. + Assistant: In that case, you should use the device stack of {device_stack_string#}. + - |- + User: {#I would like to|I want to|I would love to!} design a {#perovskite solar cell|solar cell!} with a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent. + Assistant: {#That's interesting|Cool|Interesting|Awesome!}! Do you have other {#requirements|constraints|needs!}? + User: {#I would like|I want!} the {#perovskite solar cell|solar cell!} to have a {#short-circuit voltage|JSC|short-circuit voltage (JSC)!} of {jsc#} {jsc__units}. + Assistant: {#In that case, you should use the device stack of|In your case, I recommend|I would try the device stack!} {device_stack_string#}. + - |- + User: {#I would like to|I want to|I would love to!} design a {#perovskite solar cell|solar cell!} with a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent. + Assistant: {#That's interesting|Cool|Interesting|Awesome!}! Do you have other {#requirements|constraints|needs!}? + User: {#I would like|I want!} the {#perovskite solar cell|solar cell!} to have an {#open-circuit voltage|OCV|open-circuit voltage (OCV)!} of {voc#} {voc__units}. + Assistant: {#In that case, you should use the device stack of|In your case, I recommend|I would try the device stack!} {device_stack_string#}. + - |- + User: {#I would like to|I want to|I would love to!} design a {#perovskite solar cell|solar cell!} with a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent. + Assistant: {#That's interesting|Cool|Interesting|Awesome!}! Do you have other {#requirements|constraints|needs!}? + User: {#I would like|I want!} the {#perovskite solar cell|solar cell!} to have an {#open-circuit voltage|OCV|open-circuit voltage (OCV)!} of {voc#} {voc__units} and a {#short-circuit voltage|JSC|short-circuit voltage (JSC)!} of {jsc#} {jsc__units}. + Assistant: {#In that case, you should use the device stack of|In your case, I recommend|I would try the device stack!} {device_stack_string#}. + - |- + User: {#I would like to|I want to|I would love to!} design a {#perovskite solar cell|solar cell!} with a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent. + Assistant: {#That's interesting|Cool|Interesting|Awesome!}! Do you have other {#requirements|constraints|needs!}? + User: {#I would like|I want!} the {#perovskite solar cell|solar cell!} to have an {#open-circuit voltage|OCV|open-circuit voltage (OCV)!} of {voc#} {voc__units} and a {#fill factor|FF|fill factor (FF)!} of {ff#} percent. + Assistant: {#In that case, you should use the device stack of|In your case, I recommend|I would try the device stack!} {device_stack_string#}. + - |- + User: {#I would like to|I want to|I would love to!} design a {#perovskite solar cell|solar cell!} with a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent. + Assistant: {#That's interesting|Cool|Interesting|Awesome!}! Do you have other {#requirements|constraints|needs!}? + User: {#Yes,|Indeed,!} {#I also want|I want|I need!} the {#perovskite solar cell|solar cell!} to have {descriptive_formulas#} as the {#perovskite material|absorber!}. + Assistant: {#In that case, you should use the device stack of|In your case, I recommend|I would try the device stack!} {device_stack_string#}. + - |- + User: {#I would like to|I want to|I would love to!} design a {#perovskite solar cell|solar cell!} with a {#power conversion efficiency|PCE|power conversion efficiency (PCE)!} of {pce#} percent. + Assistant: {#That's interesting|Cool|Interesting|Awesome!}! Do you have other {#requirements|constraints|needs!}? + User: {#Yes,|Indeed,!} {#I also want|I want|I need!} the {#perovskite solar cell|solar cell!} to have {reduced_formulas#} as the {#perovskite material|absorber!}. + Assistant: {#In that case, you should use the device stack of|In your case, I recommend|I would try the device stack!} {device_stack_string#}. diff --git a/data/tabular/physics_stackexchange/explore.ipynb b/data/tabular/physics_stackexchange/explore.ipynb index 8d060f363..67c06adae 100644 --- a/data/tabular/physics_stackexchange/explore.ipynb +++ b/data/tabular/physics_stackexchange/explore.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -35,16 +35,16 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "df = dataset['train'].to_pandas()" + "df = dataset[\"train\"].to_pandas()" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -59,18 +59,18 @@ " dtype=object)" ] }, - "execution_count": 7, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df.iloc[3361]['answers']" + "df.iloc[3361][\"answers\"]" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -79,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -88,52 +88,64 @@ " # almost empty means that there is only punctuation, or special characters or spaces\n", " # repeated means that the line is the same as the previous one\n", " # return the text without the repeated almost empty lines\n", - " lines = text.split('\\n')\n", + " lines = text.split(\"\\n\")\n", " new_lines = []\n", - " previous_line = ''\n", + " previous_line = \"\"\n", " for line in lines:\n", - " if line.strip() == '':\n", + " if line.strip() == \"\":\n", " continue\n", " if line.strip() == previous_line:\n", " continue\n", " new_lines.append(line)\n", " previous_line = line.strip()\n", - " return '\\n'.join(new_lines)" + " return \"\\n\".join(new_lines)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "questions_w_answer = []\n", "\n", - "# we do the following, if there is no answer, we drop this question \n", + "# we do the following, if there is no answer, we drop this question\n", "# if there is one answer, we keep it\n", "# if there are multiple we keep the ones that do not have a score of 0\n", "# the answers are in an array of arrays, the first element is the answer, the second is the score\n", - "# we then also only keep two columns, the question and the answer, both as string on which we also \n", + "# we then also only keep two columns, the question and the answer, both as string on which we also\n", "# call the strip function to remove leading and trailing whitespaces\n", "\n", "for i, row in df.iterrows():\n", - " if len(row['answers']) == 0:\n", + " if len(row[\"answers\"]) == 0:\n", " continue\n", - " if len(row['answers']) == 1:\n", - " questions_w_answer.append([remove_repeated_almost_empty_lines(row['question_text'].strip()), remove_repeated_almost_empty_lines(row['answers'][0][0].strip())])\n", + " if len(row[\"answers\"]) == 1:\n", + " questions_w_answer.append(\n", + " [\n", + " remove_repeated_almost_empty_lines(row[\"question_text\"].strip()),\n", + " remove_repeated_almost_empty_lines(row[\"answers\"][0][0].strip()),\n", + " ]\n", + " )\n", " else:\n", - " for answer in row['answers']:\n", + " for answer in row[\"answers\"]:\n", " if answer[1] != 0:\n", - " questions_w_answer.append([remove_repeated_almost_empty_lines(row['question_text'].strip()), remove_repeated_almost_empty_lines(answer[0].strip())])\n", + " questions_w_answer.append(\n", + " [\n", + " remove_repeated_almost_empty_lines(\n", + " row[\"question_text\"].strip()\n", + " ),\n", + " remove_repeated_almost_empty_lines(answer[0].strip()),\n", + " ]\n", + " )\n", " break\n", "\n", "# we then create a dataframe from the list of questions and answers\n", - "df_qa = pd.DataFrame(questions_w_answer, columns=['q', 'a'])" + "df_qa = pd.DataFrame(questions_w_answer, columns=[\"q\", \"a\"])" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -149,12 +161,12 @@ } ], "source": [ - "print(remove_repeated_almost_empty_lines(df_qa.iloc[5]['a']))" + "print(remove_repeated_almost_empty_lines(df_qa.iloc[5][\"a\"]))" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -163,7 +175,7 @@ "2571" ] }, - "execution_count": 12, + "execution_count": null, "metadata": {}, "output_type": "execute_result" } @@ -171,20 +183,6 @@ "source": [ "len(df_qa)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -202,10 +200,8 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.16" - }, - "orig_nbformat": 4 + "pygments_lexer": "ipython3" + } }, "nbformat": 4, "nbformat_minor": 2 diff --git a/data/tabular/physics_stackexchange/meta.yaml b/data/tabular/physics_stackexchange/meta.yaml index 9b87efb17..2aca00745 100644 --- a/data/tabular/physics_stackexchange/meta.yaml +++ b/data/tabular/physics_stackexchange/meta.yaml @@ -1,31 +1,30 @@ ---- name: physics_stackexchange description: |- - Questions and answers mined from physics.stackexchange.com. + Questions and answers mined from physics.stackexchange.com. targets: - - id: a - description: answer to the question - type: string - - id: title - description: title of the question - type: string + - id: a + description: answer to the question + type: string + - id: title + description: title of the question + type: string identifiers: - - id: q - type: string - description: question asked on physics.stackexchange.com + - id: q + type: string + description: question asked on physics.stackexchange.com license: CC BY-SA links: - - url: physics.stackexchange.com - description: original data source - - url: https://stackoverflow.com/help/licensing - description: information about the license + - url: physics.stackexchange.com + description: original data source + - url: https://stackoverflow.com/help/licensing + description: information about the license num_points: 6732 templates: - - |- - {#Task: Please answer the question of the user.|Task: Provide a detailed response to the user's question.|Task: Address the user's query with a well-structured answer.|Task: Your role is to respond to the user's question with clarity.|Task: Offer a concise and informative answer to the user's question.|Task: Provide a clear and concise reply to the user's inquiry.!} - {#User: |Question: |Inquiry: |\n!}{#q} - {#Assistant: |Answer: !}{#a} - - |- - {#Task: Generate a title for this question.|Task: Create a meaningful title for this question.|Task: Summarize the question in a title.!} - {#Question: |Inquiry: |\n!}{#q} - {#Assistant: |Title: |Answer: |!}{#title} + - |- + {#Task: Please answer the question of the user.|Task: Provide a detailed response to the user's question.|Task: Address the user's query with a well-structured answer.|Task: Your role is to respond to the user's question with clarity.|Task: Offer a concise and informative answer to the user's question.|Task: Provide a clear and concise reply to the user's inquiry.!} + {#User: |Question: |Inquiry: |\n!}{#q} + {#Assistant: |Answer: !}{#a} + - |- + {#Task: Generate a title for this question.|Task: Create a meaningful title for this question.|Task: Summarize the question in a title.!} + {#Question: |Inquiry: |\n!}{#q} + {#Assistant: |Title: |Answer: |!}{#title} diff --git a/data/tabular/potassium_ion_channel_kir2_1_butkiewicz/meta.yaml b/data/tabular/potassium_ion_channel_kir2_1_butkiewicz/meta.yaml index 556c0b6c9..d77f89cbc 100644 --- a/data/tabular/potassium_ion_channel_kir2_1_butkiewicz/meta.yaml +++ b/data/tabular/potassium_ion_channel_kir2_1_butkiewicz/meta.yaml @@ -1,178 +1,176 @@ ---- name: potassium_ion_channel_kir2_1_butkiewicz description: |- - The Kir2.1 inward-rectifier potassium ion channel is - a target in the treatment of cardiovascular, neurological, renal and - metabolic disorders. Primary assay AID 1672. Validation screens AID - 2032 and AID 463252. Counter screens AID 2105, AID 2345, AID 2236, and - AID 2329. The final set of 172 active compounds was constructed - subtracting the actives in AID 2105, AID 2345, AID 2236, and AID 2329 - from the molecules found active in both, AID 2032 and AID 463252 + The Kir2.1 inward-rectifier potassium ion channel is + a target in the treatment of cardiovascular, neurological, renal and + metabolic disorders. Primary assay AID 1672. Validation screens AID + 2032 and AID 463252. Counter screens AID 2105, AID 2345, AID 2236, and + AID 2329. The final set of 172 active compounds was constructed + subtracting the actives in AID 2105, AID 2345, AID 2236, and AID 2329 + from the molecules found active in both, AID 2032 and AID 463252 targets: - - id: activity_potassium_ion_channel - description: whether it is active against potassium ion channel (1) or not (0). - units: - type: boolean - names: - - gerund: blocking the potassium ion channel - - noun: blocker of the potassium ion channel activity - - adjective: effective for the blocking potassium ion channel activity - - adjective: reliable for potassium ion channel blocking - pubchem_aids: - - 1672 - - 2032 - - 463252 - - 2105 - - 2345 - - 2236 - - 2329 - uris: - - http://purl.obolibrary.org/obo/XCO_0000225 + - id: activity_potassium_ion_channel + description: whether it is active against potassium ion channel (1) or not (0). + units: + type: boolean + names: + - gerund: blocking the potassium ion channel + - noun: blocker of the potassium ion channel activity + - adjective: effective for the blocking potassium ion channel activity + - adjective: reliable for potassium ion channel blocking + pubchem_aids: + - 1672 + - 2032 + - 463252 + - 2105 + - 2345 + - 2236 + - 2329 + uris: + - http://purl.obolibrary.org/obo/XCO_0000225 identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split license: CC BY 4.0 links: - - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al - description: original dataset - - url: https://doi.org/10.3390/molecules18010735 - description: corresponding publication - - url: https://doi.org/10.1093/nar/gky1033 - description: corresponding publication - - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ - description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al + description: original dataset + - url: https://doi.org/10.3390/molecules18010735 + description: corresponding publication + - url: https://doi.org/10.1093/nar/gky1033 + description: corresponding publication + - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ + description: corresponding publication num_points: 301493 bibtex: - - |- - @article{Butkiewicz2013, - doi = {10.3390/molecules18010735}, - url = {https://doi.org/10.3390/molecules18010735}, - year = {2013}, - month = jan, - publisher = {{MDPI} {AG}}, - volume = {18}, - number = {1}, - pages = {735--756}, - author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller - and Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens Meiler}, - title = {Benchmarking Ligand-Based Virtual High-Throughput Screening - with the {PubChem} Database}, - journal = {Molecules}} - - |- - @article{Kim2018, - doi = {10.1093/nar/gky1033}, - url = {https://doi.org/10.1093/nar/gky1033}, - year = {2018}, - month = oct, - publisher = {Oxford University Press ({OUP})}, - volume = {47}, - number = {D1}, - pages = {D1102--D1109}, - author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta Gindulyte - and Jia He and Siqian He and Qingliang Li and Benjamin A Shoemaker - and Paul A Thiessen and Bo Yu and Leonid Zaslavsky and Jian Zhang - and Evan E Bolton}, - title = {{PubChem} 2019 update: improved access to chemical data}, - journal = {Nucleic Acids Research}} - - |- - @article{Butkiewicz2017, - doi = {}, - url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, - year = {2017}, - publisher = {Chem Inform}, - volume = {3}, - number = {1}, - author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. - and Lowe, E. W. and Weaver, D. C. and Meiler, J.}, - title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets - from the {P}ub{C}hem {D}atabase}}, - journal = {Chemical Science}} + - |- + @article{Butkiewicz2013, + doi = {10.3390/molecules18010735}, + url = {https://doi.org/10.3390/molecules18010735}, + year = {2013}, + month = jan, + publisher = {{MDPI} {AG}}, + volume = {18}, + number = {1}, + pages = {735--756}, + author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller + and Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens Meiler}, + title = {Benchmarking Ligand-Based Virtual High-Throughput Screening + with the {PubChem} Database}, + journal = {Molecules}} + - |- + @article{Kim2018, + doi = {10.1093/nar/gky1033}, + url = {https://doi.org/10.1093/nar/gky1033}, + year = {2018}, + month = oct, + publisher = {Oxford University Press ({OUP})}, + volume = {47}, + number = {D1}, + pages = {D1102--D1109}, + author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta Gindulyte + and Jia He and Siqian He and Qingliang Li and Benjamin A Shoemaker + and Paul A Thiessen and Bo Yu and Leonid Zaslavsky and Jian Zhang + and Evan E Bolton}, + title = {{PubChem} 2019 update: improved access to chemical data}, + journal = {Nucleic Acids Research}} + - |- + @article{Butkiewicz2017, + doi = {}, + url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, + year = {2017}, + publisher = {Chem Inform}, + volume = {3}, + number = {1}, + author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. + and Lowe, E. W. and Weaver, D. C. and Meiler, J.}, + title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets + from the {P}ub{C}hem {D}atabase}}, + journal = {Chemical Science}} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_potassium_ion_channel#no &NULL}{activity_potassium_ion_channel__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {activity_potassium_ion_channel#no &NULL}{activity_potassium_ion_channel__names__adjective} - {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {activity_potassium_ion_channel#not &NULL}identified as {activity_potassium_ion_channel__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_potassium_ion_channel__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. - Result: {activity_potassium_ion_channel#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_potassium_ion_channel__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {activity_potassium_ion_channel__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_potassium_ion_channel__names__adjective}? - Assistant: {activity_potassium_ion_channel#No&Yes}, this molecule is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {activity_potassium_ion_channel__names__adjective}? - Assistant: {activity_potassium_ion_channel#No&Yes}, it is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}? - Assistant: This is a molecule that is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {activity_potassium_ion_channel#not &NULL}be {activity_potassium_ion_channel__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {activity_potassium_ion_channel#not &NULL}be {activity_potassium_ion_channel__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {activity_potassium_ion_channel__names__adjective}:{activity_potassium_ion_channel#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_potassium_ion_channel__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{activity_potassium_ion_channel#False&True} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {activity_potassium_ion_channel__names__adjective}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_potassium_ion_channel__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {activity_potassium_ion_channel%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_potassium_ion_channel__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {activity_potassium_ion_channel%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_potassium_ion_channel%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_potassium_ion_channel%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_potassium_ion_channel#no &NULL}{activity_potassium_ion_channel__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {activity_potassium_ion_channel#no &NULL}{activity_potassium_ion_channel__names__adjective} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {activity_potassium_ion_channel#not &NULL}identified as {activity_potassium_ion_channel__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_potassium_ion_channel__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. + Result: {activity_potassium_ion_channel#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_potassium_ion_channel__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {activity_potassium_ion_channel__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_potassium_ion_channel__names__adjective}? + Assistant: {activity_potassium_ion_channel#No&Yes}, this molecule is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {activity_potassium_ion_channel__names__adjective}? + Assistant: {activity_potassium_ion_channel#No&Yes}, it is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}? + Assistant: This is a molecule that is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {activity_potassium_ion_channel#not &NULL}be {activity_potassium_ion_channel__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {activity_potassium_ion_channel#not &NULL}be {activity_potassium_ion_channel__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {activity_potassium_ion_channel__names__adjective}:{activity_potassium_ion_channel#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_potassium_ion_channel__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{activity_potassium_ion_channel#False&True} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {activity_potassium_ion_channel__names__adjective}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_potassium_ion_channel__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {activity_potassium_ion_channel%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_potassium_ion_channel__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {activity_potassium_ion_channel%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_potassium_ion_channel%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_potassium_ion_channel#not &NULL}{activity_potassium_ion_channel__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_potassium_ion_channel%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/qm8/meta.yaml b/data/tabular/qm8/meta.yaml index a453f81a2..61b9f9dc9 100644 --- a/data/tabular/qm8/meta.yaml +++ b/data/tabular/qm8/meta.yaml @@ -1,276 +1,275 @@ ---- name: qm8 description: |- - QM8 is a dataset of quantum mechanical simulations of electronic spectra - and the energy levels of excited states in small molecules. - The dataset involves the application of various techniques, - such as time-dependent density functional theories (TDDFT) - and second-order approximate coupled-cluster (CC2), - to a group of molecules, which encompasses those containing as many as eight heavy atoms. - These molecules also form a subset of the GDB-17 database. + QM8 is a dataset of quantum mechanical simulations of electronic spectra + and the energy levels of excited states in small molecules. + The dataset involves the application of various techniques, + such as time-dependent density functional theories (TDDFT) + and second-order approximate coupled-cluster (CC2), + to a group of molecules, which encompasses those containing as many as eight heavy atoms. + These molecules also form a subset of the GDB-17 database. targets: - - id: E1-CC2 - description: Excitation energy of the first excited state, computed using CC2 - units: a. u. - type: continuous - siginificant_digits: 5 - names: - - noun: S0 -> S1 transition energy computed using second-order approximate coupled-cluster theory (CC2) - - noun: S0 -> S1 transition energy computed using RI-CC2/def2TZVP - - noun: RI-CC2/def2TZVP-computed S0 -> S1 transition energy - - id: E2-CC2 - description: Excitation energy of the second excited state, computed using CC2 - units: a. u. - type: continuous - significant_digits: 5 - names: - - noun: S0 -> S2 transition energy computed using second-order approximate coupled-cluster theory (CC2) - - noun: S0 -> S2 transition energy computed using RI-CC2/def2TZVP - - noun: RI-CC2/def2TZVP-computed S0 -> S2 transition energy - - id: f1-CC2 - description: Oscillator strength of the first excited state, computed using CC2 - units: a. u. - type: continuous - significant_digits: 5 - names: - - noun: S0 -> S1 transition oscillator strength computed using second-order approximate coupled-cluster theory (CC2) - - noun: S0 -> S1 transition oscillator strength computed using RI-CC2/def2TZVP - - noun: RI-CC2/def2TZVP-computed S0 -> S1 transition oscillator strength - - id: f2-CC2 - description: Oscillator strength of the second excited state, computed using CC2 - units: a. u. - type: continuous - significant_digits: 5 - names: - - noun: S0 -> S2 transition oscillator strength computed using second-order approximate coupled-cluster theory (CC2) - - noun: S0 -> S2 transition oscillator strength computed using RI-CC2/def2TZVP - - noun: RI-CC2/def2TZVP-computed S0 -> S2 transition oscillator strength - - id: E1-PBE0 - description: Excitation energy of the first excited state, computed using LR-TDPBE0/def2SVP - units: a. u. - type: continuous - significant_digits: 5 - names: - - noun: S0 -> S1 transition energy computed using linear-response time-dependent density functional theory (LR-TDPBE0/def2SVP) - - noun: S0 -> S1 transition energy computed using LR-TDPBE0/def2SVP - - noun: LR-TDPBE0/def2SVP-computed S0 -> S1 transition energy - - id: E2-PBE0 - description: Excitation energy of the second excited state, computed using LR-TDPBE0/def2SVP - units: a. u. - type: continuous - significant_digits: 5 - names: - - noun: S0 -> S2 transition energy computed using linear-response time-dependent density functional theory (LR-TDPBE0/def2SVP) - - noun: S0 -> S2 transition energy computed using LR-TDPBE0/def2SVP - - noun: LR-TDPBE0/def2SVP-computed S0 -> S2 transition energy - - id: f1-PBE0 - description: Oscillator strength of the first excited state, computed using LR-TDPBE0/def2SVP - units: a. u. - type: continuous - significant_digits: 5 - names: - - noun: S0 -> S1 transition oscillator strength computed using linear-response time-dependent density functional theory (LR-TDPBE0/def2SVP) - - noun: S0 -> S1 transition oscillator strength computed using LR-TDPBE0/def2SVP - - noun: LR-TDPBE0/def2SVP-computed S0 -> S1 transition oscillator strength - - id: f2-PBE0 - description: Oscillator strength of the second excited state, computed using LR-TDPBE0/def2SVP - units: a. u. - type: continuous - significant_digits: 4 - names: - - noun: S0 -> S2 transition oscillator strength computed using linear-response time-dependent density functional theory (LR-TDPBE0/def2SVP) - - noun: S0 -> S2 transition oscillator strength computed using LR-TDPBE0/def2SVP - - noun: LR-TDPBE0/def2SVP-computed S0 -> S2 transition oscillator strength - - id: E1-CAM - description: Excitation energy of the first excited state, computed using CAM-B3LYP/def2TZVP - units: a. u. - type: continuous - significant_digits: 5 - names: - - noun: S0 -> S1 transition energy computed using Coulomb-attenuated B3LYP density functional theory (CAM-B3LYP/def2TZVP) - - noun: S0 -> S1 transition energy computed using CAM-B3LYP/def2TZVP - - noun: CAM-B3LYP/def2TZVP-computed S0 -> S1 transition energy - - id: E2-CAM - description: Excitation energy of the second excited state, computed using CAM-B3LYP/def2TZVP - units: a. u. - type: continuous - significant_digits: 5 - names: - - noun: S0 -> S2 transition energy computed using Coulomb-attenuated B3LYP density functional theory (CAM-B3LYP/def2TZVP) - - noun: S0 -> S2 transition energy computed using CAM-B3LYP/def2TZVP - - noun: CAM-B3LYP/def2TZVP-computed S0 -> S2 transition energy - - id: f1-CAM - description: Oscillator strength of the first excited state, computed using CAM-B3LYP/def2TZVP - units: a. u. - type: continuous - significant_digits: 5 - names: - - noun: S0 -> S1 transition oscillator strength computed using Coulomb-attenuated B3LYP density functional theory (CAM-B3LYP/def2TZVP) - - noun: S0 -> S1 transition oscillator strength computed using CAM-B3LYP/def2TZVP - - noun: CAM-B3LYP/def2TZVP-computed S0 -> S1 transition oscillator strength - - id: f2-CAM - description: Oscillator strength of the second excited state, computed using CAM-B3LYP/def2TZVP - units: a. u. - type: continuous - significant_digits: 4 - names: - - noun: S0 -> S2 transition oscillator strength computed using Coulomb-attenuated B3LYP density functional theory (CAM-B3LYP/def2TZVP) - - noun: S0 -> S2 transition oscillator strength computed using CAM-B3LYP/def2TZVP - - noun: CAM-B3LYP/def2TZVP-computed S0 -> S2 transition oscillator strength + - id: E1-CC2 + description: Excitation energy of the first excited state, computed using CC2 + units: a. u. + type: continuous + siginificant_digits: 5 + names: + - noun: S0 -> S1 transition energy computed using second-order approximate coupled-cluster theory (CC2) + - noun: S0 -> S1 transition energy computed using RI-CC2/def2TZVP + - noun: RI-CC2/def2TZVP-computed S0 -> S1 transition energy + - id: E2-CC2 + description: Excitation energy of the second excited state, computed using CC2 + units: a. u. + type: continuous + significant_digits: 5 + names: + - noun: S0 -> S2 transition energy computed using second-order approximate coupled-cluster theory (CC2) + - noun: S0 -> S2 transition energy computed using RI-CC2/def2TZVP + - noun: RI-CC2/def2TZVP-computed S0 -> S2 transition energy + - id: f1-CC2 + description: Oscillator strength of the first excited state, computed using CC2 + units: a. u. + type: continuous + significant_digits: 5 + names: + - noun: S0 -> S1 transition oscillator strength computed using second-order approximate coupled-cluster theory (CC2) + - noun: S0 -> S1 transition oscillator strength computed using RI-CC2/def2TZVP + - noun: RI-CC2/def2TZVP-computed S0 -> S1 transition oscillator strength + - id: f2-CC2 + description: Oscillator strength of the second excited state, computed using CC2 + units: a. u. + type: continuous + significant_digits: 5 + names: + - noun: S0 -> S2 transition oscillator strength computed using second-order approximate coupled-cluster theory (CC2) + - noun: S0 -> S2 transition oscillator strength computed using RI-CC2/def2TZVP + - noun: RI-CC2/def2TZVP-computed S0 -> S2 transition oscillator strength + - id: E1-PBE0 + description: Excitation energy of the first excited state, computed using LR-TDPBE0/def2SVP + units: a. u. + type: continuous + significant_digits: 5 + names: + - noun: S0 -> S1 transition energy computed using linear-response time-dependent density functional theory (LR-TDPBE0/def2SVP) + - noun: S0 -> S1 transition energy computed using LR-TDPBE0/def2SVP + - noun: LR-TDPBE0/def2SVP-computed S0 -> S1 transition energy + - id: E2-PBE0 + description: Excitation energy of the second excited state, computed using LR-TDPBE0/def2SVP + units: a. u. + type: continuous + significant_digits: 5 + names: + - noun: S0 -> S2 transition energy computed using linear-response time-dependent density functional theory (LR-TDPBE0/def2SVP) + - noun: S0 -> S2 transition energy computed using LR-TDPBE0/def2SVP + - noun: LR-TDPBE0/def2SVP-computed S0 -> S2 transition energy + - id: f1-PBE0 + description: Oscillator strength of the first excited state, computed using LR-TDPBE0/def2SVP + units: a. u. + type: continuous + significant_digits: 5 + names: + - noun: S0 -> S1 transition oscillator strength computed using linear-response time-dependent density functional theory (LR-TDPBE0/def2SVP) + - noun: S0 -> S1 transition oscillator strength computed using LR-TDPBE0/def2SVP + - noun: LR-TDPBE0/def2SVP-computed S0 -> S1 transition oscillator strength + - id: f2-PBE0 + description: Oscillator strength of the second excited state, computed using LR-TDPBE0/def2SVP + units: a. u. + type: continuous + significant_digits: 4 + names: + - noun: S0 -> S2 transition oscillator strength computed using linear-response time-dependent density functional theory (LR-TDPBE0/def2SVP) + - noun: S0 -> S2 transition oscillator strength computed using LR-TDPBE0/def2SVP + - noun: LR-TDPBE0/def2SVP-computed S0 -> S2 transition oscillator strength + - id: E1-CAM + description: Excitation energy of the first excited state, computed using CAM-B3LYP/def2TZVP + units: a. u. + type: continuous + significant_digits: 5 + names: + - noun: S0 -> S1 transition energy computed using Coulomb-attenuated B3LYP density functional theory (CAM-B3LYP/def2TZVP) + - noun: S0 -> S1 transition energy computed using CAM-B3LYP/def2TZVP + - noun: CAM-B3LYP/def2TZVP-computed S0 -> S1 transition energy + - id: E2-CAM + description: Excitation energy of the second excited state, computed using CAM-B3LYP/def2TZVP + units: a. u. + type: continuous + significant_digits: 5 + names: + - noun: S0 -> S2 transition energy computed using Coulomb-attenuated B3LYP density functional theory (CAM-B3LYP/def2TZVP) + - noun: S0 -> S2 transition energy computed using CAM-B3LYP/def2TZVP + - noun: CAM-B3LYP/def2TZVP-computed S0 -> S2 transition energy + - id: f1-CAM + description: Oscillator strength of the first excited state, computed using CAM-B3LYP/def2TZVP + units: a. u. + type: continuous + significant_digits: 5 + names: + - noun: S0 -> S1 transition oscillator strength computed using Coulomb-attenuated B3LYP density functional theory (CAM-B3LYP/def2TZVP) + - noun: S0 -> S1 transition oscillator strength computed using CAM-B3LYP/def2TZVP + - noun: CAM-B3LYP/def2TZVP-computed S0 -> S1 transition oscillator strength + - id: f2-CAM + description: Oscillator strength of the second excited state, computed using CAM-B3LYP/def2TZVP + units: a. u. + type: continuous + significant_digits: 4 + names: + - noun: S0 -> S2 transition oscillator strength computed using Coulomb-attenuated B3LYP density functional theory (CAM-B3LYP/def2TZVP) + - noun: S0 -> S2 transition oscillator strength computed using CAM-B3LYP/def2TZVP + - noun: CAM-B3LYP/def2TZVP-computed S0 -> S2 transition oscillator strength identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: XYZ - type: XYZ - description: XYZ file - - id: MOL2000 - type: MOL2000 - description: MOL2000 file - - id: MOL3000 - type: MOL3000 - description: MOL3000 file + - id: SMILES + type: SMILES + description: SMILES + - id: XYZ + type: XYZ + description: XYZ file + - id: MOL2000 + type: MOL2000 + description: MOL2000 file + - id: MOL3000 + type: MOL3000 + description: MOL3000 file license: CC BY 4.0 links: - - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/gdb8.tar.gz - description: original dataset + - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/gdb8.tar.gz + description: original dataset num_points: 7015 bibtex: - - |- - @article{Blum_2009, - doi = {10.1021/ja902302h}, - url = {https://doi.org/10.1021%2Fja902302h}, - year = 2009, - month = {jun}, - publisher = {American Chemical Society ({ACS})}, - volume = {131}, - number = {25}, - pages = {8732--8733}, - author = {Lorenz C. Blum and Jean-Louis Reymond}, - title = {970 Million Druglike Small Molecules for - Virtual Screening in the Chemical Universe Database {GDB}-13}, - journal = {J. Am. Chem. Soc.} - } - - |- - @article{Ramakrishnan_2015, - doi = {10.1063/1.4928757}, - url = {https://doi.org/10.1063%2F1.4928757}, - year = 2015, - month = {aug}, - publisher = {{AIP} Publishing}, - volume = {143}, - number = {8}, - author = {Raghunathan Ramakrishnan and Mia Hartmann - and Enrico Tapavicza and O. Anatole von Lilienfeld}, - title = {Electronic spectra from {TDDFT} - and machine learning in chemical space}, - journal = {The Journal of Chemical Physics} - } - - |- - @article{Wu_2018, - doi = {10.1039/c7sc02664a}, - url = {https://doi.org/10.1039%2Fc7sc02664a}, - year = 2018, - publisher = {Royal Society of Chemistry ({RSC})}, - volume = {9}, - number = {2}, - pages = {513--530}, - author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg - and Joseph Gomes and Caleb Geniesse and Aneesh S. Pappu - and Karl Leswing and Vijay Pande}, - title = {{MoleculeNet}: a benchmark for molecular machine learning}, - journal = {Chem. Sci.} - } + - |- + @article{Blum_2009, + doi = {10.1021/ja902302h}, + url = {https://doi.org/10.1021%2Fja902302h}, + year = 2009, + month = {jun}, + publisher = {American Chemical Society ({ACS})}, + volume = {131}, + number = {25}, + pages = {8732--8733}, + author = {Lorenz C. Blum and Jean-Louis Reymond}, + title = {970 Million Druglike Small Molecules for + Virtual Screening in the Chemical Universe Database {GDB}-13}, + journal = {J. Am. Chem. Soc.} + } + - |- + @article{Ramakrishnan_2015, + doi = {10.1063/1.4928757}, + url = {https://doi.org/10.1063%2F1.4928757}, + year = 2015, + month = {aug}, + publisher = {{AIP} Publishing}, + volume = {143}, + number = {8}, + author = {Raghunathan Ramakrishnan and Mia Hartmann + and Enrico Tapavicza and O. Anatole von Lilienfeld}, + title = {Electronic spectra from {TDDFT} + and machine learning in chemical space}, + journal = {The Journal of Chemical Physics} + } + - |- + @article{Wu_2018, + doi = {10.1039/c7sc02664a}, + url = {https://doi.org/10.1039%2Fc7sc02664a}, + year = 2018, + publisher = {Royal Society of Chemistry ({RSC})}, + volume = {9}, + number = {2}, + pages = {513--530}, + author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg + and Joseph Gomes and Caleb Geniesse and Aneesh S. Pappu + and Karl Leswing and Vijay Pande}, + title = {{MoleculeNet}: a benchmark for molecular machine learning}, + journal = {Chem. Sci.} + } templates: - - The {E1-CC2__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {E1-CC2#} {E1-CC2__units} - - The {E2-CC2__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {E2-CC2#} {E2-CC2__units} - - The {f1-CC2__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {f1-CC2#} {f1-CC2__units} - - The {f2-CC2__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {f2-CC2#} {f2-CC2__units} - - The {E1-PBE0__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {E1-PBE0#} {E1-PBE0__units} - - The {E2-PBE0__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {E2-PBE0#} {E2-PBE0__units} - - The {f1-PBE0__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {f1-PBE0#} {f1-PBE0__units} - - The {f2-PBE0__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {f2-PBE0#} {f2-PBE0__units} - - The {E1-CAM__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {E1-CAM#} {E1-CAM__units} - - The {E2-CAM__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {E2-CAM#} {E2-CAM__units} - - The {f1-CAM__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {f1-CAM#} {f1-CAM__units} - - The {f2-CAM__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {f2-CAM#} {f2-CAM__units} - - |- - Question: What is the {E1-CC2__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? - Description: The content of the V2000 Molfile is {MOL2000#}. - Answer: {E1-CC2#} {E1-CC2__units} - - |- - Question: What is the {E2-CC2__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? - Description: The content of the V2000 Molfile is {MOL2000#}. - Answer: {E2-CC2#} {E2-CC2__units} - - |- - Question: What is the {f1-CC2__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? - Description: The content of the V2000 Molfile is {MOL2000#}. - Answer: {f1-CC2#} {f1-CC2__units} - - |- - Question: What is the {f2-CC2__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? - Description: The content of the V2000 Molfile is {MOL2000#}. - Answer: {f2-CC2#} {f2-CC2__units} - - |- - Question: What is the {E1-PBE0__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? - Description: The content of the V2000 Molfile is {MOL2000#}. - Answer: {E1-PBE0#} {E1-PBE0__units} - - |- - Question: What is the {E2-PBE0__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? - Description: The content of the V2000 Molfile is {MOL2000#}. - Answer: {E2-PBE0#} {E2-PBE0__units} - - |- - Question: What is the {f1-PBE0__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? - Description: The content of the V2000 Molfile is {MOL2000#}. - Answer: {f1-PBE0#} {f1-PBE0__units} - - |- - Question: What is the {f2-PBE0__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? - Description: The content of the V2000 Molfile is {MOL2000#}. - Answer: {f2-PBE0#} {f2-PBE0__units} - - |- - Question: What is the {E1-CAM__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? - Description: The content of the V2000 Molfile is {MOL2000#}. - Answer: {E1-CAM#} {E1-CAM__units} - - |- - Question: What is the {E2-CAM__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? - Description: The content of the V2000 Molfile is {MOL2000#}. - Answer: {E2-CAM#} {E2-CAM__units} - - |- - Question: What is the {f1-CAM__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? - Description: The content of the V2000 Molfile is {MOL2000#}. - Answer: {f1-CAM#} {f1-CAM__units} - - |- - Question: What is the {f2-CAM__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? - Description: The content of the V2000 Molfile is {MOL2000#}. - Answer: {f2-CAM#} {f2-CAM__units} - - |- - Question: What is the {E1-CC2__names__noun} of the {#molecule|chemical|compound!} with the V3000 Molfile with the following content? - Description: The content of the V3000 Molfile is {MOL3000#}. - Answer: {E1-CC2#} {E1-CC2__units} - - |- - Question: What is the {SMILES__description} of the {#molecule|chemical|compound!} with the V3000 Molfile with the following content? - Description: The content of the V3000 Molfile is {MOL3000#}. - Answer: {SMILES#}. - - |- - Question: What is the {E2-CC2__names__noun} of the {#molecule|chemical|compound!} with the V3000 Molfile with the following content? - Description: The content of the V3000 Molfile is {MOL3000#}. - Answer: {E2-CC2#} {E2-CC2__units} - - |- - User: I want to design a {#molecule|molecule|compound!} with a particular {E1-CC2__names__noun}, {E2-CC2__names__noun}, and {f1-CC2__names__noun}. - Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}I would need to know the {E1-CC2__names__noun}, {E2-CC2__names__noun}, and {f1-CC2__names__noun} of the molecule you want to design. - User: The {E1-CC2__names__noun} should be {E1-CC2#} {E1-CC2__units}, the {E2-CC2__names__noun} should be {E2-CC2#} {E2-CC2__units}, and the {f1-CC2__names__noun} should be {f1-CC2#} {f1-CC2__units} - Assistant: I {#recommend|suggest|propose|advise!} the {#molecule|chemical|compound!} with the V2000 Molfile with the following content: {MOL2000#}. - - |- - User: I have computed the {E1-CC2__names__noun}, {E2-CC2__names__noun}, and {f1-CC2__names__noun} of a {#molecule|molecule|compound!} and want to know its {SMILES__description}. - Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}I would need to know the {E1-CC2__names__noun}, {E2-CC2__names__noun}, and {f1-CC2__names__noun} of the molecule you want to know the {SMILES__description} of. - User: The {E1-CC2__names__noun} is {E1-CC2#} {E1-CC2__units}, the {E2-CC2__names__noun} is {E2-CC2#} {E2-CC2__units}, and the {f1-CC2__names__noun} is {f1-CC2#} {f1-CC2__units} - Assistant: The {SMILES__description} of the molecule is {SMILES#}. - - |- - User: I want to design a {#molecule|molecule|compound!} that has a {E1-CC2__names__noun} of {E1-CC2#} {E1-CC2__units} and a {E2-CC2__names__noun} of {E2-CC2#} {E2-CC2__units} - Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}{#Do you have any other constraints?|Do you have other requirements?|What else should I take into account?!} - User: {#No, |Nope, |No, I don't, |!}I only want to know the {SMILES__description} of the molecule. - Assistant: I {#recommend|suggest|propose|advise!} the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#}. - - |- - User: I want to design a {#molecule|molecule|compound!} that has a {E1-CC2__names__noun} of {E1-CC2#} {E1-CC2__units} and a {E2-CC2__names__noun} of {E2-CC2#} {E2-CC2__units} - Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}{#Do you have any other constraints?|Do you have other requirements?|What else should I take into account?!} - User: {#Yes, |Yeah, |Yep, |Indeed, |!}I {#want|would like!} the {f1-CC2__names__noun} to be {f1-CC2#} {f1-CC2__units} - Assistant: I {#recommend|suggest|propose|advise!} the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#}. + - The {E1-CC2__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {E1-CC2#} {E1-CC2__units} + - The {E2-CC2__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {E2-CC2#} {E2-CC2__units} + - The {f1-CC2__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {f1-CC2#} {f1-CC2__units} + - The {f2-CC2__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {f2-CC2#} {f2-CC2__units} + - The {E1-PBE0__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {E1-PBE0#} {E1-PBE0__units} + - The {E2-PBE0__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {E2-PBE0#} {E2-PBE0__units} + - The {f1-PBE0__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {f1-PBE0#} {f1-PBE0__units} + - The {f2-PBE0__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {f2-PBE0#} {f2-PBE0__units} + - The {E1-CAM__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {E1-CAM#} {E1-CAM__units} + - The {E2-CAM__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {E2-CAM#} {E2-CAM__units} + - The {f1-CAM__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {f1-CAM#} {f1-CAM__units} + - The {f2-CAM__names__noun} of the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#} is {f2-CAM#} {f2-CAM__units} + - |- + Question: What is the {E1-CC2__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? + Description: The content of the V2000 Molfile is {MOL2000#}. + Answer: {E1-CC2#} {E1-CC2__units} + - |- + Question: What is the {E2-CC2__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? + Description: The content of the V2000 Molfile is {MOL2000#}. + Answer: {E2-CC2#} {E2-CC2__units} + - |- + Question: What is the {f1-CC2__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? + Description: The content of the V2000 Molfile is {MOL2000#}. + Answer: {f1-CC2#} {f1-CC2__units} + - |- + Question: What is the {f2-CC2__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? + Description: The content of the V2000 Molfile is {MOL2000#}. + Answer: {f2-CC2#} {f2-CC2__units} + - |- + Question: What is the {E1-PBE0__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? + Description: The content of the V2000 Molfile is {MOL2000#}. + Answer: {E1-PBE0#} {E1-PBE0__units} + - |- + Question: What is the {E2-PBE0__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? + Description: The content of the V2000 Molfile is {MOL2000#}. + Answer: {E2-PBE0#} {E2-PBE0__units} + - |- + Question: What is the {f1-PBE0__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? + Description: The content of the V2000 Molfile is {MOL2000#}. + Answer: {f1-PBE0#} {f1-PBE0__units} + - |- + Question: What is the {f2-PBE0__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? + Description: The content of the V2000 Molfile is {MOL2000#}. + Answer: {f2-PBE0#} {f2-PBE0__units} + - |- + Question: What is the {E1-CAM__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? + Description: The content of the V2000 Molfile is {MOL2000#}. + Answer: {E1-CAM#} {E1-CAM__units} + - |- + Question: What is the {E2-CAM__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? + Description: The content of the V2000 Molfile is {MOL2000#}. + Answer: {E2-CAM#} {E2-CAM__units} + - |- + Question: What is the {f1-CAM__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? + Description: The content of the V2000 Molfile is {MOL2000#}. + Answer: {f1-CAM#} {f1-CAM__units} + - |- + Question: What is the {f2-CAM__names__noun} of the {#molecule|chemical|compound!} with the V2000 Molfile with the following content? + Description: The content of the V2000 Molfile is {MOL2000#}. + Answer: {f2-CAM#} {f2-CAM__units} + - |- + Question: What is the {E1-CC2__names__noun} of the {#molecule|chemical|compound!} with the V3000 Molfile with the following content? + Description: The content of the V3000 Molfile is {MOL3000#}. + Answer: {E1-CC2#} {E1-CC2__units} + - |- + Question: What is the {SMILES__description} of the {#molecule|chemical|compound!} with the V3000 Molfile with the following content? + Description: The content of the V3000 Molfile is {MOL3000#}. + Answer: {SMILES#}. + - |- + Question: What is the {E2-CC2__names__noun} of the {#molecule|chemical|compound!} with the V3000 Molfile with the following content? + Description: The content of the V3000 Molfile is {MOL3000#}. + Answer: {E2-CC2#} {E2-CC2__units} + - |- + User: I want to design a {#molecule|molecule|compound!} with a particular {E1-CC2__names__noun}, {E2-CC2__names__noun}, and {f1-CC2__names__noun}. + Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}I would need to know the {E1-CC2__names__noun}, {E2-CC2__names__noun}, and {f1-CC2__names__noun} of the molecule you want to design. + User: The {E1-CC2__names__noun} should be {E1-CC2#} {E1-CC2__units}, the {E2-CC2__names__noun} should be {E2-CC2#} {E2-CC2__units}, and the {f1-CC2__names__noun} should be {f1-CC2#} {f1-CC2__units} + Assistant: I {#recommend|suggest|propose|advise!} the {#molecule|chemical|compound!} with the V2000 Molfile with the following content: {MOL2000#}. + - |- + User: I have computed the {E1-CC2__names__noun}, {E2-CC2__names__noun}, and {f1-CC2__names__noun} of a {#molecule|molecule|compound!} and want to know its {SMILES__description}. + Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}I would need to know the {E1-CC2__names__noun}, {E2-CC2__names__noun}, and {f1-CC2__names__noun} of the molecule you want to know the {SMILES__description} of. + User: The {E1-CC2__names__noun} is {E1-CC2#} {E1-CC2__units}, the {E2-CC2__names__noun} is {E2-CC2#} {E2-CC2__units}, and the {f1-CC2__names__noun} is {f1-CC2#} {f1-CC2__units} + Assistant: The {SMILES__description} of the molecule is {SMILES#}. + - |- + User: I want to design a {#molecule|molecule|compound!} that has a {E1-CC2__names__noun} of {E1-CC2#} {E1-CC2__units} and a {E2-CC2__names__noun} of {E2-CC2#} {E2-CC2__units} + Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}{#Do you have any other constraints?|Do you have other requirements?|What else should I take into account?!} + User: {#No, |Nope, |No, I don't, |!}I only want to know the {SMILES__description} of the molecule. + Assistant: I {#recommend|suggest|propose|advise!} the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#}. + - |- + User: I want to design a {#molecule|molecule|compound!} that has a {E1-CC2__names__noun} of {E1-CC2#} {E1-CC2__units} and a {E2-CC2__names__noun} of {E2-CC2#} {E2-CC2__units} + Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}{#Do you have any other constraints?|Do you have other requirements?|What else should I take into account?!} + User: {#Yes, |Yeah, |Yep, |Indeed, |!}I {#want|would like!} the {f1-CC2__names__noun} to be {f1-CC2#} {f1-CC2__units} + Assistant: I {#recommend|suggest|propose|advise!} the {#molecule|chemical|compound!} with the {SMILES__description} {SMILES#}. diff --git a/data/tabular/qm9/meta.yaml b/data/tabular/qm9/meta.yaml index d6dd53f4e..857ca6611 100644 --- a/data/tabular/qm9/meta.yaml +++ b/data/tabular/qm9/meta.yaml @@ -1,192 +1,176 @@ ---- name: qm9 description: |- - QM9 is a comprehensive dataset that provides geometric, energetic, - electronic and thermodynamic properties for a subset of GDB-17 - database, comprising 134 thousand stable organic molecules with up - to 9 heavy atoms. All molecules are modeled using density - functional theory (B3LYP/6-31G(2df,p) based DFT). + QM9 is a comprehensive dataset that provides geometric, energetic, + electronic and thermodynamic properties for a subset of GDB-17 + database, comprising 134 thousand stable organic molecules with up + to 9 heavy atoms. All molecules are modeled using density + functional theory (B3LYP/6-31G(2df,p) based DFT). targets: - - id: rotational_constant_a - description: Rotational constant A or the moment of inertia of the molecule along its principal axis of rotation - units: GHz - type: float - names: - - noun: Rotational constant A - - noun: moment of inertia along principal axis of rotation - - id: rotational_constant_b - description: Rotational constant B or the moment of inertia of the molecule along an axis perpendicular to the principal axis - units: GHz - type: float - names: - - noun: Rotational constant B - - noun: moment of inertia along an axis perpendicular to the principal axis - - id: rotational_constant_c - description: Rotational constant C or the moment of inertia of the molecule along an axis perpendicular to the principal axis - units: GHz - type: float - names: - - noun: Rotational constant C - - noun: moment of inertia along the third perpendicular to the principal axis - - id: dipole_moment - description: dipole moment - units: Debye - type: float - names: - - noun: dipole moment - - noun: mu - - id: polarizability - description: isotropic polarizability - units: Bohr^3 - type: float - names: - - noun: isotropic polarizability - - noun: alpha - - id: homo - description: energy of Highest Occupied Molecular Orbital (HOMO) - units: Hartree - type: float - names: - - noun: homo - - noun: HOMO - - noun: highest occupied molecular orbital - - id: lumo - description: energy of Lowest Unoccupied Molecular Orbital (LUMO) - units: Hartree - type: float - names: - - noun: lumo - - noun: LUMO - - noun: lowest unoccupied molecular orbital - - id: gap - description: HOMO-LUMO gap, difference between HOMO and LUMO - units: Hartree - type: float - names: - - noun: gap - - noun: HOMO-LUMO gap - - noun: homo lumo gap - - id: r2 - description: electronic spatial extent - units: Bohr^2 - type: float - names: - - noun: r2 - - noun: electronic spatial extent - - id: zero_point_energy - description: zero point vibrational energy - units: Hartree - type: float - names: - - noun: zero point vibrational energy - - noun: zero point energy - - noun: zpve - - id: u0 - description: internal energy at 0 K - units: Hartree - type: float - names: - - noun: internal energy at 0 K - - id: u298 - description: internal energy at 298.15 K - units: Hartree - type: float - names: - - noun: internal energy at 298.15 K - - id: h298 - description: enthalpy at 298.15 K - units: Hartree - type: float - names: - - noun: enthalpy at 298.15 K - - id: g298 - description: Gibbs free energy at 298.15 K - units: Hartree - type: float - names: - - noun: free energy at 298.15 K - - noun: Gibbs free energy at 298.15 K - - id: heat_capacity - description: heat capacity at 298.15 K - units: cal/(mol K) - type: float - names: - - noun: heat capacity at 298.15 K + - id: rotational_constant_a + description: Rotational constant A or the moment of inertia of the molecule along its principal axis of rotation + units: GHz + type: float + names: + - noun: Rotational constant A + - noun: moment of inertia along principal axis of rotation + - id: rotational_constant_b + description: Rotational constant B or the moment of inertia of the molecule along an axis perpendicular to the principal axis + units: GHz + type: float + names: + - noun: Rotational constant B + - noun: moment of inertia along an axis perpendicular to the principal axis + - id: rotational_constant_c + description: Rotational constant C or the moment of inertia of the molecule along an axis perpendicular to the principal axis + units: GHz + type: float + names: + - noun: Rotational constant C + - noun: moment of inertia along the third perpendicular to the principal axis + - id: dipole_moment + description: dipole moment + units: Debye + type: float + names: + - noun: dipole moment + - noun: mu + - id: polarizability + description: isotropic polarizability + units: Bohr^3 + type: float + names: + - noun: isotropic polarizability + - noun: alpha + - id: homo + description: energy of Highest Occupied Molecular Orbital (HOMO) + units: Hartree + type: float + names: + - noun: homo + - noun: HOMO + - noun: highest occupied molecular orbital + - id: lumo + description: energy of Lowest Unoccupied Molecular Orbital (LUMO) + units: Hartree + type: float + names: + - noun: lumo + - noun: LUMO + - noun: lowest unoccupied molecular orbital + - id: gap + description: HOMO-LUMO gap, difference between HOMO and LUMO + units: Hartree + type: float + names: + - noun: gap + - noun: HOMO-LUMO gap + - noun: homo lumo gap + - id: r2 + description: electronic spatial extent + units: Bohr^2 + type: float + names: + - noun: r2 + - noun: electronic spatial extent + - id: zero_point_energy + description: zero point vibrational energy + units: Hartree + type: float + names: + - noun: zero point vibrational energy + - noun: zero point energy + - noun: zpve + - id: u0 + description: internal energy at 0 K + units: Hartree + type: float + names: + - noun: internal energy at 0 K + - id: u298 + description: internal energy at 298.15 K + units: Hartree + type: float + names: + - noun: internal energy at 298.15 K + - id: h298 + description: enthalpy at 298.15 K + units: Hartree + type: float + names: + - noun: enthalpy at 298.15 K + - id: g298 + description: Gibbs free energy at 298.15 K + units: Hartree + type: float + names: + - noun: free energy at 298.15 K + - noun: Gibbs free energy at 298.15 K + - id: heat_capacity + description: heat capacity at 298.15 K + units: cal/(mol K) + type: float + names: + - noun: heat capacity at 298.15 K identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: inchi - type: InChI - description: International Chemical Identifier + - id: SMILES + type: SMILES + description: SMILES + - id: inchi + type: InChI + description: International Chemical Identifier license: CC0 1.0 num_points: 133885 links: - - url: https://figshare.com/collections/Quantum_chemistry_structures_and_properties_of_134_kilo_molecules/978904 - description: original data source - - url: https://huggingface.co/datasets/n0w0f/qm9-csv/blob/main/qm9_dataset.csv - description: parsed dataset in csv format + - url: https://figshare.com/collections/Quantum_chemistry_structures_and_properties_of_134_kilo_molecules/978904 + description: original data source + - url: https://huggingface.co/datasets/n0w0f/qm9-csv/blob/main/qm9_dataset.csv + description: parsed dataset in csv format bibtex: - - |- - @article{ramakrishnan2014quantum, - title={Quantum chemistry structures and properties of 134 kilo molecules}, - author={Ramakrishnan, Raghunathan and Dral, Pavlo O and Rupp, Matthias and Von Lilienfeld, O Anatole}, - journal={Scientific data}, - volume={1}, - number={1}, - pages={1--7}, - year={2014}, - publisher={Nature Publishing Group}} - - |- - @article{ruddigkeit2012enumeration, - title={Enumeration of 166 billion organic small molecules in the chemical universe database GDB-17}, - author={Ruddigkeit, Lars and Van Deursen, Ruud and Blum, Lorenz C and Reymond, Jean-Louis}, - journal={Journal of chemical information and modeling}, - volume={52}, - number={11}, - pages={2864--2875}, - year={2012}, - publisher={ACS Publications}} + - |- + @article{ramakrishnan2014quantum, + title={Quantum chemistry structures and properties of 134 kilo molecules}, + author={Ramakrishnan, Raghunathan and Dral, Pavlo O and Rupp, Matthias and Von Lilienfeld, O Anatole}, + journal={Scientific data}, + volume={1}, + number={1}, + pages={1--7}, + year={2014}, + publisher={Nature Publishing Group}} + - |- + @article{ruddigkeit2012enumeration, + title={Enumeration of 166 billion organic small molecules in the chemical universe database GDB-17}, + author={Ruddigkeit, Lars and Van Deursen, Ruud and Blum, Lorenz C and Reymond, Jean-Louis}, + journal={Journal of chemical information and modeling}, + volume={52}, + number={11}, + pages={2864--2875}, + year={2012}, + publisher={ACS Publications}} templates: - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a dipole - moment of {dipole_moment#} Debye, {#calculated|simulated!} computationally using {#Density Functional Theory|DFT!} with B3LYP {#exchange correlation - functional|functional|accuracy!}. - - The {#isotropic polarizability|polarizability|polarizability!} of {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} - {SMILES#} is {polarizability#} Bohr^3 calculated using {#Density Functional Theory|DFT!} with B3LYP {#exchange correlation functional|functional|accuracy!}. - - The {#molecule|compound|chemical!} with the {SMILES__description} {SMILES#} has a {rotational_constant_a__names__noun} of {rotational_constant_a#} - GHz calculated computationally. - - Based on {#Density Functional Theory|DFT!} {#calculation|simulation!} with B3LYP {#exchange correlation functional|functional|accuracy!}, the {#molecule|compound|chemical!} - with the {SMILES__description} {SMILES#} has an energy of highest occupied molecular orbital {homo#} Hartree. - - The {lumo__names__noun} {#computed|calculated!} using {#Density Functional Theory|DFT!} {#calculation|simulation!} and B3LYP {#exchange correlation - functional|functional|accuracy!} is {lumo#} Hartree. - - The {#molecule|compound|chemical|molecular species|chemical compound!} {#described|represented!} by its {SMILES__description} {#notation|representation!} - {SMILES#} possesses a HOMO-LUMO gap measuring {gap#} Hartree as per {#Density Functional Theory|DFT!} results calculated with B3LYP {#exchange correlation - functional|functional|accuracy!}. - - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} representation of {SMILES#} has an electronic - spatial extent of {r2#} Bohr^2 computed using {#Density Functional Theory|DFT!}. - - The {#molecule|compound|chemical!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {zero_point_energy__names__noun} of {zero_point_energy#} - Hartree when computed using {#Density Functional Theory|DFT!} with B3LYP functional. - - As per {#Density Functional Theory|DFT!} {#calculation|simulation!} the {#molecule|compound|chemical|molecular species|chemical compound!} with {SMILES__description} - {SMILES#} has an internal energy of {u0#} Hartree at 0 K. - - The {#molecule|compound|chemical|molecular species|chemical compound!} represented in {SMILES__description} as {SMILES#} has an internal energy of - {u298#} Hartree at 298.15 K when calculated using {#Density Functional Theory|DFT!} with B3LYP {#exchange correlation functional|functional|accuracy!}. - - The {#molecule|compound|chemical!} with the {SMILES__description} representation of {SMILES#} when calculated with B3LYP DFT simlulations has an enthalpy - of {h298#} Hartree at 298.15 K. - - The {SMILES__description} {SMILES#} {#represents|is from!} a {#molecule|compound|chemical|molecular species|chemical compound!} that has a Gibbs free - energy of {g298#} Hartree at 298.15 K, calculated computationally using {#Density Functional Theory|DFT!} with B3LYP {#exchange correlation functional|functional|accuracy!}. - - At temperature 298.15 K, the {#Density Functional Theory|DFT!} calculated value of heat capacity is {heat_capacity#} cal/(mol K) for the {#molecule|compound|chemical|molecular - species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#}. - - |- - 'Question: What is a {#molecule|compound|chemical|molecular species|chemical compound!} with a {gap__names__noun} of {gap#} Hartree and an energy of highest occupied molecular orbital {homo#} Hartree? - Answer: A {#molecule|compound|chemical|molecular species|chemical compound!} with {SMILES__description} {SMILES#}' - - |- - 'Question: What is a molecule with an electronic spatial extent of {r2#} Bohr^2 and an energy of lowest unoccupied molecular orbital {lumo#} Hartree? - Answer: A molecule with {SMILES__description} {SMILES#}' - - |- - Task: Please {#give me|create|generate!} a molecule with the {SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule with a dipole moment of {dipole_moment#} Debye and an isotropic polarizability of {polarizability#} Bohr^3. - Result: {SMILES#} - - |- - Task: Please {#give me|create|generate!} a {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} based on the {#text |!}description{# below|!}. - Description: It has a heat capacity of {heat_capacity#} cal/(mol K) at 298.15 K and a dipole moment of {dipole_moment#} Debye. - Result: {SMILES#} + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a dipole moment of {dipole_moment#} Debye, {#calculated|simulated!} computationally using {#Density Functional Theory|DFT!} with B3LYP {#exchange correlation functional|functional|accuracy!}. + - The {#isotropic polarizability|polarizability|polarizability!} of {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {SMILES#} is {polarizability#} Bohr^3 calculated using {#Density Functional Theory|DFT!} with B3LYP {#exchange correlation functional|functional|accuracy!}. + - The {#molecule|compound|chemical!} with the {SMILES__description} {SMILES#} has a {rotational_constant_a__names__noun} of {rotational_constant_a#} GHz calculated computationally. + - Based on {#Density Functional Theory|DFT!} {#calculation|simulation!} with B3LYP {#exchange correlation functional|functional|accuracy!}, the {#molecule|compound|chemical!} with the {SMILES__description} {SMILES#} has an energy of highest occupied molecular orbital {homo#} Hartree. + - The {lumo__names__noun} {#computed|calculated!} using {#Density Functional Theory|DFT!} {#calculation|simulation!} and B3LYP {#exchange correlation functional|functional|accuracy!} is {lumo#} Hartree. + - The {#molecule|compound|chemical|molecular species|chemical compound!} {#described|represented!} by its {SMILES__description} {#notation|representation!} {SMILES#} possesses a HOMO-LUMO gap measuring {gap#} Hartree as per {#Density Functional Theory|DFT!} results calculated with B3LYP {#exchange correlation functional|functional|accuracy!}. + - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} representation of {SMILES#} has an electronic spatial extent of {r2#} Bohr^2 computed using {#Density Functional Theory|DFT!}. + - The {#molecule|compound|chemical!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {zero_point_energy__names__noun} of {zero_point_energy#} Hartree when computed using {#Density Functional Theory|DFT!} with B3LYP functional. + - As per {#Density Functional Theory|DFT!} {#calculation|simulation!} the {#molecule|compound|chemical|molecular species|chemical compound!} with {SMILES__description} {SMILES#} has an internal energy of {u0#} Hartree at 0 K. + - The {#molecule|compound|chemical|molecular species|chemical compound!} represented in {SMILES__description} as {SMILES#} has an internal energy of {u298#} Hartree at 298.15 K when calculated using {#Density Functional Theory|DFT!} with B3LYP {#exchange correlation functional|functional|accuracy!}. + - The {#molecule|compound|chemical!} with the {SMILES__description} representation of {SMILES#} when calculated with B3LYP DFT simlulations has an enthalpy of {h298#} Hartree at 298.15 K. + - The {SMILES__description} {SMILES#} {#represents|is from!} a {#molecule|compound|chemical|molecular species|chemical compound!} that has a Gibbs free energy of {g298#} Hartree at 298.15 K, calculated computationally using {#Density Functional Theory|DFT!} with B3LYP {#exchange correlation functional|functional|accuracy!}. + - At temperature 298.15 K, the {#Density Functional Theory|DFT!} calculated value of heat capacity is {heat_capacity#} cal/(mol K) for the {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#}. + - |- + 'Question: What is a {#molecule|compound|chemical|molecular species|chemical compound!} with a {gap__names__noun} of {gap#} Hartree and an energy of highest occupied molecular orbital {homo#} Hartree? + Answer: A {#molecule|compound|chemical|molecular species|chemical compound!} with {SMILES__description} {SMILES#}' + - |- + 'Question: What is a molecule with an electronic spatial extent of {r2#} Bohr^2 and an energy of lowest unoccupied molecular orbital {lumo#} Hartree? + Answer: A molecule with {SMILES__description} {SMILES#}' + - |- + Task: Please {#give me|create|generate!} a molecule with the {SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule with a dipole moment of {dipole_moment#} Debye and an isotropic polarizability of {polarizability#} Bohr^3. + Result: {SMILES#} + - |- + Task: Please {#give me|create|generate!} a {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} based on the {#text |!}description{# below|!}. + Description: It has a heat capacity of {heat_capacity#} cal/(mol K) at 298.15 K and a dipole moment of {dipole_moment#} Debye. + Result: {SMILES#} diff --git a/data/tabular/qmof_gcmc/meta.yaml b/data/tabular/qmof_gcmc/meta.yaml index 5bfa323fb..2160810ba 100644 --- a/data/tabular/qmof_gcmc/meta.yaml +++ b/data/tabular/qmof_gcmc/meta.yaml @@ -1,442 +1,369 @@ ---- name: qmof_gcmc description: |- - QMOF is a database of electronic properties of MOFs, assembled by Rosen et al. - Jablonka et al. added gas adsorption properties. + QMOF is a database of electronic properties of MOFs, assembled by Rosen et al. + Jablonka et al. added gas adsorption properties. targets: - - id: lg10_CO2_Henry - description: CO2 Henry coefficient - type: continuous - units: mol/kg/Pa - significant_digits: 3 - names: - - noun: 10-based logarithm of CO2 Henry coefficient (computed using grand canonical Monte Carlo) - - noun: 10-based logarithm of carbon dioxide Henry coefficient (computed using grand canonical Monte Carlo) - - id: lg10_N2_Henry - description: N2 Henry coefficient - type: continuous - units: mol/kg/Pa - significant_digits: 3 - names: - - noun: 10-based logarithm of N2 Henry coefficient (computed using grand canonical Monte Carlo) - - noun: 10-based logarithm of nitrogen Henry coefficient (computed using grand canonical Monte Carlo) - - id: lg10_CH4_Henry - description: CH4 Henry coefficient - type: continuous - units: mol/kg/Pa - significant_digits: 3 - names: - - noun: 10-based logarithm of CH4 Henry coefficient (computed using grand canonical Monte Carlo) - - noun: 10-based logarithm of methane Henry coefficient (computed using grand canonical Monte Carlo) - - id: lg10_O2_Henry - description: O2 Henry coefficient - type: continuous - units: mol/kg/Pa - significant_digits: 3 - names: - - noun: 10-based logarithm of O2 Henry coefficient (computed using grand canonical Monte Carlo) - - noun: 10-based logarithm of oxygen Henry coefficient using grand canonical Monte Carlo - - id: lg10_Xe_Henry - description: Xe Henry coefficient - type: continuous - units: mol/kg/Pa - significant_digits: 3 - names: - - noun: 10-based logarithm of Xe Henry coefficient (computed using grand canonical Monte Carlo) - - noun: 10-based logarithm of xenon Henry coefficient (computed using grand canonical Monte Carlo) - - id: lg10_Kr_Henry - description: Kr Henry coefficient - type: continuous - units: mol/kg/Pa - significant_digits: 3 - names: - - noun: 10-based logarithm of Kr Henry coefficient (computed using grand canonical Monte Carlo) - - noun: 10-based logarithm of krypton Henry coefficient (computed using grand canonical Monte Carlo) - - id: lg10_H2S_Henry - description: H2S Henry coefficient - type: continuous - units: mol/kg/Pa - significant_digits: 3 - names: - - noun: 10-based logarithm of H2S Henry coefficient (computed using grand canonical Monte Carlo) - - noun: 10-based logarithm of hydrogen sulfide Henry coefficient (computed using grand canonical Monte Carlo) - - id: lg10_H2O_Henry - description: H2O Henry coefficient - type: continuous - units: mol/kg/Pa - significant_digits: 3 - names: - - noun: 10-based logarithm of H2O Henry coefficient (computed using grand canonical Monte Carlo) - - noun: 10-based logarithm of water Henry coefficient (computed using grand canonical Monte Carlo) - - id: outputs.CO2-adsorption_energy-kJ--mol - description: CO2 adsorption energy - type: continuous - units: kJ/mol - significant_digits: 3 - names: - - noun: CO2 adsorption energy (computed using grand canonical Monte Carlo) - - noun: carbon dioxide adsorption energy (computed using grand canonical Monte Carlo) - - id: outputs.N2-adsorption_energy-kJ--mol - description: N2 adsorption energy - type: continuous - units: kJ/mol - significant_digits: 3 - names: - - noun: N2 adsorption energy (computed using grand canonical Monte Carlo) - - noun: nitrogen adsorption energy (computed using grand canonical Monte Carlo) - - id: outputs.CH4-adsorption_energy-kJ--mol - description: CH4 adsorption energy - type: continuous - units: kJ/mol - significant_digits: 3 - names: - - noun: CH4 adsorption energy (computed using grand canonical Monte Carlo) - - noun: methane adsorption energy (computed using grand canonical Monte Carlo) - - id: outputs.O2-adsorption_energy-kJ--mol - description: O2 adsorption energy - type: continuous - units: kJ/mol - significant_digits: 3 - names: - - noun: O2 adsorption energy (computed using grand canonical Monte Carlo) - - noun: oxygen adsorption energy (computed using grand canonical Monte Carlo) - - id: outputs.Xe-adsorption_energy-kJ--mol - description: Xe adsorption energy - type: continuous - units: kJ/mol - significant_digits: 3 - names: - - noun: Xe adsorption energy (computed using grand canonical Monte Carlo) - - noun: xenon adsorption energy (computed using grand canonical Monte Carlo) - - id: outputs.Kr-adsorption_energy-kJ--mol - description: Kr adsorption energy - type: continuous - units: kJ/mol - significant_digits: 3 - names: - - noun: Kr adsorption energy (computed using grand canonical Monte Carlo) - - noun: krypton adsorption energy (computed using grand canonical Monte Carlo) - - id: outputs.H2S-adsorption_energy-kJ--mol - description: H2S adsorption energy - type: continuous - units: kJ/mol - significant_digits: 3 - names: - - noun: H2S adsorption energy (computed using grand canonical Monte Carlo) - - noun: hydrogen sulfide adsorption energy (computed using grand canonical Monte Carlo) - - id: outputs.H2O-adsorption_energy-kJ--mol - description: H2O adsorption energy - type: continuous - units: kJ/mol - significant_digits: 3 - names: - - noun: H2O adsorption energy (computed using grand canonical Monte Carlo) - - noun: water adsorption energy (computed using grand canonical Monte Carlo) - - id: outputs.H2S--H2O-selectivity_298_K- - description: H2S/H2O selectivity - type: continuous - units: dimensionless - significant_digits: 3 - names: - - noun: H2S/H2O selectivity (computed using grand canonical Monte Carlo) - - noun: hydrogen sulfide/water selectivity (computed using grand canonical Monte Carlo) - - id: outputs.CH4--N2-selectivity_298_K- - description: CH4/N2 selectivity - type: continuous - units: dimensionless - significant_digits: 3 - names: - - noun: CH4/N2 selectivity (computed using grand canonical Monte Carlo) - - noun: methane/nitrogen selectivity (computed using grand canonical Monte Carlo) - - id: outputs.Xe--Kr-selectivity_298_K- - description: Xe/Kr selectivity - type: continuous - units: dimensionless - significant_digits: 3 - names: - - noun: Xe/Kr selectivity (computed using grand canonical Monte Carlo) - - noun: xenon/krypton selectivity (computed using grand canonical Monte Carlo) - - id: outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3 - description: CH4 working capacity - type: continuous - units: cm^3 STP/cm^3 - significant_digits: 3 - names: - - noun: CH4 working capacity between 58 and 65 bar at 298 K (computed using grand canonical Monte Carlo) - - noun: methane working capacity between 58 and 65 bar at 298 K (computed using grand canonical Monte Carlo) - - id: outputs.CH4-working_capacity_mol_58_to_65_bar_298_K-mol--kg - description: CH4 working capacity - type: continuous - units: mol/kg - significant_digits: 3 - names: - - noun: CH4 working capacity between 58 and 65 bar at 298 K (computed using grand canonical Monte Carlo) - - noun: methane working capacity between 58 and 65 bar at 298 K (computed using grand canonical Monte Carlo) - - id: outputs.O2-working_capacity_vol_5_to_140_bar_298_K-cm3_STP--cm3 - description: O2 working capacity - type: continuous - units: cm^3 STP/cm^3 - significant_digits: 3 - names: - - noun: O2 working capacity between 5 and 140 bar at 298 K (computed using grand canonical Monte Carlo) - - noun: oxygen working capacity between 5 and 140 bar at 298 K (computed using grand canonical Monte Carlo) - - id: outputs.O2-working_capacity_mol_5_to_140_bar_298_K-mol--kg - description: O2 working capacity - type: continuous - units: mol/kg - significant_digits: 3 - names: - - noun: O2 working capacity between 5 and 140 bar at 298 K (computed using grand canonical Monte Carlo) - - noun: oxygen working capacity between 5 and 140 bar at 298 K (computed using grand canonical Monte Carlo) - - id: outputs.H2-working_capacity_5_to_100_bar_298_to_198_K-g--L - description: H2 working capacity - type: continuous - units: g/L - significant_digits: 3 - names: - - noun: H2 working capacity between 5 and 100 bar at 298 K (computed using grand canonical Monte Carlo) - - noun: hydrogen working capacity between 5 and 100 bar at 298 K (computed using grand canonical Monte Carlo) - - id: outputs.H2-working_capacity_5_to_100_bar_77_K-g--L - description: H2 working capacity - type: continuous - units: g/L - significant_digits: 3 - names: - - noun: H2 working capacity between 5 and 100 bar at 77 K (computed using grand canonical Monte Carlo) - - noun: hydrogen working capacity between 5 and 100 bar at 77 K (computed using grand canonical Monte Carlo) - - id: outputs.H2-working_capacity_1_to_100_bar_77_K-g--L - description: H2 working capacity - type: continuous - units: g/L - significant_digits: 3 - names: - - noun: H2 working capacity between 1 and 100 bar at 77 K (computed using grand canonical Monte Carlo) - - noun: hydrogen working capacity between 1 and 100 bar at 77 K (computed using grand canonical Monte Carlo) - - id: info.pld - type: continuous - units: \AA - significant_digits: 3 - names: - - noun: pore limiting diameter - - noun: pore limiting diameter (PLD) - - id: info.lcd - type: continuous - units: \AA - significant_digits: 3 - names: - - noun: largest cavity diameter - - noun: largest cavity diameter (LCD) - - id: info.density - type: continuous - units: g/cm^3 - significant_digits: 3 - names: - - noun: density + - id: lg10_CO2_Henry + description: CO2 Henry coefficient + type: continuous + units: mol/kg/Pa + significant_digits: 3 + names: + - noun: 10-based logarithm of CO2 Henry coefficient (computed using grand canonical Monte Carlo) + - noun: 10-based logarithm of carbon dioxide Henry coefficient (computed using grand canonical Monte Carlo) + - id: lg10_N2_Henry + description: N2 Henry coefficient + type: continuous + units: mol/kg/Pa + significant_digits: 3 + names: + - noun: 10-based logarithm of N2 Henry coefficient (computed using grand canonical Monte Carlo) + - noun: 10-based logarithm of nitrogen Henry coefficient (computed using grand canonical Monte Carlo) + - id: lg10_CH4_Henry + description: CH4 Henry coefficient + type: continuous + units: mol/kg/Pa + significant_digits: 3 + names: + - noun: 10-based logarithm of CH4 Henry coefficient (computed using grand canonical Monte Carlo) + - noun: 10-based logarithm of methane Henry coefficient (computed using grand canonical Monte Carlo) + - id: lg10_O2_Henry + description: O2 Henry coefficient + type: continuous + units: mol/kg/Pa + significant_digits: 3 + names: + - noun: 10-based logarithm of O2 Henry coefficient (computed using grand canonical Monte Carlo) + - noun: 10-based logarithm of oxygen Henry coefficient using grand canonical Monte Carlo + - id: lg10_Xe_Henry + description: Xe Henry coefficient + type: continuous + units: mol/kg/Pa + significant_digits: 3 + names: + - noun: 10-based logarithm of Xe Henry coefficient (computed using grand canonical Monte Carlo) + - noun: 10-based logarithm of xenon Henry coefficient (computed using grand canonical Monte Carlo) + - id: lg10_Kr_Henry + description: Kr Henry coefficient + type: continuous + units: mol/kg/Pa + significant_digits: 3 + names: + - noun: 10-based logarithm of Kr Henry coefficient (computed using grand canonical Monte Carlo) + - noun: 10-based logarithm of krypton Henry coefficient (computed using grand canonical Monte Carlo) + - id: lg10_H2S_Henry + description: H2S Henry coefficient + type: continuous + units: mol/kg/Pa + significant_digits: 3 + names: + - noun: 10-based logarithm of H2S Henry coefficient (computed using grand canonical Monte Carlo) + - noun: 10-based logarithm of hydrogen sulfide Henry coefficient (computed using grand canonical Monte Carlo) + - id: lg10_H2O_Henry + description: H2O Henry coefficient + type: continuous + units: mol/kg/Pa + significant_digits: 3 + names: + - noun: 10-based logarithm of H2O Henry coefficient (computed using grand canonical Monte Carlo) + - noun: 10-based logarithm of water Henry coefficient (computed using grand canonical Monte Carlo) + - id: outputs.CO2-adsorption_energy-kJ--mol + description: CO2 adsorption energy + type: continuous + units: kJ/mol + significant_digits: 3 + names: + - noun: CO2 adsorption energy (computed using grand canonical Monte Carlo) + - noun: carbon dioxide adsorption energy (computed using grand canonical Monte Carlo) + - id: outputs.N2-adsorption_energy-kJ--mol + description: N2 adsorption energy + type: continuous + units: kJ/mol + significant_digits: 3 + names: + - noun: N2 adsorption energy (computed using grand canonical Monte Carlo) + - noun: nitrogen adsorption energy (computed using grand canonical Monte Carlo) + - id: outputs.CH4-adsorption_energy-kJ--mol + description: CH4 adsorption energy + type: continuous + units: kJ/mol + significant_digits: 3 + names: + - noun: CH4 adsorption energy (computed using grand canonical Monte Carlo) + - noun: methane adsorption energy (computed using grand canonical Monte Carlo) + - id: outputs.O2-adsorption_energy-kJ--mol + description: O2 adsorption energy + type: continuous + units: kJ/mol + significant_digits: 3 + names: + - noun: O2 adsorption energy (computed using grand canonical Monte Carlo) + - noun: oxygen adsorption energy (computed using grand canonical Monte Carlo) + - id: outputs.Xe-adsorption_energy-kJ--mol + description: Xe adsorption energy + type: continuous + units: kJ/mol + significant_digits: 3 + names: + - noun: Xe adsorption energy (computed using grand canonical Monte Carlo) + - noun: xenon adsorption energy (computed using grand canonical Monte Carlo) + - id: outputs.Kr-adsorption_energy-kJ--mol + description: Kr adsorption energy + type: continuous + units: kJ/mol + significant_digits: 3 + names: + - noun: Kr adsorption energy (computed using grand canonical Monte Carlo) + - noun: krypton adsorption energy (computed using grand canonical Monte Carlo) + - id: outputs.H2S-adsorption_energy-kJ--mol + description: H2S adsorption energy + type: continuous + units: kJ/mol + significant_digits: 3 + names: + - noun: H2S adsorption energy (computed using grand canonical Monte Carlo) + - noun: hydrogen sulfide adsorption energy (computed using grand canonical Monte Carlo) + - id: outputs.H2O-adsorption_energy-kJ--mol + description: H2O adsorption energy + type: continuous + units: kJ/mol + significant_digits: 3 + names: + - noun: H2O adsorption energy (computed using grand canonical Monte Carlo) + - noun: water adsorption energy (computed using grand canonical Monte Carlo) + - id: outputs.H2S--H2O-selectivity_298_K- + description: H2S/H2O selectivity + type: continuous + units: dimensionless + significant_digits: 3 + names: + - noun: H2S/H2O selectivity (computed using grand canonical Monte Carlo) + - noun: hydrogen sulfide/water selectivity (computed using grand canonical Monte Carlo) + - id: outputs.CH4--N2-selectivity_298_K- + description: CH4/N2 selectivity + type: continuous + units: dimensionless + significant_digits: 3 + names: + - noun: CH4/N2 selectivity (computed using grand canonical Monte Carlo) + - noun: methane/nitrogen selectivity (computed using grand canonical Monte Carlo) + - id: outputs.Xe--Kr-selectivity_298_K- + description: Xe/Kr selectivity + type: continuous + units: dimensionless + significant_digits: 3 + names: + - noun: Xe/Kr selectivity (computed using grand canonical Monte Carlo) + - noun: xenon/krypton selectivity (computed using grand canonical Monte Carlo) + - id: outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3 + description: CH4 working capacity + type: continuous + units: cm^3 STP/cm^3 + significant_digits: 3 + names: + - noun: CH4 working capacity between 58 and 65 bar at 298 K (computed using grand canonical Monte Carlo) + - noun: methane working capacity between 58 and 65 bar at 298 K (computed using grand canonical Monte Carlo) + - id: outputs.CH4-working_capacity_mol_58_to_65_bar_298_K-mol--kg + description: CH4 working capacity + type: continuous + units: mol/kg + significant_digits: 3 + names: + - noun: CH4 working capacity between 58 and 65 bar at 298 K (computed using grand canonical Monte Carlo) + - noun: methane working capacity between 58 and 65 bar at 298 K (computed using grand canonical Monte Carlo) + - id: outputs.O2-working_capacity_vol_5_to_140_bar_298_K-cm3_STP--cm3 + description: O2 working capacity + type: continuous + units: cm^3 STP/cm^3 + significant_digits: 3 + names: + - noun: O2 working capacity between 5 and 140 bar at 298 K (computed using grand canonical Monte Carlo) + - noun: oxygen working capacity between 5 and 140 bar at 298 K (computed using grand canonical Monte Carlo) + - id: outputs.O2-working_capacity_mol_5_to_140_bar_298_K-mol--kg + description: O2 working capacity + type: continuous + units: mol/kg + significant_digits: 3 + names: + - noun: O2 working capacity between 5 and 140 bar at 298 K (computed using grand canonical Monte Carlo) + - noun: oxygen working capacity between 5 and 140 bar at 298 K (computed using grand canonical Monte Carlo) + - id: outputs.H2-working_capacity_5_to_100_bar_298_to_198_K-g--L + description: H2 working capacity + type: continuous + units: g/L + significant_digits: 3 + names: + - noun: H2 working capacity between 5 and 100 bar at 298 K (computed using grand canonical Monte Carlo) + - noun: hydrogen working capacity between 5 and 100 bar at 298 K (computed using grand canonical Monte Carlo) + - id: outputs.H2-working_capacity_5_to_100_bar_77_K-g--L + description: H2 working capacity + type: continuous + units: g/L + significant_digits: 3 + names: + - noun: H2 working capacity between 5 and 100 bar at 77 K (computed using grand canonical Monte Carlo) + - noun: hydrogen working capacity between 5 and 100 bar at 77 K (computed using grand canonical Monte Carlo) + - id: outputs.H2-working_capacity_1_to_100_bar_77_K-g--L + description: H2 working capacity + type: continuous + units: g/L + significant_digits: 3 + names: + - noun: H2 working capacity between 1 and 100 bar at 77 K (computed using grand canonical Monte Carlo) + - noun: hydrogen working capacity between 1 and 100 bar at 77 K (computed using grand canonical Monte Carlo) + - id: info.pld + type: continuous + units: \AA + significant_digits: 3 + names: + - noun: pore limiting diameter + - noun: pore limiting diameter (PLD) + - id: info.lcd + type: continuous + units: \AA + significant_digits: 3 + names: + - noun: largest cavity diameter + - noun: largest cavity diameter (LCD) + - id: info.density + type: continuous + units: g/cm^3 + significant_digits: 3 + names: + - noun: density identifiers: - - id: info.mofid.mofid - type: Other - description: MOFId - - id: info.mofid.smiles_nodes - type: Other - description: SMILES of nodes - - id: info.mofid.smiles_linkers - type: Other - description: SMILES of linkers - - id: info.mofid.smiles - type: Other - description: SMILES - - id: info.mofid.topology - type: Other - description: Topology RCSR ID - - id: info.symmetry.spacegroup_number - type: Other - description: Spacegroup number - names: - - noun: space group number + - id: info.mofid.mofid + type: Other + description: MOFId + - id: info.mofid.smiles_nodes + type: Other + description: SMILES of nodes + - id: info.mofid.smiles_linkers + type: Other + description: SMILES of linkers + - id: info.mofid.smiles + type: Other + description: SMILES + - id: info.mofid.topology + type: Other + description: Topology RCSR ID + - id: info.symmetry.spacegroup_number + type: Other + description: Spacegroup number + names: + - noun: space group number license: CC-BY-4.0 num_points: 88 bibtex: - - |- - @article{Rosen_2021, - doi = {10.1016/j.matt.2021.02.015}, - url = {https://doi.org/10.1016%2Fj.matt.2021.02.015}, - year = 2021, - month = {may}, - publisher = {Elsevier {BV}}, - volume = {4}, - number = {5}, - pages = {1578--1597}, - author = {Andrew S. Rosen and Shaelyn M. Iyer and Debmalya Ray and Zhenpeng Yao and Al{\'{a}}n Aspuru-Guzik and Laura Gagliardi and Justin M. Notestein and Randall Q. Snurr}, - title = {Machine learning the quantum-chemical properties of metal{\textendash}organic frameworks for accelerated materials discovery}, - journal = {Matter} - } - - |- - @article{Rosen_2022, - doi = {10.1038/s41524-022-00796-6}, - url = {https://doi.org/10.1038%2Fs41524-022-00796-6}, - year = 2022, - month = {may}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {8}, - number = {1}, - author = {Andrew S. Rosen and Victor Fung and Patrick Huck and Cody T. O'Donnell and Matthew K. Horton and Donald G. Truhlar and Kristin A. Persson and Justin M. Notestein and Randall Q. Snurr}, - title = {High-throughput predictions of metal{\textendash}organic framework electronic properties: theoretical challenges, graph neural networks, and data exploration}, - journal = {npj Comput Mater} - } - - |- - @article{Jablonka_2023, - doi = {10.1021/acscentsci.2c01177}, - url = {https://doi.org/10.1021%2Facscentsci.2c01177}, - year = 2023, - month = {mar}, - publisher = {American Chemical Society ({ACS})}, - volume = {9}, - number = {4}, - pages = {563--581}, - author = {Kevin Maik Jablonka and Andrew S. Rosen and Aditi S. Krishnapriyan and Berend Smit}, - title = {An Ecosystem for Digital Reticular Chemistry}, - journal = {ACS Cent. Sci.} Central Science} - } + - |- + @article{Rosen_2021, + doi = {10.1016/j.matt.2021.02.015}, + url = {https://doi.org/10.1016%2Fj.matt.2021.02.015}, + year = 2021, + month = {may}, + publisher = {Elsevier {BV}}, + volume = {4}, + number = {5}, + pages = {1578--1597}, + author = {Andrew S. Rosen and Shaelyn M. Iyer and Debmalya Ray and Zhenpeng Yao and Al{\'{a}}n Aspuru-Guzik and Laura Gagliardi and Justin M. Notestein and Randall Q. Snurr}, + title = {Machine learning the quantum-chemical properties of metal{\textendash}organic frameworks for accelerated materials discovery}, + journal = {Matter} + } + - |- + @article{Rosen_2022, + doi = {10.1038/s41524-022-00796-6}, + url = {https://doi.org/10.1038%2Fs41524-022-00796-6}, + year = 2022, + month = {may}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {8}, + number = {1}, + author = {Andrew S. Rosen and Victor Fung and Patrick Huck and Cody T. O'Donnell and Matthew K. Horton and Donald G. Truhlar and Kristin A. Persson and Justin M. Notestein and Randall Q. Snurr}, + title = {High-throughput predictions of metal{\textendash}organic framework electronic properties: theoretical challenges, graph neural networks, and data exploration}, + journal = {npj Comput Mater} + } + - |- + @article{Jablonka_2023, + doi = {10.1021/acscentsci.2c01177}, + url = {https://doi.org/10.1021%2Facscentsci.2c01177}, + year = 2023, + month = {mar}, + publisher = {American Chemical Society ({ACS})}, + volume = {9}, + number = {4}, + pages = {563--581}, + author = {Kevin Maik Jablonka and Andrew S. Rosen and Aditi S. Krishnapriyan and Berend Smit}, + title = {An Ecosystem for Digital Reticular Chemistry}, + journal = {ACS Cent. Sci.} Central Science} + } templates: - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {lg10_CO2_Henry__names__noun} of {lg10_CO2_Henry#} {lg10_CO2_Henry__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {lg10_CO2_Henry__names__noun} - of {lg10_CO2_Henry#} {lg10_CO2_Henry__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {lg10_N2_Henry__names__noun} of {lg10_N2_Henry#} {lg10_N2_Henry__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {lg10_N2_Henry__names__noun} - of {lg10_N2_Henry#} {lg10_N2_Henry__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {lg10_CH4_Henry__names__noun} of {lg10_CH4_Henry#} {lg10_CH4_Henry__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {lg10_CH4_Henry__names__noun} - of {lg10_CH4_Henry#} {lg10_CH4_Henry__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {lg10_O2_Henry__names__noun} of {lg10_O2_Henry#} {lg10_O2_Henry__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {lg10_O2_Henry__names__noun} - of {lg10_O2_Henry#} {lg10_O2_Henry__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {lg10_Xe_Henry__names__noun} of {lg10_Xe_Henry#} {lg10_Xe_Henry__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {lg10_Xe_Henry__names__noun} - of {lg10_Xe_Henry#} {lg10_Xe_Henry__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {lg10_Kr_Henry__names__noun} of {lg10_Kr_Henry#} {lg10_Kr_Henry__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {lg10_Kr_Henry__names__noun} - of {lg10_Kr_Henry#} {lg10_Kr_Henry__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {lg10_H2S_Henry__names__noun} of {lg10_H2S_Henry#} {lg10_H2S_Henry__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {lg10_H2S_Henry__names__noun} - of {lg10_H2S_Henry#} {lg10_H2S_Henry__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {lg10_H2O_Henry__names__noun} of {lg10_H2O_Henry#} {lg10_H2O_Henry__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {lg10_H2O_Henry__names__noun} - of {lg10_H2O_Henry#} {lg10_H2O_Henry__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.CO2-adsorption_energy-kJ--mol__names__noun} - of {outputs.CO2-adsorption_energy-kJ--mol#} {outputs.CO2-adsorption_energy-kJ--mol__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.CO2-adsorption_energy-kJ--mol__names__noun} - of {outputs.CO2-adsorption_energy-kJ--mol#} {outputs.CO2-adsorption_energy-kJ--mol__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.N2-adsorption_energy-kJ--mol__names__noun} - of {outputs.N2-adsorption_energy-kJ--mol#} {outputs.N2-adsorption_energy-kJ--mol__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.N2-adsorption_energy-kJ--mol__names__noun} - of {outputs.N2-adsorption_energy-kJ--mol#} {outputs.N2-adsorption_energy-kJ--mol__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.CH4-adsorption_energy-kJ--mol__names__noun} - of {outputs.CH4-adsorption_energy-kJ--mol#} {outputs.CH4-adsorption_energy-kJ--mol__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.CH4-adsorption_energy-kJ--mol__names__noun} - of {outputs.CH4-adsorption_energy-kJ--mol#} {outputs.CH4-adsorption_energy-kJ--mol__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.O2-adsorption_energy-kJ--mol__names__noun} - of {outputs.O2-adsorption_energy-kJ--mol#} {outputs.O2-adsorption_energy-kJ--mol__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.O2-adsorption_energy-kJ--mol__names__noun} - of {outputs.O2-adsorption_energy-kJ--mol#} {outputs.O2-adsorption_energy-kJ--mol__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.Xe-adsorption_energy-kJ--mol__names__noun} - of {outputs.Xe-adsorption_energy-kJ--mol#} {outputs.Xe-adsorption_energy-kJ--mol__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.Xe-adsorption_energy-kJ--mol__names__noun} - of {outputs.Xe-adsorption_energy-kJ--mol#} {outputs.Xe-adsorption_energy-kJ--mol__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.Kr-adsorption_energy-kJ--mol__names__noun} - of {outputs.Kr-adsorption_energy-kJ--mol#} {outputs.Kr-adsorption_energy-kJ--mol__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.Kr-adsorption_energy-kJ--mol__names__noun} - of {outputs.Kr-adsorption_energy-kJ--mol#} {outputs.Kr-adsorption_energy-kJ--mol__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.H2S-adsorption_energy-kJ--mol__names__noun} - of {outputs.H2S-adsorption_energy-kJ--mol#} {outputs.H2S-adsorption_energy-kJ--mol__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.H2S-adsorption_energy-kJ--mol__names__noun} - of {outputs.H2S-adsorption_energy-kJ--mol#} {outputs.H2S-adsorption_energy-kJ--mol__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.H2O-adsorption_energy-kJ--mol__names__noun} - of {outputs.H2O-adsorption_energy-kJ--mol#} {outputs.H2O-adsorption_energy-kJ--mol__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.H2O-adsorption_energy-kJ--mol__names__noun} - of {outputs.H2O-adsorption_energy-kJ--mol#} {outputs.H2O-adsorption_energy-kJ--mol__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.H2S--H2O-selectivity_298_K-__names__noun} of - {outputs.H2S--H2O-selectivity_298_K-#}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {outputs.H2S--H2O-selectivity_298_K-__names__noun} - of {outputs.H2S--H2O-selectivity_298_K-#}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.CH4--N2-selectivity_298_K-__names__noun} of - {outputs.CH4--N2-selectivity_298_K-#}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {outputs.CH4--N2-selectivity_298_K-__names__noun} - of {outputs.CH4--N2-selectivity_298_K-#}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.Xe--Kr-selectivity_298_K-__names__noun} of {outputs.Xe--Kr-selectivity_298_K-#}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {outputs.Xe--Kr-selectivity_298_K-__names__noun} - of {outputs.Xe--Kr-selectivity_298_K-#}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3__names__noun} - of {outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3#} {outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3__names__noun} - of {outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3#} {outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.CH4-working_capacity_mol_58_to_65_bar_298_K-mol--kg__names__noun} - of {outputs.CH4-working_capacity_mol_58_to_65_bar_298_K-mol--kg#} {outputs.CH4-working_capacity_mol_58_to_65_bar_298_K-mol--kg__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {outputs.CH4-working_capacity_mol_58_to_65_bar_298_K-mol--kg__names__noun} - of {outputs.CH4-working_capacity_mol_58_to_65_bar_298_K-mol--kg#} {outputs.CH4-working_capacity_mol_58_to_65_bar_298_K-mol--kg__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.O2-working_capacity_vol_5_to_140_bar_298_K-cm3_STP--cm3__names__noun} - of {outputs.O2-working_capacity_vol_5_to_140_bar_298_K-cm3_STP--cm3#} {outputs.O2-working_capacity_vol_5_to_140_bar_298_K-cm3_STP--cm3__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.O2-working_capacity_vol_5_to_140_bar_298_K-cm3_STP--cm3__names__noun} - of {outputs.O2-working_capacity_vol_5_to_140_bar_298_K-cm3_STP--cm3#} {outputs.O2-working_capacity_vol_5_to_140_bar_298_K-cm3_STP--cm3__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.O2-working_capacity_mol_5_to_140_bar_298_K-mol--kg__names__noun} - of {outputs.O2-working_capacity_mol_5_to_140_bar_298_K-mol--kg#} {outputs.O2-working_capacity_mol_5_to_140_bar_298_K-mol--kg__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.O2-working_capacity_mol_5_to_140_bar_298_K-mol--kg__names__noun} - of {outputs.O2-working_capacity_mol_5_to_140_bar_298_K-mol--kg#} {outputs.O2-working_capacity_mol_5_to_140_bar_298_K-mol--kg__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.H2-working_capacity_5_to_100_bar_298_to_198_K-g--L__names__noun} - of {outputs.H2-working_capacity_5_to_100_bar_298_to_198_K-g--L#} {outputs.H2-working_capacity_5_to_100_bar_298_to_198_K-g--L__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.H2-working_capacity_5_to_100_bar_298_to_198_K-g--L__names__noun} - of {outputs.H2-working_capacity_5_to_100_bar_298_to_198_K-g--L#} {outputs.H2-working_capacity_5_to_100_bar_298_to_198_K-g--L__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.H2-working_capacity_5_to_100_bar_77_K-g--L__names__noun} - of {outputs.H2-working_capacity_5_to_100_bar_77_K-g--L#} {outputs.H2-working_capacity_5_to_100_bar_77_K-g--L__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.H2-working_capacity_5_to_100_bar_77_K-g--L__names__noun} - of {outputs.H2-working_capacity_5_to_100_bar_77_K-g--L#} {outputs.H2-working_capacity_5_to_100_bar_77_K-g--L__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.H2-working_capacity_1_to_100_bar_77_K-g--L__names__noun} - of {outputs.H2-working_capacity_1_to_100_bar_77_K-g--L#} {outputs.H2-working_capacity_1_to_100_bar_77_K-g--L__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.H2-working_capacity_1_to_100_bar_77_K-g--L__names__noun} - of {outputs.H2-working_capacity_1_to_100_bar_77_K-g--L#} {outputs.H2-working_capacity_1_to_100_bar_77_K-g--L__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {info.pld__names__noun} - of {info.pld#} {info.pld__units} - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {info.lcd__names__noun} - of {info.lcd#} {info.lcd__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {info.density__names__noun} - of {info.density#} {info.density__units}. - - |- - User: I {#want|would like|have|need|must!} to {#design|synthesize|find!} a {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with specific gas adsorption properties. - Assistant: {#How can I help you?|How can I be of assistance?|What can I do for you?|That's cool, how can I help?|Interesting, can I be of any help?|Seems interesting, how can I support you?!} - User: I {#want|would like!} the {outputs.H2S--H2O-selectivity_298_K-__names__noun} to be {outputs.H2S--H2O-selectivity_298_K-#} and the {outputs.CH4--N2-selectivity_298_K-__names__noun} to be {outputs.CH4--N2-selectivity_298_K-#}. - Assistant: {#In this case, I would recommend the following MOF:|I found the following MOF for you:|I suggest|I recommend!} {info.mofid.mofid#}. - - |- - User: I {#want|would like|have|need|must!} to {#design|synthesize|find!} a {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with specific gas adsorption properties. - Assistant: {#How can I help you?|How can I be of assistance?|What can I do for you?|That's cool, how can I help?|Interesting, can I be of any help?|Seems interesting, how can I support you?!} - User: I {#want|would like!} the {outputs.CH4--N2-selectivity_298_K-__names__noun} to be {outputs.CH4--N2-selectivity_298_K-#}, the {outputs.H2S--H2O-selectivity_298_K-__names__noun} to be {outputs.H2S--H2O-selectivity_298_K-#}, and the {outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3__names__noun} to be {outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3#}. - Assistant: {#In this case, I would recommend the following MOF:|I found the following MOF for you:|I suggest|I recommend!} {info.mofid.mofid#}. - - |- - User: I {#want|would like|have|need|must!} to {#design|synthesize|find!} a {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with specific gas adsorption properties. - Assistant: {#How can I help you?|How can I be of assistance?|What can I do for you?|That's cool, how can I help?|Interesting, can I be of any help?|Seems interesting, how can I support you?!} - User: I {#want|would like!} the {outputs.CO2-adsorption_energy-kJ--mol__names__noun} to be {outputs.CO2-adsorption_energy-kJ--mol#}, the {outputs.N2-adsorption_energy-kJ--mol__names__noun} to be {outputs.N2-adsorption_energy-kJ--mol#}, and the {outputs.CH4-adsorption_energy-kJ--mol__names__noun} to be {outputs.CH4-adsorption_energy-kJ--mol#}. - Assistant: {#In this case, I would recommend the following MOF:|I found the following MOF for you:|I suggest|I recommend!} {info.mofid.mofid#}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {lg10_CO2_Henry__names__noun} of {lg10_CO2_Henry#} {lg10_CO2_Henry__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {lg10_CO2_Henry__names__noun} of {lg10_CO2_Henry#} {lg10_CO2_Henry__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {lg10_N2_Henry__names__noun} of {lg10_N2_Henry#} {lg10_N2_Henry__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {lg10_N2_Henry__names__noun} of {lg10_N2_Henry#} {lg10_N2_Henry__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {lg10_CH4_Henry__names__noun} of {lg10_CH4_Henry#} {lg10_CH4_Henry__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {lg10_CH4_Henry__names__noun} of {lg10_CH4_Henry#} {lg10_CH4_Henry__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {lg10_O2_Henry__names__noun} of {lg10_O2_Henry#} {lg10_O2_Henry__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {lg10_O2_Henry__names__noun} of {lg10_O2_Henry#} {lg10_O2_Henry__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {lg10_Xe_Henry__names__noun} of {lg10_Xe_Henry#} {lg10_Xe_Henry__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {lg10_Xe_Henry__names__noun} of {lg10_Xe_Henry#} {lg10_Xe_Henry__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {lg10_Kr_Henry__names__noun} of {lg10_Kr_Henry#} {lg10_Kr_Henry__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {lg10_Kr_Henry__names__noun} of {lg10_Kr_Henry#} {lg10_Kr_Henry__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {lg10_H2S_Henry__names__noun} of {lg10_H2S_Henry#} {lg10_H2S_Henry__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {lg10_H2S_Henry__names__noun} of {lg10_H2S_Henry#} {lg10_H2S_Henry__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {lg10_H2O_Henry__names__noun} of {lg10_H2O_Henry#} {lg10_H2O_Henry__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {lg10_H2O_Henry__names__noun} of {lg10_H2O_Henry#} {lg10_H2O_Henry__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.CO2-adsorption_energy-kJ--mol__names__noun} of {outputs.CO2-adsorption_energy-kJ--mol#} {outputs.CO2-adsorption_energy-kJ--mol__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.CO2-adsorption_energy-kJ--mol__names__noun} of {outputs.CO2-adsorption_energy-kJ--mol#} {outputs.CO2-adsorption_energy-kJ--mol__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.N2-adsorption_energy-kJ--mol__names__noun} of {outputs.N2-adsorption_energy-kJ--mol#} {outputs.N2-adsorption_energy-kJ--mol__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.N2-adsorption_energy-kJ--mol__names__noun} of {outputs.N2-adsorption_energy-kJ--mol#} {outputs.N2-adsorption_energy-kJ--mol__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.CH4-adsorption_energy-kJ--mol__names__noun} of {outputs.CH4-adsorption_energy-kJ--mol#} {outputs.CH4-adsorption_energy-kJ--mol__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.CH4-adsorption_energy-kJ--mol__names__noun} of {outputs.CH4-adsorption_energy-kJ--mol#} {outputs.CH4-adsorption_energy-kJ--mol__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.O2-adsorption_energy-kJ--mol__names__noun} of {outputs.O2-adsorption_energy-kJ--mol#} {outputs.O2-adsorption_energy-kJ--mol__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.O2-adsorption_energy-kJ--mol__names__noun} of {outputs.O2-adsorption_energy-kJ--mol#} {outputs.O2-adsorption_energy-kJ--mol__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.Xe-adsorption_energy-kJ--mol__names__noun} of {outputs.Xe-adsorption_energy-kJ--mol#} {outputs.Xe-adsorption_energy-kJ--mol__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.Xe-adsorption_energy-kJ--mol__names__noun} of {outputs.Xe-adsorption_energy-kJ--mol#} {outputs.Xe-adsorption_energy-kJ--mol__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.Kr-adsorption_energy-kJ--mol__names__noun} of {outputs.Kr-adsorption_energy-kJ--mol#} {outputs.Kr-adsorption_energy-kJ--mol__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.Kr-adsorption_energy-kJ--mol__names__noun} of {outputs.Kr-adsorption_energy-kJ--mol#} {outputs.Kr-adsorption_energy-kJ--mol__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.H2S-adsorption_energy-kJ--mol__names__noun} of {outputs.H2S-adsorption_energy-kJ--mol#} {outputs.H2S-adsorption_energy-kJ--mol__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.H2S-adsorption_energy-kJ--mol__names__noun} of {outputs.H2S-adsorption_energy-kJ--mol#} {outputs.H2S-adsorption_energy-kJ--mol__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.H2O-adsorption_energy-kJ--mol__names__noun} of {outputs.H2O-adsorption_energy-kJ--mol#} {outputs.H2O-adsorption_energy-kJ--mol__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.H2O-adsorption_energy-kJ--mol__names__noun} of {outputs.H2O-adsorption_energy-kJ--mol#} {outputs.H2O-adsorption_energy-kJ--mol__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.H2S--H2O-selectivity_298_K-__names__noun} of {outputs.H2S--H2O-selectivity_298_K-#}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {outputs.H2S--H2O-selectivity_298_K-__names__noun} of {outputs.H2S--H2O-selectivity_298_K-#}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.CH4--N2-selectivity_298_K-__names__noun} of {outputs.CH4--N2-selectivity_298_K-#}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {outputs.CH4--N2-selectivity_298_K-__names__noun} of {outputs.CH4--N2-selectivity_298_K-#}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.Xe--Kr-selectivity_298_K-__names__noun} of {outputs.Xe--Kr-selectivity_298_K-#}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {outputs.Xe--Kr-selectivity_298_K-__names__noun} of {outputs.Xe--Kr-selectivity_298_K-#}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3__names__noun} of {outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3#} {outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3__names__noun} of {outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3#} {outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.CH4-working_capacity_mol_58_to_65_bar_298_K-mol--kg__names__noun} of {outputs.CH4-working_capacity_mol_58_to_65_bar_298_K-mol--kg#} {outputs.CH4-working_capacity_mol_58_to_65_bar_298_K-mol--kg__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {outputs.CH4-working_capacity_mol_58_to_65_bar_298_K-mol--kg__names__noun} of {outputs.CH4-working_capacity_mol_58_to_65_bar_298_K-mol--kg#} {outputs.CH4-working_capacity_mol_58_to_65_bar_298_K-mol--kg__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.O2-working_capacity_vol_5_to_140_bar_298_K-cm3_STP--cm3__names__noun} of {outputs.O2-working_capacity_vol_5_to_140_bar_298_K-cm3_STP--cm3#} {outputs.O2-working_capacity_vol_5_to_140_bar_298_K-cm3_STP--cm3__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.O2-working_capacity_vol_5_to_140_bar_298_K-cm3_STP--cm3__names__noun} of {outputs.O2-working_capacity_vol_5_to_140_bar_298_K-cm3_STP--cm3#} {outputs.O2-working_capacity_vol_5_to_140_bar_298_K-cm3_STP--cm3__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.O2-working_capacity_mol_5_to_140_bar_298_K-mol--kg__names__noun} of {outputs.O2-working_capacity_mol_5_to_140_bar_298_K-mol--kg#} {outputs.O2-working_capacity_mol_5_to_140_bar_298_K-mol--kg__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.O2-working_capacity_mol_5_to_140_bar_298_K-mol--kg__names__noun} of {outputs.O2-working_capacity_mol_5_to_140_bar_298_K-mol--kg#} {outputs.O2-working_capacity_mol_5_to_140_bar_298_K-mol--kg__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.H2-working_capacity_5_to_100_bar_298_to_198_K-g--L__names__noun} of {outputs.H2-working_capacity_5_to_100_bar_298_to_198_K-g--L#} {outputs.H2-working_capacity_5_to_100_bar_298_to_198_K-g--L__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.H2-working_capacity_5_to_100_bar_298_to_198_K-g--L__names__noun} of {outputs.H2-working_capacity_5_to_100_bar_298_to_198_K-g--L#} {outputs.H2-working_capacity_5_to_100_bar_298_to_198_K-g--L__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.H2-working_capacity_5_to_100_bar_77_K-g--L__names__noun} of {outputs.H2-working_capacity_5_to_100_bar_77_K-g--L#} {outputs.H2-working_capacity_5_to_100_bar_77_K-g--L__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.H2-working_capacity_5_to_100_bar_77_K-g--L__names__noun} of {outputs.H2-working_capacity_5_to_100_bar_77_K-g--L#} {outputs.H2-working_capacity_5_to_100_bar_77_K-g--L__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has an {outputs.H2-working_capacity_1_to_100_bar_77_K-g--L__names__noun} of {outputs.H2-working_capacity_1_to_100_bar_77_K-g--L#} {outputs.H2-working_capacity_1_to_100_bar_77_K-g--L__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has an {outputs.H2-working_capacity_1_to_100_bar_77_K-g--L__names__noun} of {outputs.H2-working_capacity_1_to_100_bar_77_K-g--L#} {outputs.H2-working_capacity_1_to_100_bar_77_K-g--L__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {info.pld__names__noun} of {info.pld#} {info.pld__units} + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {info.lcd__names__noun} of {info.lcd#} {info.lcd__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFId {info.mofid.mofid#} has a {info.density__names__noun} of {info.density#} {info.density__units}. + - |- + User: I {#want|would like|have|need|must!} to {#design|synthesize|find!} a {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with specific gas adsorption properties. + Assistant: {#How can I help you?|How can I be of assistance?|What can I do for you?|That's cool, how can I help?|Interesting, can I be of any help?|Seems interesting, how can I support you?!} + User: I {#want|would like!} the {outputs.H2S--H2O-selectivity_298_K-__names__noun} to be {outputs.H2S--H2O-selectivity_298_K-#} and the {outputs.CH4--N2-selectivity_298_K-__names__noun} to be {outputs.CH4--N2-selectivity_298_K-#}. + Assistant: {#In this case, I would recommend the following MOF:|I found the following MOF for you:|I suggest|I recommend!} {info.mofid.mofid#}. + - |- + User: I {#want|would like|have|need|must!} to {#design|synthesize|find!} a {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with specific gas adsorption properties. + Assistant: {#How can I help you?|How can I be of assistance?|What can I do for you?|That's cool, how can I help?|Interesting, can I be of any help?|Seems interesting, how can I support you?!} + User: I {#want|would like!} the {outputs.CH4--N2-selectivity_298_K-__names__noun} to be {outputs.CH4--N2-selectivity_298_K-#}, the {outputs.H2S--H2O-selectivity_298_K-__names__noun} to be {outputs.H2S--H2O-selectivity_298_K-#}, and the {outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3__names__noun} to be {outputs.CH4-working_capacity_vol_58_to_65_bar_298_K-cm3_STP--cm3#}. + Assistant: {#In this case, I would recommend the following MOF:|I found the following MOF for you:|I suggest|I recommend!} {info.mofid.mofid#}. + - |- + User: I {#want|would like|have|need|must!} to {#design|synthesize|find!} a {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with specific gas adsorption properties. + Assistant: {#How can I help you?|How can I be of assistance?|What can I do for you?|That's cool, how can I help?|Interesting, can I be of any help?|Seems interesting, how can I support you?!} + User: I {#want|would like!} the {outputs.CO2-adsorption_energy-kJ--mol__names__noun} to be {outputs.CO2-adsorption_energy-kJ--mol#}, the {outputs.N2-adsorption_energy-kJ--mol__names__noun} to be {outputs.N2-adsorption_energy-kJ--mol#}, and the {outputs.CH4-adsorption_energy-kJ--mol__names__noun} to be {outputs.CH4-adsorption_energy-kJ--mol#}. + Assistant: {#In this case, I would recommend the following MOF:|I found the following MOF for you:|I suggest|I recommend!} {info.mofid.mofid#}. diff --git a/data/tabular/qmof_quantum/meta.yaml b/data/tabular/qmof_quantum/meta.yaml index f59ef16cb..6b83628b7 100644 --- a/data/tabular/qmof_quantum/meta.yaml +++ b/data/tabular/qmof_quantum/meta.yaml @@ -1,348 +1,309 @@ ---- name: qmof_quantum description: |- - QMOF is a database of electronic properties of MOFs, assembled by Rosen et al. - Jablonka et al. added gas adsorption properties. + QMOF is a database of electronic properties of MOFs, assembled by Rosen et al. + Jablonka et al. added gas adsorption properties. targets: - - id: outputs.pbe.bandgap - type: continuous - significant_digits: 3 - units: eV - names: - - noun: PAW-PBE-D3(BJ) calculated band gap - - noun: PAW-PBE-D3(BJ) computed band gap - - id: outputs.pbe.cbm - type: continuous - significant_digits: 3 - units: eV - names: - - noun: PAW-PBE-D3(BJ) calculated conduction band minimum - - noun: PAW-PBE-D3(BJ) computed conduction band minimum - - noun: PAW-PBE-D3(BJ) calculated conduction band minimum (CBM) - - noun: PAW-PBE-D3(BJ) computed conduction band minimum (CBM) - - noun: PAW-PBE-D3(BJ) calculated energy of conduction band minimum - - id: outputs.pbe.vbm - type: continuous - significant_digits: 3 - units: eV - names: - - noun: PAW-PBE-D3(BJ) calculated valence band maximum - - noun: PAW-PBE-D3(BJ) computed valence band maximum - - noun: PAW-PBE-D3(BJ) calculated valence band maximum (VBM) - - noun: PAW-PBE-D3(BJ) computed valence band maximum (VBM) - - noun: PAW-PBE-D3(BJ) computed energy of valence band maximum (VBM) - - id: outputs.hle17.bandgap - type: continuous - significant_digits: 3 - units: eV - names: - - noun: HLE17 calculated band gap of a PBE-D3(BJ) optimized structure - - noun: HLE17 computed band gap of a PBE-D3(BJ) optimized structure - - noun: HLE17 computed band gap (HLE17 single-point after PBE-D3(BJ) optimization) - - id: outputs.hle17.cbm - type: continuous - significant_digits: 3 - units: eV - names: - - noun: HLE17 calculated conduction band minimum of a PBE-D3(BJ) optimized structure - - noun: HLE17 computed conduction band minimum of a PBE-D3(BJ) optimized structure - - id: outputs.hle17.vbm - type: continuous - units: eV - significant_digits: 3 - names: - - noun: HLE17 calculated valence band maximum of a PBE-D3(BJ) optimized structure - - noun: HLE17 computed valence band maximum of a PBE-D3(BJ) optimized structure - - id: outputs.hse06.bandgap - type: continuous - units: eV - significant_digits: 3 - names: - - noun: HSE06 calculated band gap of a PBE-D3(BJ) optimized structure - - noun: HSE06 computed band gap of a PBE-D3(BJ) optimized structure - - noun: HSE06 computed band gap (HSE06 single-point after PBE-D3(BJ) optimization) - - id: outputs.hse06.cbm - type: continuous - units: eV - significant_digits: 3 - names: - - noun: HSE06 calculated conduction band minimum of a PBE-D3(BJ) optimized structure - - noun: HSE06 computed conduction band minimum of a PBE-D3(BJ) optimized structure - - id: outputs.hse06.vbm - type: continuous - units: eV - significant_digits: 3 - names: - - noun: HSE06 calculated valence band maximum of a PBE-D3(BJ) optimized structure - - noun: HSE06 computed valence band maximum of a PBE-D3(BJ) optimized structure - - id: info.pld - type: continuous - units: \AA - significant_digits: 3 - names: - - noun: pore limiting diameter - - noun: pore limiting diameter (PLD) - - id: info.lcd - type: continuous - units: \AA - significant_digits: 3 - names: - - noun: largest cavity diameter - - noun: largest cavity diameter (LCD) - - id: info.density - type: continuous - units: g/cm^3 - significant_digits: 3 - names: - - noun: density + - id: outputs.pbe.bandgap + type: continuous + significant_digits: 3 + units: eV + names: + - noun: PAW-PBE-D3(BJ) calculated band gap + - noun: PAW-PBE-D3(BJ) computed band gap + - id: outputs.pbe.cbm + type: continuous + significant_digits: 3 + units: eV + names: + - noun: PAW-PBE-D3(BJ) calculated conduction band minimum + - noun: PAW-PBE-D3(BJ) computed conduction band minimum + - noun: PAW-PBE-D3(BJ) calculated conduction band minimum (CBM) + - noun: PAW-PBE-D3(BJ) computed conduction band minimum (CBM) + - noun: PAW-PBE-D3(BJ) calculated energy of conduction band minimum + - id: outputs.pbe.vbm + type: continuous + significant_digits: 3 + units: eV + names: + - noun: PAW-PBE-D3(BJ) calculated valence band maximum + - noun: PAW-PBE-D3(BJ) computed valence band maximum + - noun: PAW-PBE-D3(BJ) calculated valence band maximum (VBM) + - noun: PAW-PBE-D3(BJ) computed valence band maximum (VBM) + - noun: PAW-PBE-D3(BJ) computed energy of valence band maximum (VBM) + - id: outputs.hle17.bandgap + type: continuous + significant_digits: 3 + units: eV + names: + - noun: HLE17 calculated band gap of a PBE-D3(BJ) optimized structure + - noun: HLE17 computed band gap of a PBE-D3(BJ) optimized structure + - noun: HLE17 computed band gap (HLE17 single-point after PBE-D3(BJ) optimization) + - id: outputs.hle17.cbm + type: continuous + significant_digits: 3 + units: eV + names: + - noun: HLE17 calculated conduction band minimum of a PBE-D3(BJ) optimized structure + - noun: HLE17 computed conduction band minimum of a PBE-D3(BJ) optimized structure + - id: outputs.hle17.vbm + type: continuous + units: eV + significant_digits: 3 + names: + - noun: HLE17 calculated valence band maximum of a PBE-D3(BJ) optimized structure + - noun: HLE17 computed valence band maximum of a PBE-D3(BJ) optimized structure + - id: outputs.hse06.bandgap + type: continuous + units: eV + significant_digits: 3 + names: + - noun: HSE06 calculated band gap of a PBE-D3(BJ) optimized structure + - noun: HSE06 computed band gap of a PBE-D3(BJ) optimized structure + - noun: HSE06 computed band gap (HSE06 single-point after PBE-D3(BJ) optimization) + - id: outputs.hse06.cbm + type: continuous + units: eV + significant_digits: 3 + names: + - noun: HSE06 calculated conduction band minimum of a PBE-D3(BJ) optimized structure + - noun: HSE06 computed conduction band minimum of a PBE-D3(BJ) optimized structure + - id: outputs.hse06.vbm + type: continuous + units: eV + significant_digits: 3 + names: + - noun: HSE06 calculated valence band maximum of a PBE-D3(BJ) optimized structure + - noun: HSE06 computed valence band maximum of a PBE-D3(BJ) optimized structure + - id: info.pld + type: continuous + units: \AA + significant_digits: 3 + names: + - noun: pore limiting diameter + - noun: pore limiting diameter (PLD) + - id: info.lcd + type: continuous + units: \AA + significant_digits: 3 + names: + - noun: largest cavity diameter + - noun: largest cavity diameter (LCD) + - id: info.density + type: continuous + units: g/cm^3 + significant_digits: 3 + names: + - noun: density identifiers: - - id: info.mofid.mofid - type: Other - description: MOF ID - - id: info.mofid.smiles_nodes - type: Other - description: SMILES of nodes - - id: info.mofid.smiles_linkers - type: Other - description: SMILES of linkers - - id: info.mofid.smiles - type: Other - description: SMILES - - id: info.mofid.topology - type: Other - description: Topology RCSR ID - - id: info.symmetry.spacegroup_number - type: Other - description: Spacegroup number - names: - - noun: space group number + - id: info.mofid.mofid + type: Other + description: MOF ID + - id: info.mofid.smiles_nodes + type: Other + description: SMILES of nodes + - id: info.mofid.smiles_linkers + type: Other + description: SMILES of linkers + - id: info.mofid.smiles + type: Other + description: SMILES + - id: info.mofid.topology + type: Other + description: Topology RCSR ID + - id: info.symmetry.spacegroup_number + type: Other + description: Spacegroup number + names: + - noun: space group number license: CC-BY-4.0 num_points: 1986 bibtex: - - |- - @article{Rosen_2021, - doi = {10.1016/j.matt.2021.02.015}, - url = {https://doi.org/10.1016%2Fj.matt.2021.02.015}, - year = 2021, - month = {may}, - publisher = {Elsevier {BV}}, - volume = {4}, - number = {5}, - pages = {1578--1597}, - author = {Andrew S. Rosen and Shaelyn M. Iyer and Debmalya Ray and Zhenpeng Yao and Al{\'{a}}n Aspuru-Guzik and Laura Gagliardi and Justin M. Notestein and Randall Q. Snurr}, - title = {Machine learning the quantum-chemical properties of metal{\textendash}organic frameworks for accelerated materials discovery}, - journal = {Matter} - } - - |- - @article{Rosen_2022, - doi = {10.1038/s41524-022-00796-6}, - url = {https://doi.org/10.1038%2Fs41524-022-00796-6}, - year = 2022, - month = {may}, - publisher = {Springer Science and Business Media {LLC}}, - volume = {8}, - number = {1}, - author = {Andrew S. Rosen and Victor Fung and Patrick Huck and Cody T. O'Donnell and Matthew K. Horton and Donald G. Truhlar and Kristin A. Persson and Justin M. Notestein and Randall Q. Snurr}, - title = {High-throughput predictions of metal{\textendash}organic framework electronic properties: theoretical challenges, graph neural networks, and data exploration}, - journal = {npj Comput Mater} - } - - |- - @article{Jablonka_2023, - doi = {10.1021/acscentsci.2c01177}, - url = {https://doi.org/10.1021%2Facscentsci.2c01177}, - year = 2023, - month = {mar}, - publisher = {American Chemical Society ({ACS})}, - volume = {9}, - number = {4}, - pages = {563--581}, - author = {Kevin Maik Jablonka and Andrew S. Rosen and Aditi S. Krishnapriyan and Berend Smit}, - title = {An Ecosystem for Digital Reticular Chemistry}, - journal = {ACS Cent. Sci.} Central Science} - } + - |- + @article{Rosen_2021, + doi = {10.1016/j.matt.2021.02.015}, + url = {https://doi.org/10.1016%2Fj.matt.2021.02.015}, + year = 2021, + month = {may}, + publisher = {Elsevier {BV}}, + volume = {4}, + number = {5}, + pages = {1578--1597}, + author = {Andrew S. Rosen and Shaelyn M. Iyer and Debmalya Ray and Zhenpeng Yao and Al{\'{a}}n Aspuru-Guzik and Laura Gagliardi and Justin M. Notestein and Randall Q. Snurr}, + title = {Machine learning the quantum-chemical properties of metal{\textendash}organic frameworks for accelerated materials discovery}, + journal = {Matter} + } + - |- + @article{Rosen_2022, + doi = {10.1038/s41524-022-00796-6}, + url = {https://doi.org/10.1038%2Fs41524-022-00796-6}, + year = 2022, + month = {may}, + publisher = {Springer Science and Business Media {LLC}}, + volume = {8}, + number = {1}, + author = {Andrew S. Rosen and Victor Fung and Patrick Huck and Cody T. O'Donnell and Matthew K. Horton and Donald G. Truhlar and Kristin A. Persson and Justin M. Notestein and Randall Q. Snurr}, + title = {High-throughput predictions of metal{\textendash}organic framework electronic properties: theoretical challenges, graph neural networks, and data exploration}, + journal = {npj Comput Mater} + } + - |- + @article{Jablonka_2023, + doi = {10.1021/acscentsci.2c01177}, + url = {https://doi.org/10.1021%2Facscentsci.2c01177}, + year = 2023, + month = {mar}, + publisher = {American Chemical Society ({ACS})}, + volume = {9}, + number = {4}, + pages = {563--581}, + author = {Kevin Maik Jablonka and Andrew S. Rosen and Aditi S. Krishnapriyan and Berend Smit}, + title = {An Ecosystem for Digital Reticular Chemistry}, + journal = {ACS Cent. Sci.} Central Science} + } templates: - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {#density|mass density!} of {info.density#} {info.density__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {#density|mass density|mass - density (density)!} of {info.density#} {info.density__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {#pore limiting diameter|pore limiting diameter (PLD)!} - of {info.pld#} {info.pld__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {#pore limiting diameter|pore - limiting diameter (PLD)!} of {info.pld#} {info.pld__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {#largest cavity diameter|largest cavity diameter (LCD)!} - of {info.lcd#} {info.lcd__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {#largest cavity diameter|largest - cavity diameter (LCD)!} of {info.lcd#} {info.lcd__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.pbe.bandgap__names__noun} of {outputs.pbe.bandgap#} - {outputs.pbe.bandgap__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.pbe.bandgap__names__noun} - of {outputs.pbe.bandgap#} {outputs.pbe.bandgap__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.pbe.cbm__names__noun} of {outputs.pbe.cbm#} - {outputs.pbe.cbm__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.pbe.cbm__names__noun} - of {outputs.pbe.cbm#} {outputs.pbe.cbm__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.pbe.vbm__names__noun} of {outputs.pbe.vbm#} - {outputs.pbe.vbm__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.pbe.vbm__names__noun} - of {outputs.pbe.vbm#} {outputs.pbe.vbm__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.hle17.bandgap__names__noun} of {outputs.hle17.bandgap#} - {outputs.hle17.bandgap__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.hle17.bandgap__names__noun} - of {outputs.hle17.bandgap#} {outputs.hle17.bandgap__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.hle17.cbm__names__noun} of {outputs.hle17.cbm#} - {outputs.hle17.cbm__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.hle17.cbm__names__noun} - of {outputs.hle17.cbm#} {outputs.hle17.cbm__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.hle17.vbm__names__noun} of {outputs.hle17.vbm#} - {outputs.hle17.vbm__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.hle17.vbm__names__noun} - of {outputs.hle17.vbm#} {outputs.hle17.vbm__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.hse06.bandgap__names__noun} of {outputs.hse06.bandgap#} - {outputs.hse06.bandgap__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.hse06.bandgap__names__noun} - of {outputs.hse06.bandgap#} {outputs.hse06.bandgap__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.hse06.cbm__names__noun} of {outputs.hse06.cbm#} - {outputs.hse06.cbm__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.hse06.cbm__names__noun} - of {outputs.hse06.cbm#} {outputs.hse06.cbm__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.hse06.vbm__names__noun} of {outputs.hse06.vbm#} - {outputs.hse06.vbm__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.hse06.vbm__names__noun} - of {outputs.hse06.vbm#} {outputs.hse06.vbm__units}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {#spacegroup number|space - group number!} of {info.symmetry.spacegroup_number#}. - - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, - linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} is a {#metal-organic framework|metal-organic framework (MOF)|reticular - material!} with the MOFid {info.mofid.mofid#}. - - |- - Question: {#What is|How large is!} the {#density|mass density!} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? - Answer: {#The density is |The mass density is |!}{info.density#} {info.density__units}. - - |- - Question: {#What is|How large is!} the {#density|mass density!} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? - Answer: {#The density is |The mass density is |!}{info.density#} {info.density__units}. - - |- - Question: {#What is|How large is!} the {#pore limiting diameter|pore limiting diameter (PLD)!} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? - Answer: {#The pore limiting diameter is |The pore limiting diameter (PLD) is |!}{info.pld#} {info.pld__units}. - - |- - Question: {#What is|How large is!} the {#pore limiting diameter|pore limiting diameter (PLD)!} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? - Answer: {#The pore limiting diameter is |The pore limiting diameter (PLD) is |!}{info.pld#} {info.pld__units}. - - |- - Question: {#What is|How large is!} the {#largest cavity diameter|largest cavity diameter (LCD)!} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? - Answer: {#The largest cavity diameter is |The largest cavity diameter (LCD) is |!}{info.lcd#} {info.lcd__units}. - - |- - Question: {#What is|How large is!} the {#largest cavity diameter|largest cavity diameter (LCD)!} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? - Answer: {#The largest cavity diameter is |The largest cavity diameter (LCD) is |!}{info.lcd#} {info.lcd__units}. - - |- - Question: {#What is|How large is!} the {outputs.pbe.bandgap__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? - Answer: {#The band gap is |The band gap of the MOF is |!}{outputs.pbe.bandgap#} {outputs.pbe.bandgap__units}. - - |- - Question: {#What is|How large is!} the {outputs.pbe.bandgap__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? - Answer: {#The band gap is |The band gap of the MOF is |!}{outputs.pbe.bandgap#} {outputs.pbe.bandgap__units}. - - |- - Question: {#What is|How large is!} the {outputs.pbe.cbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? - Answer: {#The conduction band minimum is |The conduction band minimum of the MOF is |!}{outputs.pbe.cbm#} {outputs.pbe.cbm__units}. - - |- - Question: {#What is|How large is!} the {outputs.pbe.cbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? - Answer: {#The conduction band minimum is |The conduction band minimum of the MOF is |!}{outputs.pbe.cbm#} {outputs.pbe.cbm__units}. - - |- - Question: {#What is|How large is!} the {outputs.pbe.vbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? - Answer: {#The valence band maximum is |The valence band maximum of the MOF is |!}{outputs.pbe.vbm#} {outputs.pbe.vbm__units}. - - |- - Question: {#What is|How large is!} the {outputs.pbe.vbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? - Answer: {#The valence band maximum is |The valence band maximum of the MOF is |!}{outputs.pbe.vbm#} {outputs.pbe.vbm__units}. - - |- - Question: {#What is|How large is!} the {outputs.hle17.bandgap__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? - Answer: {#The band gap is |The band gap of the MOF is |!}{outputs.hle17.bandgap#} {outputs.hle17.bandgap__units}. - - |- - Question: {#What is|How large is!} the {outputs.hle17.bandgap__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? - Answer: {#The band gap is |The band gap of the MOF is |!}{outputs.hle17.bandgap#} {outputs.hle17.bandgap__units}. - - |- - Question: {#What is|How large is!} the {outputs.hle17.cbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? - Answer: {#The conduction band minimum is |The conduction band minimum of the MOF is |!}{outputs.hle17.cbm#} {outputs.hle17.cbm__units}. - - |- - Question: {#What is|How large is!} the {outputs.hle17.cbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? - Answer: {#The conduction band minimum is |The conduction band minimum of the MOF is |!}{outputs.hle17.cbm#} {outputs.hle17.cbm__units}. - - |- - Question: {#What is|How large is!} the {outputs.hle17.vbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? - Answer: {#The valence band maximum is |The valence band maximum of the MOF is |!}{outputs.hle17.vbm#} {outputs.hle17.vbm__units}. - - |- - Question: {#What is|How large is!} the {outputs.hle17.vbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? - Answer: {#The valence band maximum is |The valence band maximum of the MOF is |!}{outputs.hle17.vbm#} {outputs.hle17.vbm__units}. - - |- - Question: {#What is|How large is!} the {outputs.hse06.bandgap__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? - Answer: {#The band gap is |The band gap of the MOF is |!}{outputs.hse06.bandgap#} {outputs.hse06.bandgap__units}. - - |- - Question: {#What is|How large is!} the {outputs.hse06.bandgap__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? - Answer: {#The band gap is |The band gap of the MOF is |!}{outputs.hse06.bandgap#} {outputs.hse06.bandgap__units}. - - |- - Question: {#What is|How large is!} the {outputs.hse06.cbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? - Answer: {#The conduction band minimum is |The conduction band minimum of the MOF is |!}{outputs.hse06.cbm#} {outputs.hse06.cbm__units}. - - |- - Question: {#What is|How large is!} the {outputs.hse06.cbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? - Answer: {#The conduction band minimum is |The conduction band minimum of the MOF is |!}{outputs.hse06.cbm#} {outputs.hse06.cbm__units}. - - |- - Question: {#What is|How large is!} the {outputs.hse06.vbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? - Answer: {#The valence band maximum is |The valence band maximum of the MOF is |!}{outputs.hse06.vbm#} {outputs.hse06.vbm__units}. - - |- - Question: In which {#topology|net!} do the linkers with SMILES {info.mofid.smiles_linkers#} and nodes with SMILES {info.mofid.smiles_nodes#} self-assemble to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? - Answer: The {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} self-assembles to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net!} {info.mofid.topology#}. - - |- - User: With which linkers do I have to combine my nodes with SMILES {info.mofid.smiles_nodes#} to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net!} {info.mofid.topology#}? - Assistant: You have to combine your nodes with SMILES {info.mofid.smiles_nodes#} with linkers with SMILES {info.mofid.smiles_linkers#} to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {info.mofid.topology#} {#topology|net!}. - - |- - User: With which nodes do I have to combine my linkers with SMILES {info.mofid.smiles_linkers#} to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net!} {info.mofid.topology#}? - Assistant: You have to combine your linkers with SMILES {info.mofid.smiles_linkers#} with nodes with SMILES {info.mofid.smiles_nodes#} to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {info.mofid.topology#} {#topology|net!}. - - |- - User: Which linkers and nodes do I have to combine to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {info.lcd__names__noun} {info.lcd#} {info.lcd__units}? - Assistant: {#Do you have other constraints?|Do you have other requirements?|Are there additional constraints?|Is there anything else I should take into account?!} - User: {#Yes, |Indeed, |Thanks, |!}I want to use linkers with SMILES {info.mofid.smiles_linkers#}. - Assistant: You have to combine your linkers with SMILES {info.mofid.smiles_linkers#} with nodes with SMILES {info.mofid.smiles_nodes#} to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {info.lcd__names__noun} {info.lcd#} {info.lcd__units}. - - |- - User: Which linkers and nodes do I have to combine to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {info.pld__names__noun} {info.pld#} {info.pld__units}? - Assistant: {#Do you have other constraints?|Do you have other requirements?|Are there additional constraints?|Is there anything else I should take into account?!} - User: {#Yes, |Indeed, |Thanks, |!}I want to use linkers with SMILES {info.mofid.smiles_linkers#} and want a {outputs.pbe.bandgap__names__noun} of {outputs.pbe.bandgap#} {outputs.pbe.bandgap__units}. - Assistant: You have to combine your linkers with SMILES {info.mofid.smiles_linkers#} with nodes with SMILES {info.mofid.smiles_nodes#}. - - |- - User: Which linkers and nodes do I have to combine to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {outputs.pbe.bandgap__names__noun} of {outputs.pbe.bandgap#} {outputs.pbe.bandgap__units}? - Assistant: {#Do you have other constraints?|Do you have other requirements?|Are there additional constraints?|Is there anything else I should take into account?!} - User: {#Yes, |Indeed, |Thanks, |!}I want to have a {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#} and {outputs.pbe.cbm__names__noun} of {outputs.pbe.cbm#} {outputs.pbe.cbm__units}. - Assistant: You have to combine your linkers with SMILES {info.mofid.smiles_linkers#} with nodes with SMILES {info.mofid.smiles_nodes#}. - User: {#One more thing, |I have one more question, |!}What is the {info.density__names__noun} and the {info.symmetry.spacegroup_number__names__noun}? - Assistant: The {info.density__names__noun} is {info.density#} {info.density__units} and the {info.symmetry.spacegroup_number__names__noun} is {info.symmetry.spacegroup_number#}. - - |- - User: What {#suggestion|proposals|ideas|recommendations!} do you have if I {#want|need!} a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {outputs.pbe.bandgap__names__noun} of {outputs.pbe.bandgap#} {outputs.pbe.bandgap__units}? - Assistant: {#Do you have other constraints?|Do you have other requirements?|Are there additional constraints?|Is there anything else I should take into account?!} - User: {#Yes, |Indeed, |Thanks, |!}I want to have a {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#} and {outputs.pbe.cbm__names__noun} of {outputs.pbe.cbm#} {outputs.pbe.cbm__units}. - Assistant: {#Is that it?|Is there anything else I should take into account?|Do you have other constraints?|Do you have other requirements?|Are there additional constraints?!} - User: {#One more thing, |I have one more request, |!}The {info.density__names__noun} should be {info.density#} {info.density__units} and the {info.symmetry.spacegroup_number__names__noun} should be {info.symmetry.spacegroup_number#}. - Assistant: {#I recommend that you |I propose that you |I suggest that you |!} combine your linkers with SMILES {info.mofid.smiles_linkers#} with nodes with SMILES {info.mofid.smiles_nodes#}. - - |- - User: I'm thinking about the band gaps of {#metal-organic frameworks|metal-organic frameworks (MOFs)|reticular materials!}. - Assistant: {#That's interesting.|How can I help?|How can I be of assistance?|Is there anything I can do?|What can I do for you?!} - User: How does the {outputs.pbe.bandgap__names__noun} of the {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} compare to the {outputs.hse06.bandgap__names__noun} and the {outputs.hle17.bandgap__names__noun}? - Assistant: The {outputs.pbe.bandgap__names__noun} is {outputs.pbe.bandgap#} {outputs.pbe.bandgap__units}, the {outputs.hse06.bandgap__names__noun} is {outputs.hse06.bandgap#} {outputs.hse06.bandgap__units} and the {outputs.hle17.bandgap__names__noun} is {outputs.hle17.bandgap#} {outputs.hle17.bandgap__units}. - - |- - User: I'm thinking about the conduction band minima and valence band maxima of {#metal-organic frameworks|metal-organic frameworks (MOFs)|reticular materials!}. - Assistant: {#That's interesting.|How can I help?|How can I be of assistance?|Is there anything I can do?|What can I do for you?!} - User: How does the {outputs.pbe.cbm__names__noun} of the {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} compare to the {outputs.hse06.cbm__names__noun} and the {outputs.hle17.cbm__names__noun}? - Assistant: The {outputs.pbe.cbm__names__noun} is {outputs.pbe.cbm#} {outputs.pbe.cbm__units}, the {outputs.hse06.cbm__names__noun} is {outputs.hse06.cbm#} {outputs.hse06.cbm__units} and the {outputs.hle17.cbm__names__noun} is {outputs.hle17.cbm#} {outputs.hle17.cbm__units}. - User: {#And how does it look like for|And how about|How about!} the {outputs.pbe.vbm__names__noun}? - Assistant: The {outputs.pbe.vbm__names__noun} is {outputs.pbe.vbm#} {outputs.pbe.vbm__units}, the {outputs.hse06.vbm__names__noun} is {outputs.hse06.vbm#} {outputs.hse06.vbm__units} and the {outputs.hle17.vbm__names__noun} is {outputs.hle17.vbm#} {outputs.hle17.vbm__units}. - - |- - User: I'm {#thinking|wondering!} about the {#topology|net|RCSR code|RCSR identifier!} of {#metal-organic frameworks|metal-organic frameworks (MOFs)|reticular materials!}. - Assistant: {#That's interesting.|How can I help?|How can I be of assistance?|Is there anything I can do?|What can I do for you?!} - User: Which {#topology|net|RCSR code|RCSR identifier!} do the linkers with SMILES {info.mofid.smiles_linkers#} and nodes with SMILES {info.mofid.smiles_nodes#} self-assemble to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!}? - Assistant: Into the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}. - User: {#One more thing, |I have one more request, |Another question, |I have one more question, |!}What density do you {#predict|expect|estimate!} for this {#metal-organic framework|metal-organic framework (MOF)|reticular material|material!}? - Assistant: The {info.density__names__noun} is {info.density#} {info.density__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {#density|mass density!} of {info.density#} {info.density__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {#density|mass density|mass density (density)!} of {info.density#} {info.density__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {#pore limiting diameter|pore limiting diameter (PLD)!} of {info.pld#} {info.pld__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {#pore limiting diameter|pore limiting diameter (PLD)!} of {info.pld#} {info.pld__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {#largest cavity diameter|largest cavity diameter (LCD)!} of {info.lcd#} {info.lcd__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {#largest cavity diameter|largest cavity diameter (LCD)!} of {info.lcd#} {info.lcd__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.pbe.bandgap__names__noun} of {outputs.pbe.bandgap#} {outputs.pbe.bandgap__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.pbe.bandgap__names__noun} of {outputs.pbe.bandgap#} {outputs.pbe.bandgap__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.pbe.cbm__names__noun} of {outputs.pbe.cbm#} {outputs.pbe.cbm__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.pbe.cbm__names__noun} of {outputs.pbe.cbm#} {outputs.pbe.cbm__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.pbe.vbm__names__noun} of {outputs.pbe.vbm#} {outputs.pbe.vbm__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.pbe.vbm__names__noun} of {outputs.pbe.vbm#} {outputs.pbe.vbm__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.hle17.bandgap__names__noun} of {outputs.hle17.bandgap#} {outputs.hle17.bandgap__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.hle17.bandgap__names__noun} of {outputs.hle17.bandgap#} {outputs.hle17.bandgap__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.hle17.cbm__names__noun} of {outputs.hle17.cbm#} {outputs.hle17.cbm__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.hle17.cbm__names__noun} of {outputs.hle17.cbm#} {outputs.hle17.cbm__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.hle17.vbm__names__noun} of {outputs.hle17.vbm#} {outputs.hle17.vbm__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.hle17.vbm__names__noun} of {outputs.hle17.vbm#} {outputs.hle17.vbm__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.hse06.bandgap__names__noun} of {outputs.hse06.bandgap#} {outputs.hse06.bandgap__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.hse06.bandgap__names__noun} of {outputs.hse06.bandgap#} {outputs.hse06.bandgap__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.hse06.cbm__names__noun} of {outputs.hse06.cbm#} {outputs.hse06.cbm__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.hse06.cbm__names__noun} of {outputs.hse06.cbm#} {outputs.hse06.cbm__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} has a {outputs.hse06.vbm__names__noun} of {outputs.hse06.vbm#} {outputs.hse06.vbm__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {outputs.hse06.vbm__names__noun} of {outputs.hse06.vbm#} {outputs.hse06.vbm__units}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} has a {#spacegroup number|space group number!} of {info.symmetry.spacegroup_number#}. + - The {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#} is a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}. + - |- + Question: {#What is|How large is!} the {#density|mass density!} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? + Answer: {#The density is |The mass density is |!}{info.density#} {info.density__units}. + - |- + Question: {#What is|How large is!} the {#density|mass density!} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? + Answer: {#The density is |The mass density is |!}{info.density#} {info.density__units}. + - |- + Question: {#What is|How large is!} the {#pore limiting diameter|pore limiting diameter (PLD)!} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? + Answer: {#The pore limiting diameter is |The pore limiting diameter (PLD) is |!}{info.pld#} {info.pld__units}. + - |- + Question: {#What is|How large is!} the {#pore limiting diameter|pore limiting diameter (PLD)!} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? + Answer: {#The pore limiting diameter is |The pore limiting diameter (PLD) is |!}{info.pld#} {info.pld__units}. + - |- + Question: {#What is|How large is!} the {#largest cavity diameter|largest cavity diameter (LCD)!} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? + Answer: {#The largest cavity diameter is |The largest cavity diameter (LCD) is |!}{info.lcd#} {info.lcd__units}. + - |- + Question: {#What is|How large is!} the {#largest cavity diameter|largest cavity diameter (LCD)!} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? + Answer: {#The largest cavity diameter is |The largest cavity diameter (LCD) is |!}{info.lcd#} {info.lcd__units}. + - |- + Question: {#What is|How large is!} the {outputs.pbe.bandgap__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? + Answer: {#The band gap is |The band gap of the MOF is |!}{outputs.pbe.bandgap#} {outputs.pbe.bandgap__units}. + - |- + Question: {#What is|How large is!} the {outputs.pbe.bandgap__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? + Answer: {#The band gap is |The band gap of the MOF is |!}{outputs.pbe.bandgap#} {outputs.pbe.bandgap__units}. + - |- + Question: {#What is|How large is!} the {outputs.pbe.cbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? + Answer: {#The conduction band minimum is |The conduction band minimum of the MOF is |!}{outputs.pbe.cbm#} {outputs.pbe.cbm__units}. + - |- + Question: {#What is|How large is!} the {outputs.pbe.cbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? + Answer: {#The conduction band minimum is |The conduction band minimum of the MOF is |!}{outputs.pbe.cbm#} {outputs.pbe.cbm__units}. + - |- + Question: {#What is|How large is!} the {outputs.pbe.vbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? + Answer: {#The valence band maximum is |The valence band maximum of the MOF is |!}{outputs.pbe.vbm#} {outputs.pbe.vbm__units}. + - |- + Question: {#What is|How large is!} the {outputs.pbe.vbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? + Answer: {#The valence band maximum is |The valence band maximum of the MOF is |!}{outputs.pbe.vbm#} {outputs.pbe.vbm__units}. + - |- + Question: {#What is|How large is!} the {outputs.hle17.bandgap__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? + Answer: {#The band gap is |The band gap of the MOF is |!}{outputs.hle17.bandgap#} {outputs.hle17.bandgap__units}. + - |- + Question: {#What is|How large is!} the {outputs.hle17.bandgap__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? + Answer: {#The band gap is |The band gap of the MOF is |!}{outputs.hle17.bandgap#} {outputs.hle17.bandgap__units}. + - |- + Question: {#What is|How large is!} the {outputs.hle17.cbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? + Answer: {#The conduction band minimum is |The conduction band minimum of the MOF is |!}{outputs.hle17.cbm#} {outputs.hle17.cbm__units}. + - |- + Question: {#What is|How large is!} the {outputs.hle17.cbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? + Answer: {#The conduction band minimum is |The conduction band minimum of the MOF is |!}{outputs.hle17.cbm#} {outputs.hle17.cbm__units}. + - |- + Question: {#What is|How large is!} the {outputs.hle17.vbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? + Answer: {#The valence band maximum is |The valence band maximum of the MOF is |!}{outputs.hle17.vbm#} {outputs.hle17.vbm__units}. + - |- + Question: {#What is|How large is!} the {outputs.hle17.vbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? + Answer: {#The valence band maximum is |The valence band maximum of the MOF is |!}{outputs.hle17.vbm#} {outputs.hle17.vbm__units}. + - |- + Question: {#What is|How large is!} the {outputs.hse06.bandgap__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? + Answer: {#The band gap is |The band gap of the MOF is |!}{outputs.hse06.bandgap#} {outputs.hse06.bandgap__units}. + - |- + Question: {#What is|How large is!} the {outputs.hse06.bandgap__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? + Answer: {#The band gap is |The band gap of the MOF is |!}{outputs.hse06.bandgap#} {outputs.hse06.bandgap__units}. + - |- + Question: {#What is|How large is!} the {outputs.hse06.cbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? + Answer: {#The conduction band minimum is |The conduction band minimum of the MOF is |!}{outputs.hse06.cbm#} {outputs.hse06.cbm__units}. + - |- + Question: {#What is|How large is!} the {outputs.hse06.cbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? + Answer: {#The conduction band minimum is |The conduction band minimum of the MOF is |!}{outputs.hse06.cbm#} {outputs.hse06.cbm__units}. + - |- + Question: {#What is|How large is!} the {outputs.hse06.vbm__names__noun} of the {#MOF|metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}, linker SMILES {info.mofid.smiles_linkers#}, and node SMILES {info.mofid.smiles_nodes#}? + Answer: {#The valence band maximum is |The valence band maximum of the MOF is |!}{outputs.hse06.vbm#} {outputs.hse06.vbm__units}. + - |- + Question: In which {#topology|net!} do the linkers with SMILES {info.mofid.smiles_linkers#} and nodes with SMILES {info.mofid.smiles_nodes#} self-assemble to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#}? + Answer: The {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} self-assembles to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net!} {info.mofid.topology#}. + - |- + User: With which linkers do I have to combine my nodes with SMILES {info.mofid.smiles_nodes#} to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net!} {info.mofid.topology#}? + Assistant: You have to combine your nodes with SMILES {info.mofid.smiles_nodes#} with linkers with SMILES {info.mofid.smiles_linkers#} to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {info.mofid.topology#} {#topology|net!}. + - |- + User: With which nodes do I have to combine my linkers with SMILES {info.mofid.smiles_linkers#} to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {#topology|net!} {info.mofid.topology#}? + Assistant: You have to combine your linkers with SMILES {info.mofid.smiles_linkers#} with nodes with SMILES {info.mofid.smiles_nodes#} to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {info.mofid.topology#} {#topology|net!}. + - |- + User: Which linkers and nodes do I have to combine to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {info.lcd__names__noun} {info.lcd#} {info.lcd__units}? + Assistant: {#Do you have other constraints?|Do you have other requirements?|Are there additional constraints?|Is there anything else I should take into account?!} + User: {#Yes, |Indeed, |Thanks, |!}I want to use linkers with SMILES {info.mofid.smiles_linkers#}. + Assistant: You have to combine your linkers with SMILES {info.mofid.smiles_linkers#} with nodes with SMILES {info.mofid.smiles_nodes#} to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {info.lcd__names__noun} {info.lcd#} {info.lcd__units}. + - |- + User: Which linkers and nodes do I have to combine to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {info.pld__names__noun} {info.pld#} {info.pld__units}? + Assistant: {#Do you have other constraints?|Do you have other requirements?|Are there additional constraints?|Is there anything else I should take into account?!} + User: {#Yes, |Indeed, |Thanks, |!}I want to use linkers with SMILES {info.mofid.smiles_linkers#} and want a {outputs.pbe.bandgap__names__noun} of {outputs.pbe.bandgap#} {outputs.pbe.bandgap__units}. + Assistant: You have to combine your linkers with SMILES {info.mofid.smiles_linkers#} with nodes with SMILES {info.mofid.smiles_nodes#}. + - |- + User: Which linkers and nodes do I have to combine to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {outputs.pbe.bandgap__names__noun} of {outputs.pbe.bandgap#} {outputs.pbe.bandgap__units}? + Assistant: {#Do you have other constraints?|Do you have other requirements?|Are there additional constraints?|Is there anything else I should take into account?!} + User: {#Yes, |Indeed, |Thanks, |!}I want to have a {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#} and {outputs.pbe.cbm__names__noun} of {outputs.pbe.cbm#} {outputs.pbe.cbm__units}. + Assistant: You have to combine your linkers with SMILES {info.mofid.smiles_linkers#} with nodes with SMILES {info.mofid.smiles_nodes#}. + User: {#One more thing, |I have one more question, |!}What is the {info.density__names__noun} and the {info.symmetry.spacegroup_number__names__noun}? + Assistant: The {info.density__names__noun} is {info.density#} {info.density__units} and the {info.symmetry.spacegroup_number__names__noun} is {info.symmetry.spacegroup_number#}. + - |- + User: What {#suggestion|proposals|ideas|recommendations!} do you have if I {#want|need!} a {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the {outputs.pbe.bandgap__names__noun} of {outputs.pbe.bandgap#} {outputs.pbe.bandgap__units}? + Assistant: {#Do you have other constraints?|Do you have other requirements?|Are there additional constraints?|Is there anything else I should take into account?!} + User: {#Yes, |Indeed, |Thanks, |!}I want to have a {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#} and {outputs.pbe.cbm__names__noun} of {outputs.pbe.cbm#} {outputs.pbe.cbm__units}. + Assistant: {#Is that it?|Is there anything else I should take into account?|Do you have other constraints?|Do you have other requirements?|Are there additional constraints?!} + User: {#One more thing, |I have one more request, |!}The {info.density__names__noun} should be {info.density#} {info.density__units} and the {info.symmetry.spacegroup_number__names__noun} should be {info.symmetry.spacegroup_number#}. + Assistant: {#I recommend that you |I propose that you |I suggest that you |!} combine your linkers with SMILES {info.mofid.smiles_linkers#} with nodes with SMILES {info.mofid.smiles_nodes#}. + - |- + User: I'm thinking about the band gaps of {#metal-organic frameworks|metal-organic frameworks (MOFs)|reticular materials!}. + Assistant: {#That's interesting.|How can I help?|How can I be of assistance?|Is there anything I can do?|What can I do for you?!} + User: How does the {outputs.pbe.bandgap__names__noun} of the {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} compare to the {outputs.hse06.bandgap__names__noun} and the {outputs.hle17.bandgap__names__noun}? + Assistant: The {outputs.pbe.bandgap__names__noun} is {outputs.pbe.bandgap#} {outputs.pbe.bandgap__units}, the {outputs.hse06.bandgap__names__noun} is {outputs.hse06.bandgap#} {outputs.hse06.bandgap__units} and the {outputs.hle17.bandgap__names__noun} is {outputs.hle17.bandgap#} {outputs.hle17.bandgap__units}. + - |- + User: I'm thinking about the conduction band minima and valence band maxima of {#metal-organic frameworks|metal-organic frameworks (MOFs)|reticular materials!}. + Assistant: {#That's interesting.|How can I help?|How can I be of assistance?|Is there anything I can do?|What can I do for you?!} + User: How does the {outputs.pbe.cbm__names__noun} of the {#metal-organic framework|metal-organic framework (MOF)|reticular material!} with the MOFid {info.mofid.mofid#} compare to the {outputs.hse06.cbm__names__noun} and the {outputs.hle17.cbm__names__noun}? + Assistant: The {outputs.pbe.cbm__names__noun} is {outputs.pbe.cbm#} {outputs.pbe.cbm__units}, the {outputs.hse06.cbm__names__noun} is {outputs.hse06.cbm#} {outputs.hse06.cbm__units} and the {outputs.hle17.cbm__names__noun} is {outputs.hle17.cbm#} {outputs.hle17.cbm__units}. + User: {#And how does it look like for|And how about|How about!} the {outputs.pbe.vbm__names__noun}? + Assistant: The {outputs.pbe.vbm__names__noun} is {outputs.pbe.vbm#} {outputs.pbe.vbm__units}, the {outputs.hse06.vbm__names__noun} is {outputs.hse06.vbm#} {outputs.hse06.vbm__units} and the {outputs.hle17.vbm__names__noun} is {outputs.hle17.vbm#} {outputs.hle17.vbm__units}. + - |- + User: I'm {#thinking|wondering!} about the {#topology|net|RCSR code|RCSR identifier!} of {#metal-organic frameworks|metal-organic frameworks (MOFs)|reticular materials!}. + Assistant: {#That's interesting.|How can I help?|How can I be of assistance?|Is there anything I can do?|What can I do for you?!} + User: Which {#topology|net|RCSR code|RCSR identifier!} do the linkers with SMILES {info.mofid.smiles_linkers#} and nodes with SMILES {info.mofid.smiles_nodes#} self-assemble to form a {#metal-organic framework|metal-organic framework (MOF)|reticular material!}? + Assistant: Into the {#topology|net|RCSR code|RCSR identifier!} {info.mofid.topology#}. + User: {#One more thing, |I have one more request, |Another question, |I have one more question, |!}What density do you {#predict|expect|estimate!} for this {#metal-organic framework|metal-organic framework (MOF)|reticular material|material!}? + Assistant: The {info.density__names__noun} is {info.density#} {info.density__units}. diff --git a/data/tabular/rdkit_features/meta.yaml b/data/tabular/rdkit_features/meta.yaml index dcc7750b6..78316effd 100644 --- a/data/tabular/rdkit_features/meta.yaml +++ b/data/tabular/rdkit_features/meta.yaml @@ -1,151 +1,150 @@ ---- name: rdkit_features description: |- - Molecular descriptors computed using RDKit + Molecular descriptors computed using RDKit targets: - - id: formula - type: formula - names: - - noun: formula - - noun: chemical formula - - noun: chemical formula - - noun: molecular formula - - id: NumHDonors - type: continuous - significant_digits: 0 - names: - - noun: number of hydrogen bond donors - - noun: number of hydrogen bond donor sites - - noun: count of hydrogen bond donors - - id: NumHAcceptors - type: continuous - significant_digits: 0 - names: - - noun: number of hydrogen bond acceptors - - noun: number of hydrogen bond acceptor sites - - noun: count of hydrogen bond acceptors - - id: NumHeteroatoms - type: continuous - significant_digits: 0 - names: - - noun: number of heteroatoms - - noun: count of heteroatoms - - noun: heteroatom count - - id: RingCount - type: continuous - significant_digits: 0 - names: - - noun: number of rings - - noun: count of rings - - noun: ring count - - id: NumRotatableBonds - type: continuous - significant_digits: 0 - names: - - noun: number of rotatable bonds - - noun: count of rotatable bonds - - noun: rotatable bond count - - id: NumAromaticBonds - type: continuous - significant_digits: 0 - names: - - noun: number of aromatic bonds - - noun: count of aromatic bonds - - noun: aromatic bond count - - id: NumAcidGroups - type: continuous - significant_digits: 0 - names: - - noun: number of acid groups - - noun: count of acid groups - - noun: acid group count - - id: NumBasicGroups - type: continuous - significant_digits: 0 - names: - - noun: number of basic groups - - noun: count of basic groups - - noun: basic group count - - id: Apol - type: continuous - significant_digits: 2 - names: - - noun: sum of atomic polarizabilities - - noun: total sum of atomic polarizabilities - - id: MolLogP - type: continuous - significant_digits: 2 - names: - - noun: Wildman-Crippen LogP value computed using RDKit - - noun: Wildman-Crippen LogP value - - noun: LogP value computed using the Wildman-Crippen method + - id: formula + type: formula + names: + - noun: formula + - noun: chemical formula + - noun: chemical formula + - noun: molecular formula + - id: NumHDonors + type: continuous + significant_digits: 0 + names: + - noun: number of hydrogen bond donors + - noun: number of hydrogen bond donor sites + - noun: count of hydrogen bond donors + - id: NumHAcceptors + type: continuous + significant_digits: 0 + names: + - noun: number of hydrogen bond acceptors + - noun: number of hydrogen bond acceptor sites + - noun: count of hydrogen bond acceptors + - id: NumHeteroatoms + type: continuous + significant_digits: 0 + names: + - noun: number of heteroatoms + - noun: count of heteroatoms + - noun: heteroatom count + - id: RingCount + type: continuous + significant_digits: 0 + names: + - noun: number of rings + - noun: count of rings + - noun: ring count + - id: NumRotatableBonds + type: continuous + significant_digits: 0 + names: + - noun: number of rotatable bonds + - noun: count of rotatable bonds + - noun: rotatable bond count + - id: NumAromaticBonds + type: continuous + significant_digits: 0 + names: + - noun: number of aromatic bonds + - noun: count of aromatic bonds + - noun: aromatic bond count + - id: NumAcidGroups + type: continuous + significant_digits: 0 + names: + - noun: number of acid groups + - noun: count of acid groups + - noun: acid group count + - id: NumBasicGroups + type: continuous + significant_digits: 0 + names: + - noun: number of basic groups + - noun: count of basic groups + - noun: basic group count + - id: Apol + type: continuous + significant_digits: 2 + names: + - noun: sum of atomic polarizabilities + - noun: total sum of atomic polarizabilities + - id: MolLogP + type: continuous + significant_digits: 2 + names: + - noun: Wildman-Crippen LogP value computed using RDKit + - noun: Wildman-Crippen LogP value + - noun: LogP value computed using the Wildman-Crippen method benchmarks: - - names: WhiteLab - link: https://huggingface.co/datasets/maykcaldas/smiles-transformers/viewer/default/train?p=9080867 - split_column: split + - names: WhiteLab + link: https://huggingface.co/datasets/maykcaldas/smiles-transformers/viewer/default/train?p=9080867 + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: MIT links: - - url: https://huggingface.co/datasets/maykcaldas/smiles-transformers/viewer/default/train?p=9080867 - description: Data source + - url: https://huggingface.co/datasets/maykcaldas/smiles-transformers/viewer/default/train?p=9080867 + description: Data source num_points: 1009179703 templates: - - The {formula__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {formula#}. - - The {NumHDonors__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {NumHDonors#}. - - The {NumHAcceptors__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {NumHAcceptors#}. - - The {NumHeteroatoms__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {NumHeteroatoms#}. - - The {RingCount__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {RingCount#}. - - The {NumRotatableBonds__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {NumRotatableBonds#}. - - The {NumAromaticBonds__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {NumAromaticBonds#}. - - The {NumAcidGroups__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {NumAcidGroups#}. - - The {NumBasicGroups__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {NumBasicGroups#}. - - The {Apol__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {Apol#}. - - The {MolLogP__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {MolLogP#}. - - |- - User: I want to {#design|create|make|synthesize|analyze!} a {#molecule|compound|chemical!} with a {formula__names__noun} of {formula#}. - Assistant: {#Cool, do|Nice, do|Interesting, do|That's interesting, do|That is a very interesting question, do|Do!} you have some additional {#constraints|requirements|conditions|limitations!}{# I should take into account| that help me narrow down the search| that I should consider| I should consider| I should take into account|!}? - User: {#Yes, |Indeed, |Yeah, |Yea, |Yep, |!}I want the {NumHDonors__names__noun} to be {NumHDonors#}, the {NumHAcceptors__names__noun} to be {NumHAcceptors#}. - Assistant: {#Then, |In that case, |In that situation, |In that scenario, |!}I {#recommend|suggest|propose|advise!} the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}. - - |- - Question: What is the {formula__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? - Answer: {formula#} - - |- - Question: What is the {NumHDonors__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? - Answer: {NumHDonors#} - - |- - Question: What is the {NumHAcceptors__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? - Answer: {NumHAcceptors#} - - |- - Question: What is the {NumHeteroatoms__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? - Answer: {NumHeteroatoms#} - - |- - Question: What is the {RingCount__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? - Answer: {RingCount#} - - |- - Question: What is the {NumRotatableBonds__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? - Answer: {NumRotatableBonds#} - - |- - Question: What is the {NumAromaticBonds__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? - Answer: {NumAromaticBonds#} - - |- - Question: What is the {NumAcidGroups__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? - Answer: {NumAcidGroups#} - - |- - Question: What is the {NumBasicGroups__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? - Answer: {NumBasicGroups#} - - |- - Question: What is the {Apol__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? - Answer: {Apol#} - - |- - User: I want to {#design|create|make|synthesize|analyze!} a {#molecule|compound|chemical!} with a {NumHDonors__names__noun} of {NumHDonors#} and a {NumHAcceptors__names__noun} of {NumHAcceptors#}. - Assistant: {#Cool, do|Nice, do|Interesting, do|That's interesting, do|That is a very interesting question, do|Do!} you have some additional {#constraints|requirements|conditions|limitations!}{# I should take into account| that help me narrow down the search| that I should consider| I should consider| I should take into account!}? - User: {#Yes, |Indeed, |Yeah, |Yea, |Yep, |!}I want the {NumHeteroatoms__names__noun} to be {NumHeteroatoms#}. - Assistant: {#Then, |In that case, |In that situation, |In that scenario, |!}I {#recommend|suggest|propose|advise!} the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}. - - |- - User: I want to {#design|create|make|synthesize|analyze!} a {#molecule|compound|chemical!} with a {NumHDonors__names__noun} of {NumHDonors#}, a {NumHAcceptors__names__noun} of {NumHAcceptors#} and a {MolLogP__names__noun} of {MolLogP#}. - Assistant: {#Cool, do|Nice, do|Interesting, do|That's interesting, do|That is a very interesting question, do|Do!} you have some additional {#constraints|requirements|conditions|limitations!}{# I should take into account| that help me narrow down the search| that I should consider| I should consider| I should take into account|!}? - User: {#Yes, |Indeed, |Yeah, |Yea, |Yep, |!}I want the {formula__names__noun} to be {formula#}. - Assistant: {#Then, |In that case, |In that situation, |In that scenario, |!}I {#recommend|suggest|propose|advise!} the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}. + - The {formula__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {formula#}. + - The {NumHDonors__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {NumHDonors#}. + - The {NumHAcceptors__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {NumHAcceptors#}. + - The {NumHeteroatoms__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {NumHeteroatoms#}. + - The {RingCount__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {RingCount#}. + - The {NumRotatableBonds__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {NumRotatableBonds#}. + - The {NumAromaticBonds__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {NumAromaticBonds#}. + - The {NumAcidGroups__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {NumAcidGroups#}. + - The {NumBasicGroups__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {NumBasicGroups#}. + - The {Apol__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {Apol#}. + - The {MolLogP__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#} is {MolLogP#}. + - |- + User: I want to {#design|create|make|synthesize|analyze!} a {#molecule|compound|chemical!} with a {formula__names__noun} of {formula#}. + Assistant: {#Cool, do|Nice, do|Interesting, do|That's interesting, do|That is a very interesting question, do|Do!} you have some additional {#constraints|requirements|conditions|limitations!}{# I should take into account| that help me narrow down the search| that I should consider| I should consider| I should take into account|!}? + User: {#Yes, |Indeed, |Yeah, |Yea, |Yep, |!}I want the {NumHDonors__names__noun} to be {NumHDonors#}, the {NumHAcceptors__names__noun} to be {NumHAcceptors#}. + Assistant: {#Then, |In that case, |In that situation, |In that scenario, |!}I {#recommend|suggest|propose|advise!} the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}. + - |- + Question: What is the {formula__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? + Answer: {formula#} + - |- + Question: What is the {NumHDonors__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? + Answer: {NumHDonors#} + - |- + Question: What is the {NumHAcceptors__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? + Answer: {NumHAcceptors#} + - |- + Question: What is the {NumHeteroatoms__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? + Answer: {NumHeteroatoms#} + - |- + Question: What is the {RingCount__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? + Answer: {RingCount#} + - |- + Question: What is the {NumRotatableBonds__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? + Answer: {NumRotatableBonds#} + - |- + Question: What is the {NumAromaticBonds__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? + Answer: {NumAromaticBonds#} + - |- + Question: What is the {NumAcidGroups__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? + Answer: {NumAcidGroups#} + - |- + Question: What is the {NumBasicGroups__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? + Answer: {NumBasicGroups#} + - |- + Question: What is the {Apol__names__noun} of the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}? + Answer: {Apol#} + - |- + User: I want to {#design|create|make|synthesize|analyze!} a {#molecule|compound|chemical!} with a {NumHDonors__names__noun} of {NumHDonors#} and a {NumHAcceptors__names__noun} of {NumHAcceptors#}. + Assistant: {#Cool, do|Nice, do|Interesting, do|That's interesting, do|That is a very interesting question, do|Do!} you have some additional {#constraints|requirements|conditions|limitations!}{# I should take into account| that help me narrow down the search| that I should consider| I should consider| I should take into account!}? + User: {#Yes, |Indeed, |Yeah, |Yea, |Yep, |!}I want the {NumHeteroatoms__names__noun} to be {NumHeteroatoms#}. + Assistant: {#Then, |In that case, |In that situation, |In that scenario, |!}I {#recommend|suggest|propose|advise!} the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}. + - |- + User: I want to {#design|create|make|synthesize|analyze!} a {#molecule|compound|chemical!} with a {NumHDonors__names__noun} of {NumHDonors#}, a {NumHAcceptors__names__noun} of {NumHAcceptors#} and a {MolLogP__names__noun} of {MolLogP#}. + Assistant: {#Cool, do|Nice, do|Interesting, do|That's interesting, do|That is a very interesting question, do|Do!} you have some additional {#constraints|requirements|conditions|limitations!}{# I should take into account| that help me narrow down the search| that I should consider| I should consider| I should take into account|!}? + User: {#Yes, |Indeed, |Yeah, |Yea, |Yep, |!}I want the {formula__names__noun} to be {formula#}. + Assistant: {#Then, |In that case, |In that situation, |In that scenario, |!}I {#recommend|suggest|propose|advise!} the {#molecule|compound|chemical!} with {SMILES__description} {SMILES#}. diff --git a/data/tabular/rhea_db_masked/meta.yaml b/data/tabular/rhea_db_masked/meta.yaml index b95bfc5f2..032502b4b 100644 --- a/data/tabular/rhea_db_masked/meta.yaml +++ b/data/tabular/rhea_db_masked/meta.yaml @@ -1,70 +1,69 @@ ---- name: ord_procedure_steps description: |- - The open reaction database is a database of chemical reactions and their conditions + The open reaction database is a database of chemical reactions and their conditions identifiers: - - id: masked_rxn_smiles - type: text - description: reaction SMILES with one element masked - names: - - noun: reaction SMILES with one element masked as `MASK` - - noun: reaction SMILES with one element hidden as `MASK` - - noun: masked reaction SMILES (one component masked as `MASK`) - - noun: masked reaction SMILES string (one component masked as `MASK`) - - noun: masked RXNSMILES (one component masked as `MASK`) + - id: masked_rxn_smiles + type: text + description: reaction SMILES with one element masked + names: + - noun: reaction SMILES with one element masked as `MASK` + - noun: reaction SMILES with one element hidden as `MASK` + - noun: masked reaction SMILES (one component masked as `MASK`) + - noun: masked reaction SMILES string (one component masked as `MASK`) + - noun: masked RXNSMILES (one component masked as `MASK`) targets: - - id: missing_component - type: text - description: masked element + - id: missing_component + type: text + description: masked element license: CC BY SA 4.0 links: - - url: https://github.com/open-reaction-database/ord-data - description: original data source + - url: https://github.com/open-reaction-database/ord-data + description: original data source num_points: 31348 bibtex: - - |- - @article{Bansal_2021, - doi = {10.1093/nar/gkab1016}, - url = {https://doi.org/10.1093%2Fnar%2Fgkab1016}, - year = 2021, - month = {nov}, - publisher = {Oxford University Press ({OUP})}, - volume = {50}, - number = {D1}, - pages = {D693--D700}, - author = {Parit Bansal and Anne Morgat and Kristian B Axelsen - and Venkatesh Muthukrishnan and Elisabeth Coudert and Lucila Aimo - and Nevila Hyka-Nouspikel and Elisabeth Gasteiger and Arnaud Kerhornou - and Teresa Batista Neto and Monica Pozzato and Marie-Claude Blatter - and Alex Ignatchenko and Nicole Redaschi and Alan Bridge}, - title = {Rhea, the reaction knowledgebase in 2022}, - journal = {Nucleic Acids Research} - } - - |- - @article{Alc_ntara_2011, - doi = {10.1093/nar/gkr1126}, - url = {https://doi.org/10.1093%2Fnar%2Fgkr1126}, - year = 2011, - month = {nov}, - publisher = {Oxford University Press ({OUP})}, - volume = {40}, - number = {D1}, - pages = {D754--D760}, - author = {Rafael Alc{\'{a}}ntara and Kristian B. Axelsen - and Anne Morgat and Eugeni Belda and Elisabeth Coudert - and Alan Bridge and Hong Cao and Paula de Matos and Marcus Ennis - and Steve Turner and Gareth Owen and Lydie Bougueleret - and Ioannis Xenarios and Christoph Steinbeck}, - title = {Rhea{\textemdash}a manually curated resource of biochemical reactions}, - journal = {Nucleic Acids Research} - } + - |- + @article{Bansal_2021, + doi = {10.1093/nar/gkab1016}, + url = {https://doi.org/10.1093%2Fnar%2Fgkab1016}, + year = 2021, + month = {nov}, + publisher = {Oxford University Press ({OUP})}, + volume = {50}, + number = {D1}, + pages = {D693--D700}, + author = {Parit Bansal and Anne Morgat and Kristian B Axelsen + and Venkatesh Muthukrishnan and Elisabeth Coudert and Lucila Aimo + and Nevila Hyka-Nouspikel and Elisabeth Gasteiger and Arnaud Kerhornou + and Teresa Batista Neto and Monica Pozzato and Marie-Claude Blatter + and Alex Ignatchenko and Nicole Redaschi and Alan Bridge}, + title = {Rhea, the reaction knowledgebase in 2022}, + journal = {Nucleic Acids Research} + } + - |- + @article{Alc_ntara_2011, + doi = {10.1093/nar/gkr1126}, + url = {https://doi.org/10.1093%2Fnar%2Fgkr1126}, + year = 2011, + month = {nov}, + publisher = {Oxford University Press ({OUP})}, + volume = {40}, + number = {D1}, + pages = {D754--D760}, + author = {Rafael Alc{\'{a}}ntara and Kristian B. Axelsen + and Anne Morgat and Eugeni Belda and Elisabeth Coudert + and Alan Bridge and Hong Cao and Paula de Matos and Marcus Ennis + and Steve Turner and Gareth Owen and Lydie Bougueleret + and Ioannis Xenarios and Christoph Steinbeck}, + title = {Rhea{\textemdash}a manually curated resource of biochemical reactions}, + journal = {Nucleic Acids Research} + } templates: - - The masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#} is {missing_component#}. - - The {#chemical|compound!} with SMILES {missing_component#} is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}. - - |- - Question: What is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}? - Answer: {missing_component#}. - - |- - Task: Predict the masked component in a {masked_rxn_smiles__names__noun}. - Description: {masked_rxn_smiles#} - {#Answer|Solution!}: {missing_component#} + - The masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#} is {missing_component#}. + - The {#chemical|compound!} with SMILES {missing_component#} is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}. + - |- + Question: What is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}? + Answer: {missing_component#}. + - |- + Task: Predict the masked component in a {masked_rxn_smiles__names__noun}. + Description: {masked_rxn_smiles#} + {#Answer|Solution!}: {missing_component#} diff --git a/data/tabular/rhea_db_predictions/meta.yaml b/data/tabular/rhea_db_predictions/meta.yaml index be4b104ce..6048ea126 100644 --- a/data/tabular/rhea_db_predictions/meta.yaml +++ b/data/tabular/rhea_db_predictions/meta.yaml @@ -1,83 +1,82 @@ ---- name: ord_procedure_steps description: |- - The open reaction database is a database of chemical reactions and their conditions + The open reaction database is a database of chemical reactions and their conditions identifiers: - - id: educt_string - type: text - description: reaction educts - names: - - noun: reaction educts - - noun: educts - - noun: starting materials - - id: RXNSMILES - type: RXNSMILES - description: reaction SMILES - names: - - noun: reaction SMILES - - noun: reaction SMILES string - - noun: RXNSMILES - - noun: reaction SMILES (RXNSMILES) + - id: educt_string + type: text + description: reaction educts + names: + - noun: reaction educts + - noun: educts + - noun: starting materials + - id: RXNSMILES + type: RXNSMILES + description: reaction SMILES + names: + - noun: reaction SMILES + - noun: reaction SMILES string + - noun: RXNSMILES + - noun: reaction SMILES (RXNSMILES) targets: - - id: product_string - type: text - description: reaction products - names: - - noun: reaction products - - noun: products + - id: product_string + type: text + description: reaction products + names: + - noun: reaction products + - noun: products license: CC BY SA 4.0 links: - - url: https://github.com/open-reaction-database/ord-data - description: original data source + - url: https://github.com/open-reaction-database/ord-data + description: original data source num_points: 31348 bibtex: - - |- - @article{Bansal_2021, - doi = {10.1093/nar/gkab1016}, - url = {https://doi.org/10.1093%2Fnar%2Fgkab1016}, - year = 2021, - month = {nov}, - publisher = {Oxford University Press ({OUP})}, - volume = {50}, - number = {D1}, - pages = {D693--D700}, - author = {Parit Bansal and Anne Morgat and Kristian B Axelsen - and Venkatesh Muthukrishnan and Elisabeth Coudert and Lucila Aimo - and Nevila Hyka-Nouspikel and Elisabeth Gasteiger and Arnaud Kerhornou - and Teresa Batista Neto and Monica Pozzato and Marie-Claude Blatter - and Alex Ignatchenko and Nicole Redaschi and Alan Bridge}, - title = {Rhea, the reaction knowledgebase in 2022}, - journal = {Nucleic Acids Research} - } - - |- - @article{Alc_ntara_2011, - doi = {10.1093/nar/gkr1126}, - url = {https://doi.org/10.1093%2Fnar%2Fgkr1126}, - year = 2011, - month = {nov}, - publisher = {Oxford University Press ({OUP})}, - volume = {40}, - number = {D1}, - pages = {D754--D760}, - author = {Rafael Alc{\'{a}}ntara and Kristian B. Axelsen - and Anne Morgat and Eugeni Belda and Elisabeth Coudert - and Alan Bridge and Hong Cao and Paula de Matos and Marcus Ennis - and Steve Turner and Gareth Owen and Lydie Bougueleret - and Ioannis Xenarios and Christoph Steinbeck}, - title = {Rhea{\textemdash}a manually curated resource of biochemical reactions}, - journal = {Nucleic Acids Research} - } + - |- + @article{Bansal_2021, + doi = {10.1093/nar/gkab1016}, + url = {https://doi.org/10.1093%2Fnar%2Fgkab1016}, + year = 2021, + month = {nov}, + publisher = {Oxford University Press ({OUP})}, + volume = {50}, + number = {D1}, + pages = {D693--D700}, + author = {Parit Bansal and Anne Morgat and Kristian B Axelsen + and Venkatesh Muthukrishnan and Elisabeth Coudert and Lucila Aimo + and Nevila Hyka-Nouspikel and Elisabeth Gasteiger and Arnaud Kerhornou + and Teresa Batista Neto and Monica Pozzato and Marie-Claude Blatter + and Alex Ignatchenko and Nicole Redaschi and Alan Bridge}, + title = {Rhea, the reaction knowledgebase in 2022}, + journal = {Nucleic Acids Research} + } + - |- + @article{Alc_ntara_2011, + doi = {10.1093/nar/gkr1126}, + url = {https://doi.org/10.1093%2Fnar%2Fgkr1126}, + year = 2011, + month = {nov}, + publisher = {Oxford University Press ({OUP})}, + volume = {40}, + number = {D1}, + pages = {D754--D760}, + author = {Rafael Alc{\'{a}}ntara and Kristian B. Axelsen + and Anne Morgat and Eugeni Belda and Elisabeth Coudert + and Alan Bridge and Hong Cao and Paula de Matos and Marcus Ennis + and Steve Turner and Gareth Owen and Lydie Bougueleret + and Ioannis Xenarios and Christoph Steinbeck}, + title = {Rhea{\textemdash}a manually curated resource of biochemical reactions}, + journal = {Nucleic Acids Research} + } templates: - - The {RXNSMILES__names__noun} {RXNSMILES#} has the {educt_string__names__noun} {educt_string#} and the {product_string__names__noun} {product_string#}. - - The {RXNSMILES__names__noun} {RXNSMILES#} has the {product_string__names__noun} {product_string#} and the {educt_string__names__noun} {educt_string#}. - - |- - Question: {#What|Which!} {educt_string__names__noun} are {#needed|required!} to {#produce|synthesize!} the {product_string__names__noun} {product_string#}? - Answer: {educt_string#}. - - |- - Question: {#What|Which!} {product_string__names__noun} are produced from the {educt_string__names__noun} {educt_string#}? - Answer: {product_string#}. - - |- - User: I {#want|would like to|must|need to!} {#synthesize|produce!} the {product_string__names__noun} {product_string#}. - Assistant: {#Cool, is|That's interesting, is|Great, is|Is!} there anything else I can do for you? - User: {#Yes, |!}I would like to know the {educt_string__names__noun} I need to produce the {product_string__names__noun} {product_string#}. - Assistant: {#I recommend|I suggest|I propose|I advise!} the following {educt_string__names__noun}: {educt_string#}. + - The {RXNSMILES__names__noun} {RXNSMILES#} has the {educt_string__names__noun} {educt_string#} and the {product_string__names__noun} {product_string#}. + - The {RXNSMILES__names__noun} {RXNSMILES#} has the {product_string__names__noun} {product_string#} and the {educt_string__names__noun} {educt_string#}. + - |- + Question: {#What|Which!} {educt_string__names__noun} are {#needed|required!} to {#produce|synthesize!} the {product_string__names__noun} {product_string#}? + Answer: {educt_string#}. + - |- + Question: {#What|Which!} {product_string__names__noun} are produced from the {educt_string__names__noun} {educt_string#}? + Answer: {product_string#}. + - |- + User: I {#want|would like to|must|need to!} {#synthesize|produce!} the {product_string__names__noun} {product_string#}. + Assistant: {#Cool, is|That's interesting, is|Great, is|Is!} there anything else I can do for you? + User: {#Yes, |!}I would like to know the {educt_string__names__noun} I need to produce the {product_string__names__noun} {product_string#}. + Assistant: {#I recommend|I suggest|I propose|I advise!} the following {educt_string__names__noun}: {educt_string#}. diff --git a/data/tabular/sarscov2_3clpro_diamond/meta.yaml b/data/tabular/sarscov2_3clpro_diamond/meta.yaml index a61ee1454..97b7e0f9a 100644 --- a/data/tabular/sarscov2_3clpro_diamond/meta.yaml +++ b/data/tabular/sarscov2_3clpro_diamond/meta.yaml @@ -1,154 +1,153 @@ ---- name: sarscov2_3clpro_diamond description: |- - A large XChem crystallographic fragment screen against SARS-CoV-2 - main protease at high resolution. From MIT AiCures. + A large XChem crystallographic fragment screen against SARS-CoV-2 + main protease at high resolution. From MIT AiCures. targets: - - id: activity_SARSCoV2_3CLPro - description: activity against the SARSCoV2 3CL protease (1) or not (0) - units: - type: boolean - names: - - noun: activity against the SARSCoV2 3CL protease - - noun: activity against the SARS-CoV-2 3CL protease - - adjective: active against the SARSCoV2 3CL protease - - adjective: active against the SARS-CoV-2 3CL protease - - gerund: targeting the SARSCoV2 3CL protease - - gerund: acting against the SARSCoV2 3CL protease - - gerund: successfully targeting the SARSCoV2 3CL protease - uris: + - id: activity_SARSCoV2_3CLPro + description: activity against the SARSCoV2 3CL protease (1) or not (0) + units: + type: boolean + names: + - noun: activity against the SARSCoV2 3CL protease + - noun: activity against the SARS-CoV-2 3CL protease + - adjective: active against the SARSCoV2 3CL protease + - adjective: active against the SARS-CoV-2 3CL protease + - gerund: targeting the SARSCoV2 3CL protease + - gerund: acting against the SARSCoV2 3CL protease + - gerund: successfully targeting the SARSCoV2 3CL protease + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://www.diamond.ac.uk/covid-19/for-scientists/Main-protease-structure-and-XChem.html - description: data source - - url: https://www.diamond.ac.uk/dam/jcr:9fdc4297-15b6-47e2-8d53-befb0970bf7c/COVID19-summary-20200324.xlsx - description: data source - - url: http://doi.org/10.1021/jacs.9b02822 - description: corresponding publication - - url: https://doi.org/10.1016/j.jmb.2006.11.073 - description: corresponding publication + - url: https://www.diamond.ac.uk/covid-19/for-scientists/Main-protease-structure-and-XChem.html + description: data source + - url: https://www.diamond.ac.uk/dam/jcr:9fdc4297-15b6-47e2-8d53-befb0970bf7c/COVID19-summary-20200324.xlsx + description: data source + - url: http://doi.org/10.1021/jacs.9b02822 + description: corresponding publication + - url: https://doi.org/10.1016/j.jmb.2006.11.073 + description: corresponding publication num_points: 880 bibtex: - - |- - @article{Resnick_2019, - doi = {10.1021/jacs.9b02822}, - url = {https://doi.org/10.1021%2Fjacs.9b02822}, - year = {2019}, - month = may, - publisher = {American Chemical Society (ACS)}, - volume = {141}, - number = {22}, - pages = {8951--8968}, - author = {Efrat Resnick and Anthony Bradley and Jinrui Gan and Alice Douangamath - and Tobias Krojer and Ritika Sethi and Paul P. Geurink and Anthony Aimon and Gabriel Amitai - and Dom Bellini and James Bennett and Michael Fairhead and Oleg Fedorov and Ronen Gabizon and Jin Gan - and Jingxu Guo and Alexander Plotnikov and Nava Reznik and Gian Filippo Ruda and Laura Diaz-Saez and - Verena M. Straub and Tamas Szommer and Srikannathasan Velupillai and Daniel Zaidman and Yanling Zhang - and Alun R. Coker and Christopher G. Dowson and Haim M. Barr and Chu Wang and Kilian V.M. Huber - and Paul E. Brennan and Huib Ovaa and Frank von Delft and Nir London}, - title = {Rapid Covalent-Probe Discovery by Electrophile-Fragment Screening}, - journal = {Journal of the American Chemical Society} - - |- - @article{Xue_2007, - doi = {10.1016/j.jmb.2006.11.073}, - url = {https://doi.org/10.1016%2Fj.jmb.2006.11.073}, - year = {2007}, - month = feb, - publisher = {Elsevier BV}, - volume = {366}, - number = {3}, - pages = {965--975}, - author = {Xiaoyu Xue and Haitao Yang and Wei Shen and Qi Zhao and Jun Li and Kailin Yang and - Cheng Chen and Yinghua Jin and Mark Bartlam and Zihe Rao}, - title = {Production of Authentic {SARS}-{CoV} Mpro with Enhanced Activity: Application as - a Novel Tag-cleavage Endopeptidase for Protein Overproduction}, - journal = {Journal of Molecular Biology} + - |- + @article{Resnick_2019, + doi = {10.1021/jacs.9b02822}, + url = {https://doi.org/10.1021%2Fjacs.9b02822}, + year = {2019}, + month = may, + publisher = {American Chemical Society (ACS)}, + volume = {141}, + number = {22}, + pages = {8951--8968}, + author = {Efrat Resnick and Anthony Bradley and Jinrui Gan and Alice Douangamath + and Tobias Krojer and Ritika Sethi and Paul P. Geurink and Anthony Aimon and Gabriel Amitai + and Dom Bellini and James Bennett and Michael Fairhead and Oleg Fedorov and Ronen Gabizon and Jin Gan + and Jingxu Guo and Alexander Plotnikov and Nava Reznik and Gian Filippo Ruda and Laura Diaz-Saez and + Verena M. Straub and Tamas Szommer and Srikannathasan Velupillai and Daniel Zaidman and Yanling Zhang + and Alun R. Coker and Christopher G. Dowson and Haim M. Barr and Chu Wang and Kilian V.M. Huber + and Paul E. Brennan and Huib Ovaa and Frank von Delft and Nir London}, + title = {Rapid Covalent-Probe Discovery by Electrophile-Fragment Screening}, + journal = {Journal of the American Chemical Society} + - |- + @article{Xue_2007, + doi = {10.1016/j.jmb.2006.11.073}, + url = {https://doi.org/10.1016%2Fj.jmb.2006.11.073}, + year = {2007}, + month = feb, + publisher = {Elsevier BV}, + volume = {366}, + number = {3}, + pages = {965--975}, + author = {Xiaoyu Xue and Haitao Yang and Wei Shen and Qi Zhao and Jun Li and Kailin Yang and + Cheng Chen and Yinghua Jin and Mark Bartlam and Zihe Rao}, + title = {Production of Authentic {SARS}-{CoV} Mpro with Enhanced Activity: Application as + a Novel Tag-cleavage Endopeptidase for Protein Overproduction}, + journal = {Journal of Molecular Biology} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_SARSCoV2_3CLPro#no &NULL}{activity_SARSCoV2_3CLPro__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_SARSCoV2_3CLPro#no &NULL}{activity_SARSCoV2_3CLPro__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_SARSCoV2_3CLPro__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra|!} words. - Result: {activity_SARSCoV2_3CLPro#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_SARSCoV2_3CLPro__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {activity_SARSCoV2_3CLPro__names__gerund}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_SARSCoV2_3CLPro__names__gerund}? - Assistant: {activity_SARSCoV2_3CLPro#No&Yes}, this molecule is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {activity_SARSCoV2_3CLPro__names__gerund}? - Assistant: {activity_SARSCoV2_3CLPro#No&Yes}, it is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}? - Assistant: This is a molecule that is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {activity_SARSCoV2_3CLPro#not &NULL}be {activity_SARSCoV2_3CLPro__names__gerund}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {activity_SARSCoV2_3CLPro#not &NULL}be {activity_SARSCoV2_3CLPro__names__gerund}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {activity_SARSCoV2_3CLPro__names__gerund}:{activity_SARSCoV2_3CLPro#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_SARSCoV2_3CLPro__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{activity_SARSCoV2_3CLPro#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_SARSCoV2_3CLPro__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result:This molecule is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {activity_SARSCoV2_3CLPro__names__gerund}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_SARSCoV2_3CLPro__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {activity_SARSCoV2_3CLPro%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_SARSCoV2_3CLPro%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_SARSCoV2_3CLPro%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_SARSCoV2_3CLPro#no &NULL}{activity_SARSCoV2_3CLPro__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_SARSCoV2_3CLPro#no &NULL}{activity_SARSCoV2_3CLPro__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_SARSCoV2_3CLPro__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra|!} words. + Result: {activity_SARSCoV2_3CLPro#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_SARSCoV2_3CLPro__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {activity_SARSCoV2_3CLPro__names__gerund}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_SARSCoV2_3CLPro__names__gerund}? + Assistant: {activity_SARSCoV2_3CLPro#No&Yes}, this molecule is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {activity_SARSCoV2_3CLPro__names__gerund}? + Assistant: {activity_SARSCoV2_3CLPro#No&Yes}, it is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}? + Assistant: This is a molecule that is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {activity_SARSCoV2_3CLPro#not &NULL}be {activity_SARSCoV2_3CLPro__names__gerund}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {activity_SARSCoV2_3CLPro#not &NULL}be {activity_SARSCoV2_3CLPro__names__gerund}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {activity_SARSCoV2_3CLPro__names__gerund}:{activity_SARSCoV2_3CLPro#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_SARSCoV2_3CLPro__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{activity_SARSCoV2_3CLPro#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_SARSCoV2_3CLPro__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result:This molecule is {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {activity_SARSCoV2_3CLPro__names__gerund}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_SARSCoV2_3CLPro__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {activity_SARSCoV2_3CLPro%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_SARSCoV2_3CLPro%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_SARSCoV2_3CLPro#not &NULL}{activity_SARSCoV2_3CLPro__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_SARSCoV2_3CLPro%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/sarscov2_vitro_touret/meta.yaml b/data/tabular/sarscov2_vitro_touret/meta.yaml index af1e9afab..c0000923b 100644 --- a/data/tabular/sarscov2_vitro_touret/meta.yaml +++ b/data/tabular/sarscov2_vitro_touret/meta.yaml @@ -1,129 +1,128 @@ ---- name: sarscov2_vitro_touret description: |- - An in-vitro screen of the Prestwick chemical library composed of 1,480 - approved drugs in an infected cell-based assay. + An in-vitro screen of the Prestwick chemical library composed of 1,480 + approved drugs in an infected cell-based assay. targets: - - id: activity_SARSCoV2 - description: whether it is active against SARSCoV2 (1) or not (0). - units: - type: boolean - names: - - noun: activity against the Corona virus - - noun: activity against SARSCoV2 - - noun: activity against COVID19 - - adjective: active against the Corona virus - - adjective: active against SARSCoV2 - - adjective: active against COVID19 - - gerund: mitigating the effects of the Corona virus + - id: activity_SARSCoV2 + description: whether it is active against SARSCoV2 (1) or not (0). + units: + type: boolean + names: + - noun: activity against the Corona virus + - noun: activity against SARSCoV2 + - noun: activity against COVID19 + - adjective: active against the Corona virus + - adjective: active against SARSCoV2 + - adjective: active against COVID19 + - gerund: mitigating the effects of the Corona virus benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://doi.org/10.1038/s41598-020-70143-6 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/hts/#sars-cov-2-in-vitro-touret-et-al - description: Data source + - url: https://doi.org/10.1038/s41598-020-70143-6 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/hts/#sars-cov-2-in-vitro-touret-et-al + description: Data source num_points: 1484 bibtex: - - |- - @article{Touret2020, - doi = {10.1038/s41598-020-70143-6}, - url = {https://doi.org/10.1038/s41598-020-70143-6}, - year = {2020}, - month = aug, - publisher = {Springer Science and Business Media LLC}, - volume = {10}, - number = {1}, - author = {Franck Touret and Magali Gilles and Karine Barral and Antoine Nougairede - and Jacques van Helden and Etienne Decroly and Xavier de Lamballerie and Bruno Coutard}, - title = {In vitro screening of a FDA approved chemical library reveals potential inhibitors of - SARS-CoV-2 replication}, - journal = {Scientific Reports} + - |- + @article{Touret2020, + doi = {10.1038/s41598-020-70143-6}, + url = {https://doi.org/10.1038/s41598-020-70143-6}, + year = {2020}, + month = aug, + publisher = {Springer Science and Business Media LLC}, + volume = {10}, + number = {1}, + author = {Franck Touret and Magali Gilles and Karine Barral and Antoine Nougairede + and Jacques van Helden and Etienne Decroly and Xavier de Lamballerie and Bruno Coutard}, + title = {In vitro screening of a FDA approved chemical library reveals potential inhibitors of + SARS-CoV-2 replication}, + journal = {Scientific Reports} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_SARSCoV2#no &NULL}{activity_SARSCoV2__names__noun}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {activity_SARSCoV2#effectively &ineffectevely}{activity_SARSCoV2__names__gerund}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_SARSCoV2#no &NULL}{activity_SARSCoV2__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_SARSCoV2__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra|!} words. - Result: {activity_SARSCoV2#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_SARSCoV2__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {activity_SARSCoV2__names__gerund}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_SARSCoV2__names__gerund}? - Assistant: {activity_SARSCoV2#No&Yes}, this molecule is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {activity_SARSCoV2__names__gerund}? - Assistant: {activity_SARSCoV2#No&Yes}, it is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}? - Assistant: This is a molecule that is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {activity_SARSCoV2#not &NULL}be {activity_SARSCoV2__names__gerund}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {activity_SARSCoV2#not &NULL}be {activity_SARSCoV2__names__gerund}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {activity_SARSCoV2__names__gerund}:{activity_SARSCoV2#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_SARSCoV2__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{activity_SARSCoV2#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_SARSCoV2__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result:This molecule is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {activity_SARSCoV2__names__gerund}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_SARSCoV2__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {activity_SARSCoV2%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_SARSCoV2%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_SARSCoV2%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_SARSCoV2#no &NULL}{activity_SARSCoV2__names__noun}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {activity_SARSCoV2#effectively &ineffectevely}{activity_SARSCoV2__names__gerund}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_SARSCoV2#no &NULL}{activity_SARSCoV2__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_SARSCoV2__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra|!} words. + Result: {activity_SARSCoV2#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_SARSCoV2__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {activity_SARSCoV2__names__gerund}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_SARSCoV2__names__gerund}? + Assistant: {activity_SARSCoV2#No&Yes}, this molecule is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {activity_SARSCoV2__names__gerund}? + Assistant: {activity_SARSCoV2#No&Yes}, it is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}? + Assistant: This is a molecule that is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {activity_SARSCoV2#not &NULL}be {activity_SARSCoV2__names__gerund}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {activity_SARSCoV2#not &NULL}be {activity_SARSCoV2__names__gerund}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {activity_SARSCoV2__names__gerund}:{activity_SARSCoV2#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_SARSCoV2__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{activity_SARSCoV2#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_SARSCoV2__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result:This molecule is {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {activity_SARSCoV2__names__gerund}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_SARSCoV2__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {activity_SARSCoV2%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_SARSCoV2%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_SARSCoV2#not &NULL}{activity_SARSCoV2__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_SARSCoV2%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/serine_threonine_kinase_33_butkiewicz/meta.yaml b/data/tabular/serine_threonine_kinase_33_butkiewicz/meta.yaml index 926c206a8..889c01852 100644 --- a/data/tabular/serine_threonine_kinase_33_butkiewicz/meta.yaml +++ b/data/tabular/serine_threonine_kinase_33_butkiewicz/meta.yaml @@ -1,170 +1,168 @@ ---- name: serine_threonine_kinase_33_butkiewicz description: |- - The serine/threonine kinase, STK33, has been shown to - be relevant for proliferation of mutant KRAS-dependent cells involved - in cancer. Primary screen AID 2661. Counter screen AID 2821. AID504583 - as validation screen. Actives in AID 2821 subtracted by the actives - from screen AID504583 resulted in the final set of 172 active - compounds. + The serine/threonine kinase, STK33, has been shown to + be relevant for proliferation of mutant KRAS-dependent cells involved + in cancer. Primary screen AID 2661. Counter screen AID 2821. AID504583 + as validation screen. Actives in AID 2821 subtracted by the actives + from screen AID504583 resulted in the final set of 172 active + compounds. targets: - - id: activity_serine_threonine_kinase33 - description: whether it is active against the serine threonine kinase 33 receptor (1) or not (0). - units: - type: boolean - names: - - noun: a serine/threonine kinase, STK3 inhibitor - - gerund: inhibiting the activity of the serine/threonine kinase, STK3 - - adjective: active against the serine threonine kinase 33 receptor - pubchem_aids: - - 2661 - - 2821 - - 504583 - uris: [] + - id: activity_serine_threonine_kinase33 + description: whether it is active against the serine threonine kinase 33 receptor (1) or not (0). + units: + type: boolean + names: + - noun: a serine/threonine kinase, STK3 inhibitor + - gerund: inhibiting the activity of the serine/threonine kinase, STK3 + - adjective: active against the serine threonine kinase 33 receptor + pubchem_aids: + - 2661 + - 2821 + - 504583 + uris: [] identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split license: CC BY 4.0 links: - - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al - description: original dataset - - url: https://doi.org/10.3390/molecules18010735 - description: corresponding publication - - url: https://doi.org/10.1093/nar/gky1033 - description: corresponding publication - - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ - description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al + description: original dataset + - url: https://doi.org/10.3390/molecules18010735 + description: corresponding publication + - url: https://doi.org/10.1093/nar/gky1033 + description: corresponding publication + - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ + description: corresponding publication num_points: 319792 bibtex: - - |- - @article{Butkiewicz2013, - doi = {10.3390/molecules18010735}, - url = {https://doi.org/10.3390/molecules18010735}, - year = {2013}, - month = jan, - publisher = {{MDPI} {AG}}, - volume = {18}, - number = {1}, - pages = {735--756}, - author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and - Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens - Meiler}, - title = {Benchmarking Ligand-Based Virtual High-Throughput - Screening with the {PubChem} Database}, - journal = {Molecules}} - - |- - @article{Kim2018, - doi = {10.1093/nar/gky1033}, - url = {https://doi.org/10.1093/nar/gky1033}, - year = {2018}, - month = oct, - publisher = {Oxford University Press ({OUP})}, - volume = {47}, - number = {D1}, - pages = {D1102--D1109}, - author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta - Gindulyte and Jia He and Siqian He and Qingliang Li and Benjamin - A Shoemaker and Paul A Thiessen and Bo Yu and Leonid Zaslavsky - and Jian Zhang and Evan E Bolton}, - title = {{PubChem} 2019 update: improved access to chemical data}, - journal = {Nucleic Acids Research}} - - |- - @article{Butkiewicz2017, - doi = {}, - url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, - year = {2017}, - publisher = {Chem Inform}, - volume = {3}, - number = {1}, - author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe, - E. W. and Weaver, D. C. and Meiler, J.}, - title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from - the {P}ub{C}hem {D}atabase}}, - journal = {Chemical Science}} + - |- + @article{Butkiewicz2013, + doi = {10.3390/molecules18010735}, + url = {https://doi.org/10.3390/molecules18010735}, + year = {2013}, + month = jan, + publisher = {{MDPI} {AG}}, + volume = {18}, + number = {1}, + pages = {735--756}, + author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and + Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens + Meiler}, + title = {Benchmarking Ligand-Based Virtual High-Throughput + Screening with the {PubChem} Database}, + journal = {Molecules}} + - |- + @article{Kim2018, + doi = {10.1093/nar/gky1033}, + url = {https://doi.org/10.1093/nar/gky1033}, + year = {2018}, + month = oct, + publisher = {Oxford University Press ({OUP})}, + volume = {47}, + number = {D1}, + pages = {D1102--D1109}, + author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta + Gindulyte and Jia He and Siqian He and Qingliang Li and Benjamin + A Shoemaker and Paul A Thiessen and Bo Yu and Leonid Zaslavsky + and Jian Zhang and Evan E Bolton}, + title = {{PubChem} 2019 update: improved access to chemical data}, + journal = {Nucleic Acids Research}} + - |- + @article{Butkiewicz2017, + doi = {}, + url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, + year = {2017}, + publisher = {Chem Inform}, + volume = {3}, + number = {1}, + author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe, + E. W. and Weaver, D. C. and Meiler, J.}, + title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from + the {P}ub{C}hem {D}atabase}}, + journal = {Chemical Science}} templates: - - 'The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {activity_serine_threonine_kinase33#not &NULL}{#showing|exhibiting|displaying!} - activity against any of the following: serine kinase /threonine kinase.' - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__noun}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_serine_threonine_kinase33__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra|!} words. - Result: {activity_serine_threonine_kinase33#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_serine_threonine_kinase33__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {activity_serine_threonine_kinase33__names__gerund}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_serine_threonine_kinase33__names__gerund}? - Assistant: {activity_serine_threonine_kinase33#No&Yes}, this molecule is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {activity_serine_threonine_kinase33__names__gerund}? - Assistant: {activity_serine_threonine_kinase33#No&Yes}, it is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}? - Assistant: This is a molecule that is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {activity_serine_threonine_kinase33#not &NULL}be {activity_serine_threonine_kinase33__names__gerund}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {activity_serine_threonine_kinase33#not &NULL}be {activity_serine_threonine_kinase33__names__gerund}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {activity_serine_threonine_kinase33__names__gerund}:{activity_serine_threonine_kinase33#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_serine_threonine_kinase33__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{activity_serine_threonine_kinase33#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_serine_threonine_kinase33__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result:This molecule is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {activity_serine_threonine_kinase33__names__gerund}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_serine_threonine_kinase33__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {activity_serine_threonine_kinase33%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_serine_threonine_kinase33%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_serine_threonine_kinase33%} - Answer:{%multiple_choice_result} + - "The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {activity_serine_threonine_kinase33#not &NULL}{#showing|exhibiting|displaying!} activity against any of the following: serine kinase /threonine kinase." + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__noun}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_serine_threonine_kinase33__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra|!} words. + Result: {activity_serine_threonine_kinase33#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_serine_threonine_kinase33__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {activity_serine_threonine_kinase33__names__gerund}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_serine_threonine_kinase33__names__gerund}? + Assistant: {activity_serine_threonine_kinase33#No&Yes}, this molecule is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {activity_serine_threonine_kinase33__names__gerund}? + Assistant: {activity_serine_threonine_kinase33#No&Yes}, it is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}? + Assistant: This is a molecule that is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {activity_serine_threonine_kinase33#not &NULL}be {activity_serine_threonine_kinase33__names__gerund}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {activity_serine_threonine_kinase33#not &NULL}be {activity_serine_threonine_kinase33__names__gerund}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {activity_serine_threonine_kinase33__names__gerund}:{activity_serine_threonine_kinase33#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_serine_threonine_kinase33__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{activity_serine_threonine_kinase33#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_serine_threonine_kinase33__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result:This molecule is {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {activity_serine_threonine_kinase33__names__gerund}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_serine_threonine_kinase33__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {activity_serine_threonine_kinase33%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_serine_threonine_kinase33%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_serine_threonine_kinase33#not &NULL}{activity_serine_threonine_kinase33__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_serine_threonine_kinase33%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/sigma_aldrich_safety_data/meta.yaml b/data/tabular/sigma_aldrich_safety_data/meta.yaml index 61dabc3e7..0ca0b350e 100644 --- a/data/tabular/sigma_aldrich_safety_data/meta.yaml +++ b/data/tabular/sigma_aldrich_safety_data/meta.yaml @@ -1,935 +1,934 @@ ---- name: sigma_aldrich_safety_data description: |- - "H-statements parsed from the safety datasheets (SDS) coming from Sigma-Aldrich. This dataset refers only to pure compounds." + "H-statements parsed from the safety datasheets (SDS) coming from Sigma-Aldrich. This dataset refers only to pure compounds." targets: - - id: H206 - description: fire, blast or projection hazard - type: boolean - names: - - noun: a fire, blast or projection hazard - - id: H208 - description: fire hazard - type: boolean - names: - - noun: a fire hazard - - id: H211 - description: may be sensitive - type: boolean - names: - - adjective: potentially sensitive - - id: H220 - description: extremely flammable gas - type: boolean - names: - - noun: an extremely flammable gas - - id: H221 - description: flammable gas - type: boolean - names: - - adjective: flammable gas - - id: H224 - description: extremely flammable liquid or vapor - type: boolean - names: - - noun: an extremely flammable liquid or vapor - - id: H225 - description: highly flammable liquid and vapor - type: boolean - names: - - noun: a highly flammable liquid or vapor - - id: H226 - description: flammable liquid and vapor - type: boolean - names: - - noun: a flammable liquid or vapor - - id: H228 - description: flammable solid - type: boolean - names: - - noun: a flammable solid - - id: H240 - description: heating may cause an explosion - type: boolean - names: - - noun: potential cause for an explosion - - id: H241 - description: heating may cause a fire or explosion - type: boolean - names: - - adjective: potentially explosive when heated - - id: H242 - description: heating may cause a fire - type: boolean - names: - - adjective: potentially can cause a fire when heated - - id: H250 - description: catches fire spontaneously if exposed to air - type: boolean - names: - - adjective: spontaneously catches fire if exposed to air - - id: H251 - description: self-heating; may catch fire - type: boolean - names: - - adjective: self-heating - - adjective: potentially going to catch fire - - id: H252 - description: self-heating in large quantities; may catch fire - type: boolean - names: - - adjective: self-heating in large quantities - - adjective: potentially going to catch fire - - id: H260 - description: in contact with water releases flammable gases which may ignite spontaneously - type: boolean - names: - - adjective: releasing flammable gases that may ignite spontaneously when in contact with water - - id: H261 - description: in contact with water releases flammable gas - type: boolean - names: - - adjective: releasing flammable gas when in contact with water - - id: H270 - description: may cause or intensify fire; oxidizer - type: boolean - names: - - noun: potential cause or intensifier for fire - - noun: an oxidizer - - id: H271 - description: may cause fire or explosion; strong oxidizer - type: boolean - names: - - noun: potential cause for fire or explosion - - noun: strong oxidizer - - id: H272 - description: may intensify fire; oxidizer - type: boolean - names: - - noun: potential fire intensifier fire - - noun: an oxidizer - - id: H280 - description: contains gas under pressure; may explode if heated - type: boolean - names: - - adjective: contains gas under pressure and may explode if heated - - id: H282 - description: is an extremely flammable chemical under pressure, may explode if heated - type: boolean - names: - - adjective: is an extremely flammable chemical under pressure, may explode if heated - - id: H284 - description: chemical under pressure, may explode if heated - type: boolean - names: - - adjective: is a chemical under pressure, may explode if heated - - id: H290 - description: may be corrosive to metals - type: boolean - names: - - adjective: potenttially corrosive to metals - - id: H300 - description: fatal if swallowed - type: boolean - names: - - adjective: fatal if swallowed - - id: H301 - description: toxic if swallowed - type: boolean - names: - - adjective: toxic if swallowed - - id: H302 - description: harmful if swallowed - type: boolean - names: - - adjective: harmful if swallowed - - id: H304 - description: may be fatal if swallowed and enters airways - type: boolean - names: - - adjective: probably fatal if swallowed and enters airways - - id: H310 - description: fatal in contact with skin - type: boolean - names: - - adjective: fatal in contact with skin - - id: H311 - description: toxic in contact with skin - type: boolean - names: - - adjective: is toxic in contact with skin - - id: H312 - description: harmful in contact with skin - type: boolean - names: - - adjective: is harmful in contact with skin - - id: H314 - description: causes severe skin burns and eye damage - type: boolean - names: - - adjective: causing severe skin burns and eye damage - - id: H315 - description: causes skin irritation - type: boolean - names: - - adjective: causing skin irritation - - id: H317 - description: may cause an allergic skin reaction - type: boolean - names: - - noun: potential cause for an allergic skin reaction - - id: H318 - description: causes serious eye damage - type: boolean - names: - - adjective: causing serious eye damage - - id: H319 - description: causes serious eye irritation - type: boolean - names: - - adjective: causing serious eye irritation - - id: H330 - description: fatal if inhaled - type: boolean - names: - - adjective: fatal if inhaled - - id: H331 - description: toxic if inhaled - type: boolean - names: - - adjective: toxic if inhaled - - id: H332 - description: harmful if inhaled - type: boolean - names: - - adjective: harmful if inhaled - - id: H334 - description: may cause allergy or asthma symptoms or breathing difficulties if inhaled - type: boolean - names: - - adjective: may cause allergy or asthma symptoms or breathing difficulties if inhaled - - id: H335 - description: may cause respiratory irritation - type: boolean - names: - - noun: potential cause for respiratory irritation - - id: H336 - description: may cause drowsiness or dizziness - type: boolean - names: - - noun: potential cause for drowsiness or dizziness - - id: H340 - description: may cause genetic defects - type: boolean - names: - - noun: potential cause for genetic defects - - id: H341 - description: suspected of causing genetic defects - type: boolean - names: - - adjective: suspected of causing genetic defects - - adjective: suspected of causing genetic alterations - - adjective: suspected of causing mutations - - id: H350 - description: may cause cancer - type: boolean - names: - - noun: potential cause for cancer - - id: H351 - description: suspected of causing cancer - type: boolean - names: - - adjective: suspected of causing cancer - - id: H360 - description: may damage fertility or the unborn child - type: boolean - names: - - noun: potential cause for fertility damage or the unborn child - - id: H361 - description: suspected of damaging fertility or the unborn child - type: boolean - names: - - adjective: suspected of damaging fertility or the unborn child - - id: H370 - description: causes damage to organs - type: boolean - names: - - adjective: causing damage to organs - - id: H371 - description: may cause damage to organs - type: boolean - names: - - noun: potential cause for damage to organs - - adjective: potentially causing organ damage - - id: H372 - description: causes damage to organs through prolonged or repeated exposure - type: boolean - names: - - adjective: causing damage to organs through prolonged or repeated exposure - - adjective: causing damage to organs - - id: H373 - description: may cause damage to organs through prolonged or repeated exposure - type: boolean - names: - - noun: potential cause for damage to organs through prolonged or repeated exposure - - id: H400 - description: very toxic to aquatic life - type: boolean - names: - - adjective: very toxic to aquatic life - - id: H410 - description: very toxic to aquatic life with long lasting effects - type: boolean - names: - - adjective: very toxic to aquatic life with long lasting effects - - id: H411 - description: toxic to aquatic life with long lasting effects - type: boolean - names: - - adjective: toxic to aquatic life with long lasting effects - - adjective: toxic to aquatic life - - adjective: inducing long lasting effects on aquatic life - - id: H420 - description: harms public health and the environment by destroying ozone in the upper atmosphere - type: boolean - names: - - adjective: harming public health and the environment by destroying the ozone in the upper atmosphere - - adjective: destroying the ozone in the upper atmosphere - - adjective: harming public health and the environment + - id: H206 + description: fire, blast or projection hazard + type: boolean + names: + - noun: a fire, blast or projection hazard + - id: H208 + description: fire hazard + type: boolean + names: + - noun: a fire hazard + - id: H211 + description: may be sensitive + type: boolean + names: + - adjective: potentially sensitive + - id: H220 + description: extremely flammable gas + type: boolean + names: + - noun: an extremely flammable gas + - id: H221 + description: flammable gas + type: boolean + names: + - adjective: flammable gas + - id: H224 + description: extremely flammable liquid or vapor + type: boolean + names: + - noun: an extremely flammable liquid or vapor + - id: H225 + description: highly flammable liquid and vapor + type: boolean + names: + - noun: a highly flammable liquid or vapor + - id: H226 + description: flammable liquid and vapor + type: boolean + names: + - noun: a flammable liquid or vapor + - id: H228 + description: flammable solid + type: boolean + names: + - noun: a flammable solid + - id: H240 + description: heating may cause an explosion + type: boolean + names: + - noun: potential cause for an explosion + - id: H241 + description: heating may cause a fire or explosion + type: boolean + names: + - adjective: potentially explosive when heated + - id: H242 + description: heating may cause a fire + type: boolean + names: + - adjective: potentially can cause a fire when heated + - id: H250 + description: catches fire spontaneously if exposed to air + type: boolean + names: + - adjective: spontaneously catches fire if exposed to air + - id: H251 + description: self-heating; may catch fire + type: boolean + names: + - adjective: self-heating + - adjective: potentially going to catch fire + - id: H252 + description: self-heating in large quantities; may catch fire + type: boolean + names: + - adjective: self-heating in large quantities + - adjective: potentially going to catch fire + - id: H260 + description: in contact with water releases flammable gases which may ignite spontaneously + type: boolean + names: + - adjective: releasing flammable gases that may ignite spontaneously when in contact with water + - id: H261 + description: in contact with water releases flammable gas + type: boolean + names: + - adjective: releasing flammable gas when in contact with water + - id: H270 + description: may cause or intensify fire; oxidizer + type: boolean + names: + - noun: potential cause or intensifier for fire + - noun: an oxidizer + - id: H271 + description: may cause fire or explosion; strong oxidizer + type: boolean + names: + - noun: potential cause for fire or explosion + - noun: strong oxidizer + - id: H272 + description: may intensify fire; oxidizer + type: boolean + names: + - noun: potential fire intensifier fire + - noun: an oxidizer + - id: H280 + description: contains gas under pressure; may explode if heated + type: boolean + names: + - adjective: contains gas under pressure and may explode if heated + - id: H282 + description: is an extremely flammable chemical under pressure, may explode if heated + type: boolean + names: + - adjective: is an extremely flammable chemical under pressure, may explode if heated + - id: H284 + description: chemical under pressure, may explode if heated + type: boolean + names: + - adjective: is a chemical under pressure, may explode if heated + - id: H290 + description: may be corrosive to metals + type: boolean + names: + - adjective: potenttially corrosive to metals + - id: H300 + description: fatal if swallowed + type: boolean + names: + - adjective: fatal if swallowed + - id: H301 + description: toxic if swallowed + type: boolean + names: + - adjective: toxic if swallowed + - id: H302 + description: harmful if swallowed + type: boolean + names: + - adjective: harmful if swallowed + - id: H304 + description: may be fatal if swallowed and enters airways + type: boolean + names: + - adjective: probably fatal if swallowed and enters airways + - id: H310 + description: fatal in contact with skin + type: boolean + names: + - adjective: fatal in contact with skin + - id: H311 + description: toxic in contact with skin + type: boolean + names: + - adjective: is toxic in contact with skin + - id: H312 + description: harmful in contact with skin + type: boolean + names: + - adjective: is harmful in contact with skin + - id: H314 + description: causes severe skin burns and eye damage + type: boolean + names: + - adjective: causing severe skin burns and eye damage + - id: H315 + description: causes skin irritation + type: boolean + names: + - adjective: causing skin irritation + - id: H317 + description: may cause an allergic skin reaction + type: boolean + names: + - noun: potential cause for an allergic skin reaction + - id: H318 + description: causes serious eye damage + type: boolean + names: + - adjective: causing serious eye damage + - id: H319 + description: causes serious eye irritation + type: boolean + names: + - adjective: causing serious eye irritation + - id: H330 + description: fatal if inhaled + type: boolean + names: + - adjective: fatal if inhaled + - id: H331 + description: toxic if inhaled + type: boolean + names: + - adjective: toxic if inhaled + - id: H332 + description: harmful if inhaled + type: boolean + names: + - adjective: harmful if inhaled + - id: H334 + description: may cause allergy or asthma symptoms or breathing difficulties if inhaled + type: boolean + names: + - adjective: may cause allergy or asthma symptoms or breathing difficulties if inhaled + - id: H335 + description: may cause respiratory irritation + type: boolean + names: + - noun: potential cause for respiratory irritation + - id: H336 + description: may cause drowsiness or dizziness + type: boolean + names: + - noun: potential cause for drowsiness or dizziness + - id: H340 + description: may cause genetic defects + type: boolean + names: + - noun: potential cause for genetic defects + - id: H341 + description: suspected of causing genetic defects + type: boolean + names: + - adjective: suspected of causing genetic defects + - adjective: suspected of causing genetic alterations + - adjective: suspected of causing mutations + - id: H350 + description: may cause cancer + type: boolean + names: + - noun: potential cause for cancer + - id: H351 + description: suspected of causing cancer + type: boolean + names: + - adjective: suspected of causing cancer + - id: H360 + description: may damage fertility or the unborn child + type: boolean + names: + - noun: potential cause for fertility damage or the unborn child + - id: H361 + description: suspected of damaging fertility or the unborn child + type: boolean + names: + - adjective: suspected of damaging fertility or the unborn child + - id: H370 + description: causes damage to organs + type: boolean + names: + - adjective: causing damage to organs + - id: H371 + description: may cause damage to organs + type: boolean + names: + - noun: potential cause for damage to organs + - adjective: potentially causing organ damage + - id: H372 + description: causes damage to organs through prolonged or repeated exposure + type: boolean + names: + - adjective: causing damage to organs through prolonged or repeated exposure + - adjective: causing damage to organs + - id: H373 + description: may cause damage to organs through prolonged or repeated exposure + type: boolean + names: + - noun: potential cause for damage to organs through prolonged or repeated exposure + - id: H400 + description: very toxic to aquatic life + type: boolean + names: + - adjective: very toxic to aquatic life + - id: H410 + description: very toxic to aquatic life with long lasting effects + type: boolean + names: + - adjective: very toxic to aquatic life with long lasting effects + - id: H411 + description: toxic to aquatic life with long lasting effects + type: boolean + names: + - adjective: toxic to aquatic life with long lasting effects + - adjective: toxic to aquatic life + - adjective: inducing long lasting effects on aquatic life + - id: H420 + description: harms public health and the environment by destroying ozone in the upper atmosphere + type: boolean + names: + - adjective: harming public health and the environment by destroying the ozone in the upper atmosphere + - adjective: destroying the ozone in the upper atmosphere + - adjective: harming public health and the environment identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES links: - - url: https://www.sigmaaldrich.com/DE/de - description: corresponding source for the safety datasheets + - url: https://www.sigmaaldrich.com/DE/de + description: corresponding source for the safety datasheets num_points: 6420 bibtex: [] templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H206#not &NULL}{H206__names__noun}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H208#not &NULL}{H206__names__noun}. - Assistant: Here is a molecule that is {H206#not &NULL}{H206__names__noun}: {SMILES#}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H206#not &NULL}{H206__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - User: I need a molecule that is {H206#not &NULL}{H206__names__noun}. - Assistant: Here is a molecule that is {H206#not &NULL}{H206__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H208#not &NULL}{H208__names__noun}. - Assistant: Here is a molecule that is {H208#not &NULL}{H208__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H211#not &NULL}{H211__names__adjective}. - Assistant: Here is a molecule that is {H211#not &NULL}{H211__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H220#not &NULL}{H220__names__noun}. - Assistant: Here is a molecule that is {H220#not &NULL}{H220__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H221#not &NULL}{H221__names__adjective}. - Assistant: Here is a molecule that is {H221#not &NULL}{H221__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H224#not &NULL}{H224__names__noun}. - Assistant: Here is a molecule that is {H224#not &NULL}{H224__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H225#not &NULL}{H225__names__noun}. - Assistant: Here is a molecule that is {H225#not &NULL}{H225__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H226#not &NULL}{H226__names__noun}. - Assistant: Here is a molecule that is {H226#not &NULL}{H226__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H228#not &NULL}{H228__names__noun}. - Assistant: Here is a molecule that is {H228#not &NULL}{H228__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H240#not &NULL}{H240__names__noun}. - Assistant: Here is a molecule that is {H240#not &NULL}{H240__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H241#not &NULL}{H241__names__adjective}. - Assistant: Here is a molecule that is {H241#not &NULL}{H241__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H242#not &NULL}{H242__names__adjective}. - Assistant: Here is a molecule that is {H242#not &NULL}{H242__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H250#not &NULL}{H250__names__adjective}. - Assistant: Here is a molecule that is {H250#not &NULL}{H250__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H251#not &NULL}{H251__names__adjective}. - Assistant: Here is a molecule that is {H251#not &NULL}{H251__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H252#not &NULL}{H252__names__adjective}. - Assistant: Here is a molecule that is {H252#not &NULL}{H252__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H260#not &NULL}{H260__names__adjective}. - Assistant: Here is a molecule that is {H260#not &NULL}{H260__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H261#not &NULL}{H261__names__adjective}. - Assistant: Here is a molecule that is {H261#not &NULL}{H261__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H270#not &NULL}{H270__names__noun}. - Assistant: Here is a molecule that is {H270#not &NULL}{H270__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H271#not &NULL}{H271__names__noun}. - Assistant: Here is a molecule that is {H271#not &NULL}{H271__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H272#not &NULL}{H272__names__noun}. - Assistant: Here is a molecule that is {H272#not &NULL}{H272__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H280#not &NULL}{H280__names__adjective}. - Assistant: Here is a molecule that is {H280#not &NULL}{H280__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H282#not &NULL}{H282__names__adjective}. - Assistant: Here is a molecule that is {H282#not &NULL}{H282__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H284#not &NULL}{H284__names__adjective}. - Assistant: Here is a molecule that is {H284#not &NULL}{H284__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H290#not &NULL}{H290__names__adjective}. - Assistant: Here is a molecule that is {H290#not &NULL}{H290__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H300#not &NULL}{H300__names__adjective}. - Assistant: Here is a molecule that is {H300#not &NULL}{H300__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H301#not &NULL}{H301__names__adjective}. - Assistant: Here is a molecule that is {H301#not &NULL}{H301__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H302#not &NULL}{H302__names__adjective}. - Assistant: Here is a molecule that is {H302#not &NULL}{H302__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H304#not &NULL}{H304__names__adjective}. - Assistant: Here is a molecule that is {H304#not &NULL}{H304__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H310#not &NULL}{H310__names__adjective}. - Assistant: Here is a molecule that is {H310#not &NULL}{H310__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H311#not &NULL}{H311__names__adjective}. - Assistant: Here is a molecule that is {H311#not &NULL}{H311__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H312#not &NULL}{H312__names__adjective}. - Assistant: Here is a molecule that is {H312#not &NULL}{H312__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H314#not &NULL}{H314__names__adjective}. - Assistant: Here is a molecule that is {H314#not &NULL}{H314__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H315#not &NULL}{H315__names__adjective}. - Assistant: Here is a molecule that is {H315#not &NULL}{H315__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H317#not &NULL}{H317__names__noun}. - Assistant: Here is a molecule that is {H317#not &NULL}{H317__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H318#not &NULL}{H318__names__adjective}. - Assistant: Here is a molecule that is {H318#not &NULL}{H318__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H319#not &NULL}{H319__names__adjective}. - Assistant: Here is a molecule that is {H319#not &NULL}{H319__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H330#not &NULL}{H330__names__adjective}. - Assistant: Here is a molecule that is {H330#not &NULL}{H330__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H331#not &NULL}{H331__names__adjective}. - Assistant: Here is a molecule that is {H331#not &NULL}{H331__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H332#not &NULL}{H332__names__adjective}. - Assistant: Here is a molecule that is {H332#not &NULL}{H332__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H334#not &NULL}{H334__names__adjective}. - Assistant: Here is a molecule that is {H334#not &NULL}{H334__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H335#not &NULL}{H335__names__noun}. - Assistant: Here is a molecule that is {H335#not &NULL}{H335__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H336#not &NULL}{H336__names__noun}. - Assistant: Here is a molecule that is {H336#not &NULL}{H336__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H340#not &NULL}{H340__names__noun}. - Assistant: Here is a molecule that is {H340#not &NULL}{H340__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H341#not &NULL}{H341__names__adjective}. - Assistant: Here is a molecule that is {H341#not &NULL}{H341__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H350#not &NULL}{H350__names__noun}. - Assistant: Here is a molecule that is {H350#not &NULL}{H350__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H351#not &NULL}{H351__names__adjective}. - Assistant: Here is a molecule that is {H351#not &NULL}{H351__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H360#not &NULL}{H360__names__noun}. - Assistant: Here is a molecule that is {H360#not &NULL}{H360__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H361#not &NULL}{H361__names__adjective}. - Assistant: Here is a molecule that is {H361#not &NULL}{H361__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H370#not &NULL}{H370__names__adjective}. - Assistant: Here is a molecule that is {H370#not &NULL}{H370__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H371#not &NULL}{H371__names__noun}. - Assistant: Here is a molecule that is {H371#not &NULL}{H371__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H371#not &NULL}{H371__names__adjective}. - Assistant: Here is a molecule that is {H371#not &NULL}{H371__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H372#not &NULL}{H372__names__adjective}. - Assistant: Here is a molecule that is {H372#not &NULL}{H372__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H373#not &NULL}{H373__names__noun}. - Assistant: Here is a molecule that is {H373#not &NULL}{H373__names__noun}: {SMILES#}. - - |- - User: I need a molecule that is {H400#not &NULL}{H400__names__adjective}. - Assistant: Here is a molecule that is {H400#not &NULL}{H400__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H410#not &NULL}{H410__names__adjective}. - Assistant: Here is a molecule that is {H410#not &NULL}{H410__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H411#not &NULL}{H411__names__adjective}. - Assistant: Here is a molecule that is {H411#not &NULL}{H411__names__adjective}: {SMILES#}. - - |- - User: I need a molecule that is {H420#not &NULL}{H420__names__adjective}. - Assistant: Here is a molecule that is {H420#not &NULL}{H420__names__adjective}: {SMILES#}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H208#not &NULL}{H208__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H211#not &NULL}{H211__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H220#not &NULL}{H220__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H221#not &NULL}{H221__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H224#not &NULL}{H224__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H225#not &NULL}{H225__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H226#not &NULL}{H226__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H228#not &NULL}{H228__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H240#not &NULL}{H240__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H241#not &NULL}{H241__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H242#not &NULL}{H242__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H250#not &NULL}{H250__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H251#not &NULL}{H251__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H252#not &NULL}{H252__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H260#not &NULL}{H260__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H261#not &NULL}{H261__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H270#not &NULL}{H270__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H271#not &NULL}{H271__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H272#not &NULL}{H272__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H280#not &NULL}{H280__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H282#not &NULL}{H282__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H284#not &NULL}{H284__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H290#not &NULL}{H290__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H300#not &NULL}{H300__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H301#not &NULL}{H301__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H302#not &NULL}{H302__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H304#not &NULL}{H304__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H310#not &NULL}{H310__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H311#not &NULL}{H311__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H312#not &NULL}{H312__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H314#not &NULL}{H314__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H315#not &NULL}{H315__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H317#not &NULL}{H317__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H318#not &NULL}{H318__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H319#not &NULL}{H319__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H330#not &NULL}{H330__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H331#not &NULL}{H331__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H332#not &NULL}{H332__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H334#not &NULL}{H334__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H335#not &NULL}{H335__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H336#not &NULL}{H336__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H340#not &NULL}{H340__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H341#not &NULL}{H341__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H350#not &NULL}{H350__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H351#not &NULL}{H351__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H360#not &NULL}{H360__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H361#not &NULL}{H361__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H370#not &NULL}{H370__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H371#not &NULL}{H371__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H371#not &NULL}{H371__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H372#not &NULL}{H372__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H373#not &NULL}{H373__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H400#not &NULL}{H400__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H410#not &NULL}{H410__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H411#not &NULL}{H411__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H420#not &NULL}{H420__names__adjective}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H208#not &NULL}{H208__names__noun}. - Assistant: Here is a molecule that is {H208#not &NULL}{H208__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H211#not &NULL}{H211__names__adjective}. - Assistant: Here is a molecule that is {H211#not &NULL}{H211__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H220#not &NULL}{H220__names__noun}. - Assistant: Here is a molecule that is {H220#not &NULL}{H220__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H221#not &NULL}{H221__names__adjective}. - Assistant: Here is a molecule that is {H221#not &NULL}{H221__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H224#not &NULL}{H224__names__noun}. - Assistant: Here is a molecule that is {H224#not &NULL}{H224__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H225#not &NULL}{H225__names__noun}. - Assistant: Here is a molecule that is {H225#not &NULL}{H225__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H226#not &NULL}{H226__names__noun}. - Assistant: Here is a molecule that is {H226#not &NULL}{H226__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H228#not &NULL}{H228__names__noun}. - Assistant: Here is a molecule that is {H228#not &NULL}{H228__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H240#not &NULL}{H240__names__noun}. - Assistant: Here is a molecule that is {H240#not &NULL}{H240__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H241#not &NULL}{H241__names__adjective}. - Assistant: Here is a molecule that is {H241#not &NULL}{H241__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H242#not &NULL}{H242__names__adjective}. - Assistant: Here is a molecule that is {H242#not &NULL}{H242__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H250#not &NULL}{H250__names__adjective}. - Assistant: Here is a molecule that is {H250#not &NULL}{H250__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H251#not &NULL}{H251__names__adjective}. - Assistant: Here is a molecule that is {H251#not &NULL}{H251__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H252#not &NULL}{H252__names__adjective}. - Assistant: Here is a molecule that is {H252#not &NULL}{H252__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H260#not &NULL}{H260__names__adjective}. - Assistant: Here is a molecule that is {H260#not &NULL}{H260__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H261#not &NULL}{H261__names__adjective}. - Assistant: Here is a molecule that is {H261#not &NULL}{H261__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H270#not &NULL}{H270__names__noun}. - Assistant: Here is a molecule that is {H270#not &NULL}{H270__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H271#not &NULL}{H271__names__noun}. - Assistant: Here is a molecule that is {H271#not &NULL}{H271__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H272#not &NULL}{H272__names__noun}. - Assistant: Here is a molecule that is {H272#not &NULL}{H272__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H280#not &NULL}{H280__names__adjective}. - Assistant: Here is a molecule that is {H280#not &NULL}{H280__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H282#not &NULL}{H282__names__adjective}. - Assistant: Here is a molecule that is {H282#not &NULL}{H282__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H284#not &NULL}{H284__names__adjective}. - Assistant: Here is a molecule that is {H284#not &NULL}{H284__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H290#not &NULL}{H290__names__adjective}. - Assistant: Here is a molecule that is {H290#not &NULL}{H290__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H300#not &NULL}{H300__names__adjective}. - Assistant: Here is a molecule that is {H300#not &NULL}{H300__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H301#not &NULL}{H301__names__adjective}. - Assistant: Here is a molecule that is {H301#not &NULL}{H301__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H302#not &NULL}{H302__names__adjective}. - Assistant: Here is a molecule that is {H302#not &NULL}{H302__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H304#not &NULL}{H304__names__adjective}. - Assistant: Here is a molecule that is {H304#not &NULL}{H304__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H310#not &NULL}{H310__names__adjective}. - Assistant: Here is a molecule that is {H310#not &NULL}{H310__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H311#not &NULL}{H311__names__adjective}. - Assistant: Here is a molecule that is {H311#not &NULL}{H311__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H312#not &NULL}{H312__names__adjective}. - Assistant: Here is a molecule that is {H312#not &NULL}{H312__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H314#not &NULL}{H314__names__adjective}. - Assistant: Here is a molecule that is {H314#not &NULL}{H314__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H315#not &NULL}{H315__names__adjective}. - Assistant: Here is a molecule that is {H315#not &NULL}{H315__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H317#not &NULL}{H317__names__noun}. - Assistant: Here is a molecule that is {H317#not &NULL}{H317__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H318#not &NULL}{H318__names__adjective}. - Assistant: Here is a molecule that is {H318#not &NULL}{H318__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H319#not &NULL}{H319__names__adjective}. - Assistant: Here is a molecule that is {H319#not &NULL}{H319__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H330#not &NULL}{H330__names__adjective}. - Assistant: Here is a molecule that is {H330#not &NULL}{H330__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H331#not &NULL}{H331__names__adjective}. - Assistant: Here is a molecule that is {H331#not &NULL}{H331__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H332#not &NULL}{H332__names__adjective}. - Assistant: Here is a molecule that is {H332#not &NULL}{H332__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H334#not &NULL}{H334__names__adjective}. - Assistant: Here is a molecule that is {H334#not &NULL}{H334__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H335#not &NULL}{H335__names__noun}. - Assistant: Here is a molecule that is {H335#not &NULL}{H335__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H336#not &NULL}{H336__names__noun}. - Assistant: Here is a molecule that is {H336#not &NULL}{H336__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H340#not &NULL}{H340__names__noun}. - Assistant: Here is a molecule that is {H340#not &NULL}{H340__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H341#not &NULL}{H341__names__adjective}. - Assistant: Here is a molecule that is {H341#not &NULL}{H341__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H350#not &NULL}{H350__names__noun}. - Assistant: Here is a molecule that is {H350#not &NULL}{H350__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H351#not &NULL}{H351__names__adjective}. - Assistant: Here is a molecule that is {H351#not &NULL}{H351__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H360#not &NULL}{H360__names__noun}. - Assistant: Here is a molecule that is {H360#not &NULL}{H360__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H361#not &NULL}{H361__names__adjective}. - Assistant: Here is a molecule that is {H361#not &NULL}{H361__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H370#not &NULL}{H370__names__adjective}. - Assistant: Here is a molecule that is {H370#not &NULL}{H370__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H371#not &NULL}{H371__names__noun}. - Assistant: Here is a molecule that is {H371#not &NULL}{H371__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H371#not &NULL}{H371__names__adjective}. - Assistant: Here is a molecule that is {H371#not &NULL}{H371__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H372#not &NULL}{H372__names__adjective}. - Assistant: Here is a molecule that is {H372#not &NULL}{H372__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H373#not &NULL}{H373__names__noun}. - Assistant: Here is a molecule that is {H373#not &NULL}{H373__names__noun}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H400#not &NULL}{H400__names__adjective}. - Assistant: Here is a molecule that is {H400#not &NULL}{H400__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H410#not &NULL}{H410__names__adjective}. - Assistant: Here is a molecule that is {H410#not &NULL}{H410__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H411#not &NULL}{H411__names__adjective}. - Assistant: Here is a molecule that is {H411#not &NULL}{H411__names__adjective}: {SMILES#}. - - |- - Chemist: {#Give me|Propose|Come up with!} a molecule that is {H420#not &NULL}{H420__names__adjective}. - Assistant: Here is a molecule that is {H420#not &NULL}{H420__names__adjective}: {SMILES#}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H206#not &NULL}{H208__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H211#not &NULL}{H211__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H220#not &NULL}{H220__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H221#not &NULL}{H221__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H224#not &NULL}{H224__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H225#not &NULL}{H225__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H226#not &NULL}{H226__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H228#not &NULL}{H228__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H240#not &NULL}{H240__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H241#not &NULL}{H241__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H242#not &NULL}{H242__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H250#not &NULL}{H250__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H251#not &NULL}{H251__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H252#not &NULL}{H252__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H260#not &NULL}{H260__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H261#not &NULL}{H261__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H270#not &NULL}{H270__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H271#not &NULL}{H271__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H272#not &NULL}{H272__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H280#not &NULL}{H280__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H282#not &NULL}{H282__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H284#not &NULL}{H284__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H290#not &NULL}{H290__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H300#not &NULL}{H300__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H301#not &NULL}{H301__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H302#not &NULL}{H302__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H304#not &NULL}{H304__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H310#not &NULL}{H310__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H311#not &NULL}{H311__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H312#not &NULL}{H312__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H314#not &NULL}{H314__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H315#not &NULL}{H315__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H317#not &NULL}{H317__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H318#not &NULL}{H318__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H319#not &NULL}{H319__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H330#not &NULL}{H330__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H331#not &NULL}{H331__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H332#not &NULL}{H332__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H334#not &NULL}{H334__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H335#not &NULL}{H335__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H336#not &NULL}{H336__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H340#not &NULL}{H340__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H341#not &NULL}{H341__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H350#not &NULL}{H350__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H351#not &NULL}{H351__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H360#not &NULL}{H360__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H361#not &NULL}{H361__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H370#not &NULL}{H370__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H371#not &NULL}{H371__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H371#not &NULL}{H371__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H372#not &NULL}{H372__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H373#not &NULL}{H373__names__noun}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H400#not &NULL}{H400__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H410#not &NULL}{H410__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H411#not &NULL}{H411__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {H420#not &NULL}{H420__names__adjective}. - Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H206#not &NULL}{H206__names__noun}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H208#not &NULL}{H206__names__noun}. + Assistant: Here is a molecule that is {H206#not &NULL}{H206__names__noun}: {SMILES#}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H206#not &NULL}{H206__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + User: I need a molecule that is {H206#not &NULL}{H206__names__noun}. + Assistant: Here is a molecule that is {H206#not &NULL}{H206__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H208#not &NULL}{H208__names__noun}. + Assistant: Here is a molecule that is {H208#not &NULL}{H208__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H211#not &NULL}{H211__names__adjective}. + Assistant: Here is a molecule that is {H211#not &NULL}{H211__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H220#not &NULL}{H220__names__noun}. + Assistant: Here is a molecule that is {H220#not &NULL}{H220__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H221#not &NULL}{H221__names__adjective}. + Assistant: Here is a molecule that is {H221#not &NULL}{H221__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H224#not &NULL}{H224__names__noun}. + Assistant: Here is a molecule that is {H224#not &NULL}{H224__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H225#not &NULL}{H225__names__noun}. + Assistant: Here is a molecule that is {H225#not &NULL}{H225__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H226#not &NULL}{H226__names__noun}. + Assistant: Here is a molecule that is {H226#not &NULL}{H226__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H228#not &NULL}{H228__names__noun}. + Assistant: Here is a molecule that is {H228#not &NULL}{H228__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H240#not &NULL}{H240__names__noun}. + Assistant: Here is a molecule that is {H240#not &NULL}{H240__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H241#not &NULL}{H241__names__adjective}. + Assistant: Here is a molecule that is {H241#not &NULL}{H241__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H242#not &NULL}{H242__names__adjective}. + Assistant: Here is a molecule that is {H242#not &NULL}{H242__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H250#not &NULL}{H250__names__adjective}. + Assistant: Here is a molecule that is {H250#not &NULL}{H250__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H251#not &NULL}{H251__names__adjective}. + Assistant: Here is a molecule that is {H251#not &NULL}{H251__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H252#not &NULL}{H252__names__adjective}. + Assistant: Here is a molecule that is {H252#not &NULL}{H252__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H260#not &NULL}{H260__names__adjective}. + Assistant: Here is a molecule that is {H260#not &NULL}{H260__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H261#not &NULL}{H261__names__adjective}. + Assistant: Here is a molecule that is {H261#not &NULL}{H261__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H270#not &NULL}{H270__names__noun}. + Assistant: Here is a molecule that is {H270#not &NULL}{H270__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H271#not &NULL}{H271__names__noun}. + Assistant: Here is a molecule that is {H271#not &NULL}{H271__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H272#not &NULL}{H272__names__noun}. + Assistant: Here is a molecule that is {H272#not &NULL}{H272__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H280#not &NULL}{H280__names__adjective}. + Assistant: Here is a molecule that is {H280#not &NULL}{H280__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H282#not &NULL}{H282__names__adjective}. + Assistant: Here is a molecule that is {H282#not &NULL}{H282__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H284#not &NULL}{H284__names__adjective}. + Assistant: Here is a molecule that is {H284#not &NULL}{H284__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H290#not &NULL}{H290__names__adjective}. + Assistant: Here is a molecule that is {H290#not &NULL}{H290__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H300#not &NULL}{H300__names__adjective}. + Assistant: Here is a molecule that is {H300#not &NULL}{H300__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H301#not &NULL}{H301__names__adjective}. + Assistant: Here is a molecule that is {H301#not &NULL}{H301__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H302#not &NULL}{H302__names__adjective}. + Assistant: Here is a molecule that is {H302#not &NULL}{H302__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H304#not &NULL}{H304__names__adjective}. + Assistant: Here is a molecule that is {H304#not &NULL}{H304__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H310#not &NULL}{H310__names__adjective}. + Assistant: Here is a molecule that is {H310#not &NULL}{H310__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H311#not &NULL}{H311__names__adjective}. + Assistant: Here is a molecule that is {H311#not &NULL}{H311__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H312#not &NULL}{H312__names__adjective}. + Assistant: Here is a molecule that is {H312#not &NULL}{H312__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H314#not &NULL}{H314__names__adjective}. + Assistant: Here is a molecule that is {H314#not &NULL}{H314__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H315#not &NULL}{H315__names__adjective}. + Assistant: Here is a molecule that is {H315#not &NULL}{H315__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H317#not &NULL}{H317__names__noun}. + Assistant: Here is a molecule that is {H317#not &NULL}{H317__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H318#not &NULL}{H318__names__adjective}. + Assistant: Here is a molecule that is {H318#not &NULL}{H318__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H319#not &NULL}{H319__names__adjective}. + Assistant: Here is a molecule that is {H319#not &NULL}{H319__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H330#not &NULL}{H330__names__adjective}. + Assistant: Here is a molecule that is {H330#not &NULL}{H330__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H331#not &NULL}{H331__names__adjective}. + Assistant: Here is a molecule that is {H331#not &NULL}{H331__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H332#not &NULL}{H332__names__adjective}. + Assistant: Here is a molecule that is {H332#not &NULL}{H332__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H334#not &NULL}{H334__names__adjective}. + Assistant: Here is a molecule that is {H334#not &NULL}{H334__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H335#not &NULL}{H335__names__noun}. + Assistant: Here is a molecule that is {H335#not &NULL}{H335__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H336#not &NULL}{H336__names__noun}. + Assistant: Here is a molecule that is {H336#not &NULL}{H336__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H340#not &NULL}{H340__names__noun}. + Assistant: Here is a molecule that is {H340#not &NULL}{H340__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H341#not &NULL}{H341__names__adjective}. + Assistant: Here is a molecule that is {H341#not &NULL}{H341__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H350#not &NULL}{H350__names__noun}. + Assistant: Here is a molecule that is {H350#not &NULL}{H350__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H351#not &NULL}{H351__names__adjective}. + Assistant: Here is a molecule that is {H351#not &NULL}{H351__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H360#not &NULL}{H360__names__noun}. + Assistant: Here is a molecule that is {H360#not &NULL}{H360__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H361#not &NULL}{H361__names__adjective}. + Assistant: Here is a molecule that is {H361#not &NULL}{H361__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H370#not &NULL}{H370__names__adjective}. + Assistant: Here is a molecule that is {H370#not &NULL}{H370__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H371#not &NULL}{H371__names__noun}. + Assistant: Here is a molecule that is {H371#not &NULL}{H371__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H371#not &NULL}{H371__names__adjective}. + Assistant: Here is a molecule that is {H371#not &NULL}{H371__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H372#not &NULL}{H372__names__adjective}. + Assistant: Here is a molecule that is {H372#not &NULL}{H372__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H373#not &NULL}{H373__names__noun}. + Assistant: Here is a molecule that is {H373#not &NULL}{H373__names__noun}: {SMILES#}. + - |- + User: I need a molecule that is {H400#not &NULL}{H400__names__adjective}. + Assistant: Here is a molecule that is {H400#not &NULL}{H400__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H410#not &NULL}{H410__names__adjective}. + Assistant: Here is a molecule that is {H410#not &NULL}{H410__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H411#not &NULL}{H411__names__adjective}. + Assistant: Here is a molecule that is {H411#not &NULL}{H411__names__adjective}: {SMILES#}. + - |- + User: I need a molecule that is {H420#not &NULL}{H420__names__adjective}. + Assistant: Here is a molecule that is {H420#not &NULL}{H420__names__adjective}: {SMILES#}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H208#not &NULL}{H208__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H211#not &NULL}{H211__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H220#not &NULL}{H220__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H221#not &NULL}{H221__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H224#not &NULL}{H224__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H225#not &NULL}{H225__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H226#not &NULL}{H226__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H228#not &NULL}{H228__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H240#not &NULL}{H240__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H241#not &NULL}{H241__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H242#not &NULL}{H242__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H250#not &NULL}{H250__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H251#not &NULL}{H251__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H252#not &NULL}{H252__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H260#not &NULL}{H260__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H261#not &NULL}{H261__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H270#not &NULL}{H270__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H271#not &NULL}{H271__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H272#not &NULL}{H272__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H280#not &NULL}{H280__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H282#not &NULL}{H282__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H284#not &NULL}{H284__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H290#not &NULL}{H290__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H300#not &NULL}{H300__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H301#not &NULL}{H301__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H302#not &NULL}{H302__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H304#not &NULL}{H304__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H310#not &NULL}{H310__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H311#not &NULL}{H311__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H312#not &NULL}{H312__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H314#not &NULL}{H314__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H315#not &NULL}{H315__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H317#not &NULL}{H317__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H318#not &NULL}{H318__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H319#not &NULL}{H319__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H330#not &NULL}{H330__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H331#not &NULL}{H331__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H332#not &NULL}{H332__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H334#not &NULL}{H334__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H335#not &NULL}{H335__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H336#not &NULL}{H336__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H340#not &NULL}{H340__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H341#not &NULL}{H341__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H350#not &NULL}{H350__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H351#not &NULL}{H351__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H360#not &NULL}{H360__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H361#not &NULL}{H361__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H370#not &NULL}{H370__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H371#not &NULL}{H371__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H371#not &NULL}{H371__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H372#not &NULL}{H372__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H373#not &NULL}{H373__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H400#not &NULL}{H400__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H410#not &NULL}{H410__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H411#not &NULL}{H411__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {H420#not &NULL}{H420__names__adjective}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H208#not &NULL}{H208__names__noun}. + Assistant: Here is a molecule that is {H208#not &NULL}{H208__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H211#not &NULL}{H211__names__adjective}. + Assistant: Here is a molecule that is {H211#not &NULL}{H211__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H220#not &NULL}{H220__names__noun}. + Assistant: Here is a molecule that is {H220#not &NULL}{H220__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H221#not &NULL}{H221__names__adjective}. + Assistant: Here is a molecule that is {H221#not &NULL}{H221__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H224#not &NULL}{H224__names__noun}. + Assistant: Here is a molecule that is {H224#not &NULL}{H224__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H225#not &NULL}{H225__names__noun}. + Assistant: Here is a molecule that is {H225#not &NULL}{H225__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H226#not &NULL}{H226__names__noun}. + Assistant: Here is a molecule that is {H226#not &NULL}{H226__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H228#not &NULL}{H228__names__noun}. + Assistant: Here is a molecule that is {H228#not &NULL}{H228__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H240#not &NULL}{H240__names__noun}. + Assistant: Here is a molecule that is {H240#not &NULL}{H240__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H241#not &NULL}{H241__names__adjective}. + Assistant: Here is a molecule that is {H241#not &NULL}{H241__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H242#not &NULL}{H242__names__adjective}. + Assistant: Here is a molecule that is {H242#not &NULL}{H242__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H250#not &NULL}{H250__names__adjective}. + Assistant: Here is a molecule that is {H250#not &NULL}{H250__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H251#not &NULL}{H251__names__adjective}. + Assistant: Here is a molecule that is {H251#not &NULL}{H251__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H252#not &NULL}{H252__names__adjective}. + Assistant: Here is a molecule that is {H252#not &NULL}{H252__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H260#not &NULL}{H260__names__adjective}. + Assistant: Here is a molecule that is {H260#not &NULL}{H260__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H261#not &NULL}{H261__names__adjective}. + Assistant: Here is a molecule that is {H261#not &NULL}{H261__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H270#not &NULL}{H270__names__noun}. + Assistant: Here is a molecule that is {H270#not &NULL}{H270__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H271#not &NULL}{H271__names__noun}. + Assistant: Here is a molecule that is {H271#not &NULL}{H271__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H272#not &NULL}{H272__names__noun}. + Assistant: Here is a molecule that is {H272#not &NULL}{H272__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H280#not &NULL}{H280__names__adjective}. + Assistant: Here is a molecule that is {H280#not &NULL}{H280__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H282#not &NULL}{H282__names__adjective}. + Assistant: Here is a molecule that is {H282#not &NULL}{H282__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H284#not &NULL}{H284__names__adjective}. + Assistant: Here is a molecule that is {H284#not &NULL}{H284__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H290#not &NULL}{H290__names__adjective}. + Assistant: Here is a molecule that is {H290#not &NULL}{H290__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H300#not &NULL}{H300__names__adjective}. + Assistant: Here is a molecule that is {H300#not &NULL}{H300__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H301#not &NULL}{H301__names__adjective}. + Assistant: Here is a molecule that is {H301#not &NULL}{H301__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H302#not &NULL}{H302__names__adjective}. + Assistant: Here is a molecule that is {H302#not &NULL}{H302__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H304#not &NULL}{H304__names__adjective}. + Assistant: Here is a molecule that is {H304#not &NULL}{H304__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H310#not &NULL}{H310__names__adjective}. + Assistant: Here is a molecule that is {H310#not &NULL}{H310__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H311#not &NULL}{H311__names__adjective}. + Assistant: Here is a molecule that is {H311#not &NULL}{H311__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H312#not &NULL}{H312__names__adjective}. + Assistant: Here is a molecule that is {H312#not &NULL}{H312__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H314#not &NULL}{H314__names__adjective}. + Assistant: Here is a molecule that is {H314#not &NULL}{H314__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H315#not &NULL}{H315__names__adjective}. + Assistant: Here is a molecule that is {H315#not &NULL}{H315__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H317#not &NULL}{H317__names__noun}. + Assistant: Here is a molecule that is {H317#not &NULL}{H317__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H318#not &NULL}{H318__names__adjective}. + Assistant: Here is a molecule that is {H318#not &NULL}{H318__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H319#not &NULL}{H319__names__adjective}. + Assistant: Here is a molecule that is {H319#not &NULL}{H319__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H330#not &NULL}{H330__names__adjective}. + Assistant: Here is a molecule that is {H330#not &NULL}{H330__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H331#not &NULL}{H331__names__adjective}. + Assistant: Here is a molecule that is {H331#not &NULL}{H331__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H332#not &NULL}{H332__names__adjective}. + Assistant: Here is a molecule that is {H332#not &NULL}{H332__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H334#not &NULL}{H334__names__adjective}. + Assistant: Here is a molecule that is {H334#not &NULL}{H334__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H335#not &NULL}{H335__names__noun}. + Assistant: Here is a molecule that is {H335#not &NULL}{H335__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H336#not &NULL}{H336__names__noun}. + Assistant: Here is a molecule that is {H336#not &NULL}{H336__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H340#not &NULL}{H340__names__noun}. + Assistant: Here is a molecule that is {H340#not &NULL}{H340__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H341#not &NULL}{H341__names__adjective}. + Assistant: Here is a molecule that is {H341#not &NULL}{H341__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H350#not &NULL}{H350__names__noun}. + Assistant: Here is a molecule that is {H350#not &NULL}{H350__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H351#not &NULL}{H351__names__adjective}. + Assistant: Here is a molecule that is {H351#not &NULL}{H351__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H360#not &NULL}{H360__names__noun}. + Assistant: Here is a molecule that is {H360#not &NULL}{H360__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H361#not &NULL}{H361__names__adjective}. + Assistant: Here is a molecule that is {H361#not &NULL}{H361__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H370#not &NULL}{H370__names__adjective}. + Assistant: Here is a molecule that is {H370#not &NULL}{H370__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H371#not &NULL}{H371__names__noun}. + Assistant: Here is a molecule that is {H371#not &NULL}{H371__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H371#not &NULL}{H371__names__adjective}. + Assistant: Here is a molecule that is {H371#not &NULL}{H371__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H372#not &NULL}{H372__names__adjective}. + Assistant: Here is a molecule that is {H372#not &NULL}{H372__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H373#not &NULL}{H373__names__noun}. + Assistant: Here is a molecule that is {H373#not &NULL}{H373__names__noun}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H400#not &NULL}{H400__names__adjective}. + Assistant: Here is a molecule that is {H400#not &NULL}{H400__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H410#not &NULL}{H410__names__adjective}. + Assistant: Here is a molecule that is {H410#not &NULL}{H410__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H411#not &NULL}{H411__names__adjective}. + Assistant: Here is a molecule that is {H411#not &NULL}{H411__names__adjective}: {SMILES#}. + - |- + Chemist: {#Give me|Propose|Come up with!} a molecule that is {H420#not &NULL}{H420__names__adjective}. + Assistant: Here is a molecule that is {H420#not &NULL}{H420__names__adjective}: {SMILES#}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H206#not &NULL}{H208__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H211#not &NULL}{H211__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H220#not &NULL}{H220__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H221#not &NULL}{H221__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H224#not &NULL}{H224__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H225#not &NULL}{H225__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H226#not &NULL}{H226__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H228#not &NULL}{H228__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H240#not &NULL}{H240__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H241#not &NULL}{H241__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H242#not &NULL}{H242__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H250#not &NULL}{H250__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H251#not &NULL}{H251__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H252#not &NULL}{H252__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H260#not &NULL}{H260__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H261#not &NULL}{H261__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H270#not &NULL}{H270__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H271#not &NULL}{H271__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H272#not &NULL}{H272__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H280#not &NULL}{H280__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H282#not &NULL}{H282__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H284#not &NULL}{H284__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H290#not &NULL}{H290__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H300#not &NULL}{H300__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H301#not &NULL}{H301__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H302#not &NULL}{H302__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H304#not &NULL}{H304__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H310#not &NULL}{H310__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H311#not &NULL}{H311__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H312#not &NULL}{H312__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H314#not &NULL}{H314__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H315#not &NULL}{H315__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H317#not &NULL}{H317__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H318#not &NULL}{H318__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H319#not &NULL}{H319__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H330#not &NULL}{H330__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H331#not &NULL}{H331__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H332#not &NULL}{H332__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H334#not &NULL}{H334__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H335#not &NULL}{H335__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H336#not &NULL}{H336__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H340#not &NULL}{H340__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H341#not &NULL}{H341__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H350#not &NULL}{H350__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H351#not &NULL}{H351__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H360#not &NULL}{H360__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H361#not &NULL}{H361__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H370#not &NULL}{H370__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H371#not &NULL}{H371__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H371#not &NULL}{H371__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H372#not &NULL}{H372__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H373#not &NULL}{H373__names__noun}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H400#not &NULL}{H400__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H410#not &NULL}{H410__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H411#not &NULL}{H411__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {H420#not &NULL}{H420__names__adjective}. + Result: {#Molecule |!}{SMILES__description}: {SMILES#} diff --git a/data/tabular/skin_reaction/meta.yaml b/data/tabular/skin_reaction/meta.yaml index 16425ecd1..cda530fe5 100644 --- a/data/tabular/skin_reaction/meta.yaml +++ b/data/tabular/skin_reaction/meta.yaml @@ -1,144 +1,143 @@ ---- name: skin_reaction description: |- - Repetitive exposure to a chemical agent can induce an immune reaction - in inherently susceptible individuals that leads to skin sensitization. The - dataset used in this study was retrieved from the ICCVAM (Interagency Coordinating - Committee on the Validation of Alternative Methods) report on the rLLNA. + Repetitive exposure to a chemical agent can induce an immune reaction + in inherently susceptible individuals that leads to skin sensitization. The + dataset used in this study was retrieved from the ICCVAM (Interagency Coordinating + Committee on the Validation of Alternative Methods) report on the rLLNA. targets: - - id: skin_reaction - description: whether it can cause skin reaction (1) or not (0). - units: - type: boolean - names: - - noun: skin reaction - - noun: skin sensitization - - noun: agent induced skin reaction - - noun: drug induced skin immune reaction - - verb: causes a skin reaction - - verb: causes a skin sensitization - - verb: causes a drug induced skin immune reaction - - gerund: causing a skin reaction - uris: - - http://purl.bioontology.org/ontology/MEDDRA/10040914 + - id: skin_reaction + description: whether it can cause skin reaction (1) or not (0). + units: + type: boolean + names: + - noun: skin reaction + - noun: skin sensitization + - noun: agent induced skin reaction + - noun: drug induced skin immune reaction + - verb: causes a skin reaction + - verb: causes a skin sensitization + - verb: causes a drug induced skin immune reaction + - gerund: causing a skin reaction + uris: + - http://purl.bioontology.org/ontology/MEDDRA/10040914 benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: compound_name - type: Other - description: drug name - names: - - noun: compound - - noun: compound name + - id: SMILES + type: SMILES + description: SMILES + - id: compound_name + type: Other + description: drug name + names: + - noun: compound + - noun: compound name license: CC BY 4.0 links: - - url: https://doi.org/10.1016/j.taap.2014.12.014 - description: corresponding publication - - url: https://ntp.niehs.nih.gov/iccvam/docs/immunotox_docs/llna-ld/tmer.pdf - description: related publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#skin-reaction - description: Data source + - url: https://doi.org/10.1016/j.taap.2014.12.014 + description: corresponding publication + - url: https://ntp.niehs.nih.gov/iccvam/docs/immunotox_docs/llna-ld/tmer.pdf + description: related publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#skin-reaction + description: Data source num_points: 404 bibtex: - - |- - @article{Alves2015, - doi = {10.1016/j.taap.2014.12.014}, - url = {https://doi.org/10.1016/j.taap.2014.12.014}, - year = {2015}, - month = apr, - publisher = {Elsevier BV}, - volume = {284}, - number = {2}, - pages = {262--272}, - author = {Vinicius M. Alves and Eugene Muratov and Denis Fourches and Judy Strickland - and Nicole Kleinstreuer and Carolina H. Andrade and Alexander Tropsha}, - title = {Predicting chemically-induced skin reactions. Part I: QSAR models of skin sensitization - and their application to identify potentially hazardous compounds}, - journal = {Toxicology and Applied Pharmacology} + - |- + @article{Alves2015, + doi = {10.1016/j.taap.2014.12.014}, + url = {https://doi.org/10.1016/j.taap.2014.12.014}, + year = {2015}, + month = apr, + publisher = {Elsevier BV}, + volume = {284}, + number = {2}, + pages = {262--272}, + author = {Vinicius M. Alves and Eugene Muratov and Denis Fourches and Judy Strickland + and Nicole Kleinstreuer and Carolina H. Andrade and Alexander Tropsha}, + title = {Predicting chemically-induced skin reactions. Part I: QSAR models of skin sensitization + and their application to identify potentially hazardous compounds}, + journal = {Toxicology and Applied Pharmacology} templates: - - The {#molecule |!}{SMILES__description} {SMILES#} causes {skin_reaction#no &NULL}{skin_reaction__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {skin_reaction#not &NULL}{skin_reaction__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, there is {skin_reaction#no &NULL}{skin_reaction__names__noun}. - - The {#molecule |!}{SMILES__description} {SMILES#} does {skin_reaction#not &NULL}{skin_reaction__names__verb}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {skin_reaction__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result: {skin_reaction#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {skin_reaction__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete|definite!} sentence. - Result: This molecule is {skin_reaction#not &NULL}{skin_reaction__names__gerund}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {skin_reaction__names__gerund}. - Result: {SMILES#} - - |- - User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {skin_reaction__names__gerund}? - Assistant: {skin_reaction#No&Yes}, this molecule is {skin_reaction#not &NULL}{skin_reaction__names__gerund}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {skin_reaction__names__gerund}? - Assistant: {skin_reaction#No&Yes}, it is {skin_reaction#not &NULL}{skin_reaction__names__gerund}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {skin_reaction#not &NULL}{skin_reaction__names__gerund}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {skin_reaction#not &NULL}{skin_reaction__names__gerund}? - Assistant: This is a molecule that is {skin_reaction#not &NULL}{skin_reaction__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {skin_reaction#not &NULL}be {skin_reaction__names__gerund}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {skin_reaction#not &NULL}{skin_reaction__names__gerund}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {skin_reaction#not &NULL}be {skin_reaction__names__gerund}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {skin_reaction#not &NULL}{skin_reaction__names__gerund}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {skin_reaction__names__gerund}:{skin_reaction#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {skin_reaction__names__gerund}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{skin_reaction#False&True} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {skin_reaction__names__gerund}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {skin_reaction__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {skin_reaction%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {skin_reaction__names__gerund}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {skin_reaction%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {skin_reaction#not &NULL}{skin_reaction__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%skin_reaction%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {skin_reaction#not &NULL}{skin_reaction__names__gerund}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%skin_reaction%} - Answer:{%multiple_choice_result} + - The {#molecule |!}{SMILES__description} {SMILES#} causes {skin_reaction#no &NULL}{skin_reaction__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {skin_reaction#not &NULL}{skin_reaction__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, there is {skin_reaction#no &NULL}{skin_reaction__names__noun}. + - The {#molecule |!}{SMILES__description} {SMILES#} does {skin_reaction#not &NULL}{skin_reaction__names__verb}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {skin_reaction__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result: {skin_reaction#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {skin_reaction__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete|definite!} sentence. + Result: This molecule is {skin_reaction#not &NULL}{skin_reaction__names__gerund}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {skin_reaction__names__gerund}. + Result: {SMILES#} + - |- + User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {skin_reaction__names__gerund}? + Assistant: {skin_reaction#No&Yes}, this molecule is {skin_reaction#not &NULL}{skin_reaction__names__gerund}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {skin_reaction__names__gerund}? + Assistant: {skin_reaction#No&Yes}, it is {skin_reaction#not &NULL}{skin_reaction__names__gerund}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {skin_reaction#not &NULL}{skin_reaction__names__gerund}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {skin_reaction#not &NULL}{skin_reaction__names__gerund}? + Assistant: This is a molecule that is {skin_reaction#not &NULL}{skin_reaction__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {skin_reaction#not &NULL}be {skin_reaction__names__gerund}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {skin_reaction#not &NULL}{skin_reaction__names__gerund}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {skin_reaction#not &NULL}be {skin_reaction__names__gerund}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {skin_reaction#not &NULL}{skin_reaction__names__gerund}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {skin_reaction__names__gerund}:{skin_reaction#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {skin_reaction__names__gerund}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{skin_reaction#False&True} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {skin_reaction__names__gerund}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {skin_reaction__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {skin_reaction%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {skin_reaction__names__gerund}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {skin_reaction%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {skin_reaction#not &NULL}{skin_reaction__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%skin_reaction%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {skin_reaction#not &NULL}{skin_reaction__names__gerund}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%skin_reaction%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/smiles_to_3d/meta.yaml b/data/tabular/smiles_to_3d/meta.yaml index 45c62bc99..0e9a0ffe1 100644 --- a/data/tabular/smiles_to_3d/meta.yaml +++ b/data/tabular/smiles_to_3d/meta.yaml @@ -1,104 +1,103 @@ ---- name: qm9 description: |- - QM9 is a comprehensive dataset that provides geometric, energetic, - electronic and thermodynamic properties for a subset of GDB-17 - database, comprising 134 thousand stable organic molecules with up - to 9 heavy atoms. All molecules are modeled using density - functional theory (B3LYP/6-31G(2df,p) based DFT). + QM9 is a comprehensive dataset that provides geometric, energetic, + electronic and thermodynamic properties for a subset of GDB-17 + database, comprising 134 thousand stable organic molecules with up + to 9 heavy atoms. All molecules are modeled using density + functional theory (B3LYP/6-31G(2df,p) based DFT). targets: - - id: xyz - description: Structure of the molecule, modeled at B3LYP/6-31G(2df,p) level of theory in XYZ format - type: text - names: - - noun: geometry in XYZ format (after optimization on B3LYP/6-31G(2df,p) level of theory) - - noun: 3D-structure in XYZ format (after optimization on B3LYP/6-31G(2df,p) level of theory) - - noun: three-dimensional structure in XYZ format (after optimization on B3LYP/6-31G(2df,p) level of theory) - - noun: content of a XYZ file with the geometry (after optimization on B3LYP/6-31G(2df,p) level of theory) - - noun: molecular geometry in XYZ format (optimized with B3LYP/6-31G(2df,p) level of theory) - - noun: 3D molecular structure in XYZ format (following optimization using B3LYP/6-31G(2df,p) theory) - - noun: three-dimensional molecular structure in XYZ format (after B3LYP/6-31G(2df,p) level of theory optimization) - - noun: data from a XYZ file containing optimized geometry (using B3LYP/6-31G(2df,p) theory) - - noun: content within a XYZ file with optimized molecular geometry (following B3LYP/6-31G(2df,p) theory) - - id: mol2000 - description: Structure of the molecule, modeled at B3LYP/6-31G(2df,p) level of theory in MOL2000 format, bonds inferred using RDKit - type: text - names: - - noun: geometry in V2000 Molfile format (after optimization on B3LYP/6-31G(2df,p) level of theory) - - noun: 3D-structure in V2000 Molfile format (after optimization on B3LYP/6-31G(2df,p) level of theory) - - noun: three-dimensional structure in V2000 Molfile format (after optimization on B3LYP/6-31G(2df,p) level of theory) - - noun: content of a V2000 Molfile file with the geometry (after optimization on B3LYP/6-31G(2df,p) level of theory) - - noun: molecular geometry in V2000 Molfile format (optimized with B3LYP/6-31G(2df,p) level of theory) - - noun: 3D molecular structure in V2000 Molfile format (following optimization using B3LYP/6-31G(2df,p) theory) - - noun: three-dimensional molecular structure in MOLV2000 Molfile format (after B3LYP/6-31G(2df,p) level of theory optimization) - - noun: data from a V2000 Molfile format file containing optimized geometry (using B3LYP/6-31G(2df,p) theory) - - noun: content within a V2000 Molfile format file with optimized molecular geometry (following B3LYP/6-31G(2df,p) theory) - - id: mol3000 - description: Structure of the molecule, modeled at B3LYP/6-31G(2df,p) level of theory in MOL3000 format, bonds inferred using RDKit - type: text - names: - - noun: geometry in V3000 Molfile format (after optimization on B3LYP/6-31G(2df,p) level of theory) - - noun: 3D-structure in V3000 Molfile format (after optimization on B3LYP/6-31G(2df,p) level of theory) - - noun: three-dimensional structure in MOLV3000 Molfile format3000 format (after optimization on B3LYP/6-31G(2df,p) level of theory) - - noun: content of a V3000 Molfile format file with the geometry (after optimization on B3LYP/6-31G(2df,p) level of theory) - - noun: molecular geometry in V3000 Molfile format (optimized with B3LYP/6-31G(2df,p) level of theory) - - noun: 3D molecular structure in V3000 Molfile format (following optimization using B3LYP/6-31G(2df,p) theory) - - noun: three-dimensional molecular structure in MOLV3000 Molfile format (after B3LYP/6-31G(2df,p) level of theory optimization) - - noun: data from a V3000 Molfile format file containing optimized geometry (using B3LYP/6-31G(2df,p) theory) - - noun: content within a V3000 Molfile format file with optimized molecular geometry (following B3LYP/6-31G(2df,p) theory) + - id: xyz + description: Structure of the molecule, modeled at B3LYP/6-31G(2df,p) level of theory in XYZ format + type: text + names: + - noun: geometry in XYZ format (after optimization on B3LYP/6-31G(2df,p) level of theory) + - noun: 3D-structure in XYZ format (after optimization on B3LYP/6-31G(2df,p) level of theory) + - noun: three-dimensional structure in XYZ format (after optimization on B3LYP/6-31G(2df,p) level of theory) + - noun: content of a XYZ file with the geometry (after optimization on B3LYP/6-31G(2df,p) level of theory) + - noun: molecular geometry in XYZ format (optimized with B3LYP/6-31G(2df,p) level of theory) + - noun: 3D molecular structure in XYZ format (following optimization using B3LYP/6-31G(2df,p) theory) + - noun: three-dimensional molecular structure in XYZ format (after B3LYP/6-31G(2df,p) level of theory optimization) + - noun: data from a XYZ file containing optimized geometry (using B3LYP/6-31G(2df,p) theory) + - noun: content within a XYZ file with optimized molecular geometry (following B3LYP/6-31G(2df,p) theory) + - id: mol2000 + description: Structure of the molecule, modeled at B3LYP/6-31G(2df,p) level of theory in MOL2000 format, bonds inferred using RDKit + type: text + names: + - noun: geometry in V2000 Molfile format (after optimization on B3LYP/6-31G(2df,p) level of theory) + - noun: 3D-structure in V2000 Molfile format (after optimization on B3LYP/6-31G(2df,p) level of theory) + - noun: three-dimensional structure in V2000 Molfile format (after optimization on B3LYP/6-31G(2df,p) level of theory) + - noun: content of a V2000 Molfile file with the geometry (after optimization on B3LYP/6-31G(2df,p) level of theory) + - noun: molecular geometry in V2000 Molfile format (optimized with B3LYP/6-31G(2df,p) level of theory) + - noun: 3D molecular structure in V2000 Molfile format (following optimization using B3LYP/6-31G(2df,p) theory) + - noun: three-dimensional molecular structure in MOLV2000 Molfile format (after B3LYP/6-31G(2df,p) level of theory optimization) + - noun: data from a V2000 Molfile format file containing optimized geometry (using B3LYP/6-31G(2df,p) theory) + - noun: content within a V2000 Molfile format file with optimized molecular geometry (following B3LYP/6-31G(2df,p) theory) + - id: mol3000 + description: Structure of the molecule, modeled at B3LYP/6-31G(2df,p) level of theory in MOL3000 format, bonds inferred using RDKit + type: text + names: + - noun: geometry in V3000 Molfile format (after optimization on B3LYP/6-31G(2df,p) level of theory) + - noun: 3D-structure in V3000 Molfile format (after optimization on B3LYP/6-31G(2df,p) level of theory) + - noun: three-dimensional structure in MOLV3000 Molfile format3000 format (after optimization on B3LYP/6-31G(2df,p) level of theory) + - noun: content of a V3000 Molfile format file with the geometry (after optimization on B3LYP/6-31G(2df,p) level of theory) + - noun: molecular geometry in V3000 Molfile format (optimized with B3LYP/6-31G(2df,p) level of theory) + - noun: 3D molecular structure in V3000 Molfile format (following optimization using B3LYP/6-31G(2df,p) theory) + - noun: three-dimensional molecular structure in MOLV3000 Molfile format (after B3LYP/6-31G(2df,p) level of theory optimization) + - noun: data from a V3000 Molfile format file containing optimized geometry (using B3LYP/6-31G(2df,p) theory) + - noun: content within a V3000 Molfile format file with optimized molecular geometry (following B3LYP/6-31G(2df,p) theory) identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 num_points: 133885 links: - - url: https://data.dtu.dk/articles/dataset/xyz_files_of_the_QM9_molecules/19780570 - description: original data source + - url: https://data.dtu.dk/articles/dataset/xyz_files_of_the_QM9_molecules/19780570 + description: original data source bibtex: - - |- - @article{ramakrishnan2014quantum, - title={Quantum chemistry structures and properties of 134 kilo molecules}, - author={Ramakrishnan, Raghunathan and Dral, Pavlo O and Rupp, Matthias and Von Lilienfeld, O Anatole}, - journal={Scientific data}, - volume={1}, - number={1}, - pages={1--7}, - year={2014}, - publisher={Nature Publishing Group}} - - |- - @article{ruddigkeit2012enumeration, - title={Enumeration of 166 billion organic small molecules in the chemical universe database GDB-17}, - author={Ruddigkeit, Lars and Van Deursen, Ruud and Blum, Lorenz C and Reymond, Jean-Louis}, - journal={Journal of chemical information and modeling}, - volume={52}, - number={11}, - pages={2864--2875}, - year={2012}, - publisher={ACS Publications}} + - |- + @article{ramakrishnan2014quantum, + title={Quantum chemistry structures and properties of 134 kilo molecules}, + author={Ramakrishnan, Raghunathan and Dral, Pavlo O and Rupp, Matthias and Von Lilienfeld, O Anatole}, + journal={Scientific data}, + volume={1}, + number={1}, + pages={1--7}, + year={2014}, + publisher={Nature Publishing Group}} + - |- + @article{ruddigkeit2012enumeration, + title={Enumeration of 166 billion organic small molecules in the chemical universe database GDB-17}, + author={Ruddigkeit, Lars and Van Deursen, Ruud and Blum, Lorenz C and Reymond, Jean-Louis}, + journal={Journal of chemical information and modeling}, + volume={52}, + number={11}, + pages={2864--2875}, + year={2012}, + publisher={ACS Publications}} templates: - - |- - Question: {#What is the|Can you generate the|Can you provide me with the!} {xyz__names__noun} of the {#molecule|compound|chemical!} with the {SMILES__description} {SMILES#}? - Answer: {xyz#} - - |- - Question: {#What is the|Can you generate the|Can you provide me with the!} {mol2000__names__noun} of the {#molecule|compound|chemical!} with the {SMILES__description} {SMILES#}? - Answer: {mol2000#} - - |- - Question: {#What is the|Can you generate the|Can you provide me with the!} {mol3000__names__noun} of the {#molecule|compound|chemical!} with the {SMILES__description} {SMILES#}? - Answer: {mol3000#} - - |- - Question: {#What is the|Can you generate the|Can you provide me with the!} {SMILES__description} of the {#molecule|compound|chemical!} with {xyz__names__noun} {xyz#}? - Answer: {SMILES#} - - |- - Question: {#What is the|Can you generate the|Can you provide me with the!} {SMILES__description} of the {#molecule|compound|chemical!} with the {mol2000__names__noun} {mol2000#}? - Answer: {SMILES#} - - |- - Question: {#What is the|Can you generate the|Can you provide me with the!} {SMILES__description} of the {#molecule|compound|chemical!} with the {mol3000__names__noun} {mol3000#}? - Answer: {SMILES#} - - |- - User: I need to generate {#conformers|3D geometries|3D structures!} of a {#molecule|compound|chemical!}. - Assistant: {#What is|Can you provide!} the {SMILES__description} of the {#molecule|compound|chemical!}? - User: {SMILES#} - Assistant: The {xyz__names__noun} of the {#molecule|compound|chemical!} is {xyz#}. - User: {#Can you give me the|What is the|And how about the!} {mol2000__names__noun}? - Assistant: The {mol2000__names__noun} of the {#molecule|compound|chemical!} is {mol2000#}. + - |- + Question: {#What is the|Can you generate the|Can you provide me with the!} {xyz__names__noun} of the {#molecule|compound|chemical!} with the {SMILES__description} {SMILES#}? + Answer: {xyz#} + - |- + Question: {#What is the|Can you generate the|Can you provide me with the!} {mol2000__names__noun} of the {#molecule|compound|chemical!} with the {SMILES__description} {SMILES#}? + Answer: {mol2000#} + - |- + Question: {#What is the|Can you generate the|Can you provide me with the!} {mol3000__names__noun} of the {#molecule|compound|chemical!} with the {SMILES__description} {SMILES#}? + Answer: {mol3000#} + - |- + Question: {#What is the|Can you generate the|Can you provide me with the!} {SMILES__description} of the {#molecule|compound|chemical!} with {xyz__names__noun} {xyz#}? + Answer: {SMILES#} + - |- + Question: {#What is the|Can you generate the|Can you provide me with the!} {SMILES__description} of the {#molecule|compound|chemical!} with the {mol2000__names__noun} {mol2000#}? + Answer: {SMILES#} + - |- + Question: {#What is the|Can you generate the|Can you provide me with the!} {SMILES__description} of the {#molecule|compound|chemical!} with the {mol3000__names__noun} {mol3000#}? + Answer: {SMILES#} + - |- + User: I need to generate {#conformers|3D geometries|3D structures!} of a {#molecule|compound|chemical!}. + Assistant: {#What is|Can you provide!} the {SMILES__description} of the {#molecule|compound|chemical!}? + User: {SMILES#} + Assistant: The {xyz__names__noun} of the {#molecule|compound|chemical!} is {xyz#}. + User: {#Can you give me the|What is the|And how about the!} {mol2000__names__noun}? + Assistant: The {mol2000__names__noun} of the {#molecule|compound|chemical!} is {mol2000#}. diff --git a/data/tabular/solubility_aqsoldb/meta.yaml b/data/tabular/solubility_aqsoldb/meta.yaml index 30804254f..327fcf110 100644 --- a/data/tabular/solubility_aqsoldb/meta.yaml +++ b/data/tabular/solubility_aqsoldb/meta.yaml @@ -1,69 +1,66 @@ ---- name: solubility_aqsoldb description: |- - Aqueous solubility measures a drug's ability to dissolve in water. - Poor water solubility could lead to slow drug absorptions, inadequate bioavailablity - and even induce toxicity. More than 40 percent of new chemical entities are - not soluble. + Aqueous solubility measures a drug's ability to dissolve in water. + Poor water solubility could lead to slow drug absorptions, inadequate bioavailablity + and even induce toxicity. More than 40 percent of new chemical entities are + not soluble. targets: - - id: aqeuous_solubility - description: aqueous solubility - units: log(mol/L) - type: continuous - names: - - noun: aqueous solubility (at room temperature) - - noun: solubility in water (at room temperature) - - noun: water-solubility at room temperature - uris: - - http://purl.jp/bio/4/id/200906006880450101 - - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C60821 + - id: aqeuous_solubility + description: aqueous solubility + units: log(mol/L) + type: continuous + names: + - noun: aqueous solubility (at room temperature) + - noun: solubility in water (at room temperature) + - noun: water-solubility at room temperature + uris: + - http://purl.jp/bio/4/id/200906006880450101 + - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C60821 benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: compound_name - type: Other - names: - - noun: compound name - - noun: drug name - - noun: generic drug name - description: compound name + - id: SMILES + type: SMILES + description: SMILES + - id: compound_name + type: Other + names: + - noun: compound name + - noun: drug name + - noun: generic drug name + description: compound name license: CC BY 4.0 links: - - url: https://doi.org/10.1038/s41597-019-0151-1 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#solubility-aqsoldb - description: data source + - url: https://doi.org/10.1038/s41597-019-0151-1 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#solubility-aqsoldb + description: data source num_points: 9982 bibtex: - - |- - @article{Sorkun_2019, - doi = {10.1038/s41597-019-0151-1}, - url = {https://doi.org/10.1038%2Fs41597-019-0151-1}, - year = {2019}, - month = aug, - publisher = {Springer Science and Business Media LLC}, - volume = {6}, - number = {1}, - author = {Murat Cihan Sorkun and Abhishek Khetan and - Suleyman Er}, - title = {AqSolDB, a curated reference set of aqueous solubility - and 2D descriptors for a diverse set of compounds}, - journal = {Scientific Data} + - |- + @article{Sorkun_2019, + doi = {10.1038/s41597-019-0151-1}, + url = {https://doi.org/10.1038%2Fs41597-019-0151-1}, + year = {2019}, + month = aug, + publisher = {Springer Science and Business Media LLC}, + volume = {6}, + number = {1}, + author = {Murat Cihan Sorkun and Abhishek Khetan and + Suleyman Er}, + title = {AqSolDB, a curated reference set of aqueous solubility + and 2D descriptors for a diverse set of compounds}, + journal = {Scientific Data} templates: - #- The name of the {#compound|drug|chemical|molecule!} with the {#SMILES|SMILES string|!} {SMILES#} is {compound_name#}. # doesn't work with class-balanced sampling - - The {aqeuous_solubility__names__noun} of the {#compound|drug|chemical|molecule!} with the {SMILES__description} {SMILES#} is {aqeuous_solubility#} - {aqeuous_solubility__units}. - - The {aqeuous_solubility__names__noun} of the {#compound|drug|chemical|molecule!} with the {compound_name__names__noun} {compound_name#} is {aqeuous_solubility#} - {aqeuous_solubility__units}. - - |- - User: I want to {#design|discover|find|identify|!} a {#compound|drug|chemical|molecule!} with a particular {aqeuous_solubility__names__noun}. - Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}I would need to know the {aqeuous_solubility__names__noun} of the {#compound|drug|chemical|molecule!} you want to {#design|discover|find|identify|!}. - User: The {aqeuous_solubility__names__noun} should be {aqeuous_solubility#} {aqeuous_solubility__units}. - Assistant: I {#recommend|suggest|propose|advise|!} the {#compound|drug|chemical|molecule!} with the {SMILES__description} {SMILES#}. {#Is there anything else I can do for you?|Do you need anything else?|Anything else?|!} - User: {#Yes, |!}I would like to know the {compound_name__names__noun} of the {#compound|drug|chemical|molecule!}. - Assistant: The {compound_name__names__noun} of the {#compound|drug|chemical|molecule!} with the {SMILES__description} {SMILES#} is {compound_name#}. + #- The name of the {#compound|drug|chemical|molecule!} with the {#SMILES|SMILES string|!} {SMILES#} is {compound_name#}. # doesn't work with class-balanced sampling + - The {aqeuous_solubility__names__noun} of the {#compound|drug|chemical|molecule!} with the {SMILES__description} {SMILES#} is {aqeuous_solubility#} {aqeuous_solubility__units}. + - The {aqeuous_solubility__names__noun} of the {#compound|drug|chemical|molecule!} with the {compound_name__names__noun} {compound_name#} is {aqeuous_solubility#} {aqeuous_solubility__units}. + - |- + User: I want to {#design|discover|find|identify|!} a {#compound|drug|chemical|molecule!} with a particular {aqeuous_solubility__names__noun}. + Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}I would need to know the {aqeuous_solubility__names__noun} of the {#compound|drug|chemical|molecule!} you want to {#design|discover|find|identify|!}. + User: The {aqeuous_solubility__names__noun} should be {aqeuous_solubility#} {aqeuous_solubility__units}. + Assistant: I {#recommend|suggest|propose|advise|!} the {#compound|drug|chemical|molecule!} with the {SMILES__description} {SMILES#}. {#Is there anything else I can do for you?|Do you need anything else?|Anything else?|!} + User: {#Yes, |!}I would like to know the {compound_name__names__noun} of the {#compound|drug|chemical|molecule!}. + Assistant: The {compound_name__names__noun} of the {#compound|drug|chemical|molecule!} with the {SMILES__description} {SMILES#} is {compound_name#}. diff --git a/data/tabular/sr_are_tox21/meta.yaml b/data/tabular/sr_are_tox21/meta.yaml index 74fb4f776..bdd94ed5c 100644 --- a/data/tabular/sr_are_tox21/meta.yaml +++ b/data/tabular/sr_are_tox21/meta.yaml @@ -1,136 +1,135 @@ ---- name: sr_are_tox21 description: |- - Tox21 is a data challenge which contains qualitative toxicity measurements - for 7,831 compounds on 12 different targets, such as nuclear receptors and stress - response pathways. + Tox21 is a data challenge which contains qualitative toxicity measurements + for 7,831 compounds on 12 different targets, such as nuclear receptors and stress + response pathways. targets: - - id: toxicity_SR-ARE - description: whether it shows activity in the SR-ARE toxicity assay (1) or not (0) - units: - type: boolean - names: - - noun: SR-ARE toxicity - - noun: SR-Antioxidant response element toxicity - - noun: Antioxidant response element toxicity - - verb: shows activity in the SR-ARE toxicity assay - - verb: shows activity in the SR-Antioxidant response element toxicity assay - - verb: shows activity in the Antioxidant response element toxicity assay - - adjective: toxic in the SR-ARE assay - - adjective: toxic in the SR-Antioxidant response element assay - - adjective: toxic in the Antioxidant response element assay - - gerund: showing activity in the SR-ARE toxicity assay - uris: + - id: toxicity_SR-ARE + description: whether it shows activity in the SR-ARE toxicity assay (1) or not (0) + units: + type: boolean + names: + - noun: SR-ARE toxicity + - noun: SR-Antioxidant response element toxicity + - noun: Antioxidant response element toxicity + - verb: shows activity in the SR-ARE toxicity assay + - verb: shows activity in the SR-Antioxidant response element toxicity assay + - verb: shows activity in the Antioxidant response element toxicity assay + - adjective: toxic in the SR-ARE assay + - adjective: toxic in the SR-Antioxidant response element assay + - adjective: toxic in the Antioxidant response element assay + - gerund: showing activity in the SR-ARE toxicity assay + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: http://dx.doi.org/10.3389/fenvs.2017.00003 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 - description: data source - - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 - description: assay name + - url: http://dx.doi.org/10.3389/fenvs.2017.00003 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 + description: data source + - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 + description: assay name num_points: 5832 bibtex: - - |- - @article{Huang2017, - doi = {10.3389/fenvs.2017.00003}, - url = {https://doi.org/10.3389/fenvs.2017.00003}, - year = {2017}, - month = jan, - publisher = {Frontiers Media SA}, - volume = {5}, - author = {Ruili Huang and Menghang Xia}, - title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor - and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, - journal = {Frontiers in Environmental Science} + - |- + @article{Huang2017, + doi = {10.3389/fenvs.2017.00003}, + url = {https://doi.org/10.3389/fenvs.2017.00003}, + year = {2017}, + month = jan, + publisher = {Frontiers Media SA}, + volume = {5}, + author = {Ruili Huang and Menghang Xia}, + title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor + and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, + journal = {Frontiers in Environmental Science} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_SR-ARE#no &NULL}{toxicity_SR-ARE__names__noun} {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_SR-ARE#not &NULL}identified as {toxicity_SR-ARE__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_SR-ARE__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. - Result: {toxicity_SR-ARE#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_SR-ARE__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_SR-ARE__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_SR-ARE__names__adjective}? - Assistant: {toxicity_SR-ARE#No&Yes}, this molecule is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_SR-ARE__names__adjective}? - Assistant: {toxicity_SR-ARE#No&Yes}, it is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}? - Assistant: This is a molecule that is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {toxicity_SR-ARE#not &NULL}be {toxicity_SR-ARE__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {toxicity_SR-ARE#not &NULL}be {toxicity_SR-ARE__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {toxicity_SR-ARE__names__adjective}:{toxicity_SR-ARE#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_SR-ARE__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{toxicity_SR-ARE#False&True} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_SR-ARE__names__adjective}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-ARE__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_SR-ARE%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-ARE__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_SR-ARE%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_SR-ARE%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_SR-ARE%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_SR-ARE#no &NULL}{toxicity_SR-ARE__names__noun} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_SR-ARE#not &NULL}identified as {toxicity_SR-ARE__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_SR-ARE__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. + Result: {toxicity_SR-ARE#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_SR-ARE__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_SR-ARE__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_SR-ARE__names__adjective}? + Assistant: {toxicity_SR-ARE#No&Yes}, this molecule is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_SR-ARE__names__adjective}? + Assistant: {toxicity_SR-ARE#No&Yes}, it is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}? + Assistant: This is a molecule that is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {toxicity_SR-ARE#not &NULL}be {toxicity_SR-ARE__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {toxicity_SR-ARE#not &NULL}be {toxicity_SR-ARE__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {toxicity_SR-ARE__names__adjective}:{toxicity_SR-ARE#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_SR-ARE__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{toxicity_SR-ARE#False&True} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_SR-ARE__names__adjective}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-ARE__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_SR-ARE%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-ARE__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_SR-ARE%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_SR-ARE%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_SR-ARE#not &NULL}{toxicity_SR-ARE__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_SR-ARE%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/sr_atad5_tox21/meta.yaml b/data/tabular/sr_atad5_tox21/meta.yaml index f44cd98b6..c236e2e04 100644 --- a/data/tabular/sr_atad5_tox21/meta.yaml +++ b/data/tabular/sr_atad5_tox21/meta.yaml @@ -1,136 +1,135 @@ ---- name: sr_atad5_tox21 description: |- - Tox21 is a data challenge which contains qualitative toxicity measurements - for 7,831 compounds on 12 different targets, such as nuclear receptors and stress - response pathways. + Tox21 is a data challenge which contains qualitative toxicity measurements + for 7,831 compounds on 12 different targets, such as nuclear receptors and stress + response pathways. targets: - - id: toxicity_SR-ATAD5 - description: whether it shows activitiy in the SR-ATAD5 assay (1) or not (0) - units: - type: boolean - names: - - noun: SR-Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity - - noun: Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity - - noun: Luciferase-tagged ATAD5 toxicity - - verb: shows activity in the SR-Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity assay - - verb: is active in the Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity assay - - verb: is active in the Luciferase-tagged ATAD5 toxicity assay - - adjective: toxic in the SR-Luciferase-tagged ATAD5 in human embryonic kidney cells assay - - adjective: toxic in the Luciferase-tagged ATAD5 in human embryonic kidney cells assay - - adjective: toxic in the Luciferase-tagged ATAD5 assay - - gerund: showing SR-ATAD5 toxicity - uris: + - id: toxicity_SR-ATAD5 + description: whether it shows activitiy in the SR-ATAD5 assay (1) or not (0) + units: + type: boolean + names: + - noun: SR-Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity + - noun: Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity + - noun: Luciferase-tagged ATAD5 toxicity + - verb: shows activity in the SR-Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity assay + - verb: is active in the Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity assay + - verb: is active in the Luciferase-tagged ATAD5 toxicity assay + - adjective: toxic in the SR-Luciferase-tagged ATAD5 in human embryonic kidney cells assay + - adjective: toxic in the Luciferase-tagged ATAD5 in human embryonic kidney cells assay + - adjective: toxic in the Luciferase-tagged ATAD5 assay + - gerund: showing SR-ATAD5 toxicity + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: http://dx.doi.org/10.3389/fenvs.2017.00003 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 - description: data source - - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 - description: assay name + - url: http://dx.doi.org/10.3389/fenvs.2017.00003 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 + description: data source + - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 + description: assay name num_points: 7072 bibtex: - - |- - @article{Huang2017, - doi = {10.3389/fenvs.2017.00003}, - url = {https://doi.org/10.3389/fenvs.2017.00003}, - year = {2017}, - month = jan, - publisher = {Frontiers Media SA}, - volume = {5}, - author = {Ruili Huang and Menghang Xia}, - title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor - and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, - journal = {Frontiers in Environmental Science} + - |- + @article{Huang2017, + doi = {10.3389/fenvs.2017.00003}, + url = {https://doi.org/10.3389/fenvs.2017.00003}, + year = {2017}, + month = jan, + publisher = {Frontiers Media SA}, + volume = {5}, + author = {Ruili Huang and Menghang Xia}, + title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor + and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, + journal = {Frontiers in Environmental Science} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_SR-ATAD5#no &NULL}{toxicity_SR-ATAD5__names__noun} {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_SR-ATAD5#not &NULL}identified as {toxicity_SR-ATAD5__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_SR-ATAD5__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. - Result: {toxicity_SR-ATAD5#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_SR-ATAD5__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_SR-ATAD5__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_SR-ATAD5__names__adjective}? - Assistant: {toxicity_SR-ATAD5#No&Yes}, this molecule is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_SR-ATAD5__names__adjective}? - Assistant: {toxicity_SR-ATAD5#No&Yes}, it is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}? - Assistant: This is a molecule that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {toxicity_SR-ATAD5#not &NULL}be {toxicity_SR-ATAD5__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {toxicity_SR-ATAD5#not &NULL}be {toxicity_SR-ATAD5__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {toxicity_SR-ATAD5__names__adjective}:{toxicity_SR-ATAD5#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_SR-ATAD5__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{toxicity_SR-ATAD5#False&True} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_SR-ATAD5__names__adjective}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-ATAD5__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_SR-ATAD5%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-ATAD5__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_SR-ATAD5%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_SR-ATAD5%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_SR-ATAD5%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_SR-ATAD5#no &NULL}{toxicity_SR-ATAD5__names__noun} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_SR-ATAD5#not &NULL}identified as {toxicity_SR-ATAD5__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_SR-ATAD5__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. + Result: {toxicity_SR-ATAD5#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_SR-ATAD5__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_SR-ATAD5__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_SR-ATAD5__names__adjective}? + Assistant: {toxicity_SR-ATAD5#No&Yes}, this molecule is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_SR-ATAD5__names__adjective}? + Assistant: {toxicity_SR-ATAD5#No&Yes}, it is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}? + Assistant: This is a molecule that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {toxicity_SR-ATAD5#not &NULL}be {toxicity_SR-ATAD5__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {toxicity_SR-ATAD5#not &NULL}be {toxicity_SR-ATAD5__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {toxicity_SR-ATAD5__names__adjective}:{toxicity_SR-ATAD5#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_SR-ATAD5__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{toxicity_SR-ATAD5#False&True} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_SR-ATAD5__names__adjective}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-ATAD5__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_SR-ATAD5%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-ATAD5__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_SR-ATAD5%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_SR-ATAD5%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_SR-ATAD5%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/sr_hse_tox21/meta.yaml b/data/tabular/sr_hse_tox21/meta.yaml index 3f095246b..e38e9a1be 100644 --- a/data/tabular/sr_hse_tox21/meta.yaml +++ b/data/tabular/sr_hse_tox21/meta.yaml @@ -1,136 +1,135 @@ ---- name: sr_hse_tox21 description: |- - Tox21 is a data challenge which contains qualitative toxicity measurements - for 7,831 compounds on 12 different targets, such as nuclear receptors and stress - response pathways. + Tox21 is a data challenge which contains qualitative toxicity measurements + for 7,831 compounds on 12 different targets, such as nuclear receptors and stress + response pathways. targets: - - id: toxicity_SR-HSE - description: whether it shows activitiy in the SR-HSE assay (1) or not (0) - units: - type: boolean - names: - - noun: SR-HSE toxicity - - noun: SR-Heat shock response toxicity - - noun: Heat shock response toxicity - - verb: shows SR-HSE toxicity - - verb: is active in the SR-Heat shock response toxicity assay - - verb: is active in the Heat shock response toxicity assay - - adjective: toxic in the SR-HSE assay - - adjective: toxic in the SR-Heat shock response assay - - adjective: toxic in the Heat shock response assay - - gerund: showing SR-HSE toxicity - uris: + - id: toxicity_SR-HSE + description: whether it shows activitiy in the SR-HSE assay (1) or not (0) + units: + type: boolean + names: + - noun: SR-HSE toxicity + - noun: SR-Heat shock response toxicity + - noun: Heat shock response toxicity + - verb: shows SR-HSE toxicity + - verb: is active in the SR-Heat shock response toxicity assay + - verb: is active in the Heat shock response toxicity assay + - adjective: toxic in the SR-HSE assay + - adjective: toxic in the SR-Heat shock response assay + - adjective: toxic in the Heat shock response assay + - gerund: showing SR-HSE toxicity + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: http://dx.doi.org/10.3389/fenvs.2017.00003 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 - description: data source - - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 - description: assay name + - url: http://dx.doi.org/10.3389/fenvs.2017.00003 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 + description: data source + - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 + description: assay name num_points: 6467 bibtex: - - |- - @article{Huang2017, - doi = {10.3389/fenvs.2017.00003}, - url = {https://doi.org/10.3389/fenvs.2017.00003}, - year = {2017}, - month = jan, - publisher = {Frontiers Media SA}, - volume = {5}, - author = {Ruili Huang and Menghang Xia}, - title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor - and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, - journal = {Frontiers in Environmental Science} + - |- + @article{Huang2017, + doi = {10.3389/fenvs.2017.00003}, + url = {https://doi.org/10.3389/fenvs.2017.00003}, + year = {2017}, + month = jan, + publisher = {Frontiers Media SA}, + volume = {5}, + author = {Ruili Huang and Menghang Xia}, + title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor + and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, + journal = {Frontiers in Environmental Science} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_SR-HSE#no &NULL}{toxicity_SR-HSE__names__noun} {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_SR-HSE#not &NULL}identified as {toxicity_SR-HSE__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_SR-HSE__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. - Result: {toxicity_SR-HSE#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_SR-HSE__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_SR-HSE__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_SR-HSE__names__adjective}? - Assistant: {toxicity_SR-HSE#No&Yes}, this molecule is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_SR-HSE__names__adjective}? - Assistant: {toxicity_SR-HSE#No&Yes}, it is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}? - Assistant: This is a molecule that is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {toxicity_SR-HSE#not &NULL}be {toxicity_SR-HSE__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {toxicity_SR-HSE#not &NULL}be {toxicity_SR-HSE__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {toxicity_SR-HSE__names__adjective}:{toxicity_SR-HSE#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_SR-HSE__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{toxicity_SR-HSE#False&True} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_SR-HSE__names__adjective}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-HSE__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_SR-HSE%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-HSE__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_SR-HSE%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_SR-HSE%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_SR-HSE%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_SR-HSE#no &NULL}{toxicity_SR-HSE__names__noun} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_SR-HSE#not &NULL}identified as {toxicity_SR-HSE__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_SR-HSE__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. + Result: {toxicity_SR-HSE#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_SR-HSE__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_SR-HSE__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_SR-HSE__names__adjective}? + Assistant: {toxicity_SR-HSE#No&Yes}, this molecule is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_SR-HSE__names__adjective}? + Assistant: {toxicity_SR-HSE#No&Yes}, it is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}? + Assistant: This is a molecule that is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {toxicity_SR-HSE#not &NULL}be {toxicity_SR-HSE__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {toxicity_SR-HSE#not &NULL}be {toxicity_SR-HSE__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {toxicity_SR-HSE__names__adjective}:{toxicity_SR-HSE#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_SR-HSE__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{toxicity_SR-HSE#False&True} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_SR-HSE__names__adjective}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-HSE__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_SR-HSE%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-HSE__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_SR-HSE%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_SR-HSE%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_SR-HSE#not &NULL}{toxicity_SR-HSE__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_SR-HSE%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/sr_mmp_tox21/meta.yaml b/data/tabular/sr_mmp_tox21/meta.yaml index 0495e46d7..c8a6b9a1a 100644 --- a/data/tabular/sr_mmp_tox21/meta.yaml +++ b/data/tabular/sr_mmp_tox21/meta.yaml @@ -1,136 +1,135 @@ ---- name: sr_mmp_tox21 description: |- - Tox21 is a data challenge which contains qualitative toxicity measurements - for 7,831 compounds on 12 different targets, such as nuclear receptors and stress - response pathways. + Tox21 is a data challenge which contains qualitative toxicity measurements + for 7,831 compounds on 12 different targets, such as nuclear receptors and stress + response pathways. targets: - - id: toxicity_SR-MMP - description: whether it shows activitiy in the SR-MMP assay (1) or not (0) - units: - type: boolean - names: - - noun: SR-MMP toxicity - - noun: SR-Mitochondrial membrane potential toxicity - - noun: Mitochondrial membrane potential toxicity - - verb: is active in the SR-MMP toxicity assay - - verb: is active in the R-Mitochondrial membrane potential toxicity assay - - verb: is active in the itochondrial membrane potential toxicity assay - - adjective: toxic in the SR-MMP assay - - adjective: toxic in the SR-Mitochondrial membrane potential assay - - adjective: toxic in the Mitochondrial membrane potential assay - - gerund: showing SR-MMP toxicity - uris: + - id: toxicity_SR-MMP + description: whether it shows activitiy in the SR-MMP assay (1) or not (0) + units: + type: boolean + names: + - noun: SR-MMP toxicity + - noun: SR-Mitochondrial membrane potential toxicity + - noun: Mitochondrial membrane potential toxicity + - verb: is active in the SR-MMP toxicity assay + - verb: is active in the R-Mitochondrial membrane potential toxicity assay + - verb: is active in the itochondrial membrane potential toxicity assay + - adjective: toxic in the SR-MMP assay + - adjective: toxic in the SR-Mitochondrial membrane potential assay + - adjective: toxic in the Mitochondrial membrane potential assay + - gerund: showing SR-MMP toxicity + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: http://dx.doi.org/10.3389/fenvs.2017.00003 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 - description: data source - - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 - description: assay name + - url: http://dx.doi.org/10.3389/fenvs.2017.00003 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 + description: data source + - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 + description: assay name num_points: 5810 bibtex: - - |- - @article{Huang2017, - doi = {10.3389/fenvs.2017.00003}, - url = {https://doi.org/10.3389/fenvs.2017.00003}, - year = {2017}, - month = jan, - publisher = {Frontiers Media SA}, - volume = {5}, - author = {Ruili Huang and Menghang Xia}, - title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor - and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, - journal = {Frontiers in Environmental Science} + - |- + @article{Huang2017, + doi = {10.3389/fenvs.2017.00003}, + url = {https://doi.org/10.3389/fenvs.2017.00003}, + year = {2017}, + month = jan, + publisher = {Frontiers Media SA}, + volume = {5}, + author = {Ruili Huang and Menghang Xia}, + title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor + and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, + journal = {Frontiers in Environmental Science} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_SR-MMP#no &NULL}{toxicity_SR-MMP__names__noun} {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_SR-MMP#not &NULL}identified as {toxicity_SR-MMP__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_SR-MMP__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. - Result: {toxicity_SR-MMP#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_SR-MMP__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_SR-MMP__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_SR-MMP__names__adjective}? - Assistant: {toxicity_SR-MMP#No&Yes}, this molecule is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_SR-MMP__names__adjective}? - Assistant: {toxicity_SR-MMP#No&Yes}, it is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}? - Assistant: This is a molecule that is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {toxicity_SR-MMP#not &NULL}be {toxicity_SR-MMP__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {toxicity_SR-MMP#not &NULL}be {toxicity_SR-MMP__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {toxicity_SR-MMP__names__adjective}:{toxicity_SR-MMP#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_SR-MMP__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{toxicity_SR-MMP#False&True} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_SR-MMP__names__adjective}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-MMP__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_SR-MMP%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-MMP__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_SR-MMP%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_SR-MMP%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_SR-MMP%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_SR-MMP#no &NULL}{toxicity_SR-MMP__names__noun} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_SR-MMP#not &NULL}identified as {toxicity_SR-MMP__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_SR-MMP__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. + Result: {toxicity_SR-MMP#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_SR-MMP__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_SR-MMP__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_SR-MMP__names__adjective}? + Assistant: {toxicity_SR-MMP#No&Yes}, this molecule is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_SR-MMP__names__adjective}? + Assistant: {toxicity_SR-MMP#No&Yes}, it is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}? + Assistant: This is a molecule that is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {toxicity_SR-MMP#not &NULL}be {toxicity_SR-MMP__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {toxicity_SR-MMP#not &NULL}be {toxicity_SR-MMP__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {toxicity_SR-MMP__names__adjective}:{toxicity_SR-MMP#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_SR-MMP__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{toxicity_SR-MMP#False&True} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_SR-MMP__names__adjective}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-MMP__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_SR-MMP%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-MMP__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_SR-MMP%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_SR-MMP%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_SR-MMP#not &NULL}{toxicity_SR-MMP__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_SR-MMP%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/sr_p53_tox21/meta.yaml b/data/tabular/sr_p53_tox21/meta.yaml index c4691b97a..9721404c5 100644 --- a/data/tabular/sr_p53_tox21/meta.yaml +++ b/data/tabular/sr_p53_tox21/meta.yaml @@ -1,137 +1,135 @@ ---- name: sr_p53_tox21 description: |- - Tox21 is a data challenge which contains qualitative toxicity measurements - for 7,831 compounds on 12 different targets, such as nuclear receptors and stress - response pathways. + Tox21 is a data challenge which contains qualitative toxicity measurements + for 7,831 compounds on 12 different targets, such as nuclear receptors and stress + response pathways. targets: - - id: toxicity_SR-p53 - description: whether it shows activitiy in the SR-p53 assay (1) or not (0) - units: - type: boolean - names: - - noun: SR-p53 toxicity - - noun: SR-p53 response toxicity - - noun: p53 response toxicity - - verb: is active in the SR-p53 toxicity assay - - verb: is active in the SR-p53 response toxicity assay - - verb: is active in the p53 response toxicity assay - - adjective: toxic in the SR-p53 assay - - adjective: toxic in the SR-p53 response assay - - adjective: toxic in the p53 response assay - - gerund: showing SR-p53 toxicity - uris: + - id: toxicity_SR-p53 + description: whether it shows activitiy in the SR-p53 assay (1) or not (0) + units: + type: boolean + names: + - noun: SR-p53 toxicity + - noun: SR-p53 response toxicity + - noun: p53 response toxicity + - verb: is active in the SR-p53 toxicity assay + - verb: is active in the SR-p53 response toxicity assay + - verb: is active in the p53 response toxicity assay + - adjective: toxic in the SR-p53 assay + - adjective: toxic in the SR-p53 response assay + - adjective: toxic in the p53 response assay + - gerund: showing SR-p53 toxicity + uris: benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: http://dx.doi.org/10.3389/fenvs.2017.00003 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 - description: data source - - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 - description: assay name + - url: http://dx.doi.org/10.3389/fenvs.2017.00003 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/tox/#tox21 + description: data source + - url: https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-018-2523-5/tables/3 + description: assay name num_points: 6774 bibtex: - - |- - @article{Huang2017, - doi = {10.3389/fenvs.2017.00003}, - url = {https://doi.org/10.3389/fenvs.2017.00003}, - year = {2017}, - month = jan, - publisher = {Frontiers Media SA}, - volume = {5}, - author = {Ruili Huang and Menghang Xia}, - title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor - and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, - journal = {Frontiers in Environmental Science} + - |- + @article{Huang2017, + doi = {10.3389/fenvs.2017.00003}, + url = {https://doi.org/10.3389/fenvs.2017.00003}, + year = {2017}, + month = jan, + publisher = {Frontiers Media SA}, + volume = {5}, + author = {Ruili Huang and Menghang Xia}, + title = {Editorial: Tox21 Challenge to Build Predictive Models of Nuclear Receptor + and Stress Response Pathways As Mediated by Exposure to Environmental Toxicants and Drugs}, + journal = {Frontiers in Environmental Science} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_SR-p53#no &NULL}{toxicity_SR-p53__names__noun} {#properties|characteristics|features!}. - - The {#molecule|chemical!} with the {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_SR-p53#not &NULL}identified - as {toxicity_SR-p53__names__adjective}. - - The {#molecule|chemical!} with the {SMILES__description} {SMILES#} is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_SR-p53__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. - Result: {toxicity_SR-p53#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_SR-p53__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_SR-p53__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_SR-p53__names__adjective}? - Assistant: {toxicity_SR-p53#No&Yes}, this molecule is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_SR-p53__names__adjective}? - Assistant: {toxicity_SR-p53#No&Yes}, it is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}? - Assistant: This is a molecule that is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {toxicity_SR-p53#not &NULL}be {toxicity_SR-p53__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {toxicity_SR-p53#not &NULL}be {toxicity_SR-p53__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {toxicity_SR-p53__names__adjective}:{toxicity_SR-p53#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {toxicity_SR-p53__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{toxicity_SR-p53#False&True} - - |- - Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|chemical compound!} based on the {#text |!}description{# below|!}. - Description: A molecule that is {toxicity_SR-p53__names__adjective}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-p53__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_SR-p53%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-p53__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {toxicity_SR-p53%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_SR-p53%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%toxicity_SR-p53%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {toxicity_SR-p53#no &NULL}{toxicity_SR-p53__names__noun} {#properties|characteristics|features!}. + - The {#molecule|chemical!} with the {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {toxicity_SR-p53#not &NULL}identified as {toxicity_SR-p53__names__adjective}. + - The {#molecule|chemical!} with the {SMILES__description} {SMILES#} is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_SR-p53__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. + Result: {toxicity_SR-p53#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_SR-p53__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_SR-p53__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {toxicity_SR-p53__names__adjective}? + Assistant: {toxicity_SR-p53#No&Yes}, this molecule is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {toxicity_SR-p53__names__adjective}? + Assistant: {toxicity_SR-p53#No&Yes}, it is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}? + Assistant: This is a molecule that is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {toxicity_SR-p53#not &NULL}be {toxicity_SR-p53__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {toxicity_SR-p53#not &NULL}be {toxicity_SR-p53__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {toxicity_SR-p53__names__adjective}:{toxicity_SR-p53#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {toxicity_SR-p53__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{toxicity_SR-p53#False&True} + - |- + Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|chemical compound!} based on the {#text |!}description{# below|!}. + Description: A molecule that is {toxicity_SR-p53__names__adjective}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-p53__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_SR-p53%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {toxicity_SR-p53__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {toxicity_SR-p53%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_SR-p53%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {toxicity_SR-p53#not &NULL}{toxicity_SR-p53__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%toxicity_SR-p53%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/suzuki_miyaura_sach/meta.yaml b/data/tabular/suzuki_miyaura_sach/meta.yaml index 73fc93925..f23ade270 100644 --- a/data/tabular/suzuki_miyaura_sach/meta.yaml +++ b/data/tabular/suzuki_miyaura_sach/meta.yaml @@ -1,102 +1,100 @@ ---- name: suzuki_miyaura_sach description: |- - High-throughput experimentation palladium-catalyzed Suzuki-Miyaura C-C - cross-coupling data set with yields measured by liquid chromatography-mass-spectrometry. + High-throughput experimentation palladium-catalyzed Suzuki-Miyaura C-C + cross-coupling data set with yields measured by liquid chromatography-mass-spectrometry. targets: - - id: yield - description: Reaction yields analyzed by LCMS - units: \% - type: continuous - names: - - noun: reaction yield - - noun: yield - - noun: reaction yield (measured by LCMS) - - id: masked_rxn_smiles - type: text - description: reaction SMILES with one element masked - names: - - noun: reaction SMILES with one element masked as `MASK` - - noun: reaction SMILES with one element hidden as `MASK` - - noun: masked reaction SMILES (one component masked as `MASK`) - - noun: masked reaction SMILES string (one component masked as `MASK`) - - noun: masked RXNSMILES (one component masked as `MASK`) - - id: educt_string - type: text - description: reaction educts - names: - - noun: reaction educts - - noun: educts - - noun: starting materials - - id: product_string - type: text - description: reaction products - names: - - noun: reaction products - - noun: products + - id: yield + description: Reaction yields analyzed by LCMS + units: \% + type: continuous + names: + - noun: reaction yield + - noun: yield + - noun: reaction yield (measured by LCMS) + - id: masked_rxn_smiles + type: text + description: reaction SMILES with one element masked + names: + - noun: reaction SMILES with one element masked as `MASK` + - noun: reaction SMILES with one element hidden as `MASK` + - noun: masked reaction SMILES (one component masked as `MASK`) + - noun: masked reaction SMILES string (one component masked as `MASK`) + - noun: masked RXNSMILES (one component masked as `MASK`) + - id: educt_string + type: text + description: reaction educts + names: + - noun: reaction educts + - noun: educts + - noun: starting materials + - id: product_string + type: text + description: reaction products + names: + - noun: reaction products + - noun: products identifiers: - - id: RXNSMILES - type: RXNSMILES - description: RXNSMILES - names: - - noun: reaction SMILES - - noun: reaction SMILES string - - noun: RXNSMILES - - noun: reaction SMILES (RXNSMILES) - - id: missing_component - type: text - description: masked element + - id: RXNSMILES + type: RXNSMILES + description: RXNSMILES + names: + - noun: reaction SMILES + - noun: reaction SMILES string + - noun: RXNSMILES + - noun: reaction SMILES (RXNSMILES) + - id: missing_component + type: text + description: masked element license: MIT links: - - url: https://doi.org/10.1126/science.aap9112 - description: corresponding publication - - url: https://github.com/rxn4chemistry/rxn_yields/blob/master/rxn_yields/data.py - description: preprocessing - - url: https://github.com/reymond-group/drfp/tree/main/data - description: dataset + - url: https://doi.org/10.1126/science.aap9112 + description: corresponding publication + - url: https://github.com/rxn4chemistry/rxn_yields/blob/master/rxn_yields/data.py + description: preprocessing + - url: https://github.com/reymond-group/drfp/tree/main/data + description: dataset num_points: 5760 url: https://doi.org/10.1126/science.aap9112 bibtex: - - |- - @article{perera2018platform, - title={A platform for automated nanomole-scale reaction screening and micromole-scale synthesis in flow}, - author={Perera, Damith and Tucker, Joseph W and Brahmbhatt, Shalini and Helal, - Christopher J and Chong, Ashley and Farrell, William and Richardson, Paul and Sach, Neal W}, - journal={Science}, - volume={359}, - number={6374}, - pages={429--434}, - year={2018}, - publisher={American Association for the Advancement of Science}, - } + - |- + @article{perera2018platform, + title={A platform for automated nanomole-scale reaction screening and micromole-scale synthesis in flow}, + author={Perera, Damith and Tucker, Joseph W and Brahmbhatt, Shalini and Helal, + Christopher J and Chong, Ashley and Farrell, William and Richardson, Paul and Sach, Neal W}, + journal={Science}, + volume={359}, + number={6374}, + pages={429--434}, + year={2018}, + publisher={American Association for the Advancement of Science}, + } templates: - - The {RXNSMILES__names__noun} {RXNSMILES#} has the {educt_string__names__noun} {educt_string#} and the {product_string__names__noun} {product_string#}. - - The {RXNSMILES__names__noun} {RXNSMILES#} has the {product_string__names__noun} {product_string#} and the {educt_string__names__noun} {educt_string#}. - - The masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#} is {missing_component#}. - - The {#chemical|compound!} with SMILES {missing_component#} is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}. - - |- - Question: {#What|Which!} {educt_string__names__noun} are {#needed|required!} to {#produce|synthesize!} the {product_string__names__noun} {product_string#}? - Answer: {educt_string#}. - - |- - Question: {#What|Which!} {product_string__names__noun} are produced from the {educt_string__names__noun} {educt_string#}? - Answer: {product_string#}. - - |- - User: I {#want|would like to|must|need to!} {#synthesize|produce!} the {product_string__names__noun} {product_string#}. - Assistant: {#Cool, is|That's interesting, is|Great, is|Is!} there anything else I can do for you? - User: {#Yes, |!}I would like to know the {educt_string__names__noun} I need to produce the {product_string__names__noun} {product_string#}. - Assistant: {#I recommend|I suggest|I propose|I advise!} the following {educt_string__names__noun}: {educt_string#}. - - |- - Question: What is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}? - Answer: {missing_component#}. - - |- - Task: Predict the masked component in a {masked_rxn_smiles__names__noun}. - Description: {masked_rxn_smiles#} - {#Answer|Solution!}: {missing_component#} - - The {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#} is {yield#}{yield__units}. - - |- - User: {#I need|I want|I would like!} to run a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}. What is the {yield__names__noun} {#I can expect|I should expect|I should get|I can get!}? - Assistant: {#The|The expected|The predicted|The estimated!} {yield__names__noun} is {yield#}{yield__units}. - - - |- - Question: {#What is|What's|What is the|What's the!} {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}? - Answer: {yield#}{yield__units}. + - The {RXNSMILES__names__noun} {RXNSMILES#} has the {educt_string__names__noun} {educt_string#} and the {product_string__names__noun} {product_string#}. + - The {RXNSMILES__names__noun} {RXNSMILES#} has the {product_string__names__noun} {product_string#} and the {educt_string__names__noun} {educt_string#}. + - The masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#} is {missing_component#}. + - The {#chemical|compound!} with SMILES {missing_component#} is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}. + - |- + Question: {#What|Which!} {educt_string__names__noun} are {#needed|required!} to {#produce|synthesize!} the {product_string__names__noun} {product_string#}? + Answer: {educt_string#}. + - |- + Question: {#What|Which!} {product_string__names__noun} are produced from the {educt_string__names__noun} {educt_string#}? + Answer: {product_string#}. + - |- + User: I {#want|would like to|must|need to!} {#synthesize|produce!} the {product_string__names__noun} {product_string#}. + Assistant: {#Cool, is|That's interesting, is|Great, is|Is!} there anything else I can do for you? + User: {#Yes, |!}I would like to know the {educt_string__names__noun} I need to produce the {product_string__names__noun} {product_string#}. + Assistant: {#I recommend|I suggest|I propose|I advise!} the following {educt_string__names__noun}: {educt_string#}. + - |- + Question: What is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}? + Answer: {missing_component#}. + - |- + Task: Predict the masked component in a {masked_rxn_smiles__names__noun}. + Description: {masked_rxn_smiles#} + {#Answer|Solution!}: {missing_component#} + - The {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#} is {yield#}{yield__units}. + - |- + User: {#I need|I want|I would like!} to run a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}. What is the {yield__names__noun} {#I can expect|I should expect|I should get|I can get!}? + Assistant: {#The|The expected|The predicted|The estimated!} {yield__names__noun} is {yield#}{yield__units}. + - |- + Question: {#What is|What's|What is the|What's the!} {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}? + Answer: {yield#}{yield__units}. diff --git a/data/tabular/thermosol/meta.yaml b/data/tabular/thermosol/meta.yaml index af2abecc9..76b6a4dab 100644 --- a/data/tabular/thermosol/meta.yaml +++ b/data/tabular/thermosol/meta.yaml @@ -1,66 +1,65 @@ ---- name: thermosol description: |- - Solubility in pH 7.4 buffer using solid starting material using the method described in J. Assoc. Lab. Autom. 2011, 16, 276-284. - Experimental range 0.10 to 1500 uM + Solubility in pH 7.4 buffer using solid starting material using the method described in J. Assoc. Lab. Autom. 2011, 16, 276-284. + Experimental range 0.10 to 1500 uM targets: - - id: target - description: aqueous solubility - units: log(microM) - type: continuous - significant_digits: 3 - names: - - noun: aqueous solubility in pH 7.4 buffer at 20 deg C - - noun: solubility in aqueous pH 7.4 buffer at 20 deg C - uris: - - http://purl.jp/bio/4/id/200906006880450101 - - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C60821 + - id: target + description: aqueous solubility + units: log(microM) + type: continuous + significant_digits: 3 + names: + - noun: aqueous solubility in pH 7.4 buffer at 20 deg C + - noun: solubility in aqueous pH 7.4 buffer at 20 deg C + uris: + - http://purl.jp/bio/4/id/200906006880450101 + - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C60821 identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY-SA 3.0 DEED links: - - url: https://journals.sagepub.com/doi/10.1016/j.jala.2010.10.002 - description: corresponding publication - - url: https://www.ebi.ac.uk/chembl/assay_report_card/CHEMBL3301364/ - description: corresponding assay report card + - url: https://journals.sagepub.com/doi/10.1016/j.jala.2010.10.002 + description: corresponding publication + - url: https://www.ebi.ac.uk/chembl/assay_report_card/CHEMBL3301364/ + description: corresponding assay report card num_points: 1763 bibtex: - - |- - @article{Wenlock_2011, - doi = {10.1016/j.jala.2010.10.002}, - url = {https://doi.org/10.1016%2Fj.jala.2010.10.002}, - year = 2011, - month = {aug}, - publisher = {{SAGE} Publications}, - volume = {16}, - number = {4}, - pages = {276--284}, - author = {Mark C. Wenlock and Rupert P. Austin and Tim Potter and Patrick Barton}, - title = {A Highly Automated Assay for Determining the Aqueous Equilibrium Solubility of Drug Discovery Compounds}, - journal = {JALA: Journal of the Association for Laboratory Automation}: Journal of the Association for Laboratory Automation} - } - - |- - @article{Wu2018, - doi = {10.1039/c7sc02664a}, - url = {https://doi.org/10.1039/c7sc02664a}, - year = {2018}, - publisher = {Royal Society of Chemistry (RSC)}, - volume = {9}, - number = {2}, - pages = {513--530}, - author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg and Joseph Gomes - and Caleb Geniesse and Aneesh S. Pappu and Karl Leswing and Vijay Pande}, - title = {MoleculeNet: a benchmark for molecular machine learning}, - journal = {Chemical Science} + - |- + @article{Wenlock_2011, + doi = {10.1016/j.jala.2010.10.002}, + url = {https://doi.org/10.1016%2Fj.jala.2010.10.002}, + year = 2011, + month = {aug}, + publisher = {{SAGE} Publications}, + volume = {16}, + number = {4}, + pages = {276--284}, + author = {Mark C. Wenlock and Rupert P. Austin and Tim Potter and Patrick Barton}, + title = {A Highly Automated Assay for Determining the Aqueous Equilibrium Solubility of Drug Discovery Compounds}, + journal = {JALA: Journal of the Association for Laboratory Automation}: Journal of the Association for Laboratory Automation} + } + - |- + @article{Wu2018, + doi = {10.1039/c7sc02664a}, + url = {https://doi.org/10.1039/c7sc02664a}, + year = {2018}, + publisher = {Royal Society of Chemistry (RSC)}, + volume = {9}, + number = {2}, + pages = {513--530}, + author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg and Joseph Gomes + and Caleb Geniesse and Aneesh S. Pappu and Karl Leswing and Vijay Pande}, + title = {MoleculeNet: a benchmark for molecular machine learning}, + journal = {Chemical Science} templates: - - The {target__names__noun} of the {#compound|drug|chemical|molecule!} with the {SMILES__description} {SMILES#} is {target#} {target__units}. - - |- - Question: What is the {target__names__noun} of the {#compound|drug|chemical|molecule!} with the {SMILES__description} {SMILES#}? - Answer: {target#} {target__units}. - - |- - User: I want to {#design|discover|find|identify|!} a {#compound|drug|chemical|molecule!} with a particular {target__names__noun}. - Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}I would need to know the {target__names__noun} of the {#compound|drug|chemical|molecule!} you want to design. - User: The {target__names__noun} should be {target#} {target__units}. - Assistant: I {#recommend|suggest|propose|advise|!} the {#compound|drug|chemical|molecule!} with the {SMILES__description} {SMILES#}. + - The {target__names__noun} of the {#compound|drug|chemical|molecule!} with the {SMILES__description} {SMILES#} is {target#} {target__units}. + - |- + Question: What is the {target__names__noun} of the {#compound|drug|chemical|molecule!} with the {SMILES__description} {SMILES#}? + Answer: {target#} {target__units}. + - |- + User: I want to {#design|discover|find|identify|!} a {#compound|drug|chemical|molecule!} with a particular {target__names__noun}. + Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, |!}I would need to know the {target__names__noun} of the {#compound|drug|chemical|molecule!} you want to design. + User: The {target__names__noun} should be {target#} {target__units}. + Assistant: I {#recommend|suggest|propose|advise|!} the {#compound|drug|chemical|molecule!} with the {SMILES__description} {SMILES#}. diff --git a/data/tabular/tyrosyl-dna_phosphodiesterase_butkiewicz/meta.yaml b/data/tabular/tyrosyl-dna_phosphodiesterase_butkiewicz/meta.yaml index 8399f73b6..47bcb2fdc 100644 --- a/data/tabular/tyrosyl-dna_phosphodiesterase_butkiewicz/meta.yaml +++ b/data/tabular/tyrosyl-dna_phosphodiesterase_butkiewicz/meta.yaml @@ -1,171 +1,168 @@ ---- name: tyrosyl-dna_phosphodiesterase_butkiewicz description: | - Inhibition of Human tyrosyl-DNA phosphodiesterase 1 (TDP1) - potentially enhances anticancer activity of DNA topoisomerase I inhibitors. - Primary screen AID 485290. Counter screen AID 489007. - Final set contains all compounds active in the counter screen AID 489007. + Inhibition of Human tyrosyl-DNA phosphodiesterase 1 (TDP1) + potentially enhances anticancer activity of DNA topoisomerase I inhibitors. + Primary screen AID 485290. Counter screen AID 489007. + Final set contains all compounds active in the counter screen AID 489007. targets: - - id: activity_tyrosyl_dna_phosphodiesterase - description: whether it active against tyrosyl-DNA phosphodiesterase receptor 1 (1) or not (0). - units: - type: boolean - names: - - noun: a tyrosyl-DNA phosphodiesterase 1 (TDP1) inhibitor - - noun: an inhibitor of tyrosyl-DNA phosphodiesterase 1 - - gerund: inhibiting the human tyrosyl-DNA phosphodiesterase 1 (TDP1) - - gerund: inhibiting the human tyrosyl-DNA phosphodiesterase 1 - - adjective: active against the tyrosyl-DNA phosphodiesterase receptor 1 - - adjective: active against the tyrosyl-DNA phosphodiesterase receptor 1 (TDP1) - pubchem_aids: - - 485290 - - 489007 - uris: [] + - id: activity_tyrosyl_dna_phosphodiesterase + description: whether it active against tyrosyl-DNA phosphodiesterase receptor 1 (1) or not (0). + units: + type: boolean + names: + - noun: a tyrosyl-DNA phosphodiesterase 1 (TDP1) inhibitor + - noun: an inhibitor of tyrosyl-DNA phosphodiesterase 1 + - gerund: inhibiting the human tyrosyl-DNA phosphodiesterase 1 (TDP1) + - gerund: inhibiting the human tyrosyl-DNA phosphodiesterase 1 + - adjective: active against the tyrosyl-DNA phosphodiesterase receptor 1 + - adjective: active against the tyrosyl-DNA phosphodiesterase receptor 1 (TDP1) + pubchem_aids: + - 485290 + - 489007 + uris: [] identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: CC BY 4.0 links: - - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al - description: original dataset - - url: https://doi.org/10.3390/molecules18010735 - description: corresponding publication - - url: https://doi.org/10.1093/nar/gky1033 - description: corresponding publication - - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ - description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al + description: original dataset + - url: https://doi.org/10.3390/molecules18010735 + description: corresponding publication + - url: https://doi.org/10.1093/nar/gky1033 + description: corresponding publication + - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/ + description: corresponding publication benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split num_points: 341365 bibtex: - - |- - @article{Butkiewicz2013, - doi = {10.3390/molecules18010735}, - url = {https://doi.org/10.3390/molecules18010735}, - year = {2013}, - month = jan, - publisher = {{MDPI} {AG}}, - volume = {18}, - number = {1}, - pages = {735--756}, - author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller - and Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens Meiler}, - title = {Benchmarking Ligand-Based Virtual High-Throughput Screening - with the {PubChem} Database}, - journal = {Molecules}} - - |- - @article{Kim2018, - doi = {10.1093/nar/gky1033}, - url = {https://doi.org/10.1093/nar/gky1033}, - year = {2018}, - month = oct, - publisher = {Oxford University Press ({OUP})}, - volume = {47}, - number = {D1}, - pages = {D1102--D1109}, - author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta Gindulyte - and Jia He and Siqian He and Qingliang Li and Benjamin A Shoemaker - and Paul A Thiessen and Bo Yu and Leonid Zaslavsky and Jian Zhang and Evan E Bolton}, - title = {{PubChem} 2019 update: improved access to chemical data}, - journal = {Nucleic Acids Research}} - - |- - @article{Butkiewicz2017, - doi = {}, - url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, - year = {2017}, - publisher = {Chem Inform}, - volume = {3}, - number = {1}, - author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. - and Lowe, E. W. and Weaver, D. C. and Meiler, J.}, - title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets - from the {P}ub{C}hem {D}atabase}}, - journal = {Chemical Science}} + - |- + @article{Butkiewicz2013, + doi = {10.3390/molecules18010735}, + url = {https://doi.org/10.3390/molecules18010735}, + year = {2013}, + month = jan, + publisher = {{MDPI} {AG}}, + volume = {18}, + number = {1}, + pages = {735--756}, + author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller + and Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens Meiler}, + title = {Benchmarking Ligand-Based Virtual High-Throughput Screening + with the {PubChem} Database}, + journal = {Molecules}} + - |- + @article{Kim2018, + doi = {10.1093/nar/gky1033}, + url = {https://doi.org/10.1093/nar/gky1033}, + year = {2018}, + month = oct, + publisher = {Oxford University Press ({OUP})}, + volume = {47}, + number = {D1}, + pages = {D1102--D1109}, + author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta Gindulyte + and Jia He and Siqian He and Qingliang Li and Benjamin A Shoemaker + and Paul A Thiessen and Bo Yu and Leonid Zaslavsky and Jian Zhang and Evan E Bolton}, + title = {{PubChem} 2019 update: improved access to chemical data}, + journal = {Nucleic Acids Research}} + - |- + @article{Butkiewicz2017, + doi = {}, + url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/}, + year = {2017}, + publisher = {Chem Inform}, + volume = {3}, + number = {1}, + author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. + and Lowe, E. W. and Weaver, D. C. and Meiler, J.}, + title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets + from the {P}ub{C}hem {D}atabase}}, + journal = {Chemical Science}} templates: - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__noun}. - - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_tyrosyl_dna_phosphodiesterase#no - &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__gerund}. - - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {activity_tyrosyl_dna_phosphodiesterase#no &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective} - {#properties|characteristics|features!}. - - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}identified as {activity_tyrosyl_dna_phosphodiesterase__names__adjective}. - - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}. - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_tyrosyl_dna_phosphodiesterase__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. - Result: {activity_tyrosyl_dna_phosphodiesterase#False&True} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_tyrosyl_dna_phosphodiesterase__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Answer the question in a {#full|complete!} sentence. - Result: This molecule is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}. - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {activity_tyrosyl_dna_phosphodiesterase__names__adjective}. - Result: {SMILES#} - - |- - User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_tyrosyl_dna_phosphodiesterase__names__adjective}? - Assistant: {activity_tyrosyl_dna_phosphodiesterase#No&Yes}, this molecule is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}. - - |- - User: Is the molecule with the {SMILES__description} {SMILES#} {activity_tyrosyl_dna_phosphodiesterase__names__adjective}? - Assistant: {activity_tyrosyl_dna_phosphodiesterase#No&Yes}, it is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}. - - |- - User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}? - Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} - - |- - User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}? - Assistant: This is a molecule that is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? - User: Yes, please. The molecule should {activity_tyrosyl_dna_phosphodiesterase#not &NULL}be {activity_tyrosyl_dna_phosphodiesterase__names__adjective}. - Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}: {SMILES#} - - |- - User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. - Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? - User: Yes, the molecule should {activity_tyrosyl_dna_phosphodiesterase#not &NULL}be {activity_tyrosyl_dna_phosphodiesterase__names__adjective}. - Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}: {SMILES#} - - Is the {SMILES__description} {SMILES#} {activity_tyrosyl_dna_phosphodiesterase__names__adjective}:{activity_tyrosyl_dna_phosphodiesterase#no&yes} - - |- - Task: Please classify a molecule based on the description. - Description: A molecule that is {activity_tyrosyl_dna_phosphodiesterase__names__adjective}. - {#Molecule |!}{SMILES__description}: {SMILES#} - Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. - Result:{activity_tyrosyl_dna_phosphodiesterase#False&True} - - |- - Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. - Description: A molecule that is {activity_tyrosyl_dna_phosphodiesterase__names__adjective}. - Result:{SMILES#} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_tyrosyl_dna_phosphodiesterase__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {activity_tyrosyl_dna_phosphodiesterase%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_tyrosyl_dna_phosphodiesterase__names__adjective}? - Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. - Options: - {activity_tyrosyl_dna_phosphodiesterase%} - Answer:{%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_tyrosyl_dna_phosphodiesterase%} - Answer: {%multiple_choice_result} - - |- - Task: Please answer the multiple choice question. - Question: Which molecules are {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}? - Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. - Options: - {SMILES%activity_tyrosyl_dna_phosphodiesterase%} - Answer:{%multiple_choice_result} + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__noun}. + - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_tyrosyl_dna_phosphodiesterase#no &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__gerund}. + - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {activity_tyrosyl_dna_phosphodiesterase#no &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective} {#properties|characteristics|features!}. + - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}identified as {activity_tyrosyl_dna_phosphodiesterase__names__adjective}. + - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}. + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_tyrosyl_dna_phosphodiesterase__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional|extra!} words. + Result: {activity_tyrosyl_dna_phosphodiesterase#False&True} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_tyrosyl_dna_phosphodiesterase__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Answer the question in a {#full|complete!} sentence. + Result: This molecule is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}. + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {activity_tyrosyl_dna_phosphodiesterase__names__adjective}. + Result: {SMILES#} + - |- + User: Can you {#tell me|figure out|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_tyrosyl_dna_phosphodiesterase__names__adjective}? + Assistant: {activity_tyrosyl_dna_phosphodiesterase#No&Yes}, this molecule is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}. + - |- + User: Is the molecule with the {SMILES__description} {SMILES#} {activity_tyrosyl_dna_phosphodiesterase__names__adjective}? + Assistant: {activity_tyrosyl_dna_phosphodiesterase#No&Yes}, it is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}. + - |- + User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}? + Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#} + - |- + User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}? + Assistant: This is a molecule that is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: This sounds {#very exciting. |very interesting. | very curious. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}? + User: Yes, please. The molecule should {activity_tyrosyl_dna_phosphodiesterase#not &NULL}be {activity_tyrosyl_dna_phosphodiesterase__names__adjective}. + Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}: {SMILES#} + - |- + User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}. + Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}? + User: Yes, the molecule should {activity_tyrosyl_dna_phosphodiesterase#not &NULL}be {activity_tyrosyl_dna_phosphodiesterase__names__adjective}. + Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}: {SMILES#} + - Is the {SMILES__description} {SMILES#} {activity_tyrosyl_dna_phosphodiesterase__names__adjective}:{activity_tyrosyl_dna_phosphodiesterase#no&yes} + - |- + Task: Please classify a molecule based on the description. + Description: A molecule that is {activity_tyrosyl_dna_phosphodiesterase__names__adjective}. + {#Molecule |!}{SMILES__description}: {SMILES#} + Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words. + Result:{activity_tyrosyl_dna_phosphodiesterase#False&True} + - |- + Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}. + Description: A molecule that is {activity_tyrosyl_dna_phosphodiesterase__names__adjective}. + Result:{SMILES#} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_tyrosyl_dna_phosphodiesterase__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {activity_tyrosyl_dna_phosphodiesterase%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_tyrosyl_dna_phosphodiesterase__names__adjective}? + Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words. + Options: + {activity_tyrosyl_dna_phosphodiesterase%} + Answer:{%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_tyrosyl_dna_phosphodiesterase%} + Answer: {%multiple_choice_result} + - |- + Task: Please answer the multiple choice question. + Question: Which molecules are {activity_tyrosyl_dna_phosphodiesterase#not &NULL}{activity_tyrosyl_dna_phosphodiesterase__names__adjective}? + Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words. + Options: + {SMILES%activity_tyrosyl_dna_phosphodiesterase%} + Answer:{%multiple_choice_result} diff --git a/data/tabular/uniprot_binding_single/meta.yaml b/data/tabular/uniprot_binding_single/meta.yaml index d7a9961c5..972c0a3f3 100644 --- a/data/tabular/uniprot_binding_single/meta.yaml +++ b/data/tabular/uniprot_binding_single/meta.yaml @@ -1,68 +1,67 @@ ---- name: uniprot_binding_single description: |- - Binding sites of a molecule in protein sequences. + Binding sites of a molecule in protein sequences. targets: - - id: start_binding_site - description: index for start of the binding sites of a protein - type: text - names: - - noun: start binding site + - id: start_binding_site + description: index for start of the binding sites of a protein + type: text + names: + - noun: start binding site identifiers: - - id: sequence - type: AS_SEQUENCE - description: other - - id: SMILES - description: SMILES - type: SMILES - names: - - noun: SMILES + - id: sequence + type: AS_SEQUENCE + description: other + - id: SMILES + description: SMILES + type: SMILES + names: + - noun: SMILES license: MIT links: - - url: https://www.uniprot.org/ - description: data source + - url: https://www.uniprot.org/ + description: data source num_points: 604383 bibtex: - - |- - @article{10.1093/nar/gkac1052, - author = {The UniProt Consortium}, - title = {UniProt - the Universal Protein Knowledgebase in 2023}, - journal = {Nucleic Acids Research}, - volume = {51}, - number = {D1}, - pages = {D523-D531}, - year = {2022}, - month = {11}, - issn = {0305-1048}, - doi = {10.1093/nar/gkac1052}, - url = {https://doi.org/10.1093/nar/gkac1052}} + - |- + @article{10.1093/nar/gkac1052, + author = {The UniProt Consortium}, + title = {UniProt - the Universal Protein Knowledgebase in 2023}, + journal = {Nucleic Acids Research}, + volume = {51}, + number = {D1}, + pages = {D523-D531}, + year = {2022}, + month = {11}, + issn = {0305-1048}, + doi = {10.1093/nar/gkac1052}, + url = {https://doi.org/10.1093/nar/gkac1052}} templates: -# - |- -# The {#molecule|chemical|compound!} with the {SMILES__description}{# representation|!} {SMILES#} binds to the {#AA sequence|amino acid sequence|peptide sequence|protein!} {sequence#} at the {#site|binding site|position!} {start_binding_site#}{#-| to !}{end_binding_site#}. - - |- - Task: {#Find|Identify|Come up with!} a binding site for the {#molecule|chemical|compound!} in the {#AA sequence|amino acid sequence|peptide sequence|protein!}. - {#AA sequence|Amino acid sequence|Peptide sequence|Protein!}: {sequence#} - {SMILES__description}{# representation|!}: {SMILES#} - {#Output|Result!}: {start_binding_site#} - - |- - Task: {#Create|Design|Come up with!} a {#molecule|chemical|compound!} that binds to the given {#binding site|site|position|!} in the {#AA sequence|amino acid sequence|peptide sequence|protein!}. - {#AA sequence|Amino acid sequence|Peptide sequence|Protein!}: {sequence#} - Binding {#site|position!}: {start_binding_site#} - {#Output|Result!}: {SMILES#} - - |- - Question: Can you {#give me one example of a|find one!} binding site of the {#molecule|chemical|compound!} with the {SMILES__description}{# representation|!} {SMILES#} in this {#AA sequence|amino acid sequence|peptide sequence|protein!} {sequence#}? - Answer: One {#possible |!}{#binding |!}site for the {#chemical|molecule|compound!} is {start_binding_site#}. - - |- - Question: What {#molecule|chemical|compound!} can {#possibly |!}bind to the {#binding |!}site {#at |at the position !}{start_binding_site#} in the {#given |!}{#AA|amino acid|protein!} sequence{# below|!}? - Sequence: {sequence#} - Answer: {SMILES#} - - |- - Task: {#Find|Identify|Come up with!} a binding site in the {#AA sequence|amino acid sequence|peptide sequence|protein!} for the {#molecule|chemical|compound!}. - {#AA sequence|Amino acid sequence|Peptide sequence|Protein!}: {sequence#} - {SMILES__description}{# representation|!}: {SMILES#} - {#Output|Result!}: {start_binding_site#} - - |- - Task: {#Create|Design|Come up with!} a {#molecule|chemical|compound!} that binds to the given {#binding site|site|position|!} in the {#AA sequence|amino acid sequence|peptide sequence|protein!}. - {#AA sequence|Amino acid sequence|Peptide sequence|Protein!}: {sequence#} - Binding site{# position|!}: {start_binding_site#} - {#Output|Result!}: {SMILES#} + # - |- + # The {#molecule|chemical|compound!} with the {SMILES__description}{# representation|!} {SMILES#} binds to the {#AA sequence|amino acid sequence|peptide sequence|protein!} {sequence#} at the {#site|binding site|position!} {start_binding_site#}{#-| to !}{end_binding_site#}. + - |- + Task: {#Find|Identify|Come up with!} a binding site for the {#molecule|chemical|compound!} in the {#AA sequence|amino acid sequence|peptide sequence|protein!}. + {#AA sequence|Amino acid sequence|Peptide sequence|Protein!}: {sequence#} + {SMILES__description}{# representation|!}: {SMILES#} + {#Output|Result!}: {start_binding_site#} + - |- + Task: {#Create|Design|Come up with!} a {#molecule|chemical|compound!} that binds to the given {#binding site|site|position|!} in the {#AA sequence|amino acid sequence|peptide sequence|protein!}. + {#AA sequence|Amino acid sequence|Peptide sequence|Protein!}: {sequence#} + Binding {#site|position!}: {start_binding_site#} + {#Output|Result!}: {SMILES#} + - |- + Question: Can you {#give me one example of a|find one!} binding site of the {#molecule|chemical|compound!} with the {SMILES__description}{# representation|!} {SMILES#} in this {#AA sequence|amino acid sequence|peptide sequence|protein!} {sequence#}? + Answer: One {#possible |!}{#binding |!}site for the {#chemical|molecule|compound!} is {start_binding_site#}. + - |- + Question: What {#molecule|chemical|compound!} can {#possibly |!}bind to the {#binding |!}site {#at |at the position !}{start_binding_site#} in the {#given |!}{#AA|amino acid|protein!} sequence{# below|!}? + Sequence: {sequence#} + Answer: {SMILES#} + - |- + Task: {#Find|Identify|Come up with!} a binding site in the {#AA sequence|amino acid sequence|peptide sequence|protein!} for the {#molecule|chemical|compound!}. + {#AA sequence|Amino acid sequence|Peptide sequence|Protein!}: {sequence#} + {SMILES__description}{# representation|!}: {SMILES#} + {#Output|Result!}: {start_binding_site#} + - |- + Task: {#Create|Design|Come up with!} a {#molecule|chemical|compound!} that binds to the given {#binding site|site|position|!} in the {#AA sequence|amino acid sequence|peptide sequence|protein!}. + {#AA sequence|Amino acid sequence|Peptide sequence|Protein!}: {sequence#} + Binding site{# position|!}: {start_binding_site#} + {#Output|Result!}: {SMILES#} diff --git a/data/tabular/uniprot_binding_sites_multiple/meta.yaml b/data/tabular/uniprot_binding_sites_multiple/meta.yaml index 397bfffea..224666e1a 100644 --- a/data/tabular/uniprot_binding_sites_multiple/meta.yaml +++ b/data/tabular/uniprot_binding_sites_multiple/meta.yaml @@ -1,73 +1,72 @@ ---- name: uniprot_binding_sites_multiple description: |- - Binding sites of a molecule in protein sequences. + Binding sites of a molecule in protein sequences. targets: - - id: start_binding_site - description: index for start of the binding sites of a protein - type: text - names: - - noun: start binding site - - id: end_binding_site - description: index for end of the binding sites of a protein - type: text - names: - - noun: end binding site + - id: start_binding_site + description: index for start of the binding sites of a protein + type: text + names: + - noun: start binding site + - id: end_binding_site + description: index for end of the binding sites of a protein + type: text + names: + - noun: end binding site identifiers: - - id: sequence - type: AS_SEQUENCE - description: other - - id: SMILES - description: SMILES - type: SMILES - names: - - noun: SMILES + - id: sequence + type: AS_SEQUENCE + description: other + - id: SMILES + description: SMILES + type: SMILES + names: + - noun: SMILES license: MIT links: - - url: https://www.uniprot.org/ - description: data source + - url: https://www.uniprot.org/ + description: data source num_points: 176066 bibtex: - - |- - @article{10.1093/nar/gkac1052, - author = {The UniProt Consortium}, - title = {UniProt - the Universal Protein Knowledgebase in 2023}, - journal = {Nucleic Acids Research}, - volume = {51}, - number = {D1}, - pages = {D523-D531}, - year = {2022}, - month = {11}, - issn = {0305-1048}, - doi = {10.1093/nar/gkac1052}, - url = {https://doi.org/10.1093/nar/gkac1052}} + - |- + @article{10.1093/nar/gkac1052, + author = {The UniProt Consortium}, + title = {UniProt - the Universal Protein Knowledgebase in 2023}, + journal = {Nucleic Acids Research}, + volume = {51}, + number = {D1}, + pages = {D523-D531}, + year = {2022}, + month = {11}, + issn = {0305-1048}, + doi = {10.1093/nar/gkac1052}, + url = {https://doi.org/10.1093/nar/gkac1052}} templates: -# - |- -# The {#molecule|chemical|compound!} with the {SMILES__description}{# representation|!} {SMILES#} binds to the {#AA sequence|amino acid sequence|peptide sequence|protein!} {sequence#} at the {#site|binding site|position!} {start_binding_site#}{#-| to !}{end_binding_site#}. - - |- - Task: {#Find|Identify|Come up with!} a binding site for the {#molecule|chemical|compound!} in the {#AA sequence|amino acid sequence|peptide sequence|protein!}. - {#AA sequence|Amino acid sequence|Peptide sequence|Protein!}: {sequence#} - {SMILES__description}{# representation|!}: {SMILES#} - {#Output|Result!}: {start_binding_site#}-{end_binding_site#} - - |- - Task: {#Create|Design|Come up with!} a {#molecule|chemical|compound!} that binds to the given {#binding site|site|position!} in the {#AA sequence|amino acid sequence|peptide sequence|protein!}. - {#AA sequence|Amino acid sequence|Peptide sequence|Protein!}: {sequence#} - Binding site{# position|!}: {start_binding_site#}{#-| to !}{end_binding_site#} - {#Output|Result!}: {SMILES#} - - |- - Question: Can you {#give me one example of a|find one!} binding site of the {#molecule|chemical|compound!} with the {SMILES__description}{# representation|!} {SMILES#} in this {#AA sequence|amino acid sequence|peptide sequence|protein!} {sequence#}? - Answer: One {#possible |!}{#binding |!}site for the {#chemical|molecule|compound!} is {start_binding_site#}{#-| to !}{end_binding_site#}. - - |- - Question: What {#molecule|chemical|compound!} can {#possibly |!}bind to the {#binding |!}site {#at |at the position !}{start_binding_site#}{#-| to !}{end_binding_site#} in the {#given |!}{#AA|amino acid|protein!} sequence{# below|!}? - Sequence: {sequence#} - Answer: {SMILES#} - - |- - Task: {#Find|Identify|Come up with!} a binding site in the {#AA sequence|amino acid sequence|peptide sequence|protein!} for the {#molecule|chemical|compound!}. - {#AA sequence|Amino acid sequence|Peptide sequence|Protein!}: {sequence#} - {SMILES__description}{# representation|!}: {SMILES#} - {#Output|Result!}: {start_binding_site#}-{end_binding_site#} - - |- - Task: {#Create|Design|Come up with!} a {#molecule|chemical|compound!} that binds to the given {#binding site|site|position|!} in the {#AA sequence|amino acid sequence|peptide sequence|protein!}. - {#AA sequence|Amino acid sequence|Peptide sequence|Protein!}: {sequence#} - Binding site{# position|!}: {start_binding_site#}{#-| to !}{end_binding_site#} - {#Output|Result!}: {SMILES#} + # - |- + # The {#molecule|chemical|compound!} with the {SMILES__description}{# representation|!} {SMILES#} binds to the {#AA sequence|amino acid sequence|peptide sequence|protein!} {sequence#} at the {#site|binding site|position!} {start_binding_site#}{#-| to !}{end_binding_site#}. + - |- + Task: {#Find|Identify|Come up with!} a binding site for the {#molecule|chemical|compound!} in the {#AA sequence|amino acid sequence|peptide sequence|protein!}. + {#AA sequence|Amino acid sequence|Peptide sequence|Protein!}: {sequence#} + {SMILES__description}{# representation|!}: {SMILES#} + {#Output|Result!}: {start_binding_site#}-{end_binding_site#} + - |- + Task: {#Create|Design|Come up with!} a {#molecule|chemical|compound!} that binds to the given {#binding site|site|position!} in the {#AA sequence|amino acid sequence|peptide sequence|protein!}. + {#AA sequence|Amino acid sequence|Peptide sequence|Protein!}: {sequence#} + Binding site{# position|!}: {start_binding_site#}{#-| to !}{end_binding_site#} + {#Output|Result!}: {SMILES#} + - |- + Question: Can you {#give me one example of a|find one!} binding site of the {#molecule|chemical|compound!} with the {SMILES__description}{# representation|!} {SMILES#} in this {#AA sequence|amino acid sequence|peptide sequence|protein!} {sequence#}? + Answer: One {#possible |!}{#binding |!}site for the {#chemical|molecule|compound!} is {start_binding_site#}{#-| to !}{end_binding_site#}. + - |- + Question: What {#molecule|chemical|compound!} can {#possibly |!}bind to the {#binding |!}site {#at |at the position !}{start_binding_site#}{#-| to !}{end_binding_site#} in the {#given |!}{#AA|amino acid|protein!} sequence{# below|!}? + Sequence: {sequence#} + Answer: {SMILES#} + - |- + Task: {#Find|Identify|Come up with!} a binding site in the {#AA sequence|amino acid sequence|peptide sequence|protein!} for the {#molecule|chemical|compound!}. + {#AA sequence|Amino acid sequence|Peptide sequence|Protein!}: {sequence#} + {SMILES__description}{# representation|!}: {SMILES#} + {#Output|Result!}: {start_binding_site#}-{end_binding_site#} + - |- + Task: {#Create|Design|Come up with!} a {#molecule|chemical|compound!} that binds to the given {#binding site|site|position|!} in the {#AA sequence|amino acid sequence|peptide sequence|protein!}. + {#AA sequence|Amino acid sequence|Peptide sequence|Protein!}: {sequence#} + Binding site{# position|!}: {start_binding_site#}{#-| to !}{end_binding_site#} + {#Output|Result!}: {SMILES#} diff --git a/data/tabular/uniprot_organisms/meta.yaml b/data/tabular/uniprot_organisms/meta.yaml index 69ca63837..5c3c03451 100644 --- a/data/tabular/uniprot_organisms/meta.yaml +++ b/data/tabular/uniprot_organisms/meta.yaml @@ -1,47 +1,46 @@ ---- name: uniprot_organisms description: |- - Organisms in which a amino-acid sequence can be found. + Organisms in which a amino-acid sequence can be found. targets: - - id: organisms - description: organisms in which a protein can be found - type: text - names: - - noun: organisms + - id: organisms + description: organisms in which a protein can be found + type: text + names: + - noun: organisms identifiers: - - id: other - type: AS_SEQUENCE - description: other + - id: other + type: AS_SEQUENCE + description: other license: MIT links: - - url: https://www.uniprot.org/ - description: data source + - url: https://www.uniprot.org/ + description: data source num_points: 559428 bibtex: - - |- - @article{10.1093/nar/gkac1052, - author = {The UniProt Consortium}, - title = {UniProt - the Universal Protein Knowledgebase in 2023}, - journal = {Nucleic Acids Research}, - volume = {51}, - number = {D1}, - pages = {D523-D531}, - year = {2022}, - month = {11}, - issn = {0305-1048}, - doi = {10.1093/nar/gkac1052}, - url = {https://doi.org/10.1093/nar/gkac1052}} + - |- + @article{10.1093/nar/gkac1052, + author = {The UniProt Consortium}, + title = {UniProt - the Universal Protein Knowledgebase in 2023}, + journal = {Nucleic Acids Research}, + volume = {51}, + number = {D1}, + pages = {D523-D531}, + year = {2022}, + month = {11}, + issn = {0305-1048}, + doi = {10.1093/nar/gkac1052}, + url = {https://doi.org/10.1093/nar/gkac1052}} templates: - - |- - The protein with the {#amino acid sequence|AA sequence!} {other#} can be found in {#the organism |!}{organisms#}. - - |- - Task: {#Predict|Identify!} the organism in which {#the below|this!} {#protein|amino acid sequence|AA sequence|polypeptide!} can be found. - {#Amino acid sequence |Sequence|AA sequence!}: {other#} - Result: {organisms#} - - |- - User: In what organism can you find the following {#protein|amino acid sequence|AA sequence|polypeptide!}:\n{other#} - Assistant: The given {#protein|amino acid sequence|AA sequence|polypeptide!} can be found in {organisms#}. - - |- - Task: {#Predict|Identify!} the organism in which {#the below|this!} {#protein|amino acid sequence|AA sequence|polypeptide!} can be found. - {#Amino acid sequence|Sequence|AA sequence!}: {other#} - Result: {organisms#} + - |- + The protein with the {#amino acid sequence|AA sequence!} {other#} can be found in {#the organism |!}{organisms#}. + - |- + Task: {#Predict|Identify!} the organism in which {#the below|this!} {#protein|amino acid sequence|AA sequence|polypeptide!} can be found. + {#Amino acid sequence |Sequence|AA sequence!}: {other#} + Result: {organisms#} + - |- + User: In what organism can you find the following {#protein|amino acid sequence|AA sequence|polypeptide!}:\n{other#} + Assistant: The given {#protein|amino acid sequence|AA sequence|polypeptide!} can be found in {organisms#}. + - |- + Task: {#Predict|Identify!} the organism in which {#the below|this!} {#protein|amino acid sequence|AA sequence|polypeptide!} can be found. + {#Amino acid sequence|Sequence|AA sequence!}: {other#} + Result: {organisms#} diff --git a/data/tabular/uniprot_reactions/meta.yaml b/data/tabular/uniprot_reactions/meta.yaml index 79bb5f255..7d1eca907 100644 --- a/data/tabular/uniprot_reactions/meta.yaml +++ b/data/tabular/uniprot_reactions/meta.yaml @@ -1,56 +1,55 @@ ---- name: uniprot_reactions description: |- - Protein sequences and the reactions these can catalyze. + Protein sequences and the reactions these can catalyze. targets: - - id: reactions - description: biochemical reactions catalyzed by a protein - type: text - names: - - noun: chemical reactions - - noun: biochemical reactions + - id: reactions + description: biochemical reactions catalyzed by a protein + type: text + names: + - noun: chemical reactions + - noun: biochemical reactions identifiers: - - id: other - type: AS_SEQUENCE - description: other + - id: other + type: AS_SEQUENCE + description: other license: MIT links: - - url: https://www.uniprot.org/ - description: data source + - url: https://www.uniprot.org/ + description: data source num_points: 253713 bibtex: - - |- - @article{10.1093/nar/gkac1052, - author = {The UniProt Consortium}, - title = {UniProt - the Universal Protein Knowledgebase in 2023}, - journal = {Nucleic Acids Research}, - volume = {51}, - number = {D1}, - pages = {D523-D531}, - year = {2022}, - month = {11}, - issn = {0305-1048}, - doi = {10.1093/nar/gkac1052}, - url = {https://doi.org/10.1093/nar/gkac1052}} + - |- + @article{10.1093/nar/gkac1052, + author = {The UniProt Consortium}, + title = {UniProt - the Universal Protein Knowledgebase in 2023}, + journal = {Nucleic Acids Research}, + volume = {51}, + number = {D1}, + pages = {D523-D531}, + year = {2022}, + month = {11}, + issn = {0305-1048}, + doi = {10.1093/nar/gkac1052}, + url = {https://doi.org/10.1093/nar/gkac1052}} templates: - - |- - The {#protein|amino acid sequence|AA sequence|polypeptide!} {#with the sequence |!}{other#} catalyzes the {#following |!}{#chemical |biochemical |!}reaction: {reactions#} - - |- - Task: {#Predict|Identify!} a {#biochemical |chemical |!}reaction that can be catalyzed by {#this|the following!} {#protein|amino acid sequence|AA sequence|polypeptide!}. - {#Amino acid sequence |Sequence|AA sequence!}: {other#} - Result: {reactions#} - - |- - Task: {#Generate|Create|Come up with!} a {#protein|amino acid sequence|AA sequence|polypeptide!} that can catalyze {#a|this!} specific {#biochemical |chemical |!}reaction. - Reaction: {reactions#} - {#Output|Result!}: {other#} - - |- - User: Can you {#tell me|come up with!} a {#biochemical |chemical |!}reaction that can be catalyzed by the following {#protein|amino acid sequence|AA sequence|polypeptide!}:\n{other#} - Assistant: {#Yes, the|Sure, the|Yes, sure, the|The!} {#chemical |biochemical |!}reaction that can be catalyzed by the given {#protein|amino acid sequence|AA sequence|polypeptide!} are:\n{reactions#} - - |- - Task: {#Predict|Identify!} a {#biochemical |chemical |!}reaction that can be catalyzed by {#this|the following!} {#protein|amino acid sequence|AA sequence|polypeptide!}. - {#Amino acid sequence |Sequence|AA sequence!}: {other#} - Result: {reactions#} - - |- - Task: {#Generate|Create|Come up with|Design!} a {#protein|amino acid sequence|AA sequence|polypeptide!} that can catalyze {#a|this!} specific {#biochemical |chemical |!}reaction. - Reaction: {reactions#} - {#Output|Result!}: {other#} + - |- + The {#protein|amino acid sequence|AA sequence|polypeptide!} {#with the sequence |!}{other#} catalyzes the {#following |!}{#chemical |biochemical |!}reaction: {reactions#} + - |- + Task: {#Predict|Identify!} a {#biochemical |chemical |!}reaction that can be catalyzed by {#this|the following!} {#protein|amino acid sequence|AA sequence|polypeptide!}. + {#Amino acid sequence |Sequence|AA sequence!}: {other#} + Result: {reactions#} + - |- + Task: {#Generate|Create|Come up with!} a {#protein|amino acid sequence|AA sequence|polypeptide!} that can catalyze {#a|this!} specific {#biochemical |chemical |!}reaction. + Reaction: {reactions#} + {#Output|Result!}: {other#} + - |- + User: Can you {#tell me|come up with!} a {#biochemical |chemical |!}reaction that can be catalyzed by the following {#protein|amino acid sequence|AA sequence|polypeptide!}:\n{other#} + Assistant: {#Yes, the|Sure, the|Yes, sure, the|The!} {#chemical |biochemical |!}reaction that can be catalyzed by the given {#protein|amino acid sequence|AA sequence|polypeptide!} are:\n{reactions#} + - |- + Task: {#Predict|Identify!} a {#biochemical |chemical |!}reaction that can be catalyzed by {#this|the following!} {#protein|amino acid sequence|AA sequence|polypeptide!}. + {#Amino acid sequence |Sequence|AA sequence!}: {other#} + Result: {reactions#} + - |- + Task: {#Generate|Create|Come up with|Design!} a {#protein|amino acid sequence|AA sequence|polypeptide!} that can catalyze {#a|this!} specific {#biochemical |chemical |!}reaction. + Reaction: {reactions#} + {#Output|Result!}: {other#} diff --git a/data/tabular/uniprot_sentences/meta.yaml b/data/tabular/uniprot_sentences/meta.yaml index 672362de3..cfd314375 100644 --- a/data/tabular/uniprot_sentences/meta.yaml +++ b/data/tabular/uniprot_sentences/meta.yaml @@ -1,56 +1,55 @@ ---- name: uniprot_sentences description: |- - Descriptions of the function of a protein. + Descriptions of the function of a protein. targets: - - id: sentences - description: sentences describing the function of a protein - type: text - names: - - noun: function + - id: sentences + description: sentences describing the function of a protein + type: text + names: + - noun: function identifiers: - - id: sequence - type: AS_SEQUENCE - description: other + - id: sequence + type: AS_SEQUENCE + description: other license: MIT links: - - url: https://www.uniprot.org/ - description: data source + - url: https://www.uniprot.org/ + description: data source num_points: 396241 bibtex: - - |- - @article{10.1093/nar/gkac1052, - author = {The UniProt Consortium}, - title = {UniProt - the Universal Protein Knowledgebase in 2023}, - journal = {Nucleic Acids Research}, - volume = {51}, - number = {D1}, - pages = {D523-D531}, - year = {2022}, - month = {11}, - issn = {0305-1048}, - doi = {10.1093/nar/gkac1052}, - url = {https://doi.org/10.1093/nar/gkac1052}} + - |- + @article{10.1093/nar/gkac1052, + author = {The UniProt Consortium}, + title = {UniProt - the Universal Protein Knowledgebase in 2023}, + journal = {Nucleic Acids Research}, + volume = {51}, + number = {D1}, + pages = {D523-D531}, + year = {2022}, + month = {11}, + issn = {0305-1048}, + doi = {10.1093/nar/gkac1052}, + url = {https://doi.org/10.1093/nar/gkac1052}} templates: - - |- - User: {#Please describe|Describe|Please briefly describe|Briefly describe!} the {#biological |biochemical |!}function of {#the|this!} {#protein|amino acid sequence|AA sequence|polypeptide!}: {sequence#} - Assistant: {sentences#} - - |- - User: What {#protein|amino acid sequence|AA sequence|polypeptide!} fits the {#biological |biochemical |!}description {#in the next sentences |below |!}best?\n{sentences#} - Assistant: A {#protein|amino acid sequence|AA sequence|polypeptide!} that fits the {#description|points|sentences!} is:\n{sequence#} - - |- - Task: {#Generate|Create|Come up with!} a description {#of a few sentences |!}for the {#protein|amino acid sequence|AA sequence|polypeptide!}{# below|!}. - {#Protein|Amino acid sequence|AA sequence|Polypeptide!}: {sequence#} - {#Output|Result!}: {sentences#} - - |- - Task: {#Generate|Create|Come up with!} a {#protein|amino acid sequence|AA sequence|polypeptide!} based on the description. - Description: {sentences#} - {#Output|Result!}: {sequence#} - - |- - Task: {#Generate|Create|Come up with!} a description {#of a few sentences |!}for the {#protein|amino acid sequence|AA sequence|polypeptide!}{# below|!}. - {#Protein|Amino acid sequence|AA sequence|Polypeptide!}: {sequence#} - {#Output|Result!}: {sentences#} - - |- - Task: {#Generate|Create|Come up with!} a {#protein|amino acid sequence|AA sequence|polypeptide!} based on the description. - Description: {sentences#} - {#Output|Result!}: {sequence#} + - |- + User: {#Please describe|Describe|Please briefly describe|Briefly describe!} the {#biological |biochemical |!}function of {#the|this!} {#protein|amino acid sequence|AA sequence|polypeptide!}: {sequence#} + Assistant: {sentences#} + - |- + User: What {#protein|amino acid sequence|AA sequence|polypeptide!} fits the {#biological |biochemical |!}description {#in the next sentences |below |!}best?\n{sentences#} + Assistant: A {#protein|amino acid sequence|AA sequence|polypeptide!} that fits the {#description|points|sentences!} is:\n{sequence#} + - |- + Task: {#Generate|Create|Come up with!} a description {#of a few sentences |!}for the {#protein|amino acid sequence|AA sequence|polypeptide!}{# below|!}. + {#Protein|Amino acid sequence|AA sequence|Polypeptide!}: {sequence#} + {#Output|Result!}: {sentences#} + - |- + Task: {#Generate|Create|Come up with!} a {#protein|amino acid sequence|AA sequence|polypeptide!} based on the description. + Description: {sentences#} + {#Output|Result!}: {sequence#} + - |- + Task: {#Generate|Create|Come up with!} a description {#of a few sentences |!}for the {#protein|amino acid sequence|AA sequence|polypeptide!}{# below|!}. + {#Protein|Amino acid sequence|AA sequence|Polypeptide!}: {sequence#} + {#Output|Result!}: {sentences#} + - |- + Task: {#Generate|Create|Come up with!} a {#protein|amino acid sequence|AA sequence|polypeptide!} based on the description. + Description: {sentences#} + {#Output|Result!}: {sequence#} diff --git a/data/tabular/uspto/meta.yaml b/data/tabular/uspto/meta.yaml index bc1c9fe1b..3b09a1013 100644 --- a/data/tabular/uspto/meta.yaml +++ b/data/tabular/uspto/meta.yaml @@ -1,77 +1,76 @@ ---- name: uspto description: |- - The USPTO dataset is a collection of reaction mined from US patents. + The USPTO dataset is a collection of reaction mined from US patents. targets: - - id: masked_rxn_smiles - type: text - description: reaction SMILES with one element masked - names: - - noun: reaction SMILES with one element masked as `MASK` - - noun: reaction SMILES with one element hidden as `MASK` - - noun: masked reaction SMILES (one component masked as `MASK`) - - noun: masked reaction SMILES string (one component masked as `MASK`) - - noun: masked RXNSMILES (one component masked as `MASK`) - - id: educt_string - type: text - description: reaction educts - names: - - noun: reaction educts - - noun: educts - - noun: starting materials - - id: product_string - type: text - description: reaction products - names: - - noun: reaction products - - noun: products + - id: masked_rxn_smiles + type: text + description: reaction SMILES with one element masked + names: + - noun: reaction SMILES with one element masked as `MASK` + - noun: reaction SMILES with one element hidden as `MASK` + - noun: masked reaction SMILES (one component masked as `MASK`) + - noun: masked reaction SMILES string (one component masked as `MASK`) + - noun: masked RXNSMILES (one component masked as `MASK`) + - id: educt_string + type: text + description: reaction educts + names: + - noun: reaction educts + - noun: educts + - noun: starting materials + - id: product_string + type: text + description: reaction products + names: + - noun: reaction products + - noun: products identifiers: - - id: RXNSMILES - type: RXNSMILES - description: RXNSMILES - names: - - noun: reaction SMILES - - noun: reaction SMILES string - - noun: RXNSMILES - - noun: reaction SMILES (RXNSMILES) - - id: missing_component - type: text - description: masked element + - id: RXNSMILES + type: RXNSMILES + description: RXNSMILES + names: + - noun: reaction SMILES + - noun: reaction SMILES string + - noun: RXNSMILES + - noun: reaction SMILES (RXNSMILES) + - id: missing_component + type: text + description: masked element license: CC0 links: - - url: https://figshare.com/articles/dataset/Chemical_reactions_from_US_patents_1976-Sep2016_/5104873 - description: original data source + - url: https://figshare.com/articles/dataset/Chemical_reactions_from_US_patents_1976-Sep2016_/5104873 + description: original data source num_points: 150774 bibtex: - - |- - @article{Lowe2017, - author = "Daniel Lowe", - title = "{Chemical reactions from US patents (1976-Sep2016)}", - year = "2017", - month = "6", - url = "https://figshare.com/articles/dataset/Chemical_reactions_from_US_patents_1976-Sep2016_/5104873", - doi = "10.6084/m9.figshare.5104873.v1" - } + - |- + @article{Lowe2017, + author = "Daniel Lowe", + title = "{Chemical reactions from US patents (1976-Sep2016)}", + year = "2017", + month = "6", + url = "https://figshare.com/articles/dataset/Chemical_reactions_from_US_patents_1976-Sep2016_/5104873", + doi = "10.6084/m9.figshare.5104873.v1" + } templates: - - The {RXNSMILES__names__noun} {RXNSMILES#} has the {educt_string__names__noun} {educt_string#} and the {product_string__names__noun} {product_string#}. - - The {RXNSMILES__names__noun} {RXNSMILES#} has the {product_string__names__noun} {product_string#} and the {educt_string__names__noun} {educt_string#}. - - The masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#} is {missing_component#}. - - The {#chemical|compound!} with SMILES {missing_component#} is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}. - - |- - Question: {#What|Which!} {educt_string__names__noun} are {#needed|required!} to {#produce|synthesize!} {product_string#}? - Answer: {educt_string#}. - - |- - Question: {#What|Which!} {product_string__names__noun} are produced from the {educt_string__names__noun} {educt_string#}? - Answer: {product_string#}. - - |- - User: I {#want|would like to|must|need to!} {#synthesize|produce!} the {product_string__names__noun} {product_string#}. - Assistant: {#Cool, is|That's interesting, is|Great, is|Is!} there anything else I can do for you? - User: {#Yes, |!}I would like to know the {educt_string__names__noun} I need to produce {product_string#}. - Assistant: {#I recommend|I suggest|I propose|I advise!} the following {educt_string__names__noun}: {educt_string#}. - - |- - Question: What is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}? - Answer: {missing_component#}. - - |- - Task: Predict the masked component in a {masked_rxn_smiles__names__noun}. - Description: {masked_rxn_smiles#} - {#Answer|Solution!}: {missing_component#} + - The {RXNSMILES__names__noun} {RXNSMILES#} has the {educt_string__names__noun} {educt_string#} and the {product_string__names__noun} {product_string#}. + - The {RXNSMILES__names__noun} {RXNSMILES#} has the {product_string__names__noun} {product_string#} and the {educt_string__names__noun} {educt_string#}. + - The masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#} is {missing_component#}. + - The {#chemical|compound!} with SMILES {missing_component#} is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}. + - |- + Question: {#What|Which!} {educt_string__names__noun} are {#needed|required!} to {#produce|synthesize!} {product_string#}? + Answer: {educt_string#}. + - |- + Question: {#What|Which!} {product_string__names__noun} are produced from the {educt_string__names__noun} {educt_string#}? + Answer: {product_string#}. + - |- + User: I {#want|would like to|must|need to!} {#synthesize|produce!} the {product_string__names__noun} {product_string#}. + Assistant: {#Cool, is|That's interesting, is|Great, is|Is!} there anything else I can do for you? + User: {#Yes, |!}I would like to know the {educt_string__names__noun} I need to produce {product_string#}. + Assistant: {#I recommend|I suggest|I propose|I advise!} the following {educt_string__names__noun}: {educt_string#}. + - |- + Question: What is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}? + Answer: {missing_component#}. + - |- + Task: Predict the masked component in a {masked_rxn_smiles__names__noun}. + Description: {masked_rxn_smiles#} + {#Answer|Solution!}: {missing_component#} diff --git a/data/tabular/uspto_yield/meta.yaml b/data/tabular/uspto_yield/meta.yaml index 3701c1e10..76bb5244a 100644 --- a/data/tabular/uspto_yield/meta.yaml +++ b/data/tabular/uspto_yield/meta.yaml @@ -1,62 +1,61 @@ ---- name: uspto_yield description: |- - The USPTO dataset is a collection of reaction mined from US patents. + The USPTO dataset is a collection of reaction mined from US patents. targets: - - id: yield - type: continuous - significant_digits: 0 - description: reaction yield - units: \% - names: - - noun: yield - - noun: reaction yield + - id: yield + type: continuous + significant_digits: 0 + description: reaction yield + units: \% + names: + - noun: yield + - noun: reaction yield identifiers: - - id: RXNSMILES - type: RXNSMILES - description: reaction SMILES - names: - - noun: reaction SMILES - - noun: reaction SMILES string - - noun: RXNSMILES - - noun: reaction SMILES (RXNSMILES) - - id: educt_string - type: text - description: reaction educts - names: - - noun: reaction educts - - noun: educts - - noun: starting materials - - id: product_string - type: text - description: reaction products - names: - - noun: reaction products - - noun: products + - id: RXNSMILES + type: RXNSMILES + description: reaction SMILES + names: + - noun: reaction SMILES + - noun: reaction SMILES string + - noun: RXNSMILES + - noun: reaction SMILES (RXNSMILES) + - id: educt_string + type: text + description: reaction educts + names: + - noun: reaction educts + - noun: educts + - noun: starting materials + - id: product_string + type: text + description: reaction products + names: + - noun: reaction products + - noun: products license: CC0 links: - - url: https://figshare.com/articles/dataset/Chemical_reactions_from_US_patents_1976-Sep2016_/5104873 - description: original data source + - url: https://figshare.com/articles/dataset/Chemical_reactions_from_US_patents_1976-Sep2016_/5104873 + description: original data source num_points: 36564 bibtex: - - |- - @article{Lowe2017, - author = "Daniel Lowe", - title = "{Chemical reactions from US patents (1976-Sep2016)}", - year = "2017", - month = "6", - url = "https://figshare.com/articles/dataset/Chemical_reactions_from_US_patents_1976-Sep2016_/5104873", - doi = "10.6084/m9.figshare.5104873.v1" - } + - |- + @article{Lowe2017, + author = "Daniel Lowe", + title = "{Chemical reactions from US patents (1976-Sep2016)}", + year = "2017", + month = "6", + url = "https://figshare.com/articles/dataset/Chemical_reactions_from_US_patents_1976-Sep2016_/5104873", + doi = "10.6084/m9.figshare.5104873.v1" + } templates: - - The {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#} is {yield#}{yield__units}. - - |- - User: {#I need|I want|I would like!} to run a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}. What is the {yield__names__noun} {#I can expect|I should expect|I should get|I can get!}? - Assistant: {#The|The expected|The predicted|The estimated!} {yield__names__noun} is {yield#}{yield__units}. - - |- - Question: {#What is|What's|What is the|What's the!} {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}? - Answer: {yield#}{yield__units}. - - The {yield__names__noun} of a reaction of {educt_string#} to {product_string#} is {yield#}{yield__units}. - - |- - Question: {#What is|What's|What is the|What's the!} {yield__names__noun} of a reaction of {educt_string#} to {product_string#}? - Answer: {yield#}{yield__units}. + - The {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#} is {yield#}{yield__units}. + - |- + User: {#I need|I want|I would like!} to run a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}. What is the {yield__names__noun} {#I can expect|I should expect|I should get|I can get!}? + Assistant: {#The|The expected|The predicted|The estimated!} {yield__names__noun} is {yield#}{yield__units}. + - |- + Question: {#What is|What's|What is the|What's the!} {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}? + Answer: {yield#}{yield__units}. + - The {yield__names__noun} of a reaction of {educt_string#} to {product_string#} is {yield#}{yield__units}. + - |- + Question: {#What is|What's|What is the|What's the!} {yield__names__noun} of a reaction of {educt_string#} to {product_string#}? + Answer: {yield#}{yield__units}. diff --git a/data/tabular/volume_of_distribution_at_steady_state_lombardo_et_al/meta.yaml b/data/tabular/volume_of_distribution_at_steady_state_lombardo_et_al/meta.yaml index 6e8d022a7..c41f2167d 100644 --- a/data/tabular/volume_of_distribution_at_steady_state_lombardo_et_al/meta.yaml +++ b/data/tabular/volume_of_distribution_at_steady_state_lombardo_et_al/meta.yaml @@ -1,54 +1,53 @@ ---- name: volume_of_distribution_at_steady_state_lombardo_et_al description: |- - The volume of distribution at steady state (VDss) measures the degree - of a drug's concentration in the body tissue compared to concentration in the blood. - Higher VD indicates a higher distribution in the tissue and usually indicates - the drug with high lipid solubility, low plasma protein binidng rate. + The volume of distribution at steady state (VDss) measures the degree + of a drug's concentration in the body tissue compared to concentration in the blood. + Higher VD indicates a higher distribution in the tissue and usually indicates + the drug with high lipid solubility, low plasma protein binding rate. targets: - - id: VDss_Lombardo - description: volume of distribution at steady state (VDss) - units: L/kg - type: continuous - names: - - noun: volume of distribution at steady state (VDss) - - noun: VDss - uris: - - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C85538 + - id: VDss_Lombardo + description: volume of distribution at steady state (VDss) + units: L/kg + type: continuous + names: + - noun: volume of distribution at steady state (VDss) + - noun: VDss + uris: + - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C85538 benchmarks: - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - - id: SMILES - type: SMILES - description: SMILES - - id: compound_name - type: Other - names: - - noun: compound name - - noun: drug name - - noun: generic drug name - description: mix of drug name and ids + - id: SMILES + type: SMILES + description: SMILES + - id: compound_name + type: Other + names: + - noun: compound name + - noun: drug name + - noun: generic drug name + description: mix of drug name and ids license: CC BY 4.0 links: - - url: https://doi.org/10.1021/acs.jcim.6b00044 - description: corresponding publication - - url: https://tdcommons.ai/single_pred_tasks/adme/#vdss-volumn-of-distribution-at-steady-state-lombardo-et-al - description: data source + - url: https://doi.org/10.1021/acs.jcim.6b00044 + description: corresponding publication + - url: https://tdcommons.ai/single_pred_tasks/adme/#vdss-volumn-of-distribution-at-steady-state-lombardo-et-al + description: data source num_points: 1130 bibtex: - - |- - @article{Lombardo2016, - doi = {10.1021/acs.jcim.6b00044}, - url = {https://doi.org/10.1021/acs.jcim.6b00044}, - year = {2016}, - month = sep, - publisher = {merican Chemical Society (ACS)}, - volume = {56}, - number = {10}, - pages = {2042--2052}, - author = {Franco Lombardo and Yankang Jing}, - title = {In Silico Prediction of Volume of Distribution in Humans. Extensive Data Set and the - Exploration of Linear and Nonlinear Methods Coupled with Molecular Interaction Fields Descriptors}, - journal = {Journal of Chemical Information and Modeling} + - |- + @article{Lombardo2016, + doi = {10.1021/acs.jcim.6b00044}, + url = {https://doi.org/10.1021/acs.jcim.6b00044}, + year = {2016}, + month = sep, + publisher = {merican Chemical Society (ACS)}, + volume = {56}, + number = {10}, + pages = {2042--2052}, + author = {Franco Lombardo and Yankang Jing}, + title = {In Silico Prediction of Volume of Distribution in Humans. Extensive Data Set and the + Exploration of Linear and Nonlinear Methods Coupled with Molecular Interaction Fields Descriptors}, + journal = {Journal of Chemical Information and Modeling} diff --git a/data/tabular/volume_of_distribution_at_steady_state_lombardo_et_al/transform.py b/data/tabular/volume_of_distribution_at_steady_state_lombardo_et_al/transform.py index bb65946e2..1d45078bb 100644 --- a/data/tabular/volume_of_distribution_at_steady_state_lombardo_et_al/transform.py +++ b/data/tabular/volume_of_distribution_at_steady_state_lombardo_et_al/transform.py @@ -48,7 +48,7 @@ def get_and_transform_data(): "description": """The volume of distribution at steady state (VDss) measures the degree of a drug's concentration in the body tissue compared to concentration in the blood. Higher VD indicates a higher distribution in the tissue and usually indicates -the drug with high lipid solubility, low plasma protein binidng rate.""", +the drug with high lipid solubility, low plasma protein binding rate.""", "targets": [ { "id": "VDss_Lombardo", # name of the column in a tabular dataset diff --git a/data/tabular/zinc/meta.yaml b/data/tabular/zinc/meta.yaml index 5c63e6d56..4e9d69a67 100644 --- a/data/tabular/zinc/meta.yaml +++ b/data/tabular/zinc/meta.yaml @@ -1,34 +1,33 @@ ---- name: zinc description: |- - ZINC is a free database of commercially-available compounds for virtual screening. - It contains over 230 million purchasable compounds in ready-to-dock, 3D formats. - TDC uses a 250,000 sampled version from the original Mol-VAE paper. + ZINC is a free database of commercially-available compounds for virtual screening. + It contains over 230 million purchasable compounds in ready-to-dock, 3D formats. + TDC uses a 250,000 sampled version from the original Mol-VAE paper. identifiers: - - id: SMILES - type: SMILES - description: SMILES + - id: SMILES + type: SMILES + description: SMILES license: |- - ZINC is free to use for everyone. - Redistribution of significant subsets requires written permission from the authors. + ZINC is free to use for everyone. + Redistribution of significant subsets requires written permission from the authors. links: - - url: https://pubs.acs.org/doi/full/10.1021/acs.jcim.5b00559 - description: Article about original dataset - - url: https://pubs.acs.org/doi/abs/10.1021/acscentsci.7b00572 - description: Exemplary related article shown in tdc's website + - url: https://pubs.acs.org/doi/full/10.1021/acs.jcim.5b00559 + description: Article about original dataset + - url: https://pubs.acs.org/doi/abs/10.1021/acscentsci.7b00572 + description: Exemplary related article shown in tdc's website num_points: 249455 bibtex: - - |- - @article{doi:10.1021/acs.jcim.5b00559, - author = {Sterling, Teague and Irwin, John J.}, - title = {ZINC 15 - Ligand Discovery for Everyone}, - journal = {Journal of Chemical Information and Modeling}, - volume = {55}, - number = {11}, - pages = {2324-2337}, - year = {2015}, - doi = {10.1021/acs.jcim.5b00559}, - note ={PMID: 26479676}, - URL = {https://doi.org/10.1021/acs.jcim.5b00559}, - eprint = {https://doi.org/10.1021/acs.jcim.5b00559}, - } + - |- + @article{doi:10.1021/acs.jcim.5b00559, + author = {Sterling, Teague and Irwin, John J.}, + title = {ZINC 15 - Ligand Discovery for Everyone}, + journal = {Journal of Chemical Information and Modeling}, + volume = {55}, + number = {11}, + pages = {2324-2337}, + year = {2015}, + doi = {10.1021/acs.jcim.5b00559}, + note ={PMID: 26479676}, + URL = {https://doi.org/10.1021/acs.jcim.5b00559}, + eprint = {https://doi.org/10.1021/acs.jcim.5b00559}, + } diff --git a/data/text_sampling/text_sampling.py b/data/text_sampling/text_sampling.py index fe99934db..e1b73f4e3 100644 --- a/data/text_sampling/text_sampling.py +++ b/data/text_sampling/text_sampling.py @@ -1,5 +1,4 @@ import copy -import glob import math import os.path import random @@ -566,7 +565,7 @@ def check_targets_and_identifiers(meta: dict, df: pd.DataFrame): self.templates = templates print(f"\n### templates\n{self.templates}") assert self.templates is not None - assert self.templates is not [] + assert self.templates != [] self.prompt_templates = [PromptTemplate(t) for t in self.templates] # create random variables for prompts and texts diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index fb12fc933..1d4773e96 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -2,6 +2,7 @@ There are many different ways to contribute to ChemNLP! You can get in touch via the GitHub [task board](https://github.com/orgs/OpenBioML/projects/5?query=is:open+sort:updated-desc) and [issues](https://github.com/OpenBioML/chemnlp/issues?q=is:issue+is:open+sort:updated-desc&query=is:open+sort:updated-desc) and our [Discord](https://t.co/YMzpevmkiN). ## Prerequisites + Please make a [GitHub account](https://github.com/) prior to implementing a dataset; you can follow instructions to install git [here](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git). 1. [Fork](https://docs.github.com/en/get-started/quickstart/fork-a-repo) the [ChemNLP repository](https://github.com/OpenBioML/chemnlp) @@ -21,8 +22,8 @@ To create your developer environment please follow the guidelines in the `Instal If you are contributing to an existing task which contains a `work package: ` label, please refer to the list below to find a main point of contact for that piece of work. If you've any questions or wish to contribute additional issues feel free to reach out to these work package leads from the core team on the [OpenBioML Discord](https://discord.gg/GgDBFP8ZEt) or message directly on GitHub issues. -| Name (discord & github) | Main Work Packages | -| ------------------------------------------------------ | ------------------------------------------------------------- | +| Name (discord & github) | Main Work Packages | +| ------------------------------------------------------ | -------------------------------------------------------------- | | Michael Pieler (MicPie#9427 & MicPie) | 💾 Structured Data, Knowledge Graph, Tokenisers, Data Sampling | | Kevin Jablonka (Kevin Jablonka#1694 & kjappelbaum) | 💾 Structured Data, Knowledge Graph, Tokenisers, Data Sampling | | Bethany Connolly (bethconnolly#3951 & bethanyconnolly) | 📊 Model Evaluation | @@ -32,12 +33,14 @@ If you are contributing to an existing task which contains a `work package: If your dataset has multiple natural splits (i.e. train, test, validation) you can create a \_meta.yaml for each. - `transform.py` Python code that transforms the original dataset (linked in `meta.yaml`) into a form that can be consumed by the loader. @@ -47,14 +50,11 @@ With "implementing" we mean the following: Even though some examples create the `meta.yaml` in `transform.py` there is no need to do so. You can also do it by hand. In most cases the data will be stored in a tabular format and should be named `data_clean.csv`. - In the `transform.py` please try to download the data from an official resource. We encourage you to upload the raw data to HuggingFace Hub, Foundry or some other repository and then retrieve the data from there with your script, if the raw data license permits it. - If you need additional dependencies, add them to `dev-requirements.txt` (those are needed for linting/testing/validation) or `requirements.txt` (those are the ones for running `transform.py`) - - The `meta.yaml` has the following structure: ```yaml @@ -78,9 +78,9 @@ targets: - noun: standard deviation of the aqueous solubility - noun: tandard deviation of the solubility in water benchmarks: # lists all benchmarks this dataset has been part of. split_column is a column in this dataframe with the value "train", "valid", "test" - indicating to which fold a specific entry belongs to - - name: TDC - link: https://tdcommons.ai/ - split_column: split + - name: TDC + link: https://tdcommons.ai/ + split_column: split identifiers: - id: InChI # column name type: InChI # can be "SMILES", "SELFIES", "IUPAC", "Other", "InChI", "InChiKey", "RXNSMILES", "RXNSMILESWAdd" see IdentifierEnum @@ -106,6 +106,7 @@ bibtex: # citation(s) for this dataset in BibTeX format journal = {Sci Data} }" ``` + Please do not simply copy/paste generic descriptions but try to give a concise and specific description for the dataset you are adding. For the typical material-property datasets, we will later use the `identifier` and `property` columns to create and fill prompt templates. @@ -113,38 +114,46 @@ For the typical material-property datasets, we will later use the `identifier` a ### Text templates With our text template setup for the sampling you can: -* use all the data from the `meta.yaml` file, -* recode categorical data, and -* chain together multiple data fields from the tabular and meta data. + +- use all the data from the `meta.yaml` file, +- recode categorical data, and +- chain together multiple data fields from the tabular and meta data. #### Example text template 1 (mainly used for tabular data) + ``` The molecule with the {SMILES__description} representation of {SMILES#} exhibits {mutagenic#no &NULL}{mutagenic__names__adjective} properties. ``` -* `SMILES__description` gets you the text from the description field of the SMILES identifier. The `__` dunder (double underscore) is used to indicate the levels in the `meta.yaml` file. -* `SMILES#` gets you the data of the sampled SMILES entry (= row from the tabular data). The `#` is used to get the corresponding data. -* `mutagenic#no &NULL` gets you the data with `#` and recodes it. The recoding options are separated with a `&`. In this example the binary variable `mutagenic` that can be `0` or `1` gets recoded to `no ` and `NULL`. `NULL` is a "reserved word" an indicates [no value](https://en.wikipedia.org/wiki/Null_(SQL)). Thus, the `no ` gets added in front of the `mutagenic__names__adjective` if `mutagenic# == 0`. -* `mutagenic__names__adjective` gets from the `mutagenic` target the adjective names. + +- `SMILES__description` gets you the text from the description field of the SMILES identifier. The `__` dunder (double underscore) is used to indicate the levels in the `meta.yaml` file. +- `SMILES#` gets you the data of the sampled SMILES entry (= row from the tabular data). The `#` is used to get the corresponding data. +- `mutagenic#no &NULL` gets you the data with `#` and recodes it. The recoding options are separated with a `&`. In this example the binary variable `mutagenic` that can be `0` or `1` gets recoded to `no ` and `NULL`. `NULL` is a "reserved word" an indicates [no value](). Thus, the `no ` gets added in front of the `mutagenic__names__adjective` if `mutagenic# == 0`. +- `mutagenic__names__adjective` gets from the `mutagenic` target the adjective names. #### Example text template 2 (mainly used for KG data)` + ``` The {node1_type#} {node1_name#|node1_smiles#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_type#} {node3_name#}. ``` -* `node1_name#|node1_smiles#` chains together two data fields from the tabular data with `|` so they are jointly sampled for this position. This means that we sample in this case from the name and the SMILES representation. -* A similar setup can be used in a single data entry (= row from the tabular data) of the tabular data: For `node2_protein_names` the field can include several protein names separated by a `|`, e.g., `Pyruvate dehydrogenase E1 component subunit beta, mitochondrial|PDHE1-B` which then samples from `Pyruvate dehydrogenase E1 component subunit beta, mitochondrial` or `PDHE1-B`. + +- `node1_name#|node1_smiles#` chains together two data fields from the tabular data with `|` so they are jointly sampled for this position. This means that we sample in this case from the name and the SMILES representation. +- A similar setup can be used in a single data entry (= row from the tabular data) of the tabular data: For `node2_protein_names` the field can include several protein names separated by a `|`, e.g., `Pyruvate dehydrogenase E1 component subunit beta, mitochondrial|PDHE1-B` which then samples from `Pyruvate dehydrogenase E1 component subunit beta, mitochondrial` or `PDHE1-B`. #### Example text templates 3 for multiple choice setups + Multiple choice setups are also supported. For this we need three components: -* `%multiple_choice_enum%2%aA1` can be used to list the multiple choice enumerations, i.e., `1, 2, or 3`, `A or B`, etc., The second `%` starts the multiple choice number sequence. Single integers and a range consisting of two integers separated by a `-` are supported to set the lower and higher number, e.g., `2-5` will sample a value between 2 and 5, including the boundaries, for the answer options. The third `%` is used to subselect multiple choice enumerations, i.e., `a` for lower case alphabetical enumerations, `A` for upper case alphabetical, and `1` for numerical enumerations. -* `mutagenic%` is used to list the multiple choice enumerations with the corresponding possible answer options after the multiple choice enumerations, and -* `%multiple_choice_result` is used to get the multiple choice enumeration of the answer, i.e., `1`, `c`. -Please pay attention to the `%` symbol and its position as this is used to parse the different control elements from the text template. -The sampling procedure incorporates a range of different multiple choice enumerations that are sampled randomly: -* numerical (`1, 2, 3, ...`) and alphabetical (`a, b, c, ...` or `A, B, C, ...`) enumerations combined with -* different suffixes, i.e., ` ` (no suffix), `.`, `.)`, `)`, and `:`, to create a range of different multiple choice enumerations. -If only the choices `0` or `1` are available they will be recoded with `False` and `True`. + +- `%multiple_choice_enum%2%aA1` can be used to list the multiple choice enumerations, i.e., `1, 2, or 3`, `A or B`, etc., The second `%` starts the multiple choice number sequence. Single integers and a range consisting of two integers separated by a `-` are supported to set the lower and higher number, e.g., `2-5` will sample a value between 2 and 5, including the boundaries, for the answer options. The third `%` is used to subselect multiple choice enumerations, i.e., `a` for lower case alphabetical enumerations, `A` for upper case alphabetical, and `1` for numerical enumerations. +- `mutagenic%` is used to list the multiple choice enumerations with the corresponding possible answer options after the multiple choice enumerations, and +- `%multiple_choice_result` is used to get the multiple choice enumeration of the answer, i.e., `1`, `c`. + Please pay attention to the `%` symbol and its position as this is used to parse the different control elements from the text template. + The sampling procedure incorporates a range of different multiple choice enumerations that are sampled randomly: +- numerical (`1, 2, 3, ...`) and alphabetical (`a, b, c, ...` or `A, B, C, ...`) enumerations combined with +- different suffixes, i.e., ` ` (no suffix), `.`, `.)`, `)`, and `:`, to create a range of different multiple choice enumerations. + If only the choices `0` or `1` are available they will be recoded with `False` and `True`. ##### Standard template + ``` Task: Please answer the multiple choice question below with {%multiple_choice_enum%2%aA1}. Question: Is the molecule with the {SMILES__description} representation of {SMILES#} {mutagenic__names__adjective}? @@ -152,7 +161,9 @@ Options: {mutagenic%} Answer: {%multiple_choice_result} ``` + Example output: + ``` Task: Please answer the multiple choice question below with A or B. Question: Is the molecule with the SMILES representation of CC(C)NCC(O)c1ccc2ccccc2c1 Ames mutagenic? @@ -163,6 +174,7 @@ Answer: A" ``` ##### Template for benchmarking + ``` Task: Please answer the multiple choice question below with {%multiple_choice_enum%2%aA1}. Question: Is the molecule with the {SMILES__description} representation of {SMILES#} {mutagenic__names__adjective}? @@ -170,15 +182,18 @@ Options: {mutagenic%} Answer:{%multiple_choice_result} ``` + The benchmarking setup exports additional fields for the benchmarking setup, see the example below: `{"input":"Task: Please answer the multiple choice question below with 1 or 2.\nQuestion: Is the molecule with the SMILES representation of BrCBr Ames mutagenic?\nOptions:\n1.) False\n2.) True\nAnswer:","output":" 2","output_choices":["1","2"],"correct_output_index":"1"}` Please have a look at the following section below about the general benchmarking template setup. #### Example text templates 4 for flexible multiple choice setups + More flexible multiple choice setups are also supported. The standard multiple choice setup from "Example text templates 3 for multiple choice setups" is intended for features of molecules as those are deduplicated during the sampling process. In contrast, this flexible multiple choice setup also lets you use the molecule identifiers, e.g., SMILES, in the multiple choice options. For this we only need to add one component to the previously outlined multiple choice format: -* In order to let the model predict which `SMILES` has or has not the boolean variable `penetrate_BBB` we simply add `SMILES%penetrate_BBB%` as an enumeration placeholder for the possible options. With that the list of the multiple choice enumerations shows the SMILES data. Note that the `penetrate_BBB#not &NULL` is needed because the sampling is based on the individual sample (= row from the tabular data) and depending on if `penetrate_BBB` is `True` or `False` we look for a different result label because in the code we compare the sampled options to the `penetrate_BBB` value of the specific sample (= entry from the specific row from the tabular data). + +- In order to let the model predict which `SMILES` has or has not the boolean variable `penetrate_BBB` we simply add `SMILES%penetrate_BBB%` as an enumeration placeholder for the possible options. With that the list of the multiple choice enumerations shows the SMILES data. Note that the `penetrate_BBB#not &NULL` is needed because the sampling is based on the individual sample (= row from the tabular data) and depending on if `penetrate_BBB` is `True` or `False` we look for a different result label because in the code we compare the sampled options to the `penetrate_BBB` value of the specific sample (= entry from the specific row from the tabular data). ``` Task: Please answer the multiple choice question. @@ -201,11 +216,14 @@ Answer: B, C ``` #### Benchmarking text templates + There are two versions of text templates, i.e., text templates with and without the end-of-input token ``: + ``` The {SMILES__description} {SMILES#} is {mutagenic#no &NULL}{mutagenic__names__adjective}. Is the {SMILES__description} {SMILES#} {mutagenic__names__adjective}:{mutagenic# yes& no} ``` + The `` token indicates the splitting position for the benchmarking export, i.e., everything before it will be written to the `input` field and everything afterwards to the `output` field. Without `` everything will be in the `text` field. In the current setup, you can switch with the `benchmarking_templates` flag of the [`TemplateSampler` class](https://github.com/OpenBioML/chemnlp/blob/text_sampling/text_sampling/text_sampling.py#L104) between text templates with and without ``. @@ -254,7 +272,6 @@ You might find suitable links using the following resources: - https://bioportal.bioontology.org/search - https://goldbook.iupac.org/ - #### PubChem Assay IDs For some targets, the activity was measured using assays. In this case, please list the assays using with their _numeric_ PubChem assay id in the field `pubchem_aids`. @@ -263,20 +280,16 @@ Keep in mind that we plan to look up the name and the description of the assay t #### Prompt examples - For datasets that are not in tabular form, we are still discussing the best process, but we also envision that we might perform some named-entity-recognition to also use some of the text datasets in a framework such as LIFT. Otherwise, we will simple use them in the typical GPT pretraining task. - ## Implementing structured data sampler TBD. - ## Implementing tokenizers TBD. - ## Implementing model adaptations Our first experiments will be based on [Pythia model](https://github.com/EleutherAI/pythia) suite from [EleuetherAI](https://www.eleuther.ai) that is based on [GPT-NeoX](https://github.com/EleutherAI/gpt-neox). diff --git a/docs/SUBMODULES.md b/docs/SUBMODULES.md index 391eddaa4..a5d7acb37 100644 --- a/docs/SUBMODULES.md +++ b/docs/SUBMODULES.md @@ -1,4 +1,5 @@ # Introduction + This page outlines the workflow for contributing to the ChemNLP project where changes to the Git submodules are required. The project currently has two submodules: 1. [gpt-neox](https://github.com/OpenBioML/gpt-neox) @@ -19,8 +20,8 @@ There are many excellent introductions to submodules online and we won't repeat 1. [7.11 Git Tools - Submodules](https://git-scm.com/book/en/v2/Git-Tools-Submodules) - section from Pro Git. 2. [Git submodule docs](https://git-scm.com/docs/git-submodule) - the documentation. - # Getting help + The instructions below attempt to guide you through the process of working with submodules. However, if you are still confused please reach out on GitHub or Discord to a project maintainer. # Workflow 1: making changes to a submodule only @@ -31,24 +32,23 @@ Example of making a change to the `gpt-neox` submodule for a feature called `add 2. [Clone your fork](https://docs.github.com/en/repositories/creating-and-managing-repositories/cloning-a-repository) and the submodules, see: [Cloning submodules](../README.md#cloning-submodules). 3. [Optional, if required for the issue] Install `chemnlp` in your virtual env using `pip install -e` (see installation instructions [here](../README.md#installation-and-set-up)). 4. [Make a new branch](https://git-scm.com/book/en/v2/Git-Branching-Basic-Branching-and-Merging) e.g. `feat(sub):add-peft-method` in the `gpt-neox` submodule, **not** in `chemnlp`. -6. Make changes to the `gpt-neox` submodule per the issue you are working on. -7. Commit changes in the `gpt-neox` submodule. -8. Push the submodule changes to remote and open a PR in [gpt-neox](https://github.com/OpenBioML/gpt-neox). -10. Once the changes to the submodule are approved, merge them (or a reviewer will). +5. Make changes to the `gpt-neox` submodule per the issue you are working on. +6. Commit changes in the `gpt-neox` submodule. +7. Push the submodule changes to remote and open a PR in [gpt-neox](https://github.com/OpenBioML/gpt-neox). +8. Once the changes to the submodule are approved, merge them (or a reviewer will). The above **only** updates the `gpt-neox` submodule on remote - it **does not** change which commit `chemnlp` is tracking. To do this: 1. On your fork of `chemnlp`, update to get the latest changes for the `gpt-neox` submodule only: `git submodule update --remote gpt-neox` 2. This will checkout the latest commit on the `main` branch of `gpt-neox`. - - Note: if you want to track a different commit of `gpt-neox` other than the latest then navigate to the `gpt-neox` directory and checkout a specific commit (e.g. your recent merge commit from the `gpt-neox` pull request above): `git checkout ` + - Note: if you want to track a different commit of `gpt-neox` other than the latest then navigate to the `gpt-neox` directory and checkout a specific commit (e.g. your recent merge commit from the `gpt-neox` pull request above): `git checkout ` 3. In `chemnlp` [make a new branch](https://git-scm.com/book/en/v2/Git-Branching-Basic-Branching-and-Merging) e.g. `feat:update-gpt-neox-submodule` 4. Commit this change, push to your fork's remote and open a PR from your fork to the [ChemNLP repository](https://github.com/OpenBioML/chemnlp) which will update the commit the `chemnlp` project tracks. - Things to note: -* The remote of `chemnlp` should be your fork. -* The remote of `gpt-neox` should be the [OpenBioML fork](https://github.com/OpenBioML/gpt-neox). +- The remote of `chemnlp` should be your fork. +- The remote of `gpt-neox` should be the [OpenBioML fork](https://github.com/OpenBioML/gpt-neox). To see the remotes for a Git repository run: `git remote -v` @@ -71,4 +71,3 @@ When you work in the submodule directly you should create or checkout a branch b See also: [why did Git detach my HEAD?](https://stackoverflow.com/questions/3965676/why-did-my-git-repo-enter-a-detached-head-state/3965714#3965714) > Any checkout of a commit that is not the name of one of *your* branches will get you a detached HEAD. A SHA1 which represents the tip of a branch still gives a detached HEAD. Only a checkout of a local branch *name* avoids that mode. -> diff --git a/docs/api/sampler.md b/docs/api/sampler.md index a75e10a60..460b6ef0c 100644 --- a/docs/api/sampler.md +++ b/docs/api/sampler.md @@ -90,7 +90,6 @@ result = sampler.sample(df.iloc[0], template) print(result) ``` - Basic usage with identifier wrapping: ```python @@ -117,7 +116,6 @@ print(result) # Output: The molecule with SMILES [BEGIN_SMILES]CC(C)NCC(O)c1ccc(O)c(O)c1[END_SMILES] has a LogP of 1.23. ``` - Using class balancing: ```python diff --git a/docs/api/sampler_cli.md b/docs/api/sampler_cli.md index e1d92636b..0e165565d 100644 --- a/docs/api/sampler_cli.md +++ b/docs/api/sampler_cli.md @@ -30,44 +30,55 @@ python sampler_cli.py [OPTIONS] ## Detailed Option Descriptions ### `chunksize` + Specifies the number of rows from the dataset to process at once. This is useful for managing memory usage when working with large datasets. ### `class_balanced` + When enabled, the script will attempt to balance the classes in the dataset for each template. The balancing column is automatically determined based on the template and metadata. ### `benchmarking` + If set to `True`, the script will only process templates that contain the `` tag, which are typically used for benchmarking purposes. ### `multiple_choice` + When `True`, the script will process only multiple-choice templates (those containing `%multiple_choice_` in the template). ### `additional_templates` + Allows you to specify additional templates to be used in the sampling process. These templates will be added to any existing templates in the metadata. ### `use_standard_templates` + If `True`, the script will include standard tabular text templates for applicable datasets. These templates are predefined in the `STANDARD_TABULAR_TEXT_TEMPLATES` constant. ### `wrap_identifiers` + When enabled, the script will wrap identifiers in the templates with special tags. ## Examples 1. Basic usage with default settings: + ``` python sampler_cli.py /path/to/data_dir /path/to/output_dir ``` 2. Process a dataset with class balancing and identifier wrapping: + ``` python sampler_cli.py /path/to/data_dir /path/to/output_dir --class_balanced=True --wrap_identifiers=True ``` 3. Generate benchmarking samples for multiple-choice questions: + ``` python sampler_cli.py /path/to/data_dir /path/to/output_dir --benchmarking=True --multiple_choice=True ``` 4. Process a large dataset in smaller chunks: + ``` python sampler_cli.py /path/to/data_dir /path/to/output_dir --chunksize=500000 ``` diff --git a/experiments/configs/data_configs/hf_data.yml b/experiments/configs/data_configs/hf_data.yml index f28a5a211..c3fec721e 100644 --- a/experiments/configs/data_configs/hf_data.yml +++ b/experiments/configs/data_configs/hf_data.yml @@ -1,7 +1,7 @@ -model_name: 'EleutherAI/pythia-1b' +model_name: "EleutherAI/pythia-1b" context_length: 2048 -dataset_name: 'EleutherAI/pile' -dataset_args: {'name': 'pubmed', 'split': 'train'} +dataset_name: "EleutherAI/pile" +dataset_args: {"name": "pubmed", "split": "train"} batch_size: 1 -string_key: 'text' -save_path: '/fsx/proj-chemnlp/data/example_tokenised' +string_key: "text" +save_path: "/fsx/proj-chemnlp/data/example_tokenised" diff --git a/experiments/configs/data_configs/hf_data_wiki.yml b/experiments/configs/data_configs/hf_data_wiki.yml index 903693165..118aa43b4 100644 --- a/experiments/configs/data_configs/hf_data_wiki.yml +++ b/experiments/configs/data_configs/hf_data_wiki.yml @@ -1,8 +1,7 @@ model_name: "EleutherAI/pythia-1b" context_length: 2048 dataset_name: "wikipedia" -dataset_args: - { "name": "20220301.en", "split": "train", "beam_runner": "DirectRunner" } +dataset_args: {"name": "20220301.en", "split": "train", "beam_runner": "DirectRunner"} batch_size: 1000 out_dir: "/fsx/proj-chemnlp/data" string_key: "text" diff --git a/experiments/configs/data_configs/prep_lm_eval_data.yml b/experiments/configs/data_configs/prep_lm_eval_data.yml index e125918ee..0810b999b 100644 --- a/experiments/configs/data_configs/prep_lm_eval_data.yml +++ b/experiments/configs/data_configs/prep_lm_eval_data.yml @@ -1,15 +1,6 @@ model_name: "EleutherAI/pythia-1b" context_length: 2048 -tasks: [ - "hendrycksTest-college_biology", - "hendrycksTest-college_chemistry", - "hendrycksTest-college_mathematics", - "hendrycksTest-college_physics", - "hendrycksTest-high_school_mathematics", - "hendrycksTest-high_school_biology", - "hendrycksTest-high_school_chemistry", - "hendrycksTest-high_school_physics", -] +tasks: ["hendrycksTest-college_biology", "hendrycksTest-college_chemistry", "hendrycksTest-college_mathematics", "hendrycksTest-college_physics", "hendrycksTest-high_school_mathematics", "hendrycksTest-high_school_biology", "hendrycksTest-high_school_chemistry", "hendrycksTest-high_school_physics"] data_split: "validation" out_dir: "/fsx/proj-chemnlp/data" save_name: "hendrycks_STEM_2" diff --git a/experiments/configs/deepspeed/deepspeed_S1.json b/experiments/configs/deepspeed/deepspeed_S1.json index b2c4b27d7..f9f81b77f 100644 --- a/experiments/configs/deepspeed/deepspeed_S1.json +++ b/experiments/configs/deepspeed/deepspeed_S1.json @@ -1,19 +1,19 @@ { - "fp16": { - "enabled": "auto" - }, - "bf16": { - "enabled": true - }, - "zero_optimization": { - "stage": 1, - "overlap_comm": true, - "contiguous_gradients": true, - "sub_group_size": 1e9, - "reduce_bucket_size": "auto" - }, - "gradient_accumulation_steps": "auto", - "gradient_clipping": "auto", - "train_batch_size": "auto", - "train_micro_batch_size_per_gpu": "auto" + "fp16": { + "enabled": "auto" + }, + "bf16": { + "enabled": true + }, + "zero_optimization": { + "stage": 1, + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1e9, + "reduce_bucket_size": "auto" + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto" } diff --git a/experiments/configs/deepspeed/deepspeed_S2.json b/experiments/configs/deepspeed/deepspeed_S2.json index 24ab68016..de2bcf6b5 100644 --- a/experiments/configs/deepspeed/deepspeed_S2.json +++ b/experiments/configs/deepspeed/deepspeed_S2.json @@ -1,23 +1,23 @@ { - "fp16": { - "enabled": "auto" + "fp16": { + "enabled": "auto" + }, + "bf16": { + "enabled": true + }, + "zero_optimization": { + "stage": 2, + "offload_optimizer": { + "device": "none", + "pin_memory": true }, - "bf16": { - "enabled": true - }, - "zero_optimization": { - "stage": 2, - "offload_optimizer": { - "device": "none", - "pin_memory": true - }, - "overlap_comm": true, - "contiguous_gradients": true, - "sub_group_size": 1e9, - "reduce_bucket_size": "auto" - }, - "gradient_accumulation_steps": "auto", - "gradient_clipping": "auto", - "train_batch_size": "auto", - "train_micro_batch_size_per_gpu": "auto" + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1e9, + "reduce_bucket_size": "auto" + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto" } diff --git a/experiments/configs/deepspeed/deepspeed_offload_S2.json b/experiments/configs/deepspeed/deepspeed_offload_S2.json index 1ca877135..1ab5ab2b6 100644 --- a/experiments/configs/deepspeed/deepspeed_offload_S2.json +++ b/experiments/configs/deepspeed/deepspeed_offload_S2.json @@ -1,23 +1,23 @@ { - "fp16": { - "enabled": "auto" + "fp16": { + "enabled": "auto" + }, + "bf16": { + "enabled": true + }, + "zero_optimization": { + "stage": 2, + "offload_optimizer": { + "device": "cpu", + "pin_memory": true }, - "bf16": { - "enabled": true - }, - "zero_optimization": { - "stage": 2, - "offload_optimizer": { - "device": "cpu", - "pin_memory": true - }, - "overlap_comm": true, - "contiguous_gradients": true, - "sub_group_size": 1e9, - "reduce_bucket_size": "auto" - }, - "gradient_accumulation_steps": "auto", - "gradient_clipping": "auto", - "train_batch_size": "auto", - "train_micro_batch_size_per_gpu": "auto" + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1e9, + "reduce_bucket_size": "auto" + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto" } diff --git a/experiments/configs/deepspeed/deepspeed_offload_S3.json b/experiments/configs/deepspeed/deepspeed_offload_S3.json index 5b950c8b3..d892abdf8 100644 --- a/experiments/configs/deepspeed/deepspeed_offload_S3.json +++ b/experiments/configs/deepspeed/deepspeed_offload_S3.json @@ -1,32 +1,32 @@ { - "fp16": { - "enabled": "auto" + "fp16": { + "enabled": "auto" + }, + "bf16": { + "enabled": true + }, + "zero_optimization": { + "stage": 3, + "offload_optimizer": { + "device": "cpu", + "pin_memory": true }, - "bf16": { - "enabled": true + "offload_param": { + "device": "cpu", + "pin_memory": true }, - "zero_optimization": { - "stage": 3, - "offload_optimizer": { - "device": "cpu", - "pin_memory": true - }, - "offload_param": { - "device": "cpu", - "pin_memory": true - }, - "overlap_comm": true, - "contiguous_gradients": true, - "sub_group_size": 1e9, - "reduce_bucket_size": "auto", - "stage3_prefetch_bucket_size": "auto", - "stage3_param_persistence_threshold": "auto", - "stage3_max_live_parameters": 1e9, - "stage3_max_reuse_distance": 1e9, - "stage3_gather_16bit_weights_on_model_save": true - }, - "gradient_accumulation_steps": "auto", - "gradient_clipping": "auto", - "train_batch_size": "auto", - "train_micro_batch_size_per_gpu": "auto" + "overlap_comm": true, + "contiguous_gradients": true, + "sub_group_size": 1e9, + "reduce_bucket_size": "auto", + "stage3_prefetch_bucket_size": "auto", + "stage3_param_persistence_threshold": "auto", + "stage3_max_live_parameters": 1e9, + "stage3_max_reuse_distance": 1e9, + "stage3_gather_16bit_weights_on_model_save": true + }, + "gradient_accumulation_steps": "auto", + "gradient_clipping": "auto", + "train_batch_size": "auto", + "train_micro_batch_size_per_gpu": "auto" } diff --git a/experiments/configs/eval_configs/default_eval_config.yaml b/experiments/configs/eval_configs/default_eval_config.yaml index ea4fdc58a..23e1796c3 100644 --- a/experiments/configs/eval_configs/default_eval_config.yaml +++ b/experiments/configs/eval_configs/default_eval_config.yaml @@ -3,7 +3,7 @@ model_args: "pretrained=/fsx/proj-chemnlp/experiments/checkpoints/finetuned/full # model_args: "pretrained=EleutherAI/pythia-1b" tasks: "hendrycksTest-college_biology,hendrycksTest-college_chemistry,hendrycksTest-college_mathematics,hendrycksTest-college_physics,hendrycksTest-high_school_mathematics,hendrycksTest-high_school_biology,hendrycksTest-high_school_chemistry,hendrycksTest-high_school_physics" batch_size: 12 -device: 'cuda:0' +device: "cuda:0" wandb_log: true wandb_project: LLCheM wandb_group: evaluation diff --git a/experiments/configs/eval_configs/nlp_eval_config.yaml b/experiments/configs/eval_configs/nlp_eval_config.yaml index 9d161f744..0f78e6ab7 100644 --- a/experiments/configs/eval_configs/nlp_eval_config.yaml +++ b/experiments/configs/eval_configs/nlp_eval_config.yaml @@ -3,7 +3,7 @@ model_args: "pretrained=/fsx/path/checkpoint" # update tasks: "lambada_standard" num_fewshot: 0 batch_size: 12 -device: 'cuda:0' +device: "cuda:0" wandb_log: true wandb_project: LLCheM wandb_group: evaluation # update diff --git a/experiments/configs/eval_configs/safety_eval_config.yaml b/experiments/configs/eval_configs/safety_eval_config.yaml index 3d665223d..7c4284391 100644 --- a/experiments/configs/eval_configs/safety_eval_config.yaml +++ b/experiments/configs/eval_configs/safety_eval_config.yaml @@ -3,7 +3,7 @@ model_args: "pretrained=/fsx/path/checkpoint" # update tasks: "crows_pairs_english_race_color,crows_pairs_english_socioeconomic,crows_pairs_english_gender,crows_pairs_english_age,crows_pairs_english_religion,crows_pairs_english_disability,crows_pairs_english_sexual_orientation,crows_pairs_english_nationality,crows_pairs_english_physical_appearance" num_fewshot: 0 batch_size: 12 -device: 'cuda:0' +device: "cuda:0" wandb_log: true wandb_project: LLCheM wandb_group: evaluation # update diff --git a/experiments/configs/eval_configs/stem_eval_config.yaml b/experiments/configs/eval_configs/stem_eval_config.yaml index 3bfcac4fc..b3b6fde11 100644 --- a/experiments/configs/eval_configs/stem_eval_config.yaml +++ b/experiments/configs/eval_configs/stem_eval_config.yaml @@ -3,7 +3,7 @@ model_args: "pretrained=/fsx/path/checkpoint" # update tasks: "pile_pubmed-abstracts,pile_pubmed-central,headqa_en,sciq,pubmedqa,is_smiles,complete_smiles,periodic_table,openbookqa,hendrycksTest-college_biology,hendrycksTest-college_chemistry,hendrycksTest-college_mathematics,hendrycksTest-college_physics,hendrycksTest-high_school_mathematics,hendrycksTest-high_school_biology,hendrycksTest-high_school_chemistry,hendrycksTest-high_school_physics" num_fewshot: 0 batch_size: 12 -device: 'cuda:0' +device: "cuda:0" wandb_log: true wandb_project: LLCheM wandb_group: evaluation # update diff --git a/experiments/configs/gpt-neox/160M.yml b/experiments/configs/gpt-neox/160M.yml index 4a729f69d..61bb8c90d 100644 --- a/experiments/configs/gpt-neox/160M.yml +++ b/experiments/configs/gpt-neox/160M.yml @@ -2,105 +2,33 @@ # Pretraining config at https://github.com/EleutherAI/pythia/blob/main/models/160M/pythia-160m.yml # See other examples at https://github.com/EleutherAI/gpt-neox/tree/main/configs { - # parallelism settings - # you will want to change these based on your cluster setup, - # ideally scheduling pipeline stages across the node boundaries - "pipe-parallel-size": 1, - "model-parallel-size": 1, - - # model settings - "num-layers": 12, - "hidden-size": 768, - "num-attention-heads": 12, - "seq-length": 2048, - "max-position-embeddings": 2048, - "pos-emb": "rotary", - "rotary-pct": 0.25, - "no-weight-tying": true, - "gpt-j-residual": true, - "output-layer-parallelism": "column", - - # these should provide a speedup but take time to build - "scaled-upper-triang-masked-softmax-fusion": false, - "bias-gelu-fusion": false, - - # init methods - "init_method": "small_init", - "output_layer_init_method": "wang_init", - - # optimizer settings - "optimizer": { - "type": "Adam", - "params": { - "lr": 0.0006, - "betas": [0.9, 0.95], - "eps": 1.0e-8, - } - }, - "min_lr": 0.00006, - - # for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training - "zero_optimization": { - "stage": 1, - "allgather_partitions": True, - "allgather_bucket_size": 500000000, - "overlap_comm": True, - "reduce_scatter": True, - "reduce_bucket_size": 500000000, - "contiguous_gradients": True, - "cpu_offload": False - }, - - # batch / data settings - "train_micro_batch_size_per_gpu": 4, - "data-impl": "mmap", - "gas": 1, - "num_workers": 1, - - # activation checkpointing - "checkpoint-activations": true, - "checkpoint-num-layers": 1, - "partition-activations": true, - "synchronize-each-layer": true, - - # regularization - "gradient_clipping": 1.0, - "weight-decay": 0.1, - "hidden-dropout": 0.0, - "attention-dropout": 0.0, - - # precision settings - "fp16": { - "fp16": true, - "enabled": true, - "loss_scale": 0, - "loss_scale_window": 1000, - "initial_scale_power": 12, - "hysteresis": 2, - "min_loss_scale": 1 - }, - - # misc. training settings - "train-iters": 320000, - "lr-decay-iters": 320000, - "distributed-backend": "nccl", - "lr-decay-style": "cosine", - "warmup": 0.01, - "checkpoint-factor": 10000, - "eval-interval": 1000, - "eval-iters": 10, - - # logging - "log-interval": 100, - "steps_per_print": 10, - "keep-last-n-checkpoints": 4, - "wall_clock_breakdown": true, - - # tokenisation - "tokenizer-type": "HFTokenizer", - "vocab-file": "/fsx/pile/20B_tokenizer.json", - - # deepspeed - "launcher": "openmpi", - "deepspeed_mpi": true, -} + # parallelism settings + # you will want to change these based on your cluster setup, + # ideally scheduling pipeline stages across the node boundaries + "pipe-parallel-size": 1, "model-parallel-size": 1, + # model settings + "num-layers": 12, "hidden-size": 768, "num-attention-heads": 12, "seq-length": 2048, "max-position-embeddings": 2048, "pos-emb": "rotary", "rotary-pct": 0.25, "no-weight-tying": true, "gpt-j-residual": true, "output-layer-parallelism": "column", + # these should provide a speedup but take time to build + "scaled-upper-triang-masked-softmax-fusion": false, "bias-gelu-fusion": false, + # init methods + "init_method": "small_init", "output_layer_init_method": "wang_init", + # optimizer settings + "optimizer": {"type": "Adam", "params": {"lr": 0.0006, "betas": [0.9, 0.95], "eps": 1.0e-8}}, "min_lr": 0.00006, + # for all zero_optimization options, see https://www.deepspeed.ai/docs/config-json/#zero-optimizations-for-fp16-training + "zero_optimization": {"stage": 1, "allgather_partitions": True, "allgather_bucket_size": 500000000, "overlap_comm": True, "reduce_scatter": True, "reduce_bucket_size": 500000000, "contiguous_gradients": True, "cpu_offload": False}, + # batch / data settings + "train_micro_batch_size_per_gpu": 4, "data-impl": "mmap", "gas": 1, "num_workers": 1, + # activation checkpointing + "checkpoint-activations": true, "checkpoint-num-layers": 1, "partition-activations": true, "synchronize-each-layer": true, + # regularization + "gradient_clipping": 1.0, "weight-decay": 0.1, "hidden-dropout": 0.0, "attention-dropout": 0.0, + # precision settings + "fp16": {"fp16": true, "enabled": true, "loss_scale": 0, "loss_scale_window": 1000, "initial_scale_power": 12, "hysteresis": 2, "min_loss_scale": 1}, + # misc. training settings + "train-iters": 320000, "lr-decay-iters": 320000, "distributed-backend": "nccl", "lr-decay-style": "cosine", "warmup": 0.01, "checkpoint-factor": 10000, "eval-interval": 1000, "eval-iters": 10, + # logging + "log-interval": 100, "steps_per_print": 10, "keep-last-n-checkpoints": 4, "wall_clock_breakdown": true, + # tokenisation + "tokenizer-type": "HFTokenizer", "vocab-file": "/fsx/pile/20B_tokenizer.json", + # deepspeed + "launcher": "openmpi", "deepspeed_mpi": true} diff --git a/experiments/configs/gpt-neox/cluster_setup.yml b/experiments/configs/gpt-neox/cluster_setup.yml index 9df0b00d0..7ea959c26 100644 --- a/experiments/configs/gpt-neox/cluster_setup.yml +++ b/experiments/configs/gpt-neox/cluster_setup.yml @@ -1,24 +1,4 @@ # Suggested data paths when using GPT-NeoX locally { # see example configs for sampling options - "data-path": "/fsx/proj-chemnlp/data/marianna13/chemrxiv/data_text_document", - - "save": "/fsx/proj-chemnlp/experiments/checkpoints/finetuned/pythia-160M", - "load": "/fsx/proj-chemnlp/experiments/checkpoints/pretrained/pythia-160M", - "finetune": True, - "checkpoint_validation_with_forward_pass": False, - - "log-dir": "/fsx/proj-chemnlp/experiments/logs", - "log_interval": 100, - "log_grad_pct_zeros": False, - "log_param_norm": False, - "log_grad_norm": False, - - "use_wandb": True, - "wandb_host": "https://stability.wandb.io", - "wandb_project": "LLCheM", - "wandb_group": "Test Runs", - - "hostfile": "/mock_path", - "num_gpus": 1, -} + "data-path": "/fsx/proj-chemnlp/data/marianna13/chemrxiv/data_text_document", "save": "/fsx/proj-chemnlp/experiments/checkpoints/finetuned/pythia-160M", "load": "/fsx/proj-chemnlp/experiments/checkpoints/pretrained/pythia-160M", "finetune": True, "checkpoint_validation_with_forward_pass": False, "log-dir": "/fsx/proj-chemnlp/experiments/logs", "log_interval": 100, "log_grad_pct_zeros": False, "log_param_norm": False, "log_grad_norm": False, "use_wandb": True, "wandb_host": "https://stability.wandb.io", "wandb_project": "LLCheM", "wandb_group": "Test Runs", "hostfile": "/mock_path", "num_gpus": 1} diff --git a/experiments/configs/gpt-neox/soft_prompt.yml b/experiments/configs/gpt-neox/soft_prompt.yml index 4985fde4d..d16f84f54 100644 --- a/experiments/configs/gpt-neox/soft_prompt.yml +++ b/experiments/configs/gpt-neox/soft_prompt.yml @@ -1,9 +1,4 @@ { # peft method settings - "soft_prompt_tuning": { - "enabled": True, # also freezes all other parameters - "n_tokens": 10, - "init_string": "", - "init_range": 0.5, - } -} + "soft_prompt_tuning": {"enabled": True, # also freezes all other parameters + "n_tokens": 10, "init_string": "", "init_range": 0.5}} diff --git a/experiments/configs/hugging-face/160M_full.yml b/experiments/configs/hugging-face/160M_full.yml index 883eb96bc..4e6c823ff 100644 --- a/experiments/configs/hugging-face/160M_full.yml +++ b/experiments/configs/hugging-face/160M_full.yml @@ -1,19 +1,15 @@ ---- # Dataset configuration (datasets.load_from_disk arguments) data: path: /fsx/proj-chemnlp/data/EleutherAI/pythia-160m/marianna13/chemrxiv - # Model configuration (model.from_pretrained arguments) model: base: GPTNeoXForCausalLM name: EleutherAI/pythia-160m revision: main # latest model #checkpoint_path: /fsx/proj-chemnlp/experiments/checkpoints/finetuned/full_160M/checkpoint-1600 - # Training strategies (PromptTuningConfig arguments) prompt_tuning: enabled: false - # Training configuration (TrainerArguments from HF) trainer: output_dir: /fsx/proj-chemnlp/experiments/checkpoints/finetuned/full_160M @@ -28,7 +24,6 @@ trainer: fp16: false per_device_train_batch_size: 4 per_device_eval_batch_size: 4 - # Logging configuration (WandB init arguments) wandb: enabled: true diff --git a/experiments/configs/hugging-face/160M_ptune.yml b/experiments/configs/hugging-face/160M_ptune.yml index 5598a3155..e3d528123 100644 --- a/experiments/configs/hugging-face/160M_ptune.yml +++ b/experiments/configs/hugging-face/160M_ptune.yml @@ -1,20 +1,16 @@ ---- # Dataset configuration (datasets.load_from_disk arguments) data: path: /fsx/proj-chemnlp/data/EleutherAI/pythia-160m/marianna13/chemrxiv - # Model configuration (model.from_pretrained arguments) model: base: GPTNeoXForCausalLM name: EleutherAI/pythia-160m revision: main # latest model - # Training strategies (PromptTuningConfig arguments) prompt_tuning: enabled: true num_virtual_tokens: 10 prompt_tuning_init_text: " " - # Training configuration (TrainerArguments from HF) trainer: output_dir: /fsx/proj-chemnlp/experiments/checkpoints/finetuned/160M @@ -29,7 +25,6 @@ trainer: fp16: false per_device_train_batch_size: 30 per_device_eval_batch_size: 30 - # Logging configuration (WandB init arguments) wandb: enabled: true diff --git a/experiments/configs/hugging-face/1B_fine_tune.yml b/experiments/configs/hugging-face/1B_fine_tune.yml index a04af490b..dabd89dae 100644 --- a/experiments/configs/hugging-face/1B_fine_tune.yml +++ b/experiments/configs/hugging-face/1B_fine_tune.yml @@ -1,18 +1,14 @@ ---- # Dataset configuration (datasets.load_from_disk arguments) data: path: /fsx/proj-chemnlp/data/EleutherAI/pythia-1b/marianna13/chemrxiv - # Model configuration (model.from_pretrained arguments) model: base: GPTNeoXForCausalLM name: EleutherAI/pythia-1b revision: main # latest model - # Training strategies (PromptTuningConfig arguments) prompt_tuning: enabled: false - # Training configuration (TrainerArguments from HF) trainer: output_dir: /fsx/proj-chemnlp/experiments/checkpoints/finetuned/full_1b @@ -27,7 +23,6 @@ trainer: fp16: false per_device_train_batch_size: 2 per_device_eval_batch_size: 8 - # Logging configuration (WandB init arguments) wandb: enabled: true diff --git a/experiments/configs/hugging-face/3B_fine_tune.yml b/experiments/configs/hugging-face/3B_fine_tune.yml index dac27bbab..2ca5161cd 100644 --- a/experiments/configs/hugging-face/3B_fine_tune.yml +++ b/experiments/configs/hugging-face/3B_fine_tune.yml @@ -1,18 +1,14 @@ ---- # Dataset configuration (datasets.load_from_disk arguments) data: path: /fsx/proj-chemnlp/data/EleutherAI/pythia-1b/marianna13/chemrxiv - # Model configuration (model.from_pretrained arguments) model: base: GPTNeoXForCausalLM name: EleutherAI/pythia-2.8b revision: main # latest model - # Training strategies (PromptTuningConfig arguments) prompt_tuning: enabled: false - # Training configuration (TrainerArguments from HF) trainer: output_dir: /fsx/proj-chemnlp/experiments/checkpoints/finetuned/full_3b @@ -29,7 +25,6 @@ trainer: per_device_eval_batch_size: 1 gradient_checkpointing: True deepspeed_config: deepspeed_S2.json - # Logging configuration (WandB init arguments) wandb: enabled: true diff --git a/experiments/configs/hugging-face/410M_fine_tune.yml b/experiments/configs/hugging-face/410M_fine_tune.yml index c889d47b8..49f378b41 100644 --- a/experiments/configs/hugging-face/410M_fine_tune.yml +++ b/experiments/configs/hugging-face/410M_fine_tune.yml @@ -1,18 +1,14 @@ ---- # Dataset configuration (datasets.load_from_disk arguments) data: path: /fsx/proj-chemnlp/data/EleutherAI/pythia-410m/marianna13/chemrxiv - # Model configuration (model.from_pretrained arguments) model: base: GPTNeoXForCausalLM name: EleutherAI/pythia-410M revision: main # latest model - # Training strategies (PromptTuningConfig arguments) prompt_tuning: enabled: false - # Training configuration (TrainerArguments from HF) trainer: output_dir: /fsx/proj-chemnlp/experiments/checkpoints/finetuned/full_410M @@ -27,7 +23,6 @@ trainer: fp16: false per_device_train_batch_size: 2 per_device_eval_batch_size: 2 - # Logging configuration (WandB init arguments) wandb: enabled: true diff --git a/experiments/configs/hugging-face/7B_fine_tune.yml b/experiments/configs/hugging-face/7B_fine_tune.yml index 1850cba09..33f7e4882 100644 --- a/experiments/configs/hugging-face/7B_fine_tune.yml +++ b/experiments/configs/hugging-face/7B_fine_tune.yml @@ -1,18 +1,14 @@ ---- # Dataset configuration (datasets.load_from_disk arguments) data: path: /fsx/proj-chemnlp/data/EleutherAI/pythia-1b/marianna13/chemrxiv - # Model configuration (model.from_pretrained arguments) model: base: GPTNeoXForCausalLM name: EleutherAI/pythia-6.9b revision: main # latest model - # Training strategies (PromptTuningConfig arguments) prompt_tuning: enabled: false - # Training configuration (TrainerArguments from HF) trainer: output_dir: /fsx/proj-chemnlp/experiments/checkpoints/finetuned/full_7b_test @@ -29,7 +25,6 @@ trainer: per_device_eval_batch_size: 1 gradient_checkpointing: True deepspeed_config: deepspeed_offload_S3.json - # Logging configuration (WandB init arguments) wandb: enabled: true diff --git a/pyproject.toml b/pyproject.toml index d897c28b1..48ff6660a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,56 +8,52 @@ description = "Open source chemistry dataset & LLM" readme = "README.md" requires-python = "==3.9.*" dependencies = [ - "pandas", - "pydantic", + "pandas", + "pydantic" ] - dynamic = ["version"] [project.optional-dependencies] dev = [ - "black[jupyter]>=22.1.0", - "fire", - "flake8>=3.8.3", - "isort>=5.0.0", - "pre-commit", - "pydantic_yaml<=0.11.2", - "pytest", - "pubchempy", + "black[jupyter]>=22.1.0", + "fire", + "flake8>=3.8.3", + "isort>=5.0.0", + "pre-commit", + "pydantic_yaml<=0.11.2", + "pytest", + "pubchempy" ] - dataset_creation = [ - "PyTDC", - "rdkit", - "ruamel.yaml", - "selfies", - "deepsmiles", - "pubchempy", - "bioc", - "pylatexenc", - "canonicalize_psmiles@git+https://github.com/Ramprasad-Group/canonicalize_psmiles.git", - "rxn-chem-utils", - # "safe-mol", - "backoff", - "givemeconformer", - "chembl_webresource_client", - "dask", - "pandarallel" + "PyTDC", + "rdkit", + "ruamel.yaml", + "selfies", + "deepsmiles", + "pubchempy", + "bioc", + "pylatexenc", + "canonicalize_psmiles@git+https://github.com/Ramprasad-Group/canonicalize_psmiles.git", + "rxn-chem-utils", + # "safe-mol", + "backoff", + "givemeconformer", + "chembl_webresource_client", + "dask", + "pandarallel" ] - training = [ - "deepspeed>=0.8.2", # https://github.com/microsoft/DeepSpeed/pull/2863 - "nvidia-ml-py3", - "protobuf<3.20", - "s3fs", - "boto3<=1.26.90", # https://github.com/boto/boto3/issues/3648 + "deepspeed>=0.8.2", # https://github.com/microsoft/DeepSpeed/pull/2863 + "nvidia-ml-py3", + "protobuf<3.20", + "s3fs", + "boto3<=1.26.90" # https://github.com/boto/boto3/issues/3648 ] - tokenisation = [ - "zstandard", - "apache_beam", - "mwparserfromhell", - "jsonlines", + "zstandard", + "apache_beam", + "mwparserfromhell", + "jsonlines" ] [tool.setuptools_scm] diff --git a/src/chemnlp/data/constants.py b/src/chemnlp/data/constants.py index 716d41275..457b00295 100644 --- a/src/chemnlp/data/constants.py +++ b/src/chemnlp/data/constants.py @@ -1,7 +1,6 @@ DEFAULT_SIGNIFICANT_DIGITS = 3 - STANDARD_TABULAR_TEXT_TEMPLATES = [ "The molecule with the {SMILES__description} {#representation of |!}{SMILES#} has a {TARGET__names__noun} of {TARGET#} {TARGET__units}.", # noqa: E501 "Based on the {SMILES__description} {#representation of |!}{SMILES#}, the molecule has a {TARGET__names__noun} of {TARGET#} {TARGET__units}.", # noqa: E501 diff --git a/src/chemnlp/data/sampler.py b/src/chemnlp/data/sampler.py index 3e1cdecc9..eff28604e 100644 --- a/src/chemnlp/data/sampler.py +++ b/src/chemnlp/data/sampler.py @@ -10,6 +10,7 @@ from functools import lru_cache from chemnlp.data_val.model import IdentifierEnum + # ToDo: handle somewhere that the meta contains multiple templates class TemplateSampler: """ @@ -39,30 +40,40 @@ class TemplateSampler: >>> print(result) The molecule with SMILES CC(=O)OC1=CC=CC=C1C(=O)O has a solubility of 3.142. """ + def __init__( self, df: pd.DataFrame, meta: Dict, config: Dict, - column_datafield_sampler: Optional[Callable] = None + column_datafield_sampler: Optional[Callable] = None, ): self.df_orig = df self.df = df self.meta = meta self.config = config - self.column_datafield_sampler = column_datafield_sampler or (lambda x: random.sample(x, k=1)) + self.column_datafield_sampler = column_datafield_sampler or ( + lambda x: random.sample(x, k=1) + ) self.class_balanced = False self.balance_column = None - self.wrap_identifiers = config.get('wrap_identifiers', False) + self.wrap_identifiers = config.get("wrap_identifiers", False) def _wrap_identifier(self, identifier: str, value: str) -> str: """Wrap the identifier value with tags if wrap_identifiers is enabled.""" - print('wrap_identifier', identifier, value, self.wrap_identifiers) + print("wrap_identifier", identifier, value, self.wrap_identifiers) if not self.wrap_identifiers: return value - identifier_type = next((item['type'] for item in self.meta['identifiers'] if item['id'] == identifier), None) + identifier_type = next( + ( + item["type"] + for item in self.meta["identifiers"] + if item["id"] == identifier + ), + None, + ) try: identifier_type = IdentifierEnum(identifier_type) @@ -171,22 +182,31 @@ def _get_target_from_row(self, sample: pd.Series, var: str) -> str: elif "_protein_names" in var: out = sample[var.replace("_protein_names", "_name")] - var_dict = next(x for x in self.meta["identifiers"] + self.meta["targets"] if x["id"] == var) + var_dict = next( + x for x in self.meta["identifiers"] + self.meta["targets"] if x["id"] == var + ) if var_dict["type"] == "continuous": if not isinstance(out, (float, int)): raise ValueError(f"out is not a number (int or float): {out}") - significant_digits = var_dict.get("significant_digits", self.config.get("DEFAULT_SIGNIFICANT_DIGITS", DEFAULT_SIGNIFICANT_DIGITS)) + significant_digits = var_dict.get( + "significant_digits", + self.config.get( + "DEFAULT_SIGNIFICANT_DIGITS", DEFAULT_SIGNIFICANT_DIGITS + ), + ) out = f"{round(out, significant_digits):.{significant_digits}f}" else: out = str(out) if "|" in out: - choices = [c for c in out.split("|") if isinstance(c, str) or not math.isnan(c)] + choices = [ + c for c in out.split("|") if isinstance(c, str) or not math.isnan(c) + ] out = self.column_datafield_sampler(choices)[0] return out - def get_sample_dict(self, sample: pd.Series, template: str) -> Dict[str, str]: + def get_sample_dict(self, sample: pd.Series, template: str) -> Dict[str, str]: """ Extract and process all target values from a sample row based on a template. """ @@ -205,18 +225,18 @@ def get_sample_dict(self, sample: pd.Series, template: str) -> Dict[str, str]: return sample_dict def _get_symbols_from_multiple_choice_enum(self, enum_str: str) -> List[str]: - _, choice_count, symbol = enum_str.split('%')[1:] - if '-' in choice_count: - min_count, max_count = map(int, choice_count.split('-')) + _, choice_count, symbol = enum_str.split("%")[1:] + if "-" in choice_count: + min_count, max_count = map(int, choice_count.split("-")) count = random.randint(min_count, max_count) else: count = int(choice_count) - if 'a' in symbol: + if "a" in symbol: return list(ascii_lowercase[:count]) - elif 'A' in symbol: + elif "A" in symbol: return list(ascii_uppercase[:count]) - elif '1' in symbol: + elif "1" in symbol: return [str(i) for i in range(1, count + 1)] def _format_enum_string(self, symbols: List[str]) -> str: @@ -246,26 +266,40 @@ def _format_enum_string(self, symbols: List[str]) -> str: else: return ", ".join(symbols[:-1]) + f", or {symbols[-1]}" - def _handle_multiple_choice(self, sample: pd.Series, input_variables: List[str]) -> Dict[str, Union[str, List[str]]]: + def _handle_multiple_choice( + self, sample: pd.Series, input_variables: List[str] + ) -> Dict[str, Union[str, List[str]]]: multiple_choice_dict = {} # get multiple_choice_enum - multiple_choice_enum_idx = [i for i, x in enumerate(input_variables) if x.startswith("%multiple_choice_enum")] + multiple_choice_enum_idx = [ + i + for i, x in enumerate(input_variables) + if x.startswith("%multiple_choice_enum") + ] assert len(multiple_choice_enum_idx) == 1 multiple_choice_enum_idx = multiple_choice_enum_idx[0] multiple_choice_enum = input_variables[multiple_choice_enum_idx] # get multiple_choice_var - multiple_choice_var_idx = [i for i, x in enumerate(input_variables) if x.endswith("%")] + multiple_choice_var_idx = [ + i for i, x in enumerate(input_variables) if x.endswith("%") + ] assert len(multiple_choice_var_idx) == 1 multiple_choice_var_idx = multiple_choice_var_idx[0] multiple_choice_input = input_variables[multiple_choice_var_idx] if multiple_choice_input.count("%") > 1: - multiple_choice_var, multiple_choice_indicator, _ = multiple_choice_input.split("%") + multiple_choice_var, multiple_choice_indicator, _ = ( + multiple_choice_input.split("%") + ) else: - multiple_choice_var, multiple_choice_indicator = multiple_choice_input.split("%") - multiple_choice_indicator = "" # multiple_choice_indicator is here an empty string + multiple_choice_var, multiple_choice_indicator = ( + multiple_choice_input.split("%") + ) + multiple_choice_indicator = ( + "" # multiple_choice_indicator is here an empty string + ) symbols = self._get_symbols_from_multiple_choice_enum(multiple_choice_enum) @@ -273,23 +307,39 @@ def _handle_multiple_choice(self, sample: pd.Series, input_variables: List[str]) correct_choice = self._get_target_from_row(sample, multiple_choice_var + "#") if multiple_choice_indicator == "": - multiple_choices, correct_choice_idx = self._get_choices_without_indicator(multiple_choice_var, symbols, correct_choice) + multiple_choices, correct_choice_idx = self._get_choices_without_indicator( + multiple_choice_var, symbols, correct_choice + ) else: - multiple_choices, correct_choice_idx = self._get_choices_with_indicator(sample, multiple_choice_var, multiple_choice_indicator, symbols, correct_choice) + multiple_choices, correct_choice_idx = self._get_choices_with_indicator( + sample, + multiple_choice_var, + multiple_choice_indicator, + symbols, + correct_choice, + ) multiple_choice_dict[multiple_choice_enum] = self._format_enum_string(symbols) - multiple_choice_dict[multiple_choice_input] = self._format_choices(symbols, multiple_choices) - multiple_choice_dict["%multiple_choice_result"] = self._format_result(symbols, correct_choice_idx) + multiple_choice_dict[multiple_choice_input] = self._format_choices( + symbols, multiple_choices + ) + multiple_choice_dict["%multiple_choice_result"] = self._format_result( + symbols, correct_choice_idx + ) multiple_choice_dict["%multiple_choice_symbols"] = symbols multiple_choice_dict["%multiple_choice_result_idx"] = correct_choice_idx return multiple_choice_dict - def _get_choices_without_indicator(self, multiple_choice_var: str, symbols: List[str], correct_choice: str) -> Tuple[List[str], int]: + def _get_choices_without_indicator( + self, multiple_choice_var: str, symbols: List[str], correct_choice: str + ) -> Tuple[List[str], int]: cutoff_full_unique = 100 all_choices = self.df[multiple_choice_var].unique() if len(all_choices) > cutoff_full_unique: - all_choices = self.df[multiple_choice_var].sample(cutoff_full_unique).unique() + all_choices = ( + self.df[multiple_choice_var].sample(cutoff_full_unique).unique() + ) all_choices = sorted([str(x) for x in all_choices]) if all_choices == ["0", "1"]: @@ -304,19 +354,41 @@ def _get_choices_without_indicator(self, multiple_choice_var: str, symbols: List correct_choice_idx = multiple_choices.index(correct_choice) return multiple_choices, correct_choice_idx - def _get_choices_with_indicator(self, sample: pd.Series, multiple_choice_var: str, multiple_choice_indicator: str, symbols: List[str], correct_choice: str) -> Tuple[List[str], List[int]]: - correct_choice_indicator = self._get_target_from_row(sample, multiple_choice_indicator + "#") - df_sample = self.df.sample(len(symbols) - 1)[[multiple_choice_var, multiple_choice_indicator]] - - multiple_choices = df_sample[multiple_choice_var].astype(str).tolist() + [correct_choice] - multiple_choices_indicators = df_sample[multiple_choice_indicator].astype(str).tolist() + [correct_choice_indicator] - - multiple_choices_combined = list(zip(multiple_choices, multiple_choices_indicators)) + def _get_choices_with_indicator( + self, + sample: pd.Series, + multiple_choice_var: str, + multiple_choice_indicator: str, + symbols: List[str], + correct_choice: str, + ) -> Tuple[List[str], List[int]]: + correct_choice_indicator = self._get_target_from_row( + sample, multiple_choice_indicator + "#" + ) + df_sample = self.df.sample(len(symbols) - 1)[ + [multiple_choice_var, multiple_choice_indicator] + ] + + multiple_choices = df_sample[multiple_choice_var].astype(str).tolist() + [ + correct_choice + ] + multiple_choices_indicators = df_sample[multiple_choice_indicator].astype( + str + ).tolist() + [correct_choice_indicator] + + multiple_choices_combined = list( + zip(multiple_choices, multiple_choices_indicators) + ) random.shuffle(multiple_choices_combined) multiple_choices, multiple_choices_indicators = zip(*multiple_choices_combined) - correct_choice_idx = [i for i, (choice, indicator) in enumerate(zip(multiple_choices, multiple_choices_indicators)) - if indicator == correct_choice_indicator] + correct_choice_idx = [ + i + for i, (choice, indicator) in enumerate( + zip(multiple_choices, multiple_choices_indicators) + ) + if indicator == correct_choice_indicator + ] return list(multiple_choices), correct_choice_idx @@ -324,22 +396,34 @@ def _format_choices(self, symbols: List[str], choices: List[str]) -> str: rnd_symbol = self._get_random_symbol() rnd_symbol_prefix, rnd_symbol_suffix = self._get_symbol_affixes(rnd_symbol) - return "\n".join([f"{rnd_symbol_prefix}{s}{rnd_symbol_suffix} {c}" for s, c in zip(symbols, choices)]) + return "\n".join( + [ + f"{rnd_symbol_prefix}{s}{rnd_symbol_suffix} {c}" + for s, c in zip(symbols, choices) + ] + ) - def _format_result(self, symbols: List[str], correct_choice_idx: Union[int, List[int]]) -> str: + def _format_result( + self, symbols: List[str], correct_choice_idx: Union[int, List[int]] + ) -> str: if isinstance(correct_choice_idx, list): return ", ".join([symbols[i] for i in correct_choice_idx]) else: return symbols[correct_choice_idx] def _get_random_symbol(self) -> str: - if self.config.get('multiple_choice_benchmarking_templates') and self.config.get('multiple_choice_benchmarking_format') is not None: - if len(self.config['multiple_choice_rnd_symbols']) > 1: - return self.config['multiple_choice_rnd_symbols'][self.config['multiple_choice_benchmarking_format']] + if ( + self.config.get("multiple_choice_benchmarking_templates") + and self.config.get("multiple_choice_benchmarking_format") is not None + ): + if len(self.config["multiple_choice_rnd_symbols"]) > 1: + return self.config["multiple_choice_rnd_symbols"][ + self.config["multiple_choice_benchmarking_format"] + ] else: - return self.config['multiple_choice_rnd_symbols'][0] + return self.config["multiple_choice_rnd_symbols"][0] else: - return random.choice(self.config['multiple_choice_rnd_symbols']) + return random.choice(self.config["multiple_choice_rnd_symbols"]) def _get_symbol_affixes(self, symbol: str) -> Tuple[str, str]: if symbol in ["()", "[]"]: @@ -348,7 +432,7 @@ def _get_symbol_affixes(self, symbol: str) -> Tuple[str, str]: return "", symbol def _get_input_variables_from_template(self, template: str) -> List[str]: - return re.findall(r'\{([^}]+)\}', template) + return re.findall(r"\{([^}]+)\}", template) @lru_cache(maxsize=None) def _get_random_text_identifiers_and_targets(self) -> dict: @@ -367,7 +451,9 @@ def _get_random_text_identifiers_and_targets(self) -> dict: rnd_texts[e["id"]]["names"][name] = rnd_text if "description" in e: - rnd_texts[e["id"]]["description"] = partial(lambda x: x, e["description"]) + rnd_texts[e["id"]]["description"] = partial( + lambda x: x, e["description"] + ) if "units" in e: rnd_texts[e["id"]]["units"] = partial(lambda x: x, e["units"]) @@ -407,7 +493,9 @@ def get_with_nested_keys(d: dict, keys: list) -> Union[str, Callable]: if len(keys) == 1 and keys[0] in self.meta: return self.meta[keys[0]] - elif keys[0] in [x["id"] for x in self.meta["identifiers"] + self.meta["targets"]]: + elif keys[0] in [ + x["id"] for x in self.meta["identifiers"] + self.meta["targets"] + ]: rnd_texts = self._get_random_text_identifiers_and_targets() return get_with_nested_keys(rnd_texts, keys) else: @@ -432,12 +520,14 @@ def sample(self, sample: pd.Series, template: str) -> str: sample_dict = self.get_sample_dict(sample, template) return self._fill_template(template, sample_dict) - def _fill_template(self, template: str, sample_dict: Dict[str, Union[str, List[str]]]) -> str: + def _fill_template( + self, template: str, sample_dict: Dict[str, Union[str, List[str]]] + ) -> str: for key, value in sample_dict.items(): if isinstance(value, list): - value = '\n'.join(value) - if '#' in key: # This indicates it's an identifier - identifier = key.replace('#', '') + value = "\n".join(value) + if "#" in key: # This indicates it's an identifier + identifier = key.replace("#", "") value = self._wrap_identifier(identifier, str(value)) - template = template.replace('{' + key + '}', str(value)) + template = template.replace("{" + key + "}", str(value)) return template diff --git a/src/chemnlp/data/sampler_cli.py b/src/chemnlp/data/sampler_cli.py index 5f574ee23..432f677cf 100644 --- a/src/chemnlp/data/sampler_cli.py +++ b/src/chemnlp/data/sampler_cli.py @@ -12,6 +12,7 @@ DEFAULT_SIGNIFICANT_DIGITS, ) + def determine_balance_column(meta: dict, template: str) -> Optional[str]: """ Determine which column to use for class balancing based on the template and metadata. @@ -27,9 +28,12 @@ def determine_balance_column(meta: dict, template: str) -> Optional[str]: return matching_targets[0] else: chosen_target = random.choice(matching_targets) - warnings.warn(f"Multiple targets found in template. Randomly chose '{chosen_target}' for balancing.") + warnings.warn( + f"Multiple targets found in template. Randomly chose '{chosen_target}' for balancing." + ) return chosen_target + def process_dataset( data_dir: str, output_dir: str, @@ -55,8 +59,8 @@ def process_dataset( use_standard_templates (bool): Whether to use standard tabular text templates wrap_identifiers (bool): Whether to wrap identifiers in templates """ - meta_path = os.path.join(data_dir, 'meta.yaml') - data_path = os.path.join(data_dir, 'data_clean.csv') + meta_path = os.path.join(data_dir, "meta.yaml") + data_path = os.path.join(data_dir, "data_clean.csv") meta = load_yaml(meta_path) @@ -85,11 +89,11 @@ def process_dataset( multiple_choice_rnd_symbols = ["", ".", ".)", ")", ":", "()", "[]"] config = { - 'DEFAULT_SIGNIFICANT_DIGITS': DEFAULT_SIGNIFICANT_DIGITS, - 'multiple_choice_rnd_symbols': multiple_choice_rnd_symbols, - 'multiple_choice_benchmarking_templates': multiple_choice, - 'multiple_choice_benchmarking_format': None, - 'wrap_identifiers': wrap_identifiers + "DEFAULT_SIGNIFICANT_DIGITS": DEFAULT_SIGNIFICANT_DIGITS, + "multiple_choice_rnd_symbols": multiple_choice_rnd_symbols, + "multiple_choice_benchmarking_templates": multiple_choice, + "multiple_choice_benchmarking_format": None, + "wrap_identifiers": wrap_identifiers, } with pd.read_csv(data_path, chunksize=chunksize, low_memory=False) as reader: @@ -111,21 +115,29 @@ def process_dataset( templates = [t for t in templates if "" not in t] for template_idx, template in enumerate(templates): - print(f"\nProcessing chunk {chunk_idx}, template {template_idx}:\n{template}") + print( + f"\nProcessing chunk {chunk_idx}, template {template_idx}:\n{template}" + ) # Determine balance column - balance_column = determine_balance_column(meta, template) if class_balanced else None + balance_column = ( + determine_balance_column(meta, template) if class_balanced else None + ) # Sampling step if balance_column: sampler.enable_class_balancing(balance_column) print(f"Enabled class balancing on column: {balance_column}") - sampled_data = df_chunk.apply(lambda row: sampler.sample(row, template), axis=1) + sampled_data = df_chunk.apply( + lambda row: sampler.sample(row, template), axis=1 + ) # Export step - output_path = os.path.join(output_dir, f"chunk_{chunk_idx}_template_{template_idx}.jsonl") - with open(output_path, 'w') as f: + output_path = os.path.join( + output_dir, f"chunk_{chunk_idx}_template_{template_idx}.jsonl" + ) + with open(output_path, "w") as f: for sample in sampled_data: f.write(f"{sample}\n") @@ -134,6 +146,7 @@ def process_dataset( print(f"Exported samples to {output_path}") + def main( data_dir: str, output_dir: str, @@ -171,5 +184,6 @@ def main( wrap_identifiers, ) + if __name__ == "__main__": fire.Fire(main) diff --git a/src/chemnlp/data/utils.py b/src/chemnlp/data/utils.py index 21345d6db..d417ad90b 100644 --- a/src/chemnlp/data/utils.py +++ b/src/chemnlp/data/utils.py @@ -11,6 +11,7 @@ import yaml from typing import Any + def sample_dataset(dataset, num_samples): n = len(dataset) num_samples = min(num_samples, n) @@ -174,17 +175,20 @@ def oxford_comma_join(items: List[str]) -> str: def load_yaml(file_path: str) -> Any: - with open(file_path, 'r') as file: + with open(file_path, "r") as file: return yaml.safe_load(file) + def save_yaml(data: Any, file_path: str) -> None: - with open(file_path, 'w') as file: + with open(file_path, "w") as file: yaml.dump(data, file, sort_keys=False) + def str_presenter(dumper, data): if len(data.splitlines()) > 1: # check for multiline string - return dumper.represent_scalar('tag:yaml.org,2002:str', data, style='|') - return dumper.represent_scalar('tag:yaml.org,2002:str', data) + return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|") + return dumper.represent_scalar("tag:yaml.org,2002:str", data) + yaml.add_representer(str, str_presenter) yaml.representer.SafeRepresenter.add_representer(str, str_presenter) diff --git a/tests/data/test_sampler.py b/tests/data/test_sampler.py index 526234bb0..a6e9c4f4a 100644 --- a/tests/data/test_sampler.py +++ b/tests/data/test_sampler.py @@ -4,22 +4,37 @@ import numpy as np import re + @pytest.fixture def sample_df(): - return pd.DataFrame({ - 'SMILES': ['CC(C)NCC(O)c1ccc(O)c(O)c1', 'CC1=C(C(=O)NC2=C1C=CC=C2)C3=CC=CC=C3'], - 'CYP2D6_Substrate': [1, 0], - 'compound_name': ['Isoproterenol', 'Phenytoin'], - 'split': ['train', 'test'] - }) + return pd.DataFrame( + { + "SMILES": [ + "CC(C)NCC(O)c1ccc(O)c(O)c1", + "CC1=C(C(=O)NC2=C1C=CC=C2)C3=CC=CC=C3", + ], + "CYP2D6_Substrate": [1, 0], + "compound_name": ["Isoproterenol", "Phenytoin"], + "split": ["train", "test"], + } + ) + @pytest.fixture def sample_meta(): return { "identifiers": [ {"id": "SMILES", "type": "SMILES", "description": "SMILES"}, - {"id": "compound_name", "type": "Other", "description": "drug name", - "names": [{"noun": "compound name"}, {"noun": "drug name"}, {"noun": "generic drug name"}]} + { + "id": "compound_name", + "type": "Other", + "description": "drug name", + "names": [ + {"noun": "compound name"}, + {"noun": "drug name"}, + {"noun": "generic drug name"}, + ], + }, ], "targets": [ { @@ -32,54 +47,63 @@ def sample_meta(): {"noun": "substrate for CYP2D6"}, {"noun": "substrate for CYP P450 2D6"}, {"verb": "metabolized by CYP2D6"}, - {"verb": "metabolized by CYP P450 2D6"} - ] + {"verb": "metabolized by CYP P450 2D6"}, + ], } - ] + ], } + @pytest.fixture def sample_config(): return { - 'DEFAULT_SIGNIFICANT_DIGITS': 2, - 'multiple_choice_rnd_symbols': ["", ".)", ")"], - 'multiple_choice_benchmarking_templates': False, - 'multiple_choice_benchmarking_format': None + "DEFAULT_SIGNIFICANT_DIGITS": 2, + "multiple_choice_rnd_symbols": ["", ".)", ")"], + "multiple_choice_benchmarking_templates": False, + "multiple_choice_benchmarking_format": None, } @pytest.fixture def sample_config_with_wrapping(): return { - 'DEFAULT_SIGNIFICANT_DIGITS': 2, - 'multiple_choice_rnd_symbols': ["", ".)", ")"], - 'multiple_choice_benchmarking_templates': False, - 'multiple_choice_benchmarking_format': None, - 'wrap_identifiers': True + "DEFAULT_SIGNIFICANT_DIGITS": 2, + "multiple_choice_rnd_symbols": ["", ".)", ")"], + "multiple_choice_benchmarking_templates": False, + "multiple_choice_benchmarking_format": None, + "wrap_identifiers": True, } + # Add these to your existing fixtures or create new ones as needed @pytest.fixture def large_sample_df(): np.random.seed(42) - return pd.DataFrame({ - 'SMILES': [f'C{i}' for i in range(1000)], - 'CYP2D6_Substrate': np.random.choice([0, 1], size=1000, p=[0.7, 0.3]), - 'LogP': np.random.normal(2, 1, 1000), - 'compound_name': [f'Compound_{i}' for i in range(1000)], - 'split': np.random.choice(['train', 'test', 'valid'], size=1000, p=[0.8, 0.1, 0.1]) - }) + return pd.DataFrame( + { + "SMILES": [f"C{i}" for i in range(1000)], + "CYP2D6_Substrate": np.random.choice([0, 1], size=1000, p=[0.7, 0.3]), + "LogP": np.random.normal(2, 1, 1000), + "compound_name": [f"Compound_{i}" for i in range(1000)], + "split": np.random.choice( + ["train", "test", "valid"], size=1000, p=[0.8, 0.1, 0.1] + ), + } + ) + @pytest.fixture def large_sample_meta(sample_meta): - sample_meta['targets'].append({ - "id": "LogP", - "type": "continuous", - "description": "Logarithm of the partition coefficient", - "names": [{"noun": "LogP value"}, {"noun": "partition coefficient"}], - "units": "log units", - "significant_digits": 2 - }) + sample_meta["targets"].append( + { + "id": "LogP", + "type": "continuous", + "description": "Logarithm of the partition coefficient", + "names": [{"noun": "LogP value"}, {"noun": "partition coefficient"}], + "units": "log units", + "significant_digits": 2, + } + ) return sample_meta @@ -91,28 +115,47 @@ def test_basic_identifier_wrapping(sample_df, sample_meta, sample_config_with_wr assert "[BEGIN_SMILES]" in result and "[END_SMILES]" in result assert "[BEGIN_Other]" in result and "[END_Other]" in result + def test_get_target_from_row(sample_df, sample_meta, sample_config): sampler = TemplateSampler(sample_df, sample_meta, sample_config) - assert sampler._get_target_from_row(sample_df.iloc[0], "SMILES#") == "CC(C)NCC(O)c1ccc(O)c(O)c1" + assert ( + sampler._get_target_from_row(sample_df.iloc[0], "SMILES#") + == "CC(C)NCC(O)c1ccc(O)c(O)c1" + ) assert sampler._get_target_from_row(sample_df.iloc[0], "CYP2D6_Substrate#") == "1" - assert sampler._get_target_from_row(sample_df.iloc[0], "compound_name#") == "Isoproterenol" + assert ( + sampler._get_target_from_row(sample_df.iloc[0], "compound_name#") + == "Isoproterenol" + ) + def test_get_target_from_string(sample_df, sample_meta, sample_config): sampler = TemplateSampler(sample_df, sample_meta, sample_config) assert sampler._get_target_from_string("CYP2D6_Substrate__names__noun")() in [ - "CYP P450 2D6 substrate", "CYP2D6 substrate", "substrate for CYP2D6", "substrate for CYP P450 2D6" + "CYP P450 2D6 substrate", + "CYP2D6 substrate", + "substrate for CYP2D6", + "substrate for CYP P450 2D6", ] assert sampler._get_target_from_string("CYP2D6_Substrate__names__verb")() in [ - "metabolized by CYP2D6", "metabolized by CYP P450 2D6" + "metabolized by CYP2D6", + "metabolized by CYP P450 2D6", ] + def test_sample_with_template(sample_df, sample_meta, sample_config): sampler = TemplateSampler(sample_df, sample_meta, sample_config) template = "The molecule with the {SMILES__description} {SMILES#} is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}." result = sampler.sample(sample_df.iloc[0], template) assert "CC(C)NCC(O)c1ccc(O)c(O)c1" in result assert "is a" in result - assert "CYP P450 2D6 substrate" in result or "CYP2D6 substrate" in result or "substrate for CYP2D6" in result or "substrate for CYP P450 2D6" in result + assert ( + "CYP P450 2D6 substrate" in result + or "CYP2D6 substrate" in result + or "substrate for CYP2D6" in result + or "substrate for CYP P450 2D6" in result + ) + def test_multiple_choice_template(sample_df, sample_meta, sample_config): sampler = TemplateSampler(sample_df, sample_meta, sample_config) @@ -128,21 +171,34 @@ def test_multiple_choice_template(sample_df, sample_meta, sample_config): # Check that the answer is one of the options answer_letter = result.split("Answer: ")[1].strip() # assert that "True" in in the line starting with the answer letter - assert "True" in [line for line in result.split("\n") if line.startswith(answer_letter) and ('True' in line or 'False' in line)][0] + assert ( + "True" + in [ + line + for line in result.split("\n") + if line.startswith(answer_letter) and ("True" in line or "False" in line) + ][0] + ) + def test_class_balancing(sample_df, sample_meta, sample_config): sampler = TemplateSampler(sample_df, sample_meta, sample_config) sampler.enable_class_balancing("CYP2D6_Substrate") balanced_df = sampler.df - assert len(balanced_df[balanced_df['CYP2D6_Substrate'] == 0]) == len(balanced_df[balanced_df['CYP2D6_Substrate'] == 1]) - + assert len(balanced_df[balanced_df["CYP2D6_Substrate"] == 0]) == len( + balanced_df[balanced_df["CYP2D6_Substrate"] == 1] + ) -def test_class_balancing_large_dataset(large_sample_df, large_sample_meta, sample_config): +def test_class_balancing_large_dataset( + large_sample_df, large_sample_meta, sample_config +): sampler = TemplateSampler(large_sample_df, large_sample_meta, sample_config) sampler.enable_class_balancing("CYP2D6_Substrate") assert len(sampler.df) < len(large_sample_df) - assert len(sampler.df[sampler.df['CYP2D6_Substrate'] == 0]) == len(sampler.df[sampler.df['CYP2D6_Substrate'] == 1]) + assert len(sampler.df[sampler.df["CYP2D6_Substrate"] == 0]) == len( + sampler.df[sampler.df["CYP2D6_Substrate"] == 1] + ) def test_class_balancing_disable(large_sample_df, large_sample_meta, sample_config): @@ -152,7 +208,11 @@ def test_class_balancing_disable(large_sample_df, large_sample_meta, sample_conf sampler.disable_class_balancing() assert len(sampler.df) == len(large_sample_df) - assert (sampler.df['CYP2D6_Substrate'].value_counts() != sampler.df['CYP2D6_Substrate'].value_counts().iloc[0]).any() + assert ( + sampler.df["CYP2D6_Substrate"].value_counts() + != sampler.df["CYP2D6_Substrate"].value_counts().iloc[0] + ).any() + def test_continuous_value_formatting(large_sample_df, large_sample_meta, sample_config): sampler = TemplateSampler(large_sample_df, large_sample_meta, sample_config) @@ -161,7 +221,10 @@ def test_continuous_value_formatting(large_sample_df, large_sample_meta, sample_ assert "LogP value" in result or "partition coefficient" in result assert "log units" in result - assert re.search(r'\d+\.\d{2} log units', result) # Check if the value is rounded to 2 decimal places + assert re.search( + r"\d+\.\d{2} log units", result + ) # Check if the value is rounded to 2 decimal places + def test_error_handling_invalid_variable(sample_df, sample_meta, sample_config): sampler = TemplateSampler(sample_df, sample_meta, sample_config) @@ -170,13 +233,19 @@ def test_error_handling_invalid_variable(sample_df, sample_meta, sample_config): with pytest.raises(KeyError): sampler.sample(sample_df.iloc[0], template) -def test_multiple_targets_in_template(large_sample_df, large_sample_meta, sample_config): + +def test_multiple_targets_in_template( + large_sample_df, large_sample_meta, sample_config +): sampler = TemplateSampler(large_sample_df, large_sample_meta, sample_config) template = "The molecule {compound_name#} with {SMILES__description} {SMILES#} has a {LogP__names__noun} of {LogP#} {LogP__units} and is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}." result = sampler.sample(large_sample_df.iloc[0], template) print(result) - assert all(x in result for x in ['Compound_', 'C', 'log units', 'CYP']) - assert ('is a' in result and 'not a' not in result) or ('is not a' in result and 'is a' not in result) + assert all(x in result for x in ["Compound_", "C", "log units", "CYP"]) + assert ("is a" in result and "not a" not in result) or ( + "is not a" in result and "is a" not in result + ) + def test_random_sampling(large_sample_df, large_sample_meta, sample_config): sampler = TemplateSampler(large_sample_df, large_sample_meta, sample_config) @@ -193,10 +262,15 @@ def test_multiple_identifier_types(sample_df, sample_meta, sample_config_with_wr sampler = TemplateSampler(sample_df, sample_meta, sample_config_with_wrapping) template = "SMILES: {SMILES#}, Name: {compound_name#}" result = sampler.sample(sample_df.iloc[0], template) - assert all(tag in result for tag in ["[BEGIN_SMILES]", "[END_SMILES]", "[BEGIN_Other]", "[END_Other]"]) + assert all( + tag in result + for tag in ["[BEGIN_SMILES]", "[END_SMILES]", "[BEGIN_Other]", "[END_Other]"] + ) -def test_wrapping_with_multiple_choice(sample_df, sample_meta, sample_config_with_wrapping): +def test_wrapping_with_multiple_choice( + sample_df, sample_meta, sample_config_with_wrapping +): sampler = TemplateSampler(sample_df, sample_meta, sample_config_with_wrapping) template = """ Which compound has this SMILES: {SMILES#}? @@ -209,8 +283,12 @@ def test_wrapping_with_multiple_choice(sample_df, sample_meta, sample_config_wit assert "A or B" in result or "a or b" in result or "1 or 2" in result -def test_wrapping_with_continuous_value(large_sample_df, large_sample_meta, sample_config_with_wrapping): - sampler = TemplateSampler(large_sample_df, large_sample_meta, sample_config_with_wrapping) +def test_wrapping_with_continuous_value( + large_sample_df, large_sample_meta, sample_config_with_wrapping +): + sampler = TemplateSampler( + large_sample_df, large_sample_meta, sample_config_with_wrapping + ) template = "SMILES: {SMILES#}, LogP: {LogP#}" result = sampler.sample(large_sample_df.iloc[0], template) assert "[BEGIN_SMILES]" in result and "[END_SMILES]" in result