diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index ca2b7874e..000000000
Binary files a/.DS_Store and /dev/null differ
diff --git a/.github/workflows/install.yaml b/.github/workflows/install.yaml
index bc327c087..94a50480f 100644
--- a/.github/workflows/install.yaml
+++ b/.github/workflows/install.yaml
@@ -1,29 +1,27 @@
----
# GitHub action that attempts to install the conda env
# from conda.yaml
# then run black, isort, flake8
-
name: Install
on: [push, pull_request]
jobs:
- install:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v2
- - uses: conda-incubator/setup-miniconda@v2
- with:
- environment-file: conda.yaml
- activate-environment: chemnlp
- python-version: 3.9
- auto-update-conda: true
- auto-activate-base: false
- - name: Validate yaml
- shell: bash -l {0}
- run: |
- conda activate chemnlp
- python -m src.chemnlp.data_val.validate data
- - name: Tests
- shell: bash -l {0}
- run: |
- pip install pytest
- pytest tests
+ install:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v2
+ - uses: conda-incubator/setup-miniconda@v2
+ with:
+ environment-file: conda.yaml
+ activate-environment: chemnlp
+ python-version: 3.9
+ auto-update-conda: true
+ auto-activate-base: false
+ - name: Validate yaml
+ shell: bash -l {0}
+ run: |
+ conda activate chemnlp
+ python -m src.chemnlp.data_val.validate data
+ - name: Tests
+ shell: bash -l {0}
+ run: |
+ pip install pytest
+ pytest tests
diff --git a/.gitignore b/.gitignore
index edff3a531..4c2a71c41 100644
--- a/.gitignore
+++ b/.gitignore
@@ -144,3 +144,5 @@ scratch/
# vim
*~
*.swp
+
+.DS_Store
diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index 67b00b7f6..000000000
--- a/.gitmodules
+++ /dev/null
@@ -1,12 +0,0 @@
-[submodule "gpt-neox"]
- path = gpt-neox
- url = git@github.com:OpenBioML/gpt-neox.git
- branch = main
-[submodule "lm-eval2"]
- path = lm-eval2
- url = git@github.com:OpenBioML/lm-eval2.git
- branch = main
-[submodule "lm-eval1"]
- path = lm-evaluation-harness
- url = git@github.com:OpenBioML/lm-evaluation-harness.git
- branch = master
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c382fca0a..a794fa2f9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,41 +1,90 @@
----
-ci:
- autoupdate_schedule: quarterly
- autofix_prs: false
- submodules: false
-
+default_language_version:
+ python: python3
repos:
- - repo: https://github.com/pre-commit/pre-commit-hooks
- rev: v4.5.0
- hooks:
- - id: check-json
- - id: check-yaml
- - id: end-of-file-fixer
- - id: trailing-whitespace
- exclude: miscellaneous/structures/SiO2.xyz
-
- - repo: https://github.com/jumanjihouse/pre-commit-hook-yamlfmt
- rev: 0.2.3
- hooks:
- - id: yamlfmt
- exclude: ^experiments/configs
-
- - repo: https://github.com/psf/black
- rev: 24.3.0
- hooks:
- - id: black
- language_version: python3 # Should be a command that runs python3.6+
-
- - repo: https://github.com/PyCQA/flake8
- rev: 7.0.0
- hooks:
- - id: flake8
- args: [--count, --show-source, --statistics]
- additional_dependencies:
- - flake8-bugbear==22.7.1
-
- - repo: https://github.com/pycqa/isort
- rev: 5.13.2
- hooks:
- - id: isort
- args: [--profile, black, --filter-files]
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v4.6.0
+ hooks:
+ - id: check-added-large-files
+ - id: check-byte-order-marker
+ - id: check-case-conflict
+ - id: check-merge-conflict
+ - id: check-shebang-scripts-are-executable
+ - id: check-symlinks
+ - id: check-toml
+ - id: check-yaml
+ exclude: "kubernetes.yaml$" # This line excludes kubernetes.yaml from being checked as it is WIP
+ - id: debug-statements
+ - id: detect-private-key
+ - id: end-of-file-fixer
+ - id: mixed-line-ending
+ exclude: .gitignore
+ - id: trailing-whitespace
+ exclude: .gitignore
+ - repo: https://github.com/psf/black-pre-commit-mirror
+ rev: 24.4.2
+ hooks:
+ - id: black-jupyter
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.5.1
+ hooks:
+ - id: ruff
+ args: [--fix, --exit-non-zero-on-fix]
+ - repo: https://github.com/rbubley/mirrors-prettier
+ rev: v3.3.2
+ hooks:
+ - id: prettier
+ - repo: https://github.com/google/yamlfmt
+ rev: v0.13.0
+ hooks:
+ - id: yamlfmt
+ - repo: https://github.com/Yelp/detect-secrets
+ rev: v1.5.0
+ hooks:
+ - id: detect-secrets
+ - repo: https://github.com/pappasam/toml-sort
+ rev: v0.23.1
+ hooks:
+ - id: toml-sort-fix
+ exclude: poetry.lock
+ - repo: https://github.com/codespell-project/codespell
+ rev: v2.3.0
+ hooks:
+ - id: codespell
+ additional_dependencies: [".[toml]"]
+ - repo: https://github.com/sqlfluff/sqlfluff
+ rev: 3.1.0
+ hooks:
+ - id: sqlfluff-fix
+ - repo: https://github.com/hadolint/hadolint
+ rev: v2.13.0-beta
+ hooks:
+ - id: hadolint-docker
+ - repo: https://github.com/jsh9/markdown-toc-creator
+ rev: 0.0.6
+ hooks:
+ - id: markdown-toc-creator
+ - repo: https://github.com/jumanjihouse/pre-commit-hooks
+ rev: 3.0.0
+ hooks:
+ - id: check-mailmap
+ - repo: https://github.com/python-poetry/poetry
+ rev: 1.8.0
+ hooks:
+ - id: poetry-check
+ - repo: https://github.com/pre-commit/mirrors-mypy
+ rev: v1.10.1
+ hooks:
+ - id: mypy
+ additional_dependencies:
+ - types-requests
+ - repo: https://github.com/srstevenson/nb-clean
+ rev: 3.3.0
+ hooks:
+ - id: nb-clean
+ args: [--preserve-cell-outputs, --remove-empty-cells]
+ - repo: https://github.com/abravalheri/validate-pyproject
+ rev: v0.18
+ hooks:
+ - id: validate-pyproject
+ additional_dependencies:
+ - "validate-pyproject-schema-store[all]>=2024.06.24" # Pin for Ruff's FURB154
diff --git a/README.md b/README.md
index a69442e27..cba9a42a2 100644
--- a/README.md
+++ b/README.md
@@ -32,14 +32,12 @@ ChemNLP is an open-source project - your involvement is warmly welcome! If you'r
- Looking for ideas? See our [task board](https://github.com/orgs/OpenBioML/projects/5/views/1) to see what we may need help with.
- Have an idea? Create an [issue](https://github.com/OpenBioML/chemnlp/issues)!
-
Over the past months ChemNLP has received many contributions and a lot of feedback. We appreciate all contributions from community to make ChemNLP thrive.
-
# Note on the "ChemNLP" name
Our OpenBioML ChemNLP project is not affiliated to the [ChemNLP library from NIST](https://arxiv.org/abs/2209.08203) and we use "ChemNLP" as a general term to highlight our project focus. The datasets and models we create through our project will have a unique and recognizable name when we release them.
diff --git a/code_of_conduct.md b/code_of_conduct.md
index 45d257b29..8b4fcfd34 100644
--- a/code_of_conduct.md
+++ b/code_of_conduct.md
@@ -1,4 +1,3 @@
-
# Contributor Covenant Code of Conduct
## Our Pledge
@@ -18,23 +17,23 @@ diverse, inclusive, and healthy community.
Examples of behavior that contributes to a positive environment for our
community include:
-* Demonstrating empathy and kindness toward other people
-* Being respectful of differing opinions, viewpoints, and experiences
-* Giving and gracefully accepting constructive feedback
-* Accepting responsibility and apologizing to those affected by our mistakes,
+- Demonstrating empathy and kindness toward other people
+- Being respectful of differing opinions, viewpoints, and experiences
+- Giving and gracefully accepting constructive feedback
+- Accepting responsibility and apologizing to those affected by our mistakes,
and learning from the experience
-* Focusing on what is best not just for us as individuals, but for the overall
+- Focusing on what is best not just for us as individuals, but for the overall
community
Examples of unacceptable behavior include:
-* The use of sexualized language or imagery, and sexual attention or advances of
+- The use of sexualized language or imagery, and sexual attention or advances of
any kind
-* Trolling, insulting or derogatory comments, and personal or political attacks
-* Public or private harassment
-* Publishing others' private information, such as a physical or email address,
+- Trolling, insulting or derogatory comments, and personal or political attacks
+- Public or private harassment
+- Publishing others' private information, such as a physical or email address,
without their explicit permission
-* Other conduct which could reasonably be considered inappropriate in a
+- Other conduct which could reasonably be considered inappropriate in a
professional setting
## Enforcement Responsibilities
diff --git a/conda.yaml b/conda.yaml
index b6badfc78..92628149a 100644
--- a/conda.yaml
+++ b/conda.yaml
@@ -1,9 +1,8 @@
----
name: dummy
dependencies:
- - python==3.9.*
- - pip
- - pip:
- - .
- - .[dev]
- - .[dataset_creation]
+ - python==3.9.*
+ - pip
+ - pip:
+ - .
+ - .[dev]
+ - .[dataset_creation]
diff --git a/data/kg/chebi_chebi/meta.yaml b/data/kg/chebi_chebi/meta.yaml
index 49f1d325b..73e0fae00 100644
--- a/data/kg/chebi_chebi/meta.yaml
+++ b/data/kg/chebi_chebi/meta.yaml
@@ -1,48 +1,43 @@
----
name: chebi_chebi
description: Knowledgegraph data samples.
targets:
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
identifiers:
- - id: node1_type
- description: node1_type
- type: Other
- - id: node1_name
- description: node1_name
- type: Other
- - id: node1_id
- description: node1_id
- type: Other
- - id: rel1_type
- description: rel1_type
- type: Other
+ - id: node1_type
+ description: node1_type
+ type: Other
+ - id: node1_name
+ description: node1_name
+ type: Other
+ - id: node1_id
+ description: node1_id
+ type: Other
+ - id: rel1_type
+ description: rel1_type
+ type: Other
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 638182
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_name#} {rel1_type#} {node2_name#}.
+ - The {node1_name#} {rel1_type#} {node2_name#}.
diff --git a/data/kg/chembl33_preprocessed_filtered_bioactivity_dataset_w_fullprotnames_smiles/meta.yaml b/data/kg/chembl33_preprocessed_filtered_bioactivity_dataset_w_fullprotnames_smiles/meta.yaml
index 50b55545d..450197858 100644
--- a/data/kg/chembl33_preprocessed_filtered_bioactivity_dataset_w_fullprotnames_smiles/meta.yaml
+++ b/data/kg/chembl33_preprocessed_filtered_bioactivity_dataset_w_fullprotnames_smiles/meta.yaml
@@ -1,107 +1,101 @@
----
name: chembl33_preprocessed_filtered_bioactivity_dataset_w_fullprotnames_smiles
description: Knowledgegraph data samples.
targets:
- - id: protein_name
- description: protein_name
- type: Other
- units: protein_name
- names:
- - noun: protein_name
- - id: pchembl_value
- description: pchembl_value
- type: Other
- units: pchembl_value
- names:
- - noun: pchembl_value
- - id: standard_type
- description: standard_type
- type: Other
- units: standard_type
- names:
- - noun: standard_type
- - id: standard_value
- description: standard_value
- type: Other
- units: standard_value
- names:
- - noun: standard_value
- - id: standard_units
- description: standard_units
- type: Other
- units: standard_units
- names:
- - noun: standard_units
- - id: description
- description: description
- type: Other
- units: description
- names:
- - noun: description
+ - id: protein_name
+ description: protein_name
+ type: Other
+ units: protein_name
+ names:
+ - noun: protein_name
+ - id: pchembl_value
+ description: pchembl_value
+ type: Other
+ units: pchembl_value
+ names:
+ - noun: pchembl_value
+ - id: standard_type
+ description: standard_type
+ type: Other
+ units: standard_type
+ names:
+ - noun: standard_type
+ - id: standard_value
+ description: standard_value
+ type: Other
+ units: standard_value
+ names:
+ - noun: standard_value
+ - id: standard_units
+ description: standard_units
+ type: Other
+ units: standard_units
+ names:
+ - noun: standard_units
+ - id: description
+ description: description
+ type: Other
+ units: description
+ names:
+ - noun: description
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 1059070
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {#molecule with the |!}{SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} a {#bioaffinity|affinity!} for {#the
- protein |!}{protein_name#} with a {standard_type#} {#value |!}of {standard_value#} {standard_units#}.
- - |-
- Task: Please {#derive|estimate|approximate!} {#the bioaffinity|the affinity!} of a {#molecule to a protein|protein to a molecule!}.
- Protein{# name|!}: {protein_name#}
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint{#s|!}: The {#resulting|derived|calculated!} {standard_type#} {#value |!}should be in {standard_units#}. Even if you are {#uncertain|not sure!}, you must {#derive|estimate|come up with!} a {standard_type#} {#value |!}without using any {#other|additional!} words.
- Result: {standard_value#} {standard_units#}
- - |-
- Task: Please {#create|generate!} {#a molecule |a !}{SMILES__description} that has a {#bioaffinity|affinity!} to {#the protein |!}{protein_name#} with a {standard_type#} {#value |!}of {standard_value#} {standard_units#}.
- Result: {SMILES#}
- - |-
- User: Can you {#give me|come up with!} {#one|an!} example of a protein that has a {#bioaffinity|affinity!} to the {SMILES__description} {SMILES#}?
- Assistant: {#For example, the protein |For example, |!}{protein_name#} has a {#bioaffinity|affinity!} to the {SMILES__description} {SMILES#}.
- User: Can you {#derive|estimate|approximate!} the {standard_type#} {#of this molecule|of this molecule for me|for me!}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, the {standard_type#} {#value |!}is {standard_value#} {standard_units#}.
- - |-
- User: Can you {#give me|come up with!} {#one|an!} example of a protein that has a {#bioaffinity|affinity!} to the {SMILES__description} {SMILES#}?
- Assistant: {#The protein |!}{protein_name#} has for example a {#bioaffinity|affinity!} to the {SMILES__description} {SMILES#}.
- User: Can you {#derive|estimate|approximate!} the {standard_type#} {#of this molecule|of this molecule for me|for me!}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, the {standard_type#} {#value |!}is {standard_value#} {standard_units#}.
- User: Can you give {#me |!}{#additional|more!} {#information|details!} {#on|about!} the assay{# used| used for this estimation!}?
- Assistant: {#Sure|Yes|Of course!}, here you go:
- {description#}
- - |-
- Task: Please {#derive|estimate|approximate!} {#the bioaffinity|the affinity!} of a {#molecule to a protein|protein to a molecule!}.
- Protein{# name|!}: {protein_name#}
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint{#s|!}: The {#resulting|derived|calculated!} {standard_type#} {#value |!}should be in {standard_units#}. Even if you are {#uncertain|not sure!}, you must {#derive|estimate|come up with!} a {standard_type#} {#value |!}without using any {#other|additional!} words.
- Result: {standard_value#} {standard_units#}
- - |-
- Task: Please {#create|generate!} a {#molecule |!}{SMILES__description} that has a {#bioaffinity|affinity!} to {#the protein |!}{protein_name#} with a {standard_type#} {#value |!}of {standard_value#} {standard_units#}.
- Result: {SMILES#}
- - |-
- Task: Please answer the multiple choice question.
- Question: What is the {#the bioaffinity|the affinity!} of a {#molecule to a protein|protein to a molecule!}?
- Protein{# name|!}: {protein_name#}
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: The {#shown|listed!} {standard_type#} values {#below |!}are in {standard_units#}. Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%3-5%aA1} without using any other words.
- Options:
- {standard_value%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: What is the {#the bioaffinity|the affinity!} of a {#molecule to a protein|protein to a molecule!}?
- Protein{# name|!}: {protein_name#}
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: The {#shown|listed!} {standard_type#} values {#below |!}are in {standard_units#}. Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%3-5%aA1} without using any other words.
- Options:
- {standard_value%}
- Answer: {%multiple_choice_result}
+ - The {#molecule with the |!}{SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} a {#bioaffinity|affinity!} for {#the protein |!}{protein_name#} with a {standard_type#} {#value |!}of {standard_value#} {standard_units#}.
+ - |-
+ Task: Please {#derive|estimate|approximate!} {#the bioaffinity|the affinity!} of a {#molecule to a protein|protein to a molecule!}.
+ Protein{# name|!}: {protein_name#}
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint{#s|!}: The {#resulting|derived|calculated!} {standard_type#} {#value |!}should be in {standard_units#}. Even if you are {#uncertain|not sure!}, you must {#derive|estimate|come up with!} a {standard_type#} {#value |!}without using any {#other|additional!} words.
+ Result: {standard_value#} {standard_units#}
+ - |-
+ Task: Please {#create|generate!} {#a molecule |a !}{SMILES__description} that has a {#bioaffinity|affinity!} to {#the protein |!}{protein_name#} with a {standard_type#} {#value |!}of {standard_value#} {standard_units#}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#give me|come up with!} {#one|an!} example of a protein that has a {#bioaffinity|affinity!} to the {SMILES__description} {SMILES#}?
+ Assistant: {#For example, the protein |For example, |!}{protein_name#} has a {#bioaffinity|affinity!} to the {SMILES__description} {SMILES#}.
+ User: Can you {#derive|estimate|approximate!} the {standard_type#} {#of this molecule|of this molecule for me|for me!}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, the {standard_type#} {#value |!}is {standard_value#} {standard_units#}.
+ - |-
+ User: Can you {#give me|come up with!} {#one|an!} example of a protein that has a {#bioaffinity|affinity!} to the {SMILES__description} {SMILES#}?
+ Assistant: {#The protein |!}{protein_name#} has for example a {#bioaffinity|affinity!} to the {SMILES__description} {SMILES#}.
+ User: Can you {#derive|estimate|approximate!} the {standard_type#} {#of this molecule|of this molecule for me|for me!}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, the {standard_type#} {#value |!}is {standard_value#} {standard_units#}.
+ User: Can you give {#me |!}{#additional|more!} {#information|details!} {#on|about!} the assay{# used| used for this estimation!}?
+ Assistant: {#Sure|Yes|Of course!}, here you go:
+ {description#}
+ - |-
+ Task: Please {#derive|estimate|approximate!} {#the bioaffinity|the affinity!} of a {#molecule to a protein|protein to a molecule!}.
+ Protein{# name|!}: {protein_name#}
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint{#s|!}: The {#resulting|derived|calculated!} {standard_type#} {#value |!}should be in {standard_units#}. Even if you are {#uncertain|not sure!}, you must {#derive|estimate|come up with!} a {standard_type#} {#value |!}without using any {#other|additional!} words.
+ Result: {standard_value#} {standard_units#}
+ - |-
+ Task: Please {#create|generate!} a {#molecule |!}{SMILES__description} that has a {#bioaffinity|affinity!} to {#the protein |!}{protein_name#} with a {standard_type#} {#value |!}of {standard_value#} {standard_units#}.
+ Result: {SMILES#}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: What is the {#the bioaffinity|the affinity!} of a {#molecule to a protein|protein to a molecule!}?
+ Protein{# name|!}: {protein_name#}
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: The {#shown|listed!} {standard_type#} values {#below |!}are in {standard_units#}. Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%3-5%aA1} without using any other words.
+ Options:
+ {standard_value%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: What is the {#the bioaffinity|the affinity!} of a {#molecule to a protein|protein to a molecule!}?
+ Protein{# name|!}: {protein_name#}
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: The {#shown|listed!} {standard_type#} values {#below |!}are in {standard_units#}. Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%3-5%aA1} without using any other words.
+ Options:
+ {standard_value%}
+ Answer: {%multiple_choice_result}
diff --git a/data/kg/compound_chebi/meta.yaml b/data/kg/compound_chebi/meta.yaml
index 0215a4eac..a3784a4f0 100644
--- a/data/kg/compound_chebi/meta.yaml
+++ b/data/kg/compound_chebi/meta.yaml
@@ -1,69 +1,64 @@
----
name: compound_chebi
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 6754
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} {node2_name#}.
- - |-
- Task: Please {#create|generate!} {#a compound |a !}{SMILES__description} that {rel1_type#} {node2_name#}.
- Result: {SMILES#}
- - |-
- Task: Please {#create|generate!} {#a compound |a !}{SMILES__description} that {rel1_type#} {node2_name#}.
- Result: {SMILES#}
+ - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} {node2_name#}.
+ - |-
+ Task: Please {#create|generate!} {#a compound |a !}{SMILES__description} that {rel1_type#} {node2_name#}.
+ Result: {SMILES#}
+ - |-
+ Task: Please {#create|generate!} {#a compound |a !}{SMILES__description} that {rel1_type#} {node2_name#}.
+ Result: {SMILES#}
diff --git a/data/kg/compound_chebi_chebi/meta.yaml b/data/kg/compound_chebi_chebi/meta.yaml
index 91e13ec0c..9a36f991a 100644
--- a/data/kg/compound_chebi_chebi/meta.yaml
+++ b/data/kg/compound_chebi_chebi/meta.yaml
@@ -1,87 +1,82 @@
----
name: compound_chebi_chebi
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 26991
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} {node2_name#} and {rel2_type#} {node3_name#}.
+ - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} {node2_name#} and {rel2_type#} {node3_name#}.
diff --git a/data/kg/compound_chebi_chebi_chebi_1/meta.yaml b/data/kg/compound_chebi_chebi_chebi_1/meta.yaml
index b1153cc31..40437de34 100644
--- a/data/kg/compound_chebi_chebi_chebi_1/meta.yaml
+++ b/data/kg/compound_chebi_chebi_chebi_1/meta.yaml
@@ -1,110 +1,105 @@
----
name: compound_chebi_chebi_chebi_1
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
- - id: rel3_type
- description: rel3_type
- type: Other
- units: rel3_type
- names:
- - noun: rel3_type
- - id: node4_type
- description: node4_type
- type: Other
- units: node4_type
- names:
- - noun: node4_type
- - id: node4_name
- description: node4_name
- type: Other
- units: node4_name
- names:
- - noun: node4_name
- - id: node4_id
- description: node4_id
- type: Other
- units: node4_id
- names:
- - noun: node4_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
+ - id: rel3_type
+ description: rel3_type
+ type: Other
+ units: rel3_type
+ names:
+ - noun: rel3_type
+ - id: node4_type
+ description: node4_type
+ type: Other
+ units: node4_type
+ names:
+ - noun: node4_type
+ - id: node4_name
+ description: node4_name
+ type: Other
+ units: node4_name
+ names:
+ - noun: node4_name
+ - id: node4_id
+ description: node4_id
+ type: Other
+ units: node4_id
+ names:
+ - noun: node4_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 9936872
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates: []
diff --git a/data/kg/compound_chebi_chebi_chebi_2/meta.yaml b/data/kg/compound_chebi_chebi_chebi_2/meta.yaml
index 0a0bd567b..944890527 100644
--- a/data/kg/compound_chebi_chebi_chebi_2/meta.yaml
+++ b/data/kg/compound_chebi_chebi_chebi_2/meta.yaml
@@ -1,110 +1,105 @@
----
name: compound_chebi_chebi_chebi_2
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
- - id: rel3_type
- description: rel3_type
- type: Other
- units: rel3_type
- names:
- - noun: rel3_type
- - id: node4_type
- description: node4_type
- type: Other
- units: node4_type
- names:
- - noun: node4_type
- - id: node4_name
- description: node4_name
- type: Other
- units: node4_name
- names:
- - noun: node4_name
- - id: node4_id
- description: node4_id
- type: Other
- units: node4_id
- names:
- - noun: node4_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
+ - id: rel3_type
+ description: rel3_type
+ type: Other
+ units: rel3_type
+ names:
+ - noun: rel3_type
+ - id: node4_type
+ description: node4_type
+ type: Other
+ units: node4_type
+ names:
+ - noun: node4_type
+ - id: node4_name
+ description: node4_name
+ type: Other
+ units: node4_name
+ names:
+ - noun: node4_name
+ - id: node4_id
+ description: node4_id
+ type: Other
+ units: node4_id
+ names:
+ - noun: node4_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 1480272
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates: []
diff --git a/data/kg/compound_protein/meta.yaml b/data/kg/compound_protein/meta.yaml
index c4e2a093f..31905c97b 100644
--- a/data/kg/compound_protein/meta.yaml
+++ b/data/kg/compound_protein/meta.yaml
@@ -1,73 +1,68 @@
----
name: compound_protein
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 619840
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}.
- - The {node2_type#} {node2_protein_names#} is targeted by the drug with the {SMILES__description} {SMILES#}.
- - |-
- User: Can you {#give me|come up with!} {#one|an!} example for a {node1_type#} {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
- Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}.
+ - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}.
+ - The {node2_type#} {node2_protein_names#} is targeted by the drug with the {SMILES__description} {SMILES#}.
+ - |-
+ User: Can you {#give me|come up with!} {#one|an!} example for a {node1_type#} {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
+ Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}.
diff --git a/data/kg/compound_protein_compound_1/meta.yaml b/data/kg/compound_protein_compound_1/meta.yaml
index 6fc86bdc9..c5659cca0 100644
--- a/data/kg/compound_protein_compound_1/meta.yaml
+++ b/data/kg/compound_protein_compound_1/meta.yaml
@@ -1,106 +1,100 @@
----
name: compound_protein_compound_1
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_smiles
- description: node3_smiles
- type: Other
- units: node3_smiles
- names:
- - noun: node3_smiles
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_smiles
+ description: node3_smiles
+ type: Other
+ units: node3_smiles
+ names:
+ - noun: node3_smiles
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 9851748
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} the {node3_type#}
- {node3_smiles#}.
- - The {node2_type#} {node2_protein_names#} is targeted by the compound with the {SMILES__description} {SMILES#} and {node3_smiles#}.
- - |-
- User: Can you {#give me|come up with!} {#one|an!} example for a {node1_type#} with the {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
- Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}.
- User: Can you {#tell me|create|generate!} {#another|a!} {node1_type#} {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
- Assistant: {#Sure|Yes|Of course|Yes, of course!}, the SMILES{# representation|!} {node3_smiles#} {#also |!}{rel1_type#} the {node2_type#} {node2_protein_names#}.
+ - The {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} the {node3_type#} {node3_smiles#}.
+ - The {node2_type#} {node2_protein_names#} is targeted by the compound with the {SMILES__description} {SMILES#} and {node3_smiles#}.
+ - |-
+ User: Can you {#give me|come up with!} {#one|an!} example for a {node1_type#} with the {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
+ Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}.
+ User: Can you {#tell me|create|generate!} {#another|a!} {node1_type#} {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
+ Assistant: {#Sure|Yes|Of course|Yes, of course!}, the SMILES{# representation|!} {node3_smiles#} {#also |!}{rel1_type#} the {node2_type#} {node2_protein_names#}.
diff --git a/data/kg/compound_protein_compound_2/meta.yaml b/data/kg/compound_protein_compound_2/meta.yaml
index 76ede634e..29d92b5af 100644
--- a/data/kg/compound_protein_compound_2/meta.yaml
+++ b/data/kg/compound_protein_compound_2/meta.yaml
@@ -1,106 +1,100 @@
----
name: compound_protein_compound_2
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_smiles
- description: node3_smiles
- type: Other
- units: node3_smiles
- names:
- - noun: node3_smiles
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_smiles
+ description: node3_smiles
+ type: Other
+ units: node3_smiles
+ names:
+ - noun: node3_smiles
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 9906551
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} the {node3_type#}
- {node3_smiles#}.
- - The {node2_type#} {node2_protein_names#} is targeted by the compound with the {SMILES__description} {SMILES#} and {node3_smiles#}.
- - |-
- User: Can you {#give me|come up with!} {#one|an!} example for a {node1_type#} with the {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
- Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}.
- User: Can you {#tell me|create|generate!} {#another|a!} {node1_type#} {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
- Assistant: {#Sure|Yes|Of course|Yes, of course!}, the SMILES{# representation|!} {node3_smiles#} {#also |!}{rel1_type#} the {node2_type#} {node2_protein_names#}.
+ - The {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} the {node3_type#} {node3_smiles#}.
+ - The {node2_type#} {node2_protein_names#} is targeted by the compound with the {SMILES__description} {SMILES#} and {node3_smiles#}.
+ - |-
+ User: Can you {#give me|come up with!} {#one|an!} example for a {node1_type#} with the {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
+ Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}.
+ User: Can you {#tell me|create|generate!} {#another|a!} {node1_type#} {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
+ Assistant: {#Sure|Yes|Of course|Yes, of course!}, the SMILES{# representation|!} {node3_smiles#} {#also |!}{rel1_type#} the {node2_type#} {node2_protein_names#}.
diff --git a/data/kg/compound_protein_compound_3/meta.yaml b/data/kg/compound_protein_compound_3/meta.yaml
index efaa033e1..94c455871 100644
--- a/data/kg/compound_protein_compound_3/meta.yaml
+++ b/data/kg/compound_protein_compound_3/meta.yaml
@@ -1,106 +1,100 @@
----
name: compound_protein_compound_3
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_smiles
- description: node3_smiles
- type: Other
- units: node3_smiles
- names:
- - noun: node3_smiles
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_smiles
+ description: node3_smiles
+ type: Other
+ units: node3_smiles
+ names:
+ - noun: node3_smiles
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 9764124
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} the {node3_type#}
- {node3_smiles#}.
- - The {node2_type#} {node2_protein_names#} is targeted by the compound with the {SMILES__description} {SMILES#} and {node3_smiles#}.
- - |-
- User: Can you {#give me|come up with!} {#one|an!} example for a {node1_type#} with the {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
- Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}.
- User: Can you {#tell me|create|generate!} {#another|a!} {node1_type#} {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
- Assistant: {#Sure|Yes|Of course|Yes, of course!}, the SMILES{# representation|!} {node3_smiles#} {#also |!}{rel1_type#} the {node2_type#} {node2_protein_names#}.
+ - The {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} the {node3_type#} {node3_smiles#}.
+ - The {node2_type#} {node2_protein_names#} is targeted by the compound with the {SMILES__description} {SMILES#} and {node3_smiles#}.
+ - |-
+ User: Can you {#give me|come up with!} {#one|an!} example for a {node1_type#} with the {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
+ Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} with the {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}.
+ User: Can you {#tell me|create|generate!} {#another|a!} {node1_type#} {SMILES__description} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
+ Assistant: {#Sure|Yes|Of course|Yes, of course!}, the SMILES{# representation|!} {node3_smiles#} {#also |!}{rel1_type#} the {node2_type#} {node2_protein_names#}.
diff --git a/data/kg/compound_protein_disease/meta.yaml b/data/kg/compound_protein_disease/meta.yaml
index 54de1a39b..e33e843fa 100644
--- a/data/kg/compound_protein_disease/meta.yaml
+++ b/data/kg/compound_protein_disease/meta.yaml
@@ -1,100 +1,94 @@
----
name: compound_protein_disease
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 1424348
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_type#} {node3_name#}.
- - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_type#}
- {node3_name#}.
- - |-
- User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that is targeted by the {node1_type#} {SMILES#}?
- Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_protein_names#} is targeted by the {node1_type#} {SMILES#}.
- User: Can you tell me which disease the {node2_type#} {node2_protein_names#} {rel2_type#}?
- Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#} {node3_type#}.
+ - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_type#} {node3_name#}.
+ - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_type#} {node3_name#}.
+ - |-
+ User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that is targeted by the {node1_type#} {SMILES#}?
+ Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_protein_names#} is targeted by the {node1_type#} {SMILES#}.
+ User: Can you tell me which disease the {node2_type#} {node2_protein_names#} {rel2_type#}?
+ Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#} {node3_type#}.
diff --git a/data/kg/compound_protein_domain/meta.yaml b/data/kg/compound_protein_domain/meta.yaml
index 15184bf4c..f1a86b94a 100644
--- a/data/kg/compound_protein_domain/meta.yaml
+++ b/data/kg/compound_protein_domain/meta.yaml
@@ -1,99 +1,94 @@
----
name: compound_protein_domain
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 1589285
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - '{SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} a {node3_name#}.'
- - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} a {node3_name#}.
- - |-
- User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#}?
- Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES#} {rel1_type#} for example the {node2_type#} {node2_protein_names#}.
- User: Can you tell me a domain of the {node2_type#} {node2_protein_names#}?
- Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} a {node3_name#}.
+ - "{SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} a {node3_name#}."
+ - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} a {node3_name#}.
+ - |-
+ User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#}?
+ Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES#} {rel1_type#} for example the {node2_type#} {node2_protein_names#}.
+ User: Can you tell me a domain of the {node2_type#} {node2_protein_names#}?
+ Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} a {node3_name#}.
diff --git a/data/kg/compound_protein_ec_number/meta.yaml b/data/kg/compound_protein_ec_number/meta.yaml
index aecaa2ceb..58d8288b8 100644
--- a/data/kg/compound_protein_ec_number/meta.yaml
+++ b/data/kg/compound_protein_ec_number/meta.yaml
@@ -1,100 +1,94 @@
----
name: compound_protein_ec_number
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 405980
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} the {node3_name#} (EC {node3_id#}).
- - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. Furthermore, the {node1_type#} {SMILES#} {rel2_type#} the {node3_name#}
- (EC {node3_id#}).
- - |-
- User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#}?
- Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}.
- User: Can you tell me which enzyme the {node2_type#} {node2_protein_names#} {rel2_type#}?
- Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} a {node3_name#} (EC {node3_id#}).
+ - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} the {node3_name#} (EC {node3_id#}).
+ - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. Furthermore, the {node1_type#} {SMILES#} {rel2_type#} the {node3_name#} (EC {node3_id#}).
+ - |-
+ User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#}?
+ Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}.
+ User: Can you tell me which enzyme the {node2_type#} {node2_protein_names#} {rel2_type#}?
+ Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} a {node3_name#} (EC {node3_id#}).
diff --git a/data/kg/compound_protein_go_term_1/meta.yaml b/data/kg/compound_protein_go_term_1/meta.yaml
index e49613b7a..f95ecac30 100644
--- a/data/kg/compound_protein_go_term_1/meta.yaml
+++ b/data/kg/compound_protein_go_term_1/meta.yaml
@@ -1,95 +1,89 @@
----
name: compound_protein_go_term_1
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 9820893
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}.
- - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#}
- {rel2_type#} the {node3_name#}.
+ - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}.
+ - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}.
diff --git a/data/kg/compound_protein_go_term_2/meta.yaml b/data/kg/compound_protein_go_term_2/meta.yaml
index 75b61831e..1577ea82d 100644
--- a/data/kg/compound_protein_go_term_2/meta.yaml
+++ b/data/kg/compound_protein_go_term_2/meta.yaml
@@ -1,95 +1,89 @@
----
name: compound_protein_go_term_2
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 9781374
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}.
- - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#}
- {rel2_type#} the {node3_name#}.
+ - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}.
+ - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}.
diff --git a/data/kg/compound_protein_go_term_3/meta.yaml b/data/kg/compound_protein_go_term_3/meta.yaml
index ea6bda6fb..f1df53242 100644
--- a/data/kg/compound_protein_go_term_3/meta.yaml
+++ b/data/kg/compound_protein_go_term_3/meta.yaml
@@ -1,95 +1,89 @@
----
name: compound_protein_go_term_3
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 9798619
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}.
- - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#}
- {rel2_type#} the {node3_name#}.
+ - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}.
+ - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}.
diff --git a/data/kg/compound_protein_go_term_4/meta.yaml b/data/kg/compound_protein_go_term_4/meta.yaml
index bebb00ce5..f23fb3d1f 100644
--- a/data/kg/compound_protein_go_term_4/meta.yaml
+++ b/data/kg/compound_protein_go_term_4/meta.yaml
@@ -1,95 +1,89 @@
----
name: compound_protein_go_term_4
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 1767147
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}.
- - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#}
- {rel2_type#} the {node3_name#}.
+ - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}.
+ - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}.
diff --git a/data/kg/compound_protein_hpo/meta.yaml b/data/kg/compound_protein_hpo/meta.yaml
index 4aea5ce91..8f68f9802 100644
--- a/data/kg/compound_protein_hpo/meta.yaml
+++ b/data/kg/compound_protein_hpo/meta.yaml
@@ -1,97 +1,91 @@
----
name: compound_protein_hpo
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 2971239
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} {node3_name#}.
- - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}.
- - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the human
- phenotype represented by {node3_name#}.
- - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {rel2_type#} the {node3_name#}.
+ - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} {node3_name#}.
+ - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}.
+ - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the human phenotype represented by {node3_name#}.
+ - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {rel2_type#} the {node3_name#}.
diff --git a/data/kg/compound_protein_hpo_disease_1/meta.yaml b/data/kg/compound_protein_hpo_disease_1/meta.yaml
index a8d5bdd83..035583ee3 100644
--- a/data/kg/compound_protein_hpo_disease_1/meta.yaml
+++ b/data/kg/compound_protein_hpo_disease_1/meta.yaml
@@ -1,118 +1,112 @@
----
name: compound_protein_hpo_disease_1
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
- - id: rel3_type
- description: rel3_type
- type: Other
- units: rel3_type
- names:
- - noun: rel3_type
- - id: node4_type
- description: node4_type
- type: Other
- units: node4_type
- names:
- - noun: node4_type
- - id: node4_name
- description: node4_name
- type: Other
- units: node4_name
- names:
- - noun: node4_name
- - id: node4_id
- description: node4_id
- type: Other
- units: node4_id
- names:
- - noun: node4_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
+ - id: rel3_type
+ description: rel3_type
+ type: Other
+ units: rel3_type
+ names:
+ - noun: rel3_type
+ - id: node4_type
+ description: node4_type
+ type: Other
+ units: node4_type
+ names:
+ - noun: node4_type
+ - id: node4_name
+ description: node4_name
+ type: Other
+ units: node4_name
+ names:
+ - noun: node4_name
+ - id: node4_id
+ description: node4_id
+ type: Other
+ units: node4_id
+ names:
+ - noun: node4_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 9815355
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}.
- The {node3_name#} {rel3_type#} the {node4_type#} {node4_name#}.
+ - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. The {node3_name#} {rel3_type#} the {node4_type#} {node4_name#}.
diff --git a/data/kg/compound_protein_hpo_disease_2/meta.yaml b/data/kg/compound_protein_hpo_disease_2/meta.yaml
index c5b744322..5ce2d0ef0 100644
--- a/data/kg/compound_protein_hpo_disease_2/meta.yaml
+++ b/data/kg/compound_protein_hpo_disease_2/meta.yaml
@@ -1,118 +1,112 @@
----
name: compound_protein_hpo_disease_2
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
- - id: rel3_type
- description: rel3_type
- type: Other
- units: rel3_type
- names:
- - noun: rel3_type
- - id: node4_type
- description: node4_type
- type: Other
- units: node4_type
- names:
- - noun: node4_type
- - id: node4_name
- description: node4_name
- type: Other
- units: node4_name
- names:
- - noun: node4_name
- - id: node4_id
- description: node4_id
- type: Other
- units: node4_id
- names:
- - noun: node4_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
+ - id: rel3_type
+ description: rel3_type
+ type: Other
+ units: rel3_type
+ names:
+ - noun: rel3_type
+ - id: node4_type
+ description: node4_type
+ type: Other
+ units: node4_type
+ names:
+ - noun: node4_type
+ - id: node4_name
+ description: node4_name
+ type: Other
+ units: node4_name
+ names:
+ - noun: node4_name
+ - id: node4_id
+ description: node4_id
+ type: Other
+ units: node4_id
+ names:
+ - noun: node4_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 2786883
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}.
- The {node3_name#} {rel3_type#} the {node4_type#} {node4_name#}.
+ - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. The {node3_name#} {rel3_type#} the {node4_type#} {node4_name#}.
diff --git a/data/kg/compound_protein_pathway/meta.yaml b/data/kg/compound_protein_pathway/meta.yaml
index 8059ada61..e38ce798f 100644
--- a/data/kg/compound_protein_pathway/meta.yaml
+++ b/data/kg/compound_protein_pathway/meta.yaml
@@ -1,94 +1,89 @@
----
name: compound_protein_pathway
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 5872197
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}.
- - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}.
+ - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}.
+ - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}.
diff --git a/data/kg/compound_protein_pathway_disease_1/meta.yaml b/data/kg/compound_protein_pathway_disease_1/meta.yaml
index 623bf8483..ddbbdd0b9 100644
--- a/data/kg/compound_protein_pathway_disease_1/meta.yaml
+++ b/data/kg/compound_protein_pathway_disease_1/meta.yaml
@@ -1,118 +1,112 @@
----
name: compound_protein_pathway_disease_1
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
- - id: rel3_type
- description: rel3_type
- type: Other
- units: rel3_type
- names:
- - noun: rel3_type
- - id: node4_type
- description: node4_type
- type: Other
- units: node4_type
- names:
- - noun: node4_type
- - id: node4_name
- description: node4_name
- type: Other
- units: node4_name
- names:
- - noun: node4_name
- - id: node4_id
- description: node4_id
- type: Other
- units: node4_id
- names:
- - noun: node4_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
+ - id: rel3_type
+ description: rel3_type
+ type: Other
+ units: rel3_type
+ names:
+ - noun: rel3_type
+ - id: node4_type
+ description: node4_type
+ type: Other
+ units: node4_type
+ names:
+ - noun: node4_type
+ - id: node4_name
+ description: node4_name
+ type: Other
+ units: node4_name
+ names:
+ - noun: node4_name
+ - id: node4_id
+ description: node4_id
+ type: Other
+ units: node4_id
+ names:
+ - noun: node4_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 9797638
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}.
- The {node3_name#} is {rel3_type#} the {node4_type#} {node4_name#}.
+ - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. The {node3_name#} is {rel3_type#} the {node4_type#} {node4_name#}.
diff --git a/data/kg/compound_protein_pathway_disease_2/meta.yaml b/data/kg/compound_protein_pathway_disease_2/meta.yaml
index 51c8f8492..257a1b22b 100644
--- a/data/kg/compound_protein_pathway_disease_2/meta.yaml
+++ b/data/kg/compound_protein_pathway_disease_2/meta.yaml
@@ -1,118 +1,112 @@
----
name: compound_protein_pathway_disease_2
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
- - id: rel3_type
- description: rel3_type
- type: Other
- units: rel3_type
- names:
- - noun: rel3_type
- - id: node4_type
- description: node4_type
- type: Other
- units: node4_type
- names:
- - noun: node4_type
- - id: node4_name
- description: node4_name
- type: Other
- units: node4_name
- names:
- - noun: node4_name
- - id: node4_id
- description: node4_id
- type: Other
- units: node4_id
- names:
- - noun: node4_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
+ - id: rel3_type
+ description: rel3_type
+ type: Other
+ units: rel3_type
+ names:
+ - noun: rel3_type
+ - id: node4_type
+ description: node4_type
+ type: Other
+ units: node4_type
+ names:
+ - noun: node4_type
+ - id: node4_name
+ description: node4_name
+ type: Other
+ units: node4_name
+ names:
+ - noun: node4_name
+ - id: node4_id
+ description: node4_id
+ type: Other
+ units: node4_id
+ names:
+ - noun: node4_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 9780116
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#}
- {rel2_type#} {node3_name#}. The {node3_name#} is {rel3_type#} the {node4_type#} {node4_name#}.
+ - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. The {node3_name#} is {rel3_type#} the {node4_type#} {node4_name#}.
diff --git a/data/kg/compound_protein_pathway_disease_3/meta.yaml b/data/kg/compound_protein_pathway_disease_3/meta.yaml
index a39472f08..be4c6c291 100644
--- a/data/kg/compound_protein_pathway_disease_3/meta.yaml
+++ b/data/kg/compound_protein_pathway_disease_3/meta.yaml
@@ -1,118 +1,112 @@
----
name: compound_protein_pathway_disease_3
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
- - id: rel3_type
- description: rel3_type
- type: Other
- units: rel3_type
- names:
- - noun: rel3_type
- - id: node4_type
- description: node4_type
- type: Other
- units: node4_type
- names:
- - noun: node4_type
- - id: node4_name
- description: node4_name
- type: Other
- units: node4_name
- names:
- - noun: node4_name
- - id: node4_id
- description: node4_id
- type: Other
- units: node4_id
- names:
- - noun: node4_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
+ - id: rel3_type
+ description: rel3_type
+ type: Other
+ units: rel3_type
+ names:
+ - noun: rel3_type
+ - id: node4_type
+ description: node4_type
+ type: Other
+ units: node4_type
+ names:
+ - noun: node4_type
+ - id: node4_name
+ description: node4_name
+ type: Other
+ units: node4_name
+ names:
+ - noun: node4_name
+ - id: node4_id
+ description: node4_id
+ type: Other
+ units: node4_id
+ names:
+ - noun: node4_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 8349447
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}.
- The {node3_name#} is {rel3_type#} the {node4_type#} {node4_name#}.
+ - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. The {node3_name#} is {rel3_type#} the {node4_type#} {node4_name#}.
diff --git a/data/kg/compound_protein_protein/meta.yaml b/data/kg/compound_protein_protein/meta.yaml
index 53428585a..e16ea7605 100644
--- a/data/kg/compound_protein_protein/meta.yaml
+++ b/data/kg/compound_protein_protein/meta.yaml
@@ -1,106 +1,100 @@
----
name: compound_protein_protein
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_protein_names
- description: node3_protein_names
- type: Other
- units: node3_protein_names
- names:
- - noun: node3_protein_names
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_protein_names
+ description: node3_protein_names
+ type: Other
+ units: node3_protein_names
+ names:
+ - noun: node3_protein_names
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 10139561
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_type#} {node3_protein_names#}.
- - The {node2_type#} {node2_protein_names#} is targeted by the {SMILES__description} {SMILES#}. The {node2_type#} {node2_protein_names#} {rel2_type#}
- {node3_protein_names#}.
- - |-
- User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} with the {SMILES__description} {SMILES#}?
- Assistant: The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} for example the {node2_type#} {node2_protein_names#}.
- User: Can you tell me a {node3_type#} that {rel2_type#} {node2_type#} {node2_protein_names#}?
- Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_protein_names#} {rel2_type#} {node3_protein_names#}.
+ - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_type#} {node3_protein_names#}.
+ - The {node2_type#} {node2_protein_names#} is targeted by the {SMILES__description} {SMILES#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_protein_names#}.
+ - |-
+ User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} with the {SMILES__description} {SMILES#}?
+ Assistant: The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} for example the {node2_type#} {node2_protein_names#}.
+ User: Can you tell me a {node3_type#} that {rel2_type#} {node2_type#} {node2_protein_names#}?
+ Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_protein_names#} {rel2_type#} {node3_protein_names#}.
diff --git a/data/kg/drug_chebi/meta.yaml b/data/kg/drug_chebi/meta.yaml
index 354090f45..b1dfb8922 100644
--- a/data/kg/drug_chebi/meta.yaml
+++ b/data/kg/drug_chebi/meta.yaml
@@ -1,63 +1,58 @@
----
name: drug_chebi
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 3033
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#} {rel1_type#} {node2_name#}.
+ - The {node1_type#} {SMILES#} {rel1_type#} {node2_name#}.
diff --git a/data/kg/drug_chebi_chebi/meta.yaml b/data/kg/drug_chebi_chebi/meta.yaml
index 9f8a26513..689c985d4 100644
--- a/data/kg/drug_chebi_chebi/meta.yaml
+++ b/data/kg/drug_chebi_chebi/meta.yaml
@@ -1,87 +1,82 @@
----
name: drug_chebi_chebi
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 5710
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#} {rel1_type#} {node2_name#} and {rel2_type#} {node3_name#}.
+ - The {node1_type#} {SMILES#} {rel1_type#} {node2_name#} and {rel2_type#} {node3_name#}.
diff --git a/data/kg/drug_chebi_chebi_chebi/meta.yaml b/data/kg/drug_chebi_chebi_chebi/meta.yaml
index 5cdc7ec62..a446f2552 100644
--- a/data/kg/drug_chebi_chebi_chebi/meta.yaml
+++ b/data/kg/drug_chebi_chebi_chebi/meta.yaml
@@ -1,110 +1,105 @@
----
name: drug_chebi_chebi_chebi
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
- - id: rel3_type
- description: rel3_type
- type: Other
- units: rel3_type
- names:
- - noun: rel3_type
- - id: node4_type
- description: node4_type
- type: Other
- units: node4_type
- names:
- - noun: node4_type
- - id: node4_name
- description: node4_name
- type: Other
- units: node4_name
- names:
- - noun: node4_name
- - id: node4_id
- description: node4_id
- type: Other
- units: node4_id
- names:
- - noun: node4_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
+ - id: rel3_type
+ description: rel3_type
+ type: Other
+ units: rel3_type
+ names:
+ - noun: rel3_type
+ - id: node4_type
+ description: node4_type
+ type: Other
+ units: node4_type
+ names:
+ - noun: node4_type
+ - id: node4_name
+ description: node4_name
+ type: Other
+ units: node4_name
+ names:
+ - noun: node4_name
+ - id: node4_id
+ description: node4_id
+ type: Other
+ units: node4_id
+ names:
+ - noun: node4_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 1538960
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates: []
diff --git a/data/kg/drug_disease_pathway/meta.yaml b/data/kg/drug_disease_pathway/meta.yaml
index f4f863858..c11801a80 100644
--- a/data/kg/drug_disease_pathway/meta.yaml
+++ b/data/kg/drug_disease_pathway/meta.yaml
@@ -1,87 +1,82 @@
----
name: drug_disease_pathway
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 276
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#|node1_name#} is indicated for the {node2_name#} {node2_type#} and {rel2_type#} the {node3_name#} {node3_type#}.
+ - The {node1_type#} {SMILES#|node1_name#} is indicated for the {node2_name#} {node2_type#} and {rel2_type#} the {node3_name#} {node3_type#}.
diff --git a/data/kg/drug_disease_pathway_protein/meta.yaml b/data/kg/drug_disease_pathway_protein/meta.yaml
index 14a7ace63..9b81a5e58 100644
--- a/data/kg/drug_disease_pathway_protein/meta.yaml
+++ b/data/kg/drug_disease_pathway_protein/meta.yaml
@@ -1,118 +1,112 @@
----
name: drug_disease_pathway_protein
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
- - id: rel3_type
- description: rel3_type
- type: Other
- units: rel3_type
- names:
- - noun: rel3_type
- - id: node4_type
- description: node4_type
- type: Other
- units: node4_type
- names:
- - noun: node4_type
- - id: node4_name
- description: node4_name
- type: Other
- units: node4_name
- names:
- - noun: node4_name
- - id: node4_protein_names
- description: node4_protein_names
- type: Other
- units: node4_protein_names
- names:
- - noun: node4_protein_names
- - id: node4_id
- description: node4_id
- type: Other
- units: node4_id
- names:
- - noun: node4_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
+ - id: rel3_type
+ description: rel3_type
+ type: Other
+ units: rel3_type
+ names:
+ - noun: rel3_type
+ - id: node4_type
+ description: node4_type
+ type: Other
+ units: node4_type
+ names:
+ - noun: node4_type
+ - id: node4_name
+ description: node4_name
+ type: Other
+ units: node4_name
+ names:
+ - noun: node4_name
+ - id: node4_protein_names
+ description: node4_protein_names
+ type: Other
+ units: node4_protein_names
+ names:
+ - noun: node4_protein_names
+ - id: node4_id
+ description: node4_id
+ type: Other
+ units: node4_id
+ names:
+ - noun: node4_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 33215
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#|node1_name#} is indicated for the {node2_name#} {node2_type#} and {rel2_type#} the {node3_name#} {node3_type#}. The {node3_type#}
- {node3_name#} {rel3_type#} the {node4_type#} {node4_protein_names#}.
+ - The {node1_type#} {SMILES#|node1_name#} is indicated for the {node2_name#} {node2_type#} and {rel2_type#} the {node3_name#} {node3_type#}. The {node3_type#} {node3_name#} {rel3_type#} the {node4_type#} {node4_protein_names#}.
diff --git a/data/kg/drug_protein/meta.yaml b/data/kg/drug_protein/meta.yaml
index 9f491fe08..244b659b9 100644
--- a/data/kg/drug_protein/meta.yaml
+++ b/data/kg/drug_protein/meta.yaml
@@ -1,72 +1,67 @@
----
name: drug_protein
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 15303
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}.
- - |-
- User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that is targeted by the {node1_type#} {SMILES#|node1_name#}?
- Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_protein_names#} is targeted by {#this|the above!} {node1_type#}.
+ - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}.
+ - |-
+ User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that is targeted by the {node1_type#} {SMILES#|node1_name#}?
+ Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_protein_names#} is targeted by {#this|the above!} {node1_type#}.
diff --git a/data/kg/drug_protein_disease/meta.yaml b/data/kg/drug_protein_disease/meta.yaml
index 5797f8635..1c5fda780 100644
--- a/data/kg/drug_protein_disease/meta.yaml
+++ b/data/kg/drug_protein_disease/meta.yaml
@@ -1,100 +1,94 @@
----
name: drug_protein_disease
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 28774
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_type#} {node3_name#}.
- - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#}
- the {node3_type#} {node3_name#}.
- - |-
- User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that is targeted by the {node1_type#} {SMILES#|node1_name#}?
- Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_protein_names#} is targeted by {#this|the above!} {node1_type#}.
- User: Can you tell me which disease the {node2_type#} {node2_protein_names#} {rel2_type#}?
- Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#} {node3_type#}.
+ - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_type#} {node3_name#}.
+ - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_type#} {node3_name#}.
+ - |-
+ User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that is targeted by the {node1_type#} {SMILES#|node1_name#}?
+ Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_protein_names#} is targeted by {#this|the above!} {node1_type#}.
+ User: Can you tell me which disease the {node2_type#} {node2_protein_names#} {rel2_type#}?
+ Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#} {node3_type#}.
diff --git a/data/kg/drug_protein_domain/meta.yaml b/data/kg/drug_protein_domain/meta.yaml
index 8a62099e6..3e7394470 100644
--- a/data/kg/drug_protein_domain/meta.yaml
+++ b/data/kg/drug_protein_domain/meta.yaml
@@ -1,99 +1,94 @@
----
name: drug_protein_domain
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 33850
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - '{SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} a {node3_name#}.'
- - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} a {node3_name#}.
- - |-
- User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}?
- Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {rel1_type#} for example the {node2_type#} {node2_protein_names#}.
- User: Can you tell me a domain of the {node2_type#} {node2_protein_names#}?
- Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} a {node3_name#}.
+ - "{SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} a {node3_name#}."
+ - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} a {node3_name#}.
+ - |-
+ User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}?
+ Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {rel1_type#} for example the {node2_type#} {node2_protein_names#}.
+ User: Can you tell me a domain of the {node2_type#} {node2_protein_names#}?
+ Assistant: The {node2_type#} {node2_protein_names#} {rel2_type#} a {node3_name#}.
diff --git a/data/kg/drug_protein_drug/meta.yaml b/data/kg/drug_protein_drug/meta.yaml
index 9d6ffe828..5e23e2455 100644
--- a/data/kg/drug_protein_drug/meta.yaml
+++ b/data/kg/drug_protein_drug/meta.yaml
@@ -1,105 +1,100 @@
----
name: drug_protein_drug
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_smiles
- description: node3_smiles
- type: Other
- units: node3_smiles
- names:
- - noun: node3_smiles
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_smiles
+ description: node3_smiles
+ type: Other
+ units: node3_smiles
+ names:
+ - noun: node3_smiles
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 451843
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_type#} {node3_name#|node3_smiles#}.
- - The {node2_type#} {node2_protein_names#} is targeted by the drugs {SMILES#|node1_name#} and {node3_name#|node3_smiles#}.
- - |-
- User: {#Can you give me|Can you come up with!} {#an|one!} example for a {node1_type#} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
- Assistant: {#Sure|Yes|Of course|Yes, of course!}, the {node1_type#} {rel1_type#} the {node2_type#} {node2_protein_names#}.
- User: Can you tell me another {node1_type#} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
- Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node3_type#} {node3_name#|node3_smiles#} also {rel1_type#} the {node2_type#} {node2_protein_names#}.
+ - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_type#} {node3_name#|node3_smiles#}.
+ - The {node2_type#} {node2_protein_names#} is targeted by the drugs {SMILES#|node1_name#} and {node3_name#|node3_smiles#}.
+ - |-
+ User: {#Can you give me|Can you come up with!} {#an|one!} example for a {node1_type#} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
+ Assistant: {#Sure|Yes|Of course|Yes, of course!}, the {node1_type#} {rel1_type#} the {node2_type#} {node2_protein_names#}.
+ User: Can you tell me another {node1_type#} that {rel1_type#} the {node2_type#} {node2_protein_names#}?
+ Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node3_type#} {node3_name#|node3_smiles#} also {rel1_type#} the {node2_type#} {node2_protein_names#}.
diff --git a/data/kg/drug_protein_ec_number/meta.yaml b/data/kg/drug_protein_ec_number/meta.yaml
index 2762b5203..02054ee5f 100644
--- a/data/kg/drug_protein_ec_number/meta.yaml
+++ b/data/kg/drug_protein_ec_number/meta.yaml
@@ -1,100 +1,94 @@
----
name: drug_protein_ec_number
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 7636
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#} which {rel2_type#} the {node3_name#} (EC {node3_id#}) reaction.
- - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}. Furthermore, the {node2_type#} {node2_name#} {rel2_type#} the
- {node3_name#} (EC {node3_id#}) reaction.
- - |-
- User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}?
- Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}.
- User: Can you tell me which reaction the {node2_type#} {node2_name#} {rel2_type#}?
- Assistant: The {node2_type#} {node2_name#} {rel2_type#} a {node3_name#} (EC {node3_id#}) reaction.
+ - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#} which {rel2_type#} the {node3_name#} (EC {node3_id#}) reaction.
+ - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}. Furthermore, the {node2_type#} {node2_name#} {rel2_type#} the {node3_name#} (EC {node3_id#}) reaction.
+ - |-
+ User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}?
+ Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}.
+ User: Can you tell me which reaction the {node2_type#} {node2_name#} {rel2_type#}?
+ Assistant: The {node2_type#} {node2_name#} {rel2_type#} a {node3_name#} (EC {node3_id#}) reaction.
diff --git a/data/kg/drug_protein_go_term/meta.yaml b/data/kg/drug_protein_go_term/meta.yaml
index 81f97935e..5670302d4 100644
--- a/data/kg/drug_protein_go_term/meta.yaml
+++ b/data/kg/drug_protein_go_term/meta.yaml
@@ -1,99 +1,94 @@
----
name: drug_protein_go_term
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 656202
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#} which {rel2_type#} the {node3_name#}.
- - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}. The {node2_type#} {node2_name#} {rel2_type#} the {node3_name#}.
- - |-
- User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}?
- Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}.
- User: Can you tell me more {#details |!}about {node2_type#} {node2_name#}?
- Assistant: {#Sure|Yes|Of course|Yes, of course!}, the {node2_type#} {node2_name#} {rel2_type#} the {node3_name#}.
+ - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#} which {rel2_type#} the {node3_name#}.
+ - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}. The {node2_type#} {node2_name#} {rel2_type#} the {node3_name#}.
+ - |-
+ User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}?
+ Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}.
+ User: Can you tell me more {#details |!}about {node2_type#} {node2_name#}?
+ Assistant: {#Sure|Yes|Of course|Yes, of course!}, the {node2_type#} {node2_name#} {rel2_type#} the {node3_name#}.
diff --git a/data/kg/drug_protein_hpo/meta.yaml b/data/kg/drug_protein_hpo/meta.yaml
index 6bbb7dc01..6e3975e2b 100644
--- a/data/kg/drug_protein_hpo/meta.yaml
+++ b/data/kg/drug_protein_hpo/meta.yaml
@@ -1,98 +1,91 @@
----
name: drug_protein_hpo
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 71321
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} {node3_name#}.
- - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#}
- {node3_name#}.
- - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#}
- the human phenotype represented by {node3_name#}.
- - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. This {node2_type#} {rel2_type#} the {node3_name#}.
+ - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} and {rel2_type#} {node3_name#}.
+ - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}.
+ - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the human phenotype represented by {node3_name#}.
+ - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. This {node2_type#} {rel2_type#} the {node3_name#}.
diff --git a/data/kg/drug_protein_hpo_disease/meta.yaml b/data/kg/drug_protein_hpo_disease/meta.yaml
index 865744a14..9e8d68cfd 100644
--- a/data/kg/drug_protein_hpo_disease/meta.yaml
+++ b/data/kg/drug_protein_hpo_disease/meta.yaml
@@ -1,118 +1,112 @@
----
name: drug_protein_hpo_disease
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
- - id: rel3_type
- description: rel3_type
- type: Other
- units: rel3_type
- names:
- - noun: rel3_type
- - id: node4_type
- description: node4_type
- type: Other
- units: node4_type
- names:
- - noun: node4_type
- - id: node4_name
- description: node4_name
- type: Other
- units: node4_name
- names:
- - noun: node4_name
- - id: node4_id
- description: node4_id
- type: Other
- units: node4_id
- names:
- - noun: node4_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
+ - id: rel3_type
+ description: rel3_type
+ type: Other
+ units: rel3_type
+ names:
+ - noun: rel3_type
+ - id: node4_type
+ description: node4_type
+ type: Other
+ units: node4_type
+ names:
+ - noun: node4_type
+ - id: node4_name
+ description: node4_name
+ type: Other
+ units: node4_name
+ names:
+ - noun: node4_name
+ - id: node4_id
+ description: node4_id
+ type: Other
+ units: node4_id
+ names:
+ - noun: node4_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 293872
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#}
- {rel2_type#} {node3_name#}. The {node3_name#} {rel3_type#} the {node4_type#} {node4_name#}.
+ - The {node1_type#} {SMILES__description} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}. The {node3_name#} {rel3_type#} the {node4_type#} {node4_name#}.
diff --git a/data/kg/drug_protein_pathway/meta.yaml b/data/kg/drug_protein_pathway/meta.yaml
index a8e7a8180..9d5d42e8e 100644
--- a/data/kg/drug_protein_pathway/meta.yaml
+++ b/data/kg/drug_protein_pathway/meta.yaml
@@ -1,100 +1,94 @@
----
name: drug_protein_pathway
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 124609
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}.
- - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#}
- the {node3_name#}.
- - |-
- User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}?
- Assistant: {#Sure|Yes|Of course|Yes, of course!}, the {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}.
- User: Can you tell me more {#details |!}about {node2_type#} {node2_name#}?
- Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_name#} {rel2_type#} the {node3_name#}.
+ - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_name#}.
+ - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}.
+ - |-
+ User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}?
+ Assistant: {#Sure|Yes|Of course|Yes, of course!}, the {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_name#}.
+ User: Can you tell me more {#details |!}about {node2_type#} {node2_name#}?
+ Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_name#} {rel2_type#} the {node3_name#}.
diff --git a/data/kg/drug_protein_pathway_disease/meta.yaml b/data/kg/drug_protein_pathway_disease/meta.yaml
index 1d5a20c11..d0538ca19 100644
--- a/data/kg/drug_protein_pathway_disease/meta.yaml
+++ b/data/kg/drug_protein_pathway_disease/meta.yaml
@@ -1,118 +1,112 @@
----
name: drug_protein_pathway_disease
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
- - id: rel3_type
- description: rel3_type
- type: Other
- units: rel3_type
- names:
- - noun: rel3_type
- - id: node4_type
- description: node4_type
- type: Other
- units: node4_type
- names:
- - noun: node4_type
- - id: node4_name
- description: node4_name
- type: Other
- units: node4_name
- names:
- - noun: node4_name
- - id: node4_id
- description: node4_id
- type: Other
- units: node4_id
- names:
- - noun: node4_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
+ - id: rel3_type
+ description: rel3_type
+ type: Other
+ units: rel3_type
+ names:
+ - noun: rel3_type
+ - id: node4_type
+ description: node4_type
+ type: Other
+ units: node4_type
+ names:
+ - noun: node4_type
+ - id: node4_name
+ description: node4_name
+ type: Other
+ units: node4_name
+ names:
+ - noun: node4_name
+ - id: node4_id
+ description: node4_id
+ type: Other
+ units: node4_id
+ names:
+ - noun: node4_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 617318
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}.
- The {node3_name#} is {rel3_type#} the {node4_type#} {node4_name#}.
+ - The {node1_type#} {SMILES#} {rel1_type#} the {node2_type#} {node2_protein_names#}. The {node2_type#} {node2_protein_names#} {rel2_type#} the {node3_name#}. The {node3_name#} is {rel3_type#} the {node4_type#} {node4_name#}.
diff --git a/data/kg/drug_protein_protein/meta.yaml b/data/kg/drug_protein_protein/meta.yaml
index 849021429..22683f7b7 100644
--- a/data/kg/drug_protein_protein/meta.yaml
+++ b/data/kg/drug_protein_protein/meta.yaml
@@ -1,105 +1,100 @@
----
name: drug_protein_protein
description: Knowledgegraph data samples.
targets:
- - id: node1_type
- description: node1_type
- type: Other
- units: node1_type
- names:
- - noun: node1_type
- - id: node1_name
- description: node1_name
- type: Other
- units: node1_name
- names:
- - noun: node1_name
- - id: node1_id
- description: node1_id
- type: Other
- units: node1_id
- names:
- - noun: node1_id
- - id: rel1_type
- description: rel1_type
- type: Other
- units: rel1_type
- names:
- - noun: rel1_type
- - id: node2_type
- description: node2_type
- type: Other
- units: node2_type
- names:
- - noun: node2_type
- - id: node2_protein_names
- description: node2_protein_names
- type: Other
- units: node2_protein_names
- names:
- - noun: node2_protein_names
- - id: node2_name
- description: node2_name
- type: Other
- units: node2_name
- names:
- - noun: node2_name
- - id: node2_id
- description: node2_id
- type: Other
- units: node2_id
- names:
- - noun: node2_id
- - id: rel2_type
- description: rel2_type
- type: Other
- units: rel2_type
- names:
- - noun: rel2_type
- - id: node3_type
- description: node3_type
- type: Other
- units: node3_type
- names:
- - noun: node3_type
- - id: node3_protein_names
- description: node3_protein_names
- type: Other
- units: node3_protein_names
- names:
- - noun: node3_protein_names
- - id: node3_name
- description: node3_name
- type: Other
- units: node3_name
- names:
- - noun: node3_name
- - id: node3_id
- description: node3_id
- type: Other
- units: node3_id
- names:
- - noun: node3_id
+ - id: node1_type
+ description: node1_type
+ type: Other
+ units: node1_type
+ names:
+ - noun: node1_type
+ - id: node1_name
+ description: node1_name
+ type: Other
+ units: node1_name
+ names:
+ - noun: node1_name
+ - id: node1_id
+ description: node1_id
+ type: Other
+ units: node1_id
+ names:
+ - noun: node1_id
+ - id: rel1_type
+ description: rel1_type
+ type: Other
+ units: rel1_type
+ names:
+ - noun: rel1_type
+ - id: node2_type
+ description: node2_type
+ type: Other
+ units: node2_type
+ names:
+ - noun: node2_type
+ - id: node2_protein_names
+ description: node2_protein_names
+ type: Other
+ units: node2_protein_names
+ names:
+ - noun: node2_protein_names
+ - id: node2_name
+ description: node2_name
+ type: Other
+ units: node2_name
+ names:
+ - noun: node2_name
+ - id: node2_id
+ description: node2_id
+ type: Other
+ units: node2_id
+ names:
+ - noun: node2_id
+ - id: rel2_type
+ description: rel2_type
+ type: Other
+ units: rel2_type
+ names:
+ - noun: rel2_type
+ - id: node3_type
+ description: node3_type
+ type: Other
+ units: node3_type
+ names:
+ - noun: node3_type
+ - id: node3_protein_names
+ description: node3_protein_names
+ type: Other
+ units: node3_protein_names
+ names:
+ - noun: node3_protein_names
+ - id: node3_name
+ description: node3_name
+ type: Other
+ units: node3_name
+ names:
+ - noun: node3_name
+ - id: node3_id
+ description: node3_id
+ type: Other
+ units: node3_id
+ names:
+ - noun: node3_id
identifiers:
- - id: SMILES
- description: SMILES
- type: SMILES
+ - id: SMILES
+ description: SMILES
+ type: SMILES
license: CC BY 4.0
links:
- - url: https://crossbar.kansil.org
- description: original knowledge graph web GUI link
+ - url: https://crossbar.kansil.org
+ description: original knowledge graph web GUI link
num_points: 245582
bibtex:
- - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat,\
- \ Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay,\
- \ Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids\
- \ Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\n\
- url = {https://doi.org/10.1093/nar/gkab543},\n}"
+ - "@article{10.1093/nar/gkab543,\nauthor = {Doğan, Tunca and Atas, Heval and Joshi, Vishal and Atakan, Ahmet and Rifaioglu, Ahmet Sureyya and Nalbat, Esra and Nightingale, Andrew and Saidi, Rabie and Volynkin, Vladimir and Zellner, Hermann and Cetin-Atalay, Rengul and Martin, Maria and Atalay, Volkan},\ntitle = \"{CROssBAR: comprehensive resource of biomedical relations with knowledge graph representations}\",\njournal = {Nucleic Acids Research},\nvolume = {49},\nnumber = {16},\npages = {e96-e96},\nyear = {2021},\nmonth = {06},\nissn = {0305-1048},\ndoi = {10.1093/nar/gkab543},\nurl = {https://doi.org/10.1093/nar/gkab543},\n}"
templates:
- - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_type#} {node3_name#}.
- - The {node2_type#} {node2_protein_names#} is targeted by {SMILES#|node1_name#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}.
- - |-
- User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}?
- Assistant: The {node1_type#} {SMILES#|node1_name#} {rel1_type#} for example the {node2_type#} {node2_protein_names#}.
- User: Can you tell me a {node3_type#} that {rel2_type#} {node2_type#} {node2_protein_names#}?
- Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_name#} {rel2_type#} {node3_type#} {node3_name#}.
+ - The {node1_type#} {SMILES#|node1_name#} {rel1_type#} the {node2_type#} {node2_protein_names#} which {rel2_type#} the {node3_type#} {node3_name#}.
+ - The {node2_type#} {node2_protein_names#} is targeted by {SMILES#|node1_name#}. The {node2_type#} {node2_protein_names#} {rel2_type#} {node3_name#}.
+ - |-
+ User: {#Can you give me|Can you come up with!} {#an|one!} example for a protein that binds the {node1_type#} {SMILES#|node1_name#}?
+ Assistant: The {node1_type#} {SMILES#|node1_name#} {rel1_type#} for example the {node2_type#} {node2_protein_names#}.
+ User: Can you tell me a {node3_type#} that {rel2_type#} {node2_type#} {node2_protein_names#}?
+ Assistant: {#Yes|Of course|Yes, of course|Sure!}, the {node2_type#} {node2_name#} {rel2_type#} {node3_type#} {node3_name#}.
diff --git a/data/natural/preprocess_msds.py b/data/natural/preprocess_msds.py
index c28f2d7d8..9669f0069 100644
--- a/data/natural/preprocess_msds.py
+++ b/data/natural/preprocess_msds.py
@@ -11,7 +11,7 @@
def get_text(d, text="", level=1, linebreaks=2):
for k in d:
if k in [
- "SECTION 6: Acidental release measures", # always empty
+ "SECTION 6: Accidental release measures", # always empty
"SECTION 1: Toxicological information", # always empty
"SECTION 16: Other information", # always the same information
]:
diff --git a/data/tabular/BACE/meta.yaml b/data/tabular/BACE/meta.yaml
index b25927a1f..2149a1a61 100644
--- a/data/tabular/BACE/meta.yaml
+++ b/data/tabular/BACE/meta.yaml
@@ -1,70 +1,69 @@
----
name: BACE
description: |-
- The BACE dataset provides quantitative pIC50 and qualitative (binary label) binding results for
- a set of inhibitors of human beta-secretase 1 (BACE-1). All data are experimental values reported
- in scientific literature over the past decade, some with detailed crystal structures available.
+ The BACE dataset provides quantitative pIC50 and qualitative (binary label) binding results for
+ a set of inhibitors of human beta-secretase 1 (BACE-1). All data are experimental values reported
+ in scientific literature over the past decade, some with detailed crystal structures available.
targets:
- - id: BACE_inhibition
- description: binary labels for inhibition of the human beta-secretase 1 (BACE-1)
- type: boolean
- names:
- - noun: inhibition of the human beta-secretase 1 (BACE-1)
- - adjective: inhibitory of the human beta-secretase 1
- - adjective: inhibitory of BACE-1
- - id: pIC50
- description: pIC50 values for inhibition of human beta-secretase 1 (BACE-1)
- units: M
- type: continuous
- names:
- - noun: pIC50 of the human beta-secretase 1 (BACE-1)
- - noun: negative log10 of the 50% inhibitory concentration of BACE-1
+ - id: BACE_inhibition
+ description: binary labels for inhibition of the human beta-secretase 1 (BACE-1)
+ type: boolean
+ names:
+ - noun: inhibition of the human beta-secretase 1 (BACE-1)
+ - adjective: inhibitory of the human beta-secretase 1
+ - adjective: inhibitory of BACE-1
+ - id: pIC50
+ description: pIC50 values for inhibition of human beta-secretase 1 (BACE-1)
+ units: M
+ type: continuous
+ names:
+ - noun: pIC50 of the human beta-secretase 1 (BACE-1)
+ - noun: negative log10 of the 50% inhibitory concentration of BACE-1
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://pubs.rsc.org/en/content/articlehtml/2018/sc/c7sc02664a
- description: corresponding publication
- - url:
- - https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/bace.csv
- description: data source
+ - url: https://pubs.rsc.org/en/content/articlehtml/2018/sc/c7sc02664a
+ description: corresponding publication
+ - url:
+ - https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/bace.csv
+ description: data source
num_points: 1513
bibtex:
- - |-
- @article{Wu2018,
- doi = {10.1039/c7sc02664a},
- url = {https://doi.org/10.1039/c7sc02664a},
- year = {2018},
- publisher = {Royal Society of Chemistry (RSC)},
- volume = {9},
- number = {2},
- pages = {513--530},
- author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg and Joseph Gomes
- and Caleb Geniesse and Aneesh S. Pappu and Karl Leswing and Vijay Pande},
- title = {MoleculeNet: a benchmark for molecular machine learning},
- journal = {Chemical Science}
+ - |-
+ @article{Wu2018,
+ doi = {10.1039/c7sc02664a},
+ url = {https://doi.org/10.1039/c7sc02664a},
+ year = {2018},
+ publisher = {Royal Society of Chemistry (RSC)},
+ volume = {9},
+ number = {2},
+ pages = {513--530},
+ author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg and Joseph Gomes
+ and Caleb Geniesse and Aneesh S. Pappu and Karl Leswing and Vijay Pande},
+ title = {MoleculeNet: a benchmark for molecular machine learning},
+ journal = {Chemical Science}
templates:
- - The {#compound|chemical!} with the {SMILES__description} of {SMILES#} {#shows|exhibits|displays!} {BACE_inhibition#no &NULL}{BACE_inhibition__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {BACE_inhibition#not &NULL}{BACE_inhibition__names__adjective}.
- - The {SMILES__description} {SMILES#} represents a molecule that is {BACE_inhibition#not&NULL}identified as {BACE_inhibition__names__adjective}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {BACE_inhibition__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|extra!} words.
- Result: {BACE_inhibition#False&True}
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {BACE_inhibition#not &NULL}{BACE_inhibition__names__adjective}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {BACE_inhibition__names__adjective}?
- Assistant: {BACE_inhibition#No&Yes}, this molecule is {BACE_inhibition#not &NULL}{BACE_inhibition__names__adjective}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {BACE_inhibition__names__adjective}?
- Assistant: {BACE_inhibition#No&Yes}, it is {BACE_inhibition#not &NULL}{BACE_inhibition__names__adjective}.
- - The compound with the {SMILES__description} {SMILES#} has a {pIC50__names__noun} of {pIC50#} {pIC50__units}.
- - Based on the {SMILES__description} {SMILES#}, the molecule has a {pIC50__names__noun} of {pIC50#} {pIC50__units}.
- - The {SMILES__description}{SMILES#} represents a molecule that has a {pIC50__names__noun} of {pIC50#} {pIC50__units}.
+ - The {#compound|chemical!} with the {SMILES__description} of {SMILES#} {#shows|exhibits|displays!} {BACE_inhibition#no &NULL}{BACE_inhibition__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {BACE_inhibition#not &NULL}{BACE_inhibition__names__adjective}.
+ - The {SMILES__description} {SMILES#} represents a molecule that is {BACE_inhibition#not&NULL}identified as {BACE_inhibition__names__adjective}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {BACE_inhibition__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|extra!} words.
+ Result: {BACE_inhibition#False&True}
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {BACE_inhibition#not &NULL}{BACE_inhibition__names__adjective}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {BACE_inhibition__names__adjective}?
+ Assistant: {BACE_inhibition#No&Yes}, this molecule is {BACE_inhibition#not &NULL}{BACE_inhibition__names__adjective}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {BACE_inhibition__names__adjective}?
+ Assistant: {BACE_inhibition#No&Yes}, it is {BACE_inhibition#not &NULL}{BACE_inhibition__names__adjective}.
+ - The compound with the {SMILES__description} {SMILES#} has a {pIC50__names__noun} of {pIC50#} {pIC50__units}.
+ - Based on the {SMILES__description} {SMILES#}, the molecule has a {pIC50__names__noun} of {pIC50#} {pIC50__units}.
+ - The {SMILES__description}{SMILES#} represents a molecule that has a {pIC50__names__noun} of {pIC50#} {pIC50__units}.
diff --git a/data/tabular/BBBP/meta.yaml b/data/tabular/BBBP/meta.yaml
index b589d65a9..5237a25ee 100644
--- a/data/tabular/BBBP/meta.yaml
+++ b/data/tabular/BBBP/meta.yaml
@@ -1,65 +1,64 @@
----
name: BBBP
description: |-
- The blood-brain barrier penetration (BBBP) dataset is designed for the
- modeling and prediction of barrier permeability. As a membrane separating
- circulating blood and brain extracellular fluid, the blood-brain barrier
- blocks most drugs, hormones, and neurotransmitters. Thus penetration of the
- barrier forms a long-standing issue in the development of drugs targeting
- the central nervous system. This dataset includes binary labels for over 2000
- compounds on their permeability properties.
+ The blood-brain barrier penetration (BBBP) dataset is designed for the
+ modeling and prediction of barrier permeability. As a membrane separating
+ circulating blood and brain extracellular fluid, the blood-brain barrier
+ blocks most drugs, hormones, and neurotransmitters. Thus penetration of the
+ barrier forms a long-standing issue in the development of drugs targeting
+ the central nervous system. This dataset includes binary labels for over 2000
+ compounds on their permeability properties.
targets:
- - id: p_np
- description: Binary labels for penetration/non-penetration of the blood-brain barrier
- type: boolean
- names:
- - noun: blood-brain barrier permeability
- - noun: permeability through the blood-brain barrier
- - noun: permeability through the membrane separating circulating blood and extracellular brain fluid
- - adjective: permeable through the blood-brain barrier
- - adjective: permeable through the membrane separating circulating blood and extracellular brain fluid
+ - id: p_np
+ description: Binary labels for penetration/non-penetration of the blood-brain barrier
+ type: boolean
+ names:
+ - noun: blood-brain barrier permeability
+ - noun: permeability through the blood-brain barrier
+ - noun: permeability through the membrane separating circulating blood and extracellular brain fluid
+ - adjective: permeable through the blood-brain barrier
+ - adjective: permeable through the membrane separating circulating blood and extracellular brain fluid
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://pubs.acs.org/doi/10.1021/ci300124c
- description: corresponding publication
- - url:
- - https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/BBBP.csv
- description: data source
+ - url: https://pubs.acs.org/doi/10.1021/ci300124c
+ description: corresponding publication
+ - url:
+ - https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/BBBP.csv
+ description: data source
num_points: 2050
bibtex:
- - |-
- @article{doi:10.1021/ci300124c,
- author = {Martins, Ines Filipa and Teixeira, Ana L. and Pinheiro, Luis and Falcao, Andre O.},
- title = {A Bayesian Approach to in Silico Blood-Brain Barrier Penetration Modeling},
- journal = {Journal of Chemical Information and Modeling},
- volume = {52},
- number = {6},
- pages = {1686-1697},
- year = {2012},
- doi = {10.1021/ci300124c},
- URL = {https://doi.org/10.1021/ci300124c},
- eprint = {https://doi.org/10.1021/ci300124c}}
+ - |-
+ @article{doi:10.1021/ci300124c,
+ author = {Martins, Ines Filipa and Teixeira, Ana L. and Pinheiro, Luis and Falcao, Andre O.},
+ title = {A Bayesian Approach to in Silico Blood-Brain Barrier Penetration Modeling},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {52},
+ number = {6},
+ pages = {1686-1697},
+ year = {2012},
+ doi = {10.1021/ci300124c},
+ URL = {https://doi.org/10.1021/ci300124c},
+ eprint = {https://doi.org/10.1021/ci300124c}}
templates:
- - The {#compound|chemical!} with the {SMILES__description} of {SMILES#} {#shows|exhibits|displays!} {p_np#no &NULL}{p_np__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {p_np#not &NULL}{p_np__names__adjective}.
- - The {SMILES__description} {SMILES#} represents a molecule that is {p_np#not&NULL}identified as {p_np__names__adjective}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {p_np__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|extra!} words.
- Result: {p_np#False&True}
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {p_np#not &NULL}{p_np__names__adjective}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {p_np__names__adjective}?
- Assistant: {p_np#No&Yes}, this molecule is {p_np#not &NULL}{p_np__names__adjective}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {p_np__names__adjective}?
- Assistant: {p_np#No&Yes}, it is {p_np#not &NULL}{p_np__names__adjective}.
+ - The {#compound|chemical!} with the {SMILES__description} of {SMILES#} {#shows|exhibits|displays!} {p_np#no &NULL}{p_np__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {p_np#not &NULL}{p_np__names__adjective}.
+ - The {SMILES__description} {SMILES#} represents a molecule that is {p_np#not&NULL}identified as {p_np__names__adjective}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {p_np__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|extra!} words.
+ Result: {p_np#False&True}
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {p_np#not &NULL}{p_np__names__adjective}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {p_np__names__adjective}?
+ Assistant: {p_np#No&Yes}, this molecule is {p_np#not &NULL}{p_np__names__adjective}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {p_np__names__adjective}?
+ Assistant: {p_np#No&Yes}, it is {p_np#not &NULL}{p_np__names__adjective}.
diff --git a/data/tabular/MUV_466/meta.yaml b/data/tabular/MUV_466/meta.yaml
index 60ef927e4..c3d47e042 100644
--- a/data/tabular/MUV_466/meta.yaml
+++ b/data/tabular/MUV_466/meta.yaml
@@ -1,35 +1,33 @@
----
name: MUV_466
description: Activity in the MUV_466 assay
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: MUV-466
- type: boolean
- description: MUV-466
- names:
- - noun: an agonist of the S1P1 receptor
+ - id: MUV-466
+ type: boolean
+ description: MUV-466
+ names:
+ - noun: an agonist of the S1P1 receptor
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
+ description: Data source
num_points: 14841
bibtex:
- - |
- @article{doi:10.1021/ci8002649,
- author = {Rohrer, Sebastian G. and Baumann, Knut},
- title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
- journal = {Journal of Chemical Information and Modeling},
- volume = {49},
- number = {2},
- pages = {169-184},
- year = {2009},
- doi = {10.1021/ci8002649},
- URL = {https://doi.org/10.1021/ci8002649}}
+ - |
+ @article{doi:10.1021/ci8002649,
+ author = {Rohrer, Sebastian G. and Baumann, Knut},
+ title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {2},
+ pages = {169-184},
+ year = {2009},
+ doi = {10.1021/ci8002649},
+ URL = {https://doi.org/10.1021/ci8002649}}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-466#not
- &NULL}{MUV-466__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-466#not &NULL}{MUV-466__names__noun}.
diff --git a/data/tabular/MUV_548/meta.yaml b/data/tabular/MUV_548/meta.yaml
index 8a1ad7371..6092be5a3 100644
--- a/data/tabular/MUV_548/meta.yaml
+++ b/data/tabular/MUV_548/meta.yaml
@@ -1,37 +1,35 @@
----
name: MUV_548
description: Activity in the MUV_548 assay
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: MUV-548
- type: boolean
- description: MUV-548
- names:
- - noun: an inhibitor of the protein kinase A (PKA)
- - noun: an inhibitor of the protein kinase A
- - noun: an inhibitor of PKA
+ - id: MUV-548
+ type: boolean
+ description: MUV-548
+ names:
+ - noun: an inhibitor of the protein kinase A (PKA)
+ - noun: an inhibitor of the protein kinase A
+ - noun: an inhibitor of PKA
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
+ description: Data source
num_points: 14734
bibtex:
- - |
- @article{doi:10.1021/ci8002649,
- author = {Rohrer, Sebastian G. and Baumann, Knut},
- title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
- journal = {Journal of Chemical Information and Modeling},
- volume = {49},
- number = {2},
- pages = {169-184},
- year = {2009},
- doi = {10.1021/ci8002649},
- URL = {https://doi.org/10.1021/ci8002649}}
+ - |
+ @article{doi:10.1021/ci8002649,
+ author = {Rohrer, Sebastian G. and Baumann, Knut},
+ title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {2},
+ pages = {169-184},
+ year = {2009},
+ doi = {10.1021/ci8002649},
+ URL = {https://doi.org/10.1021/ci8002649}}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-548#not
- &NULL}{MUV-548__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-548#not &NULL}{MUV-548__names__noun}.
diff --git a/data/tabular/MUV_600/meta.yaml b/data/tabular/MUV_600/meta.yaml
index 58f2cdbc4..cebb65352 100644
--- a/data/tabular/MUV_600/meta.yaml
+++ b/data/tabular/MUV_600/meta.yaml
@@ -1,36 +1,34 @@
----
name: MUV_600
description: Activity in the MUV_600 assay
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: MUV-600
- type: boolean
- description: MUV-600
- names:
- - noun: an inhibitor of the steroidogenic factor 1 (SF-1)
- - noun: an inhibitor of SF-1
+ - id: MUV-600
+ type: boolean
+ description: MUV-600
+ names:
+ - noun: an inhibitor of the steroidogenic factor 1 (SF-1)
+ - noun: an inhibitor of SF-1
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
+ description: Data source
num_points: 14728
bibtex:
- - |
- @article{doi:10.1021/ci8002649,
- author = {Rohrer, Sebastian G. and Baumann, Knut},
- title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
- journal = {Journal of Chemical Information and Modeling},
- volume = {49},
- number = {2},
- pages = {169-184},
- year = {2009},
- doi = {10.1021/ci8002649},
- URL = {https://doi.org/10.1021/ci8002649}}
+ - |
+ @article{doi:10.1021/ci8002649,
+ author = {Rohrer, Sebastian G. and Baumann, Knut},
+ title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {2},
+ pages = {169-184},
+ year = {2009},
+ doi = {10.1021/ci8002649},
+ URL = {https://doi.org/10.1021/ci8002649}}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-600#not
- &NULL}{MUV-600__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-600#not &NULL}{MUV-600__names__noun}.
diff --git a/data/tabular/MUV_644/meta.yaml b/data/tabular/MUV_644/meta.yaml
index 3b97372f9..43a60930d 100644
--- a/data/tabular/MUV_644/meta.yaml
+++ b/data/tabular/MUV_644/meta.yaml
@@ -1,36 +1,34 @@
----
name: MUV_644
description: Activity in the MUV_644 assay
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: MUV-644
- type: boolean
- description: MUV-644
- names:
- - noun: an inhibitor of Rho-kinase 2 (ROCK-2)
- - noun: an inhibitor of ROCK-2
+ - id: MUV-644
+ type: boolean
+ description: MUV-644
+ names:
+ - noun: an inhibitor of Rho-kinase 2 (ROCK-2)
+ - noun: an inhibitor of ROCK-2
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
+ description: Data source
num_points: 14623
bibtex:
- - |
- @article{doi:10.1021/ci8002649,
- author = {Rohrer, Sebastian G. and Baumann, Knut},
- title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
- journal = {Journal of Chemical Information and Modeling},
- volume = {49},
- number = {2},
- pages = {169-184},
- year = {2009},
- doi = {10.1021/ci8002649},
- URL = {https://doi.org/10.1021/ci8002649}}
+ - |
+ @article{doi:10.1021/ci8002649,
+ author = {Rohrer, Sebastian G. and Baumann, Knut},
+ title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {2},
+ pages = {169-184},
+ year = {2009},
+ doi = {10.1021/ci8002649},
+ URL = {https://doi.org/10.1021/ci8002649}}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-644#not
- &NULL}{MUV-644__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-644#not &NULL}{MUV-644__names__noun}.
diff --git a/data/tabular/MUV_652/meta.yaml b/data/tabular/MUV_652/meta.yaml
index 8f559d38c..da14f996c 100644
--- a/data/tabular/MUV_652/meta.yaml
+++ b/data/tabular/MUV_652/meta.yaml
@@ -1,35 +1,33 @@
----
name: MUV_652
description: Activity in the MUV_652 assay
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: MUV-652
- type: boolean
- description: MUV-652
- names:
- - noun: an inhibitor of HIV RT-RNase
+ - id: MUV-652
+ type: boolean
+ description: MUV-652
+ names:
+ - noun: an inhibitor of HIV RT-RNase
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
+ description: Data source
num_points: 14902
bibtex:
- - |
- @article{doi:10.1021/ci8002649,
- author = {Rohrer, Sebastian G. and Baumann, Knut},
- title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
- journal = {Journal of Chemical Information and Modeling},
- volume = {49},
- number = {2},
- pages = {169-184},
- year = {2009},
- doi = {10.1021/ci8002649},
- URL = {https://doi.org/10.1021/ci8002649}}
+ - |
+ @article{doi:10.1021/ci8002649,
+ author = {Rohrer, Sebastian G. and Baumann, Knut},
+ title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {2},
+ pages = {169-184},
+ year = {2009},
+ doi = {10.1021/ci8002649},
+ URL = {https://doi.org/10.1021/ci8002649}}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-652#not
- &NULL}{MUV-652__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-652#not &NULL}{MUV-652__names__noun}.
diff --git a/data/tabular/MUV_689/meta.yaml b/data/tabular/MUV_689/meta.yaml
index 6e1ad3423..780715e39 100644
--- a/data/tabular/MUV_689/meta.yaml
+++ b/data/tabular/MUV_689/meta.yaml
@@ -1,35 +1,33 @@
----
name: MUV_689
description: Activity in the MUV_689 assay
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: MUV-689
- type: boolean
- description: MUV-689
- names:
- - noun: an inhibitor of the EPH receptor A4
+ - id: MUV-689
+ type: boolean
+ description: MUV-689
+ names:
+ - noun: an inhibitor of the EPH receptor A4
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
+ description: Data source
num_points: 14601
bibtex:
- - |
- @article{doi:10.1021/ci8002649,
- author = {Rohrer, Sebastian G. and Baumann, Knut},
- title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
- journal = {Journal of Chemical Information and Modeling},
- volume = {49},
- number = {2},
- pages = {169-184},
- year = {2009},
- doi = {10.1021/ci8002649},
- URL = {https://doi.org/10.1021/ci8002649}}
+ - |
+ @article{doi:10.1021/ci8002649,
+ author = {Rohrer, Sebastian G. and Baumann, Knut},
+ title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {2},
+ pages = {169-184},
+ year = {2009},
+ doi = {10.1021/ci8002649},
+ URL = {https://doi.org/10.1021/ci8002649}}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-689#not
- &NULL}{MUV-689__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-689#not &NULL}{MUV-689__names__noun}.
diff --git a/data/tabular/MUV_692/meta.yaml b/data/tabular/MUV_692/meta.yaml
index d8988d3d3..62400e402 100644
--- a/data/tabular/MUV_692/meta.yaml
+++ b/data/tabular/MUV_692/meta.yaml
@@ -1,36 +1,34 @@
----
name: MUV_692
description: Activity in the MUV_692 assay
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: MUV-692
- type: boolean
- description: MUV-692
- names:
- - noun: an agonist of the steroidogenic factor 1 (SF-1)
- - noun: an agonist of SF-1
+ - id: MUV-692
+ type: boolean
+ description: MUV-692
+ names:
+ - noun: an agonist of the steroidogenic factor 1 (SF-1)
+ - noun: an agonist of SF-1
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
+ description: Data source
num_points: 14644
bibtex:
- - |
- @article{doi:10.1021/ci8002649,
- author = {Rohrer, Sebastian G. and Baumann, Knut},
- title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
- journal = {Journal of Chemical Information and Modeling},
- volume = {49},
- number = {2},
- pages = {169-184},
- year = {2009},
- doi = {10.1021/ci8002649},
- URL = {https://doi.org/10.1021/ci8002649}}
+ - |
+ @article{doi:10.1021/ci8002649,
+ author = {Rohrer, Sebastian G. and Baumann, Knut},
+ title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {2},
+ pages = {169-184},
+ year = {2009},
+ doi = {10.1021/ci8002649},
+ URL = {https://doi.org/10.1021/ci8002649}}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-692#not
- &NULL}{MUV-692__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-692#not &NULL}{MUV-692__names__noun}.
diff --git a/data/tabular/MUV_712/meta.yaml b/data/tabular/MUV_712/meta.yaml
index 15ee8d493..977d3feb5 100644
--- a/data/tabular/MUV_712/meta.yaml
+++ b/data/tabular/MUV_712/meta.yaml
@@ -1,36 +1,34 @@
----
name: MUV_712
description: Activity in the MUV_712 assay
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: MUV-712
- type: boolean
- description: MUV-712
- names:
- - noun: an inhibitor of the heat shock protein 90
- - noun: an inhibitor of HSP90
+ - id: MUV-712
+ type: boolean
+ description: MUV-712
+ names:
+ - noun: an inhibitor of the heat shock protein 90
+ - noun: an inhibitor of HSP90
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
+ description: Data source
num_points: 14411
bibtex:
- - |
- @article{doi:10.1021/ci8002649,
- author = {Rohrer, Sebastian G. and Baumann, Knut},
- title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
- journal = {Journal of Chemical Information and Modeling},
- volume = {49},
- number = {2},
- pages = {169-184},
- year = {2009},
- doi = {10.1021/ci8002649},
- URL = {https://doi.org/10.1021/ci8002649}}
+ - |
+ @article{doi:10.1021/ci8002649,
+ author = {Rohrer, Sebastian G. and Baumann, Knut},
+ title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {2},
+ pages = {169-184},
+ year = {2009},
+ doi = {10.1021/ci8002649},
+ URL = {https://doi.org/10.1021/ci8002649}}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-712#not
- &NULL}{MUV-712__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-712#not &NULL}{MUV-712__names__noun}.
diff --git a/data/tabular/MUV_713/meta.yaml b/data/tabular/MUV_713/meta.yaml
index b26d3da21..a5623ea17 100644
--- a/data/tabular/MUV_713/meta.yaml
+++ b/data/tabular/MUV_713/meta.yaml
@@ -1,36 +1,34 @@
----
name: MUV_713
description: Activity in the MUV_713 assay
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: MUV-713
- type: boolean
- description: MUV-713
- names:
- - noun: an inhibitor of the estrogen receptor-alpha-coactivator binding
- - noun: an inhibitor of the ER-alpha-coact. binding
+ - id: MUV-713
+ type: boolean
+ description: MUV-713
+ names:
+ - noun: an inhibitor of the estrogen receptor-alpha-coactivator binding
+ - noun: an inhibitor of the ER-alpha-coact. binding
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
+ description: Data source
num_points: 14836
bibtex:
- - |
- @article{doi:10.1021/ci8002649,
- author = {Rohrer, Sebastian G. and Baumann, Knut},
- title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
- journal = {Journal of Chemical Information and Modeling},
- volume = {49},
- number = {2},
- pages = {169-184},
- year = {2009},
- doi = {10.1021/ci8002649},
- URL = {https://doi.org/10.1021/ci8002649}}
+ - |
+ @article{doi:10.1021/ci8002649,
+ author = {Rohrer, Sebastian G. and Baumann, Knut},
+ title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {2},
+ pages = {169-184},
+ year = {2009},
+ doi = {10.1021/ci8002649},
+ URL = {https://doi.org/10.1021/ci8002649}}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-713#not
- &NULL}{MUV-713__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-713#not &NULL}{MUV-713__names__noun}.
diff --git a/data/tabular/MUV_733/meta.yaml b/data/tabular/MUV_733/meta.yaml
index c29903e09..7acc2925d 100644
--- a/data/tabular/MUV_733/meta.yaml
+++ b/data/tabular/MUV_733/meta.yaml
@@ -1,35 +1,33 @@
----
name: MUV_733
description: Activity in the MUV_733 assay
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: MUV-733
- type: boolean
- description: MUV-733
- names:
- - noun: an inhibitor of the estrogen receptor-alpha-coactivator binding
+ - id: MUV-733
+ type: boolean
+ description: MUV-733
+ names:
+ - noun: an inhibitor of the estrogen receptor-alpha-coactivator binding
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
+ description: Data source
num_points: 14682
bibtex:
- - |
- @article{doi:10.1021/ci8002649,
- author = {Rohrer, Sebastian G. and Baumann, Knut},
- title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
- journal = {Journal of Chemical Information and Modeling},
- volume = {49},
- number = {2},
- pages = {169-184},
- year = {2009},
- doi = {10.1021/ci8002649},
- URL = {https://doi.org/10.1021/ci8002649}}
+ - |
+ @article{doi:10.1021/ci8002649,
+ author = {Rohrer, Sebastian G. and Baumann, Knut},
+ title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {2},
+ pages = {169-184},
+ year = {2009},
+ doi = {10.1021/ci8002649},
+ URL = {https://doi.org/10.1021/ci8002649}}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-733#not
- &NULL}{MUV-733__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-733#not &NULL}{MUV-733__names__noun}.
diff --git a/data/tabular/MUV_737/meta.yaml b/data/tabular/MUV_737/meta.yaml
index cf0b3deb3..69c56e3ed 100644
--- a/data/tabular/MUV_737/meta.yaml
+++ b/data/tabular/MUV_737/meta.yaml
@@ -1,36 +1,34 @@
----
name: MUV_737
description: Activity in the MUV_737 assay
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: MUV-737
- type: boolean
- description: MUV-737
- names:
- - noun: a potentiator of the estrogen receptor-alpha-coactivator binding
- - noun: a potentiator of the ER-alpha-coact. binding
+ - id: MUV-737
+ type: boolean
+ description: MUV-737
+ names:
+ - noun: a potentiator of the estrogen receptor-alpha-coactivator binding
+ - noun: a potentiator of the ER-alpha-coact. binding
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
+ description: Data source
num_points: 14691
bibtex:
- - |
- @article{doi:10.1021/ci8002649,
- author = {Rohrer, Sebastian G. and Baumann, Knut},
- title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
- journal = {Journal of Chemical Information and Modeling},
- volume = {49},
- number = {2},
- pages = {169-184},
- year = {2009},
- doi = {10.1021/ci8002649},
- URL = {https://doi.org/10.1021/ci8002649}}
+ - |
+ @article{doi:10.1021/ci8002649,
+ author = {Rohrer, Sebastian G. and Baumann, Knut},
+ title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {2},
+ pages = {169-184},
+ year = {2009},
+ doi = {10.1021/ci8002649},
+ URL = {https://doi.org/10.1021/ci8002649}}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-737#not
- &NULL}{MUV-737__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-737#not &NULL}{MUV-737__names__noun}.
diff --git a/data/tabular/MUV_810/meta.yaml b/data/tabular/MUV_810/meta.yaml
index e285dd8ad..74c9f1907 100644
--- a/data/tabular/MUV_810/meta.yaml
+++ b/data/tabular/MUV_810/meta.yaml
@@ -1,36 +1,34 @@
----
name: MUV_810
description: Activity in the MUV_810 assay
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: MUV-810
- type: boolean
- description: MUV-810
- names:
- - noun: an inhibitor of the focal adhesion kinase
- - noun: an inhibitor of FAK
+ - id: MUV-810
+ type: boolean
+ description: MUV-810
+ names:
+ - noun: an inhibitor of the focal adhesion kinase
+ - noun: an inhibitor of FAK
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
+ description: Data source
num_points: 14644
bibtex:
- - |
- @article{doi:10.1021/ci8002649,
- author = {Rohrer, Sebastian G. and Baumann, Knut},
- title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
- journal = {Journal of Chemical Information and Modeling},
- volume = {49},
- number = {2},
- pages = {169-184},
- year = {2009},
- doi = {10.1021/ci8002649},
- URL = {https://doi.org/10.1021/ci8002649}}
+ - |
+ @article{doi:10.1021/ci8002649,
+ author = {Rohrer, Sebastian G. and Baumann, Knut},
+ title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {2},
+ pages = {169-184},
+ year = {2009},
+ doi = {10.1021/ci8002649},
+ URL = {https://doi.org/10.1021/ci8002649}}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-810#not
- &NULL}{MUV-810__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-810#not &NULL}{MUV-810__names__noun}.
diff --git a/data/tabular/MUV_832/meta.yaml b/data/tabular/MUV_832/meta.yaml
index 1c1e74835..16ad3978a 100644
--- a/data/tabular/MUV_832/meta.yaml
+++ b/data/tabular/MUV_832/meta.yaml
@@ -1,35 +1,33 @@
----
name: MUV_832
description: Activity in the MUV_832 assay
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: MUV-832
- type: boolean
- description: MUV-832
- names:
- - noun: an inhibitor of the Cathepsin G protease
+ - id: MUV-832
+ type: boolean
+ description: MUV-832
+ names:
+ - noun: an inhibitor of the Cathepsin G protease
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
+ description: Data source
num_points: 14667
bibtex:
- - |
- @article{doi:10.1021/ci8002649,
- author = {Rohrer, Sebastian G. and Baumann, Knut},
- title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
- journal = {Journal of Chemical Information and Modeling},
- volume = {49},
- number = {2},
- pages = {169-184},
- year = {2009},
- doi = {10.1021/ci8002649},
- URL = {https://doi.org/10.1021/ci8002649}}
+ - |
+ @article{doi:10.1021/ci8002649,
+ author = {Rohrer, Sebastian G. and Baumann, Knut},
+ title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {2},
+ pages = {169-184},
+ year = {2009},
+ doi = {10.1021/ci8002649},
+ URL = {https://doi.org/10.1021/ci8002649}}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-832#not
- &NULL}{MUV-832__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-832#not &NULL}{MUV-832__names__noun}.
diff --git a/data/tabular/MUV_846/meta.yaml b/data/tabular/MUV_846/meta.yaml
index d9fc0362d..fc3621147 100644
--- a/data/tabular/MUV_846/meta.yaml
+++ b/data/tabular/MUV_846/meta.yaml
@@ -1,35 +1,33 @@
----
name: MUV_846
description: Activity in the MUV_846 assay
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: MUV-846
- type: boolean
- description: MUV-846
- names:
- - noun: an inhibitor of factor XIa (FXIa)
+ - id: MUV-846
+ type: boolean
+ description: MUV-846
+ names:
+ - noun: an inhibitor of factor XIa (FXIa)
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
+ description: Data source
num_points: 14711
bibtex:
- - |
- @article{doi:10.1021/ci8002649,
- author = {Rohrer, Sebastian G. and Baumann, Knut},
- title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
- journal = {Journal of Chemical Information and Modeling},
- volume = {49},
- number = {2},
- pages = {169-184},
- year = {2009},
- doi = {10.1021/ci8002649},
- URL = {https://doi.org/10.1021/ci8002649}}
+ - |
+ @article{doi:10.1021/ci8002649,
+ author = {Rohrer, Sebastian G. and Baumann, Knut},
+ title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {2},
+ pages = {169-184},
+ year = {2009},
+ doi = {10.1021/ci8002649},
+ URL = {https://doi.org/10.1021/ci8002649}}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-846#not
- &NULL}{MUV-846__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-846#not &NULL}{MUV-846__names__noun}.
diff --git a/data/tabular/MUV_852/meta.yaml b/data/tabular/MUV_852/meta.yaml
index 86d8874fc..a77ec1be0 100644
--- a/data/tabular/MUV_852/meta.yaml
+++ b/data/tabular/MUV_852/meta.yaml
@@ -1,35 +1,33 @@
----
name: MUV_852
description: Activity in the MUV_852 assay
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: MUV-852
- type: boolean
- description: MUV-852
- names:
- - noun: an inhibitor of factor XIIa (FXIIa)
+ - id: MUV-852
+ type: boolean
+ description: MUV-852
+ names:
+ - noun: an inhibitor of factor XIIa (FXIIa)
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
+ description: Data source
num_points: 14651
bibtex:
- - |
- @article{doi:10.1021/ci8002649,
- author = {Rohrer, Sebastian G. and Baumann, Knut},
- title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
- journal = {Journal of Chemical Information and Modeling},
- volume = {49},
- number = {2},
- pages = {169-184},
- year = {2009},
- doi = {10.1021/ci8002649},
- URL = {https://doi.org/10.1021/ci8002649}}
+ - |
+ @article{doi:10.1021/ci8002649,
+ author = {Rohrer, Sebastian G. and Baumann, Knut},
+ title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {2},
+ pages = {169-184},
+ year = {2009},
+ doi = {10.1021/ci8002649},
+ URL = {https://doi.org/10.1021/ci8002649}}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-852#not
- &NULL}{MUV-852__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-852#not &NULL}{MUV-852__names__noun}.
diff --git a/data/tabular/MUV_858/meta.yaml b/data/tabular/MUV_858/meta.yaml
index bb110dc4e..0690988e8 100644
--- a/data/tabular/MUV_858/meta.yaml
+++ b/data/tabular/MUV_858/meta.yaml
@@ -1,36 +1,34 @@
----
name: MUV_858
description: Activity in the MUV_858 assay
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: MUV-858
- type: boolean
- description: MUV-858
- names:
- - noun: an allosteric modulator of the dopamine receptor D1
- - noun: an allosteric modulator of the D1 receptor
+ - id: MUV-858
+ type: boolean
+ description: MUV-858
+ names:
+ - noun: an allosteric modulator of the dopamine receptor D1
+ - noun: an allosteric modulator of the D1 receptor
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
+ description: Data source
num_points: 14774
bibtex:
- - |
- @article{doi:10.1021/ci8002649,
- author = {Rohrer, Sebastian G. and Baumann, Knut},
- title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
- journal = {Journal of Chemical Information and Modeling},
- volume = {49},
- number = {2},
- pages = {169-184},
- year = {2009},
- doi = {10.1021/ci8002649},
- URL = {https://doi.org/10.1021/ci8002649}}
+ - |
+ @article{doi:10.1021/ci8002649,
+ author = {Rohrer, Sebastian G. and Baumann, Knut},
+ title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {2},
+ pages = {169-184},
+ year = {2009},
+ doi = {10.1021/ci8002649},
+ URL = {https://doi.org/10.1021/ci8002649}}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-858#not
- &NULL}{MUV-858__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-858#not &NULL}{MUV-858__names__noun}.
diff --git a/data/tabular/MUV_859/meta.yaml b/data/tabular/MUV_859/meta.yaml
index e2f43b5a3..2bbecfc47 100644
--- a/data/tabular/MUV_859/meta.yaml
+++ b/data/tabular/MUV_859/meta.yaml
@@ -1,36 +1,34 @@
----
name: MUV_859
description: Activity in the MUV_859 assay
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: MUV-859
- type: boolean
- description: MUV-859
- names:
- - noun: an allosteric inhibitor of the muscarinic acetylcholine receptor M1
- - noun: an allosteric inhibitor of the M1 receptor
+ - id: MUV-859
+ type: boolean
+ description: MUV-859
+ names:
+ - noun: an allosteric inhibitor of the muscarinic acetylcholine receptor M1
+ - noun: an allosteric inhibitor of the M1 receptor
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/muv.csv.gz
+ description: Data source
num_points: 14746
bibtex:
- - |
- @article{doi:10.1021/ci8002649,
- author = {Rohrer, Sebastian G. and Baumann, Knut},
- title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
- journal = {Journal of Chemical Information and Modeling},
- volume = {49},
- number = {2},
- pages = {169-184},
- year = {2009},
- doi = {10.1021/ci8002649},
- URL = {https://doi.org/10.1021/ci8002649}}
+ - |
+ @article{doi:10.1021/ci8002649,
+ author = {Rohrer, Sebastian G. and Baumann, Knut},
+ title = {Maximum Unbiased Validation (MUV) Data Sets for Virtual Screening Based on PubChem Bioactivity Data},
+ journal = {Journal of Chemical Information and Modeling},
+ volume = {49},
+ number = {2},
+ pages = {169-184},
+ year = {2009},
+ doi = {10.1021/ci8002649},
+ URL = {https://doi.org/10.1021/ci8002649}}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-859#not
- &NULL}{MUV-859__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {MUV-859#not &NULL}{MUV-859__names__noun}.
diff --git a/data/tabular/RedDB/meta.yaml b/data/tabular/RedDB/meta.yaml
index 91ba5989e..94df55d18 100644
--- a/data/tabular/RedDB/meta.yaml
+++ b/data/tabular/RedDB/meta.yaml
@@ -1,168 +1,155 @@
----
name: RedDB
description: |-
- RedDB: a computational database that contains 30861 molecules
- from two prominent classes of organic electroactive compounds, quinones and aza-aromatics,
- has been presented. RedDB incorporates miscellaneous physicochemical property information
- of the compounds that can potentially be employed as battery performance descriptors.
- RedDBs development steps, including:
- (i) chemical library generation,
- (ii) molecular property prediction based on quantum chemical calculations,
- (iii) aqueous solubility prediction using machine learning,
- (iv) data processing and database creation, have been described.
+ RedDB: a computational database that contains 30861 molecules
+ from two prominent classes of organic electroactive compounds, quinones and aza-aromatics,
+ has been presented. RedDB incorporates miscellaneous physicochemical property information
+ of the compounds that can potentially be employed as battery performance descriptors.
+ RedDBs development steps, including:
+ (i) chemical library generation,
+ (ii) molecular property prediction based on quantum chemical calculations,
+ (iii) aqueous solubility prediction using machine learning,
+ (iv) data processing and database creation, have been described.
targets:
- - id: molecularSurface
- description: Total surface area of a molecule
- units: \AA^2
- type: continuous
- names:
- - noun: molecular surface area
- - id: reactionFieldEnergy
- description: Energy associated with the interaction during a chemical reaction
- units: kT
- type: continuous
- significant_digits: 5
- names:
- - noun: chemical reaction field energy
- - id: solventAccessSurface
- description: Surface area of a molecule accessible to a solvent
- units: \AA^2
- type: continuous
- names:
- - noun: solvent-accessible surface area
- - id: cavityEnergy
- description: Energy associated with the formation of cavities in a molecular structure
- units: kT
- type: continuous
- names:
- - noun: cavity formation energy at the PBE level of theory
- - id: gasEnergy
- description: Total energy of a molecule in the gas phase
- units: Hartree
- significant_digits: 5
- type: continuous
- names:
- - noun: gas-phase molecular energy at the PBE level of theory
- - id: gasHomo
- description: Highest Occupied Molecular Orbital (HOMO) energy of a gas-phase molecule
- units: Hartree
- type: continuous
- significant_digits: 5
- names:
- - noun: gaseous phase HOMO energy at the PBE level of theory
- - noun: gaseous phase highest occupied molecular orbital energy at the PBE level of theory
- - noun: gaseous phase highest occupied molecular orbital (HOMO) energy at the PBE level of theory
- - id: gasLumo
- description: Lowest Unoccupied Molecular Orbital (LUMO) energy of a gas-phase molecule
- units: Hartree
- type: continuous
- significant_digits: 5
- names:
- - noun: gaseous phase LUMO energy at the PBE level of theory
- - noun: gaseous phase lowest unoccupied molecular orbital energy at the PBE level of theory
- - noun: gaseous phase lowest unoccupied molecular orbital energy (LUMO) at the PBE level of theory
- - id: solutionEnergy
- description: Total energy of a molecule in a solution
- units: Hartree
- type: continuous
- significant_digits: 5
- names:
- - noun: aqueous phase molecular energy at the PBE level of theory
- - id: solutionHomo
- description: Highest Occupied Molecular Orbital (HOMO) energy in a solution
- units: Hartree
- type: continuous
- significant_digits: 5
- names:
- - noun: aqueous phase HOMO energy at the PBE level of theory
- - noun: aqueous phase energy of the highest occupied molecular orbital at the PBE level of theory
- - noun: aqueous phase energy of the highest occupied molecular orbital (HOMO) at the PBE level of theory
- - id: solutionLumo
- description: Lowest Unoccupied Molecular Orbital (LUMO) energy in a solution
- units: Hartree
- type: continuous
- significant_digits: 5
- names:
- - noun: aqueous phase LUMO energy at the PBE level of theory
- - noun: aqueous phase energy of the lowest unoccupied molecular orbital at the PBE level of theory
- - noun: aqueous phase energy of the lowest unoccupied molecular orbital (LUMO) at the PBE level of theory
- - id: nuclearRepulsionEnergy
- description: Electrostatic repulsion energy between atomic nuclei in a molecule
- units: Hartree
- type: continuous
- significant_digits: 5
- names:
- - noun: nuclear repulsion energy at the PBE level of theory
- - id: optGasEnergy
- description: Total energy of an optimized gas-phase molecule
- units: Hartree
- type: continuous
- significant_digits: 5
- names:
- - noun: optimized gas-phase molecular energy at the PBE level of theory
- - id: optGasHomo
- description: Highest Occupied Molecular Orbital (HOMO) energy of an optimized gas-phase molecule
- units: Hartree
- type: continuous
- significant_digits: 5
- names:
- - noun: optimized gas-phase HOMO energy at the PBE level of theory
- - id: optGasLumo
- description: Lowest Unoccupied Molecular Orbital (LUMO) energy of an optimized gas-phase molecule
- units: Hartree
- significant_digits: 5
- type: continuous
- names:
- - noun: optimized gas-phase LUMO energy calculated at the PBE level of theory
- - noun: optimized gas-phase LUMO energy calculated with DFT at the PBE level of theory
+ - id: molecularSurface
+ description: Total surface area of a molecule
+ units: \AA^2
+ type: continuous
+ names:
+ - noun: molecular surface area
+ - id: reactionFieldEnergy
+ description: Energy associated with the interaction during a chemical reaction
+ units: kT
+ type: continuous
+ significant_digits: 5
+ names:
+ - noun: chemical reaction field energy
+ - id: solventAccessSurface
+ description: Surface area of a molecule accessible to a solvent
+ units: \AA^2
+ type: continuous
+ names:
+ - noun: solvent-accessible surface area
+ - id: cavityEnergy
+ description: Energy associated with the formation of cavities in a molecular structure
+ units: kT
+ type: continuous
+ names:
+ - noun: cavity formation energy at the PBE level of theory
+ - id: gasEnergy
+ description: Total energy of a molecule in the gas phase
+ units: Hartree
+ significant_digits: 5
+ type: continuous
+ names:
+ - noun: gas-phase molecular energy at the PBE level of theory
+ - id: gasHomo
+ description: Highest Occupied Molecular Orbital (HOMO) energy of a gas-phase molecule
+ units: Hartree
+ type: continuous
+ significant_digits: 5
+ names:
+ - noun: gaseous phase HOMO energy at the PBE level of theory
+ - noun: gaseous phase highest occupied molecular orbital energy at the PBE level of theory
+ - noun: gaseous phase highest occupied molecular orbital (HOMO) energy at the PBE level of theory
+ - id: gasLumo
+ description: Lowest Unoccupied Molecular Orbital (LUMO) energy of a gas-phase molecule
+ units: Hartree
+ type: continuous
+ significant_digits: 5
+ names:
+ - noun: gaseous phase LUMO energy at the PBE level of theory
+ - noun: gaseous phase lowest unoccupied molecular orbital energy at the PBE level of theory
+ - noun: gaseous phase lowest unoccupied molecular orbital energy (LUMO) at the PBE level of theory
+ - id: solutionEnergy
+ description: Total energy of a molecule in a solution
+ units: Hartree
+ type: continuous
+ significant_digits: 5
+ names:
+ - noun: aqueous phase molecular energy at the PBE level of theory
+ - id: solutionHomo
+ description: Highest Occupied Molecular Orbital (HOMO) energy in a solution
+ units: Hartree
+ type: continuous
+ significant_digits: 5
+ names:
+ - noun: aqueous phase HOMO energy at the PBE level of theory
+ - noun: aqueous phase energy of the highest occupied molecular orbital at the PBE level of theory
+ - noun: aqueous phase energy of the highest occupied molecular orbital (HOMO) at the PBE level of theory
+ - id: solutionLumo
+ description: Lowest Unoccupied Molecular Orbital (LUMO) energy in a solution
+ units: Hartree
+ type: continuous
+ significant_digits: 5
+ names:
+ - noun: aqueous phase LUMO energy at the PBE level of theory
+ - noun: aqueous phase energy of the lowest unoccupied molecular orbital at the PBE level of theory
+ - noun: aqueous phase energy of the lowest unoccupied molecular orbital (LUMO) at the PBE level of theory
+ - id: nuclearRepulsionEnergy
+ description: Electrostatic repulsion energy between atomic nuclei in a molecule
+ units: Hartree
+ type: continuous
+ significant_digits: 5
+ names:
+ - noun: nuclear repulsion energy at the PBE level of theory
+ - id: optGasEnergy
+ description: Total energy of an optimized gas-phase molecule
+ units: Hartree
+ type: continuous
+ significant_digits: 5
+ names:
+ - noun: optimized gas-phase molecular energy at the PBE level of theory
+ - id: optGasHomo
+ description: Highest Occupied Molecular Orbital (HOMO) energy of an optimized gas-phase molecule
+ units: Hartree
+ type: continuous
+ significant_digits: 5
+ names:
+ - noun: optimized gas-phase HOMO energy at the PBE level of theory
+ - id: optGasLumo
+ description: Lowest Unoccupied Molecular Orbital (LUMO) energy of an optimized gas-phase molecule
+ units: Hartree
+ significant_digits: 5
+ type: continuous
+ names:
+ - noun: optimized gas-phase LUMO energy calculated at the PBE level of theory
+ - noun: optimized gas-phase LUMO energy calculated with DFT at the PBE level of theory
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://doi.org/10.1038/s41597-022-01832-2
- description: corresponding publication
- - url: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/F3QFSQ
- description: Data source
+ - url: https://doi.org/10.1038/s41597-022-01832-2
+ description: corresponding publication
+ - url: https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/F3QFSQ
+ description: Data source
num_points: 30861
bibtex:
- - |-
- @article{Elif2022,
- doi = {10.1021/ci300400a},
- url = {https://doi.org/10.1038/s41597-022-01832-2},
- year = {2022},
- volume = {9},
- number = {1},
- author = {Elif Sorkun and Qi Zhang and Abhishek Khetan and Murat Cihan Sorkun and
- Suleyman Er},
- journal = {Nature Scientific Data}
+ - |-
+ @article{Elif2022,
+ doi = {10.1021/ci300400a},
+ url = {https://doi.org/10.1038/s41597-022-01832-2},
+ year = {2022},
+ volume = {9},
+ number = {1},
+ author = {Elif Sorkun and Qi Zhang and Abhishek Khetan and Murat Cihan Sorkun and
+ Suleyman Er},
+ journal = {Nature Scientific Data}
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {molecularSurface__names__noun}
- of {molecularSurface#} {molecularSurface__units}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {reactionFieldEnergy__names__noun}
- of {reactionFieldEnergy#} {reactionFieldEnergy__units}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {solventAccessSurface__names__noun}
- of {solventAccessSurface#} {solventAccessSurface__units}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {cavityEnergy__names__noun}
- of {cavityEnergy#} {cavityEnergy__units}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {gasEnergy__names__noun}
- of {gasEnergy#} {gasEnergy__units}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {gasHomo__names__noun}
- of {gasHomo#} {gasHomo__units}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {gasLumo__names__noun}
- of {gasLumo#} {gasLumo__units}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has an {solutionEnergy__names__noun}
- of {solutionEnergy#} {solutionEnergy__units}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {solutionLumo__names__noun}
- of {solutionLumo#} {solutionLumo__units}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {nuclearRepulsionEnergy__names__noun}
- of {nuclearRepulsionEnergy#} {nuclearRepulsionEnergy__units}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {optGasEnergy__names__noun}
- of {optGasEnergy#} {optGasEnergy__units}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {optGasHomo__names__noun}
- of {optGasHomo#} {optGasHomo__units}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} based on the {#text |!}description{# below|!}.
- Description: It has an {solutionLumo__names__noun} {solutionLumo#} {solutionLumo__units} and an {solutionHomo__names__noun} of {solutionHomo#} {solutionHomo__units}.
- Result: {SMILES#}
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {molecularSurface__names__noun} of {molecularSurface#} {molecularSurface__units}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {reactionFieldEnergy__names__noun} of {reactionFieldEnergy#} {reactionFieldEnergy__units}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {solventAccessSurface__names__noun} of {solventAccessSurface#} {solventAccessSurface__units}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {cavityEnergy__names__noun} of {cavityEnergy#} {cavityEnergy__units}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {gasEnergy__names__noun} of {gasEnergy#} {gasEnergy__units}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {gasHomo__names__noun} of {gasHomo#} {gasHomo__units}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {gasLumo__names__noun} of {gasLumo#} {gasLumo__units}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has an {solutionEnergy__names__noun} of {solutionEnergy#} {solutionEnergy__units}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {solutionLumo__names__noun} of {solutionLumo#} {solutionLumo__units}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {nuclearRepulsionEnergy__names__noun} of {nuclearRepulsionEnergy#} {nuclearRepulsionEnergy__units}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {optGasEnergy__names__noun} of {optGasEnergy#} {optGasEnergy__units}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} has a {optGasHomo__names__noun} of {optGasHomo#} {optGasHomo__units}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: It has an {solutionLumo__names__noun} {solutionLumo#} {solutionLumo__units} and an {solutionHomo__names__noun} of {solutionHomo#} {solutionHomo__units}.
+ Result: {SMILES#}
diff --git a/data/tabular/SIDER/meta.yaml b/data/tabular/SIDER/meta.yaml
index 1472f3bfb..0fb68d2c4 100644
--- a/data/tabular/SIDER/meta.yaml
+++ b/data/tabular/SIDER/meta.yaml
@@ -1,215 +1,191 @@
----
name: SIDER
description: Database of marketed drugs and adverse drug reactions (ADR), grouped into 23 system organ classes.
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
targets:
- - id: hepatobiliary_disorders
- description: hepatobiliary disorders
- type: boolean
- names:
- - noun: hepatobiliary disorders
- - noun: liver and gallbladder disorders
- - id: metabolism_and_nutrition_disorders
- description: metabolism and nutrition disorders
- type: boolean
- names:
- - noun: metabolism and nutrition disorders
- - noun: metabolic and nutritional disorders
- - id: eye_disorders
- description: eye disorders
- type: boolean
- names:
- - noun: eye disorders
- - noun: ophthalmic disorders
- - id: musculoskeletal_and_connective_tissue_disorders
- description: musculoskeletal and connective tissue disorders
- type: boolean
- names:
- - noun: musculoskeletal and connective tissue disorders
- - noun: muscle and joint disorders
- - id: gastrointestinal_disorders
- description: gastrointestinal disorders
- type: boolean
- names:
- - noun: gastrointestinal disorders
- - noun: digestive system disorders
- - id: immune_system_disorders
- description: immune system disorders
- type: boolean
- names:
- - noun: immune system disorders
- - noun: disorders of the immune system
- - id: reproductive_system_and_breast_disorders
- description: reproductive system and breast disorders
- type: boolean
- names:
- - noun: reproductive system and breast disorders
- - noun: disorders of the breasts and the reproductive system
- - id: neoplasms_benign_malignant_and_unspecified_(incl_cysts_and_polyps)
- description: neoplasms benign, malignant and unspecified (incl cysts and polyps)
- type: boolean
- names:
- - noun: neoplasms benign, malignant and unspecified (incl cysts and polyps)
- - noun: benign and malignant tumors (including cysts and polyps)
- - id: general_disorders_and_administration_site_conditions
- description: general disorders and administration site conditions
- type: boolean
- names:
- - noun: general disorders and administration site conditions
- - noun: general health and administration site conditions
- - id: endocrine_disorders
- description: endocrine disorders
- type: boolean
- names:
- - noun: endocrine disorders
- - noun: endocrine system disorders
- - id: surgical_and_medical_procedures
- description: surgical and medical procedures
- type: boolean
- names:
- - noun: surgical and medical procedures
- - noun: medical and surgical procedures
- - id: vascular_disorders
- description: vascular disorders
- type: boolean
- names:
- - noun: vascular disorders
- - noun: vascular system disorders
- - id: blood_and_lymphatic_system_disorders
- description: blood and lymphatic system disorders
- type: boolean
- names:
- - noun: blood and lymphatic system disorders
- - noun: disorders of the blood and lymphatic system
- - id: skin_and_subcutaneous_tissue_disorders
- description: skin and subcutaneous tissue disorders
- type: boolean
- names:
- - noun: skin and subcutaneous tissue disorders
- - noun: disorders of the skin and subcutaneous tissue
- - id: congenital_familial_and_genetic_disorders
- description: congenital, familial and genetic disorders
- type: boolean
- names:
- - noun: congenital, familial and genetic disorders
- - noun: familial, congenital and genetic disorders
- - id: infections_and_infestations
- description: infections and infestations
- type: boolean
- names:
- - noun: infections and infestations
- - noun: infestations and infections
- - id: respiratory_thoracic_and_mediastinal_disorders
- description: respiratory, thoracic and mediastinal disorders
- type: boolean
- names:
- - noun: respiratory, thoracic and mediastinal disorders
- - noun: respiratory and thoracic disorders
- - id: psychiatric_disorders
- description: psychiatric disorders
- type: boolean
- names:
- - noun: psychiatric disorders
- - noun: mental health and psychiatric disorders
- - id: renal_and_urinary_disorders
- description: renal and urinary disorders
- type: boolean
- names:
- - noun: renal and urinary disorders
- - noun: kidney and urinary tract disorders
- - id: pregnancy_puerperium_and_perinatal_conditions
- description: pregnancy, puerperium and perinatal conditions
- type: boolean
- names:
- - noun: pregnancy, puerperium and perinatal conditions
- - noun: pregnancy, childbirth, and newborn conditions
- - id: ear_and_labyrinth_disorders
- description: ear and labyrinth disorders
- type: boolean
- names:
- - noun: ear and labyrinth disorders
- - noun: ear and inner ear disorders
- - id: cardiac_disorders
- description: cardiac disorders
- type: boolean
- names:
- - noun: cardiac disorders
- - noun: cardiovascular disorders
- - id: nervous_system_disorders
- description: nervous system disorders
- type: boolean
- names:
- - noun: nervous system disorders
- - noun: disorders of the nervous system
+ - id: hepatobiliary_disorders
+ description: hepatobiliary disorders
+ type: boolean
+ names:
+ - noun: hepatobiliary disorders
+ - noun: liver and gallbladder disorders
+ - id: metabolism_and_nutrition_disorders
+ description: metabolism and nutrition disorders
+ type: boolean
+ names:
+ - noun: metabolism and nutrition disorders
+ - noun: metabolic and nutritional disorders
+ - id: eye_disorders
+ description: eye disorders
+ type: boolean
+ names:
+ - noun: eye disorders
+ - noun: ophthalmic disorders
+ - id: musculoskeletal_and_connective_tissue_disorders
+ description: musculoskeletal and connective tissue disorders
+ type: boolean
+ names:
+ - noun: musculoskeletal and connective tissue disorders
+ - noun: muscle and joint disorders
+ - id: gastrointestinal_disorders
+ description: gastrointestinal disorders
+ type: boolean
+ names:
+ - noun: gastrointestinal disorders
+ - noun: digestive system disorders
+ - id: immune_system_disorders
+ description: immune system disorders
+ type: boolean
+ names:
+ - noun: immune system disorders
+ - noun: disorders of the immune system
+ - id: reproductive_system_and_breast_disorders
+ description: reproductive system and breast disorders
+ type: boolean
+ names:
+ - noun: reproductive system and breast disorders
+ - noun: disorders of the breasts and the reproductive system
+ - id: neoplasms_benign_malignant_and_unspecified_(incl_cysts_and_polyps)
+ description: neoplasms benign, malignant and unspecified (incl cysts and polyps)
+ type: boolean
+ names:
+ - noun: neoplasms benign, malignant and unspecified (incl cysts and polyps)
+ - noun: benign and malignant tumors (including cysts and polyps)
+ - id: general_disorders_and_administration_site_conditions
+ description: general disorders and administration site conditions
+ type: boolean
+ names:
+ - noun: general disorders and administration site conditions
+ - noun: general health and administration site conditions
+ - id: endocrine_disorders
+ description: endocrine disorders
+ type: boolean
+ names:
+ - noun: endocrine disorders
+ - noun: endocrine system disorders
+ - id: surgical_and_medical_procedures
+ description: surgical and medical procedures
+ type: boolean
+ names:
+ - noun: surgical and medical procedures
+ - noun: medical and surgical procedures
+ - id: vascular_disorders
+ description: vascular disorders
+ type: boolean
+ names:
+ - noun: vascular disorders
+ - noun: vascular system disorders
+ - id: blood_and_lymphatic_system_disorders
+ description: blood and lymphatic system disorders
+ type: boolean
+ names:
+ - noun: blood and lymphatic system disorders
+ - noun: disorders of the blood and lymphatic system
+ - id: skin_and_subcutaneous_tissue_disorders
+ description: skin and subcutaneous tissue disorders
+ type: boolean
+ names:
+ - noun: skin and subcutaneous tissue disorders
+ - noun: disorders of the skin and subcutaneous tissue
+ - id: congenital_familial_and_genetic_disorders
+ description: congenital, familial and genetic disorders
+ type: boolean
+ names:
+ - noun: congenital, familial and genetic disorders
+ - noun: familial, congenital and genetic disorders
+ - id: infections_and_infestations
+ description: infections and infestations
+ type: boolean
+ names:
+ - noun: infections and infestations
+ - noun: infestations and infections
+ - id: respiratory_thoracic_and_mediastinal_disorders
+ description: respiratory, thoracic and mediastinal disorders
+ type: boolean
+ names:
+ - noun: respiratory, thoracic and mediastinal disorders
+ - noun: respiratory and thoracic disorders
+ - id: psychiatric_disorders
+ description: psychiatric disorders
+ type: boolean
+ names:
+ - noun: psychiatric disorders
+ - noun: mental health and psychiatric disorders
+ - id: renal_and_urinary_disorders
+ description: renal and urinary disorders
+ type: boolean
+ names:
+ - noun: renal and urinary disorders
+ - noun: kidney and urinary tract disorders
+ - id: pregnancy_puerperium_and_perinatal_conditions
+ description: pregnancy, puerperium and perinatal conditions
+ type: boolean
+ names:
+ - noun: pregnancy, puerperium and perinatal conditions
+ - noun: pregnancy, childbirth, and newborn conditions
+ - id: ear_and_labyrinth_disorders
+ description: ear and labyrinth disorders
+ type: boolean
+ names:
+ - noun: ear and labyrinth disorders
+ - noun: ear and inner ear disorders
+ - id: cardiac_disorders
+ description: cardiac disorders
+ type: boolean
+ names:
+ - noun: cardiac disorders
+ - noun: cardiovascular disorders
+ - id: nervous_system_disorders
+ description: nervous system disorders
+ type: boolean
+ names:
+ - noun: nervous system disorders
+ - noun: disorders of the nervous system
license: CC BY 4.0
links:
- - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
- description: corresponding publication
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/sider.csv.gz
- description: Data source
+ - url: https://academic.oup.com/nar/article/44/D1/D1075/2502602?login=false
+ description: corresponding publication
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/sider.csv.gz
+ description: Data source
num_points: 1427
bibtex:
- - |-
- @article{10.1093/nar/gkv1075,
- author = {Kuhn, Michael and Letunic, Ivica and Jensen, Lars Juhl and Bork, Peer},
- title = "{The SIDER database of drugs and side effects}",
- journal = {Nucleic Acids Research},
- volume = {44},
- number = {D1},
- pages = {D1075-D1079},
- year = {2015},
- month = {10},
- issn = {0305-1048},
- doi = {10.1093/nar/gkv1075},
- url = {https://doi.org/10.1093/nar/gkv1075},
- }
+ - |-
+ @article{10.1093/nar/gkv1075,
+ author = {Kuhn, Michael and Letunic, Ivica and Jensen, Lars Juhl and Bork, Peer},
+ title = "{The SIDER database of drugs and side effects}",
+ journal = {Nucleic Acids Research},
+ volume = {44},
+ number = {D1},
+ pages = {D1075-D1079},
+ year = {2015},
+ month = {10},
+ issn = {0305-1048},
+ doi = {10.1093/nar/gkv1075},
+ url = {https://doi.org/10.1093/nar/gkv1075},
+ }
templates:
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {hepatobiliary_disorders#not
- a &a }{#potential cause|potential reason!} for {hepatobiliary_disorders__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {metabolism_and_nutrition_disorders#not
- a &a }{#potential cause|potential reason!} for {metabolism_and_nutrition_disorders__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {eye_disorders#not
- a &a }{#potential cause|potential reason!} for {eye_disorders__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {musculoskeletal_and_connective_tissue_disorders#not
- a &a }{#potential cause|potential reason!} for {musculoskeletal_and_connective_tissue_disorders__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {gastrointestinal_disorders#not
- a &a }{#potential cause|potential reason!} for {gastrointestinal_disorders__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {immune_system_disorders#not
- a &a }{#potential cause|potential reason!} for {immune_system_disorders__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {reproductive_system_and_breast_disorders#not
- a &a }{#potential cause|potential reason!} for {reproductive_system_and_breast_disorders__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {neoplasms_benign_malignant_and_unspecified_(incl_cysts_and_polyps)#not
- a &a }{#potential cause|potential reason!} for {neoplasms_benign_malignant_and_unspecified_(incl_cysts_and_polyps)__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {general_disorders_and_administration_site_conditions#not
- a &a }{#potential cause|potential reason!} for {general_disorders_and_administration_site_conditions__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {endocrine_disorders#not
- a &a }{#potential cause|potential reason!} for {endocrine_disorders__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {surgical_and_medical_procedures#not
- a &a }{#potential cause|potential reason!} for {surgical_and_medical_procedures__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {vascular_disorders#not
- a &a }{#potential cause|potential reason!} for {vascular_disorders__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {blood_and_lymphatic_system_disorders#not
- a &a }{#potential cause|potential reason!} for {blood_and_lymphatic_system_disorders__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {skin_and_subcutaneous_tissue_disorders#not
- a &a }{#potential cause|potential reason!} for {skin_and_subcutaneous_tissue_disorders__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {congenital_familial_and_genetic_disorders#not
- a &a }{#potential cause|potential reason!} for {congenital_familial_and_genetic_disorders__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {infections_and_infestations#not
- a &a }{#potential cause|potential reason!} for {infections_and_infestations__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {respiratory_thoracic_and_mediastinal_disorders#not
- a &a }{#potential cause|potential reason!} for {respiratory_thoracic_and_mediastinal_disorders__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {psychiatric_disorders#not
- a &a }{#potential cause|potential reason!} for {psychiatric_disorders__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {renal_and_urinary_disorders#not
- a &a }{#potential cause|potential reason!} for {renal_and_urinary_disorders__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {pregnancy_puerperium_and_perinatal_conditions#not
- a &a }{#potential cause|potential reason!} for {pregnancy_puerperium_and_perinatal_conditions__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {ear_and_labyrinth_disorders#not
- a &a }{#potential cause|potential reason!} for {ear_and_labyrinth_disorders__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {cardiac_disorders#not
- a &a }{#potential cause|potential reason!} for {cardiac_disorders__names__noun}.
- - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {nervous_system_disorders#not
- a &a }{#potential cause|potential reason!} for {nervous_system_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {hepatobiliary_disorders#not a &a }{#potential cause|potential reason!} for {hepatobiliary_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {metabolism_and_nutrition_disorders#not a &a }{#potential cause|potential reason!} for {metabolism_and_nutrition_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {eye_disorders#not a &a }{#potential cause|potential reason!} for {eye_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {musculoskeletal_and_connective_tissue_disorders#not a &a }{#potential cause|potential reason!} for {musculoskeletal_and_connective_tissue_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {gastrointestinal_disorders#not a &a }{#potential cause|potential reason!} for {gastrointestinal_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {immune_system_disorders#not a &a }{#potential cause|potential reason!} for {immune_system_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {reproductive_system_and_breast_disorders#not a &a }{#potential cause|potential reason!} for {reproductive_system_and_breast_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {neoplasms_benign_malignant_and_unspecified_(incl_cysts_and_polyps)#not a &a }{#potential cause|potential reason!} for {neoplasms_benign_malignant_and_unspecified_(incl_cysts_and_polyps)__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {general_disorders_and_administration_site_conditions#not a &a }{#potential cause|potential reason!} for {general_disorders_and_administration_site_conditions__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {endocrine_disorders#not a &a }{#potential cause|potential reason!} for {endocrine_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {surgical_and_medical_procedures#not a &a }{#potential cause|potential reason!} for {surgical_and_medical_procedures__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {vascular_disorders#not a &a }{#potential cause|potential reason!} for {vascular_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {blood_and_lymphatic_system_disorders#not a &a }{#potential cause|potential reason!} for {blood_and_lymphatic_system_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {skin_and_subcutaneous_tissue_disorders#not a &a }{#potential cause|potential reason!} for {skin_and_subcutaneous_tissue_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {congenital_familial_and_genetic_disorders#not a &a }{#potential cause|potential reason!} for {congenital_familial_and_genetic_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {infections_and_infestations#not a &a }{#potential cause|potential reason!} for {infections_and_infestations__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {respiratory_thoracic_and_mediastinal_disorders#not a &a }{#potential cause|potential reason!} for {respiratory_thoracic_and_mediastinal_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {psychiatric_disorders#not a &a }{#potential cause|potential reason!} for {psychiatric_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {renal_and_urinary_disorders#not a &a }{#potential cause|potential reason!} for {renal_and_urinary_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {pregnancy_puerperium_and_perinatal_conditions#not a &a }{#potential cause|potential reason!} for {pregnancy_puerperium_and_perinatal_conditions__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {ear_and_labyrinth_disorders#not a &a }{#potential cause|potential reason!} for {ear_and_labyrinth_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {cardiac_disorders#not a &a }{#potential cause|potential reason!} for {cardiac_disorders__names__noun}.
+ - The {#molecule|compound|chemical|molecular species|chemical compound!} with the {SMILES__description} {#representation of |!}{SMILES#} is {nervous_system_disorders#not a &a }{#potential cause|potential reason!} for {nervous_system_disorders__names__noun}.
diff --git a/data/tabular/ames_mutagenicity/meta.yaml b/data/tabular/ames_mutagenicity/meta.yaml
index fd841fc07..654103ef2 100644
--- a/data/tabular/ames_mutagenicity/meta.yaml
+++ b/data/tabular/ames_mutagenicity/meta.yaml
@@ -1,132 +1,130 @@
----
name: ames_mutagenicity
description: |-
- Mutagenicity means the ability of a drug to induce genetic alterations.
- Drugs that can cause damage to the DNA can result in cell death or other severe
- adverse effects. Nowadays, the most widely used assay for testing the mutagenicity
- of compounds is the Ames experiment which was invented by a professor named
- Ames. The Ames test is a short term bacterial reverse mutation assay detecting
- a large number of compounds which can induce genetic damage and frameshift mutations.
- The dataset is aggregated from four papers.
+ Mutagenicity means the ability of a drug to induce genetic alterations.
+ Drugs that can cause damage to the DNA can result in cell death or other severe
+ adverse effects. Nowadays, the most widely used assay for testing the mutagenicity
+ of compounds is the Ames experiment which was invented by a professor named
+ Ames. The Ames test is a short term bacterial reverse mutation assay detecting
+ a large number of compounds which can induce genetic damage and frameshift mutations.
+ The dataset is aggregated from four papers.
targets:
- - id: mutagenic
- description: whether it is mutagenic (1) or not mutagenic (0)
- units:
- type: boolean
- names:
- - noun: mutagenicity
- - noun: Ames mutagenicity
- - adjective: mutagenic
- - adjective: Ames mutagenic
- - verb: has the ability to induce genetic alterations
- - gerund: having the potential to cause mutations
- - gerund: having the potential to induce genetic alterations
+ - id: mutagenic
+ description: whether it is mutagenic (1) or not mutagenic (0)
+ units:
+ type: boolean
+ names:
+ - noun: mutagenicity
+ - noun: Ames mutagenicity
+ - adjective: mutagenic
+ - adjective: Ames mutagenic
+ - verb: has the ability to induce genetic alterations
+ - gerund: having the potential to cause mutations
+ - gerund: having the potential to induce genetic alterations
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://doi.org/10.1021/ci300400a
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/tox/#ames-mutagenicity
- description: Data source
+ - url: https://doi.org/10.1021/ci300400a
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/tox/#ames-mutagenicity
+ description: Data source
num_points: 7278
bibtex:
- - |-
- @article{Xu2012,
- doi = {10.1021/ci300400a},
- url = {https://doi.org/10.1021/ci300400a},
- year = {2012},
- month = oct,
- publisher = {American Chemical Society (ACS)},
- volume = {52},
- number = {11},
- pages = {2840--2847},
- author = {Congying Xu and Feixiong Cheng and Lei Chen and
- Zheng Du and Weihua Li and Guixia Liu and Philip W. Lee and Yun Tang},
- title = {In silico Prediction of Chemical Ames Mutagenicity},
- journal = {Journal of Chemical Information and Modeling}
+ - |-
+ @article{Xu2012,
+ doi = {10.1021/ci300400a},
+ url = {https://doi.org/10.1021/ci300400a},
+ year = {2012},
+ month = oct,
+ publisher = {American Chemical Society (ACS)},
+ volume = {52},
+ number = {11},
+ pages = {2840--2847},
+ author = {Congying Xu and Feixiong Cheng and Lei Chen and
+ Zheng Du and Weihua Li and Guixia Liu and Philip W. Lee and Yun Tang},
+ title = {In silico Prediction of Chemical Ames Mutagenicity},
+ journal = {Journal of Chemical Information and Modeling}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {mutagenic#no &NULL}{mutagenic__names__adjective}
- properties.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {mutagenic#no &NULL}{mutagenic__names__adjective} {#properties|characteristics|features!}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {mutagenic#not &NULL}identified as {mutagenic__names__adjective}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {mutagenic#not &NULL}{mutagenic__names__adjective}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {mutagenic__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {mutagenic#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {mutagenic__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {mutagenic#not &NULL}{mutagenic__names__adjective}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {mutagenic#not &NULL}{mutagenic__names__adjective}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {mutagenic__names__adjective}?
- Assistant: {mutagenic#No&Yes}, this molecule is {mutagenic#not &NULL}{mutagenic__names__adjective}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {mutagenic__names__adjective}?
- Assistant: {mutagenic#No&Yes}, it is {mutagenic#not &NULL}{mutagenic__names__adjective}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {mutagenic#not &NULL}{mutagenic__names__adjective}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {mutagenic#not &NULL}{mutagenic__names__adjective}?
- Assistant: This is a molecule that is {mutagenic#not &NULL}{mutagenic__names__adjective}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {mutagenic#not &NULL}be {mutagenic__names__adjective}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {mutagenic#not &NULL}{mutagenic__names__adjective}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {mutagenic#not &NULL}be {mutagenic__names__adjective}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {mutagenic#not &NULL}{mutagenic__names__adjective}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} {mutagenic__names__adjective}:{mutagenic#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {mutagenic__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{mutagenic#False&True}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {mutagenic__names__adjective}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {mutagenic%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {mutagenic__names__adjective}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {mutagenic%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {mutagenic#not &NULL}{mutagenic__names__adjective}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%mutagenic%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {mutagenic#not &NULL}{mutagenic__names__adjective}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%mutagenic%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {mutagenic#no &NULL}{mutagenic__names__adjective} properties.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {mutagenic#no &NULL}{mutagenic__names__adjective} {#properties|characteristics|features!}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {mutagenic#not &NULL}identified as {mutagenic__names__adjective}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {mutagenic#not &NULL}{mutagenic__names__adjective}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {mutagenic__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {mutagenic#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {mutagenic__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {mutagenic#not &NULL}{mutagenic__names__adjective}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {mutagenic#not &NULL}{mutagenic__names__adjective}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {mutagenic__names__adjective}?
+ Assistant: {mutagenic#No&Yes}, this molecule is {mutagenic#not &NULL}{mutagenic__names__adjective}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {mutagenic__names__adjective}?
+ Assistant: {mutagenic#No&Yes}, it is {mutagenic#not &NULL}{mutagenic__names__adjective}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {mutagenic#not &NULL}{mutagenic__names__adjective}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {mutagenic#not &NULL}{mutagenic__names__adjective}?
+ Assistant: This is a molecule that is {mutagenic#not &NULL}{mutagenic__names__adjective}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {mutagenic#not &NULL}be {mutagenic__names__adjective}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {mutagenic#not &NULL}{mutagenic__names__adjective}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {mutagenic#not &NULL}be {mutagenic__names__adjective}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {mutagenic#not &NULL}{mutagenic__names__adjective}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} {mutagenic__names__adjective}:{mutagenic#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {mutagenic__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{mutagenic#False&True}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {mutagenic__names__adjective}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {mutagenic%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {mutagenic__names__adjective}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {mutagenic%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {mutagenic#not &NULL}{mutagenic__names__adjective}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%mutagenic%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {mutagenic#not &NULL}{mutagenic__names__adjective}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%mutagenic%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/aminoacids/meta.yaml b/data/tabular/aminoacids/meta.yaml
index a44611c2b..90af3904a 100644
--- a/data/tabular/aminoacids/meta.yaml
+++ b/data/tabular/aminoacids/meta.yaml
@@ -1,41 +1,38 @@
----
name: aminoacids
description: |-
- The list of the 20 essential aminoacids, their SMILES, one letter and three letter codes.
+ The list of the 20 essential aminoacids, their SMILES, one letter and three letter codes.
targets:
- - id: three_letter_code
- description: three-letter code
- type: text
- - id: one_letter_code
- description: one-letter code
- type: text
- - id: aminoacid_name
- description: name
- type: text
- - id: type
- description: type of aminoacid
- type: text
+ - id: three_letter_code
+ description: three-letter code
+ type: text
+ - id: one_letter_code
+ description: one-letter code
+ type: text
+ - id: aminoacid_name
+ description: name
+ type: text
+ - id: type
+ description: type of aminoacid
+ type: text
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://chemistry.stackexchange.com/questions/138614/why-are-tyrosine-and-tryptophan-considered-hydrophobic
- description: reference for amino acid type
+ - url: https://chemistry.stackexchange.com/questions/138614/why-are-tyrosine-and-tryptophan-considered-hydrophobic
+ description: reference for amino acid type
num_points: 20
templates:
- - The {#essential amino acid|amino acid|amino acid (AA)|AA!} with the {SMILES__description} {SMILES#} has the one-letter code {one_letter_code#} and
- the three-letter code {three_letter_code#}.
- - The {#essential amino acid|amino acid|amino acid (AA)|AA!} {aminoacid_name#} has the one-letter code {one_letter_code#} and the three-letter code
- {three_letter_code#}.
- - |-
- Question: What is the one-letter code of the {#essential amino acid|amino acid|amino acid (AA)|AA!} with the {SMILES__description} {SMILES#}?
- Answer: {one_letter_code#}.
- - |-
- Question: What is the three-letter code of the {#essential amino acid|amino acid|amino acid (AA)|AA!} with the {SMILES__description} {SMILES#}?
- Answer: {three_letter_code#}.
- - |-
- Question: What is the type of the amino acid with the one-letter code {one_letter_code#} and {SMILES__description} {SMILES#}?
- Constraint: The possible types are: polar, non-polar, positively charged, negatively charged.
- Answer: From the provided amino acid types (polar, non-polar, positively charged, negatively charged), the amino acid with the one-letter code {one_letter_code#} is {type#}.
+ - The {#essential amino acid|amino acid|amino acid (AA)|AA!} with the {SMILES__description} {SMILES#} has the one-letter code {one_letter_code#} and the three-letter code {three_letter_code#}.
+ - The {#essential amino acid|amino acid|amino acid (AA)|AA!} {aminoacid_name#} has the one-letter code {one_letter_code#} and the three-letter code {three_letter_code#}.
+ - |-
+ Question: What is the one-letter code of the {#essential amino acid|amino acid|amino acid (AA)|AA!} with the {SMILES__description} {SMILES#}?
+ Answer: {one_letter_code#}.
+ - |-
+ Question: What is the three-letter code of the {#essential amino acid|amino acid|amino acid (AA)|AA!} with the {SMILES__description} {SMILES#}?
+ Answer: {three_letter_code#}.
+ - |-
+ Question: What is the type of the amino acid with the one-letter code {one_letter_code#} and {SMILES__description} {SMILES#}?
+ Constraint: The possible types are: polar, non-polar, positively charged, negatively charged.
+ Answer: From the provided amino acid types (polar, non-polar, positively charged, negatively charged), the amino acid with the one-letter code {one_letter_code#} is {type#}.
diff --git a/data/tabular/bc5chem/meta.yaml b/data/tabular/bc5chem/meta.yaml
index db7f684dd..cd7b301ba 100644
--- a/data/tabular/bc5chem/meta.yaml
+++ b/data/tabular/bc5chem/meta.yaml
@@ -1,55 +1,54 @@
----
name: bc5chem
description: |-
- BC5CHEM is a named entity recognition dataset for chemical mentions.
+ BC5CHEM is a named entity recognition dataset for chemical mentions.
targets:
- - id: matched_words
- description: matched words
- type: text
- names:
- - noun: entity
- - noun: matched entity
+ - id: matched_words
+ description: matched words
+ type: text
+ names:
+ - noun: entity
+ - noun: matched entity
identifiers:
- - id: sentence
- description: Sentence
- type: text
- names:
- - noun: sentence
- - noun: text
+ - id: sentence
+ description: Sentence
+ type: text
+ names:
+ - noun: sentence
+ - noun: text
license: https://huggingface.co/datasets/bigbio/blurb/blob/main/LICENSE
links:
- - url: https://huggingface.co/datasets/bigbio/blurb
- description: original dataset
+ - url: https://huggingface.co/datasets/bigbio/blurb
+ description: original dataset
benchmarks:
- - name: bc5chem
- link: hhttps://huggingface.co/datasets/bigbio/blurb
- split_column: split
+ - name: bc5chem
+ link: hhttps://huggingface.co/datasets/bigbio/blurb
+ split_column: split
num_points: 13755
bibtex:
- - |-
- @article{gu2021domain,
- title = {
- Domain-specific language model pretraining for biomedical natural
- language processing
- },
- author = {
- Gu, Yu and Tinn, Robert and Cheng, Hao and Lucas, Michael and
- Usuyama, Naoto and Liu, Xiaodong and Naumann, Tristan and Gao,
- Jianfeng and Poon, Hoifung
- },
- year = 2021,
- journal = {ACM Transactions on Computing for Healthcare (HEALTH)},
- publisher = {ACM New York, NY},
- volume = 3,
- number = 1,
- pages = {1--23}
- }
+ - |-
+ @article{gu2021domain,
+ title = {
+ Domain-specific language model pretraining for biomedical natural
+ language processing
+ },
+ author = {
+ Gu, Yu and Tinn, Robert and Cheng, Hao and Lucas, Michael and
+ Usuyama, Naoto and Liu, Xiaodong and Naumann, Tristan and Gao,
+ Jianfeng and Poon, Hoifung
+ },
+ year = 2021,
+ journal = {ACM Transactions on Computing for Healthcare (HEALTH)},
+ publisher = {ACM New York, NY},
+ volume = 3,
+ number = 1,
+ pages = {1--23}
+ }
templates:
- - |-
- Task: Find all the mentions of {#chemicals|chemical compounds|chemical substances!} in the {#following|subsequent!} {#text|sentence!}. Return the matching {#words|entities!}. If there is no {#match|mention of a chemical|matching entity!}, return `no match`.
- {#Sentence|Description!}: {sentence#}
- Answer: {matched_words#}
- - |-
- User: Does the following text contain mentions of {#chemicals|chemical compounds|chemical substances!}?{# Can you return matches?| Can you output matches?| Please return matches.!}
- {#Text: |!}{sentence#}
- Assistant: {#I found|There is!} {matched_words#}.
+ - |-
+ Task: Find all the mentions of {#chemicals|chemical compounds|chemical substances!} in the {#following|subsequent!} {#text|sentence!}. Return the matching {#words|entities!}. If there is no {#match|mention of a chemical|matching entity!}, return `no match`.
+ {#Sentence|Description!}: {sentence#}
+ Answer: {matched_words#}
+ - |-
+ User: Does the following text contain mentions of {#chemicals|chemical compounds|chemical substances!}?{# Can you return matches?| Can you output matches?| Please return matches.!}
+ {#Text: |!}{sentence#}
+ Assistant: {#I found|There is!} {matched_words#}.
diff --git a/data/tabular/bc5disease/meta.yaml b/data/tabular/bc5disease/meta.yaml
index 02491b3ab..cbe390570 100644
--- a/data/tabular/bc5disease/meta.yaml
+++ b/data/tabular/bc5disease/meta.yaml
@@ -1,55 +1,54 @@
----
name: bc5disease
description: |-
- BC5Disease is a named entity recognition dataset for disease mentions.
+ BC5Disease is a named entity recognition dataset for disease mentions.
targets:
- - id: matched_words
- description: matched words
- type: text
- names:
- - noun: entity
- - noun: matched entity
+ - id: matched_words
+ description: matched words
+ type: text
+ names:
+ - noun: entity
+ - noun: matched entity
identifiers:
- - id: sentence
- description: Sentence
- type: text
- names:
- - noun: sentence
- - noun: text
+ - id: sentence
+ description: Sentence
+ type: text
+ names:
+ - noun: sentence
+ - noun: text
license: https://huggingface.co/datasets/bigbio/blurb/blob/main/LICENSE
links:
- - url: https://huggingface.co/datasets/bigbio/blurb
- description: original dataset
+ - url: https://huggingface.co/datasets/bigbio/blurb
+ description: original dataset
benchmarks:
- - name: bc5chem
- link: hhttps://huggingface.co/datasets/bigbio/blurb
- split_column: split
+ - name: bc5chem
+ link: hhttps://huggingface.co/datasets/bigbio/blurb
+ split_column: split
num_points: 13755
bibtex:
- - |-
- @article{gu2021domain,
- title = {
- Domain-specific language model pretraining for biomedical natural
- language processing
- },
- author = {
- Gu, Yu and Tinn, Robert and Cheng, Hao and Lucas, Michael and
- Usuyama, Naoto and Liu, Xiaodong and Naumann, Tristan and Gao,
- Jianfeng and Poon, Hoifung
- },
- year = 2021,
- journal = {ACM Transactions on Computing for Healthcare (HEALTH)},
- publisher = {ACM New York, NY},
- volume = 3,
- number = 1,
- pages = {1--23}
- }
+ - |-
+ @article{gu2021domain,
+ title = {
+ Domain-specific language model pretraining for biomedical natural
+ language processing
+ },
+ author = {
+ Gu, Yu and Tinn, Robert and Cheng, Hao and Lucas, Michael and
+ Usuyama, Naoto and Liu, Xiaodong and Naumann, Tristan and Gao,
+ Jianfeng and Poon, Hoifung
+ },
+ year = 2021,
+ journal = {ACM Transactions on Computing for Healthcare (HEALTH)},
+ publisher = {ACM New York, NY},
+ volume = 3,
+ number = 1,
+ pages = {1--23}
+ }
templates:
- - |-
- Task: Find all the mentions of diseases in the {#following|subsequent!} {#text|sentence!}. Return the matching {#words|entities!}. If there is no {#match|mention of a disease|matching entity!}, return `no match`.
- {#Sentence|Description!}: {sentence#}
- Answer: {matched_words#}
- - |-
- User: Does the following text contain mentions of diseases?{# Can you return matches?| Can you output matches?|Please return matches!}
- {#Text: |!}{sentence#}
- Assistant: {#I found|There is!} {matched_words#}.
+ - |-
+ Task: Find all the mentions of diseases in the {#following|subsequent!} {#text|sentence!}. Return the matching {#words|entities!}. If there is no {#match|mention of a disease|matching entity!}, return `no match`.
+ {#Sentence|Description!}: {sentence#}
+ Answer: {matched_words#}
+ - |-
+ User: Does the following text contain mentions of diseases?{# Can you return matches?| Can you output matches?|Please return matches!}
+ {#Text: |!}{sentence#}
+ Assistant: {#I found|There is!} {matched_words#}.
diff --git a/data/tabular/bicerano_dataset/meta.yaml b/data/tabular/bicerano_dataset/meta.yaml
index 382c63d3a..305bb1b93 100644
--- a/data/tabular/bicerano_dataset/meta.yaml
+++ b/data/tabular/bicerano_dataset/meta.yaml
@@ -1,66 +1,65 @@
----
name: bicerano_dataset
description: |-
- This paper outlines a MD simulation workflow based on GPU MD simulation and the
- refined optimized potentials for liquid simulation (OPLS) OPLS3e force field to
- calculate glass transition temperatures (Tgs) of 315 polymers for which Bicerano
- reported experimental values.
+ This paper outlines a MD simulation workflow based on GPU MD simulation and the
+ refined optimized potentials for liquid simulation (OPLS) OPLS3e force field to
+ calculate glass transition temperatures (Tgs) of 315 polymers for which Bicerano
+ reported experimental values.
targets:
- - id: Tg_exp
- description: experimental glass transition temperature
- units: K
- type: float
- names:
- - noun: experimental glass transition temperature
- uris:
- - id: Tg_calc
- description: calculated glass transition T
- units: K
- type: float
- names:
- - noun: computed glass transition temperature
- - id: rho_300K_calc
- description: computed density at 300K
- units: g/cm^3
- type: float
- names:
- - noun: computed polymer density at 300K
+ - id: Tg_exp
+ description: experimental glass transition temperature
+ units: K
+ type: float
+ names:
+ - noun: experimental glass transition temperature
+ uris:
+ - id: Tg_calc
+ description: calculated glass transition T
+ units: K
+ type: float
+ names:
+ - noun: computed glass transition temperature
+ - id: rho_300K_calc
+ description: computed density at 300K
+ units: g/cm^3
+ type: float
+ names:
+ - noun: computed polymer density at 300K
identifiers:
- - id: PSMILES
- type: PSMILES
- description: PSMILES
- - id: compound_name
- type: Other
- names:
- - noun: compound name
- description: polymer name
+ - id: PSMILES
+ type: PSMILES
+ description: PSMILES
+ - id: compound_name
+ type: Other
+ names:
+ - noun: compound name
+ description: polymer name
license: CC BY 4.0
links:
- - url: https://pubs.acs.org/doi/10.1021/acsapm.0c00524#
- description: corresponding publication
- - url:
- - https://raw.githubusercontent.com/AdrianM0/chemnlp/main/data/tabular/bicerano_dataset/HT_MD_polymer_properties.csv
- description: data source
+ - url: https://pubs.acs.org/doi/10.1021/acsapm.0c00524#
+ description: corresponding publication
+ - url:
+ - https://raw.githubusercontent.com/AdrianM0/chemnlp/main/data/tabular/bicerano_dataset/HT_MD_polymer_properties.csv
+ description: data source
num_points: 315
bibtex:
- - |-
- @article{afzal2021,
- author = {Afzal, Mohammad Atif Faiz and Browning, Andrea R. and Goldberg, Alexander and Halls, Mathew D. and Gavartin, Jacob L. and Morisato,
- Tsuguo and Hughes, Thomas F. and Giesen, David J. and Goose, Joseph E.},
- title = {High-Throughput Molecular Dynamics Simulations and Validation of Thermophysical Properties of Polymers for Various Applications},
- journal = {ACS Applied Polymer Materials},
- volume = {3},
- number = {2},
- pages = {620-630},
- year = {2021},
- doi = {10.1021/acsapm.0c00524}}
+ - |-
+ @article{afzal2021,
+ author = {Afzal, Mohammad Atif Faiz and Browning, Andrea R. and Goldberg, Alexander and Halls, Mathew D. and Gavartin, Jacob L. and Morisato,
+ Tsuguo and Hughes, Thomas F. and Giesen, David J. and Goose, Joseph E.},
+ title = {High-Throughput Molecular Dynamics Simulations and Validation of Thermophysical Properties of Polymers for Various Applications},
+ journal = {ACS Applied Polymer Materials},
+ volume = {3},
+ number = {2},
+ pages = {620-630},
+ year = {2021},
+ doi = {10.1021/acsapm.0c00524}}
templates:
- - The polymer with the {PSMILES__description} of {PSMILES#} has an experimental glass transition temperature of {Tg_exp#} K.
- - The polymer with the {PSMILES__description} of {PSMILES#} has a computed glass transition temperature of {Tg_calc#} K.
- - The polymer with the {PSMILES__description} of {PSMILES#} has a computed density at 300 K of {rho_300K_calc#} g/cc.
- - The polymer with the {compound_name__names__noun} of {compound_name#} has an experimental glass transition temperature of {Tg_exp#} K.
- - The polymer with the {compound_name__names__noun} of {compound_name#} has a computed glass transition temperature of {Tg_calc#} K.
- - The polymer with the {compound_name__names__noun} of {compound_name#} has a computed density at 300 K of {rho_300K_calc#} g/cc.
- - |-
- Question: What is a polymer with a computed glass transition temperature of {Tg_calc#} K and a computed density at 300 K of {rho_300K_calc#} g/cc.
- Answer: A polymer with {PSMILES__description} {PSMILES#}
+ - The polymer with the {PSMILES__description} of {PSMILES#} has an experimental glass transition temperature of {Tg_exp#} K.
+ - The polymer with the {PSMILES__description} of {PSMILES#} has a computed glass transition temperature of {Tg_calc#} K.
+ - The polymer with the {PSMILES__description} of {PSMILES#} has a computed density at 300 K of {rho_300K_calc#} g/cc.
+ - The polymer with the {compound_name__names__noun} of {compound_name#} has an experimental glass transition temperature of {Tg_exp#} K.
+ - The polymer with the {compound_name__names__noun} of {compound_name#} has a computed glass transition temperature of {Tg_calc#} K.
+ - The polymer with the {compound_name__names__noun} of {compound_name#} has a computed density at 300 K of {rho_300K_calc#} g/cc.
+ - |-
+ Question: What is a polymer with a computed glass transition temperature of {Tg_calc#} K and a computed density at 300 K of {rho_300K_calc#} g/cc.
+ Answer: A polymer with {PSMILES__description} {PSMILES#}
diff --git a/data/tabular/bio_ner/meta.yaml b/data/tabular/bio_ner/meta.yaml
index a0cb30f83..8566a1816 100644
--- a/data/tabular/bio_ner/meta.yaml
+++ b/data/tabular/bio_ner/meta.yaml
@@ -1,37 +1,36 @@
----
name: bio_ner
description: NER task on bio-related text.
identifiers:
- - id: Sentence
- description: Sentence
- type: Other
+ - id: Sentence
+ description: Sentence
+ type: Other
targets:
- - id: entity_1
- description: entity_1
- type: Other
- units: entity_1
- names:
- - noun: entity_1
- - id: json
- description: json
- type: Other
- units:
- names:
- - noun: JSON output
+ - id: entity_1
+ description: entity_1
+ type: Other
+ units: entity_1
+ names:
+ - noun: entity_1
+ - id: json
+ description: json
+ type: Other
+ units:
+ names:
+ - noun: JSON output
benchmarks:
- - name: bio_ner
- link: https://github.com/ML4LitS/bio-datasets
- split_column: split
+ - name: bio_ner
+ link: https://github.com/ML4LitS/bio-datasets
+ split_column: split
license: unknown
links:
- - url: https://github.com/ML4LitS/bio-datasets
- description: ???
+ - url: https://github.com/ML4LitS/bio-datasets
+ description: ???
num_points: 123509
bibtex:
- - ???
+ - ???
templates:
- - |-
- Task: Please carry out the {#named entity recognition (NER)|named entity recognition|NER!} task for the the text below.
- Text: {Sentence#}.
- Constrain: Please, {#only |!}list the entities in the form NER entity, span start, span end, and type {#in separate lines |!}with a high probability of being in the text.
- Result: {entity_1#}
+ - |-
+ Task: Please carry out the {#named entity recognition (NER)|named entity recognition|NER!} task for the the text below.
+ Text: {Sentence#}.
+ Constrain: Please, {#only |!}list the entities in the form NER entity, span start, span end, and type {#in separate lines |!}with a high probability of being in the text.
+ Result: {entity_1#}
diff --git a/data/tabular/bioavailability_ma_et_al/meta.yaml b/data/tabular/bioavailability_ma_et_al/meta.yaml
index ccfb73dae..e5d7f417a 100644
--- a/data/tabular/bioavailability_ma_et_al/meta.yaml
+++ b/data/tabular/bioavailability_ma_et_al/meta.yaml
@@ -1,132 +1,131 @@
----
name: bioavailability_ma_et_al
description: |-
- Oral bioavailability is defined as the rate and extent to which the
- active ingredient or active moiety is absorbed from a drug product and becomes
- available at the site of action.
+ Oral bioavailability is defined as the rate and extent to which the
+ active ingredient or active moiety is absorbed from a drug product and becomes
+ available at the site of action.
targets:
- - id: bioavailable
- description: whether it is bioavailable (1) or not (0)
- units:
- type: boolean
- names:
- - noun: oral bioavailability
- - adjective: orally bioavailable
- uris:
- - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C70913
+ - id: bioavailable
+ description: whether it is bioavailable (1) or not (0)
+ units:
+ type: boolean
+ names:
+ - noun: oral bioavailability
+ - adjective: orally bioavailable
+ uris:
+ - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C70913
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
- - id: compound_name
- type: Other
- names:
- - noun: compound name
- - noun: drug name
- - noun: generic drug name
- description: drug name
+ - id: SMILES
+ type: SMILES
+ description: SMILES
+ - id: compound_name
+ type: Other
+ names:
+ - noun: compound name
+ - noun: drug name
+ - noun: generic drug name
+ description: drug name
license: CC BY 4.0
links:
- - url: https://doi.org/10.1016/j.jpba.2008.03.023
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/adme/#bioavailability-ma-et-al
- description: data source
+ - url: https://doi.org/10.1016/j.jpba.2008.03.023
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/adme/#bioavailability-ma-et-al
+ description: data source
num_points: 640
bibtex:
- - |-
- @article{Ma2008,
- doi = {10.1016/j.jpba.2008.03.023},
- url = {https://doi.org/10.1016/j.jpba.2008.03.023},
- year = {2008},
- month = aug,
- publisher = {Elsevier BV},
- volume = {47},
- number = {4-5},
- author = {Chang-Ying Ma and Sheng-Yong Yang and Hui Zhang
- and Ming-Li Xiang and Qi Huang and Yu-Quan Wei},
- title = {Prediction models of human plasma protein binding rate and
- oral bioavailability derived by using GA-CG-SVM method},
- journal = {Journal of Pharmaceutical and Biomedical Analysis}
+ - |-
+ @article{Ma2008,
+ doi = {10.1016/j.jpba.2008.03.023},
+ url = {https://doi.org/10.1016/j.jpba.2008.03.023},
+ year = {2008},
+ month = aug,
+ publisher = {Elsevier BV},
+ volume = {47},
+ number = {4-5},
+ author = {Chang-Ying Ma and Sheng-Yong Yang and Hui Zhang
+ and Ming-Li Xiang and Qi Huang and Yu-Quan Wei},
+ title = {Prediction models of human plasma protein binding rate and
+ oral bioavailability derived by using GA-CG-SVM method},
+ journal = {Journal of Pharmaceutical and Biomedical Analysis}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} has a {bioavailable#low&high} {bioavailable__names__noun}.
- - Based on the {SMILES__description} {#representation of |!}{SMILES#}, the molecule has a {bioavailable#low&high} {bioavailable__names__noun}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that has a {bioavailable#low&high} {bioavailable__names__noun}.
- - The {SMILES__description} {SMILES#} has a {bioavailable#low&high} {bioavailable__names__noun}.
- - The molecule with the {SMILES__description} {SMILES#} has a {bioavailable#low&high} {bioavailable__names__noun}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: Predict if the molecule has a low or high {bioavailable__names__noun}?
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "low" or "high" without using any {#other|additional!} words.
- Result: {bioavailable#low&high}
- - |-
- Task: Please classify a molecule based on the description.
- Description: Predict if the molecule has a low or high {bioavailable__names__noun}?
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule has a {bioavailable#low&high} {bioavailable__names__noun}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that has a {bioavailable#low&high} {bioavailable__names__noun}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} has a low or high {bioavailable__names__noun}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, this molecule has a {bioavailable#low&high} {bioavailable__names__noun}.
- - |-
- User: Has the molecule with the {SMILES__description} {SMILES#} a low or high {bioavailable__names__noun}?
- Assistant: It has a {bioavailable#low&high} {bioavailable__names__noun}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that has a {bioavailable#low&high} {bioavailable__names__noun}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that has a {bioavailable#low&high} {bioavailable__names__noun}?
- Assistant: {#Ok, this|This!} is a molecule that has a {bioavailable#low&high} {bioavailable__names__noun}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should have a {bioavailable#low&high} {bioavailable__names__noun}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} has a {bioavailable#low&high} {bioavailable__names__noun}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should have a {bioavailable#low&high} {bioavailable__names__noun}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} has a {bioavailable#low&high} {bioavailable__names__noun}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} {bioavailable__names__adjective}?{bioavailable#yes&no}
- - |-
- Task: Please classify a molecule based on the description.
- Description: Predict if the molecule has a low or high {bioavailable__names__noun}?
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "low" or "high" without using any {#other|additional!} words.
- Result:{bioavailable#low&high}
- - |-
- Task: Please answer the multiple choice question.
- Question: Has the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a high {bioavailable__names__noun}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {bioavailable%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Has the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a high {bioavailable__names__noun}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {bioavailable%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules have a high {bioavailable__names__noun}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%bioavailable%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules have a high {bioavailable__names__noun}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%bioavailable%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} has a {bioavailable#low&high} {bioavailable__names__noun}.
+ - Based on the {SMILES__description} {#representation of |!}{SMILES#}, the molecule has a {bioavailable#low&high} {bioavailable__names__noun}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that has a {bioavailable#low&high} {bioavailable__names__noun}.
+ - The {SMILES__description} {SMILES#} has a {bioavailable#low&high} {bioavailable__names__noun}.
+ - The molecule with the {SMILES__description} {SMILES#} has a {bioavailable#low&high} {bioavailable__names__noun}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: Predict if the molecule has a low or high {bioavailable__names__noun}?
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "low" or "high" without using any {#other|additional!} words.
+ Result: {bioavailable#low&high}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: Predict if the molecule has a low or high {bioavailable__names__noun}?
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule has a {bioavailable#low&high} {bioavailable__names__noun}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that has a {bioavailable#low&high} {bioavailable__names__noun}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} has a low or high {bioavailable__names__noun}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, this molecule has a {bioavailable#low&high} {bioavailable__names__noun}.
+ - |-
+ User: Has the molecule with the {SMILES__description} {SMILES#} a low or high {bioavailable__names__noun}?
+ Assistant: It has a {bioavailable#low&high} {bioavailable__names__noun}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that has a {bioavailable#low&high} {bioavailable__names__noun}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that has a {bioavailable#low&high} {bioavailable__names__noun}?
+ Assistant: {#Ok, this|This!} is a molecule that has a {bioavailable#low&high} {bioavailable__names__noun}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should have a {bioavailable#low&high} {bioavailable__names__noun}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} has a {bioavailable#low&high} {bioavailable__names__noun}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should have a {bioavailable#low&high} {bioavailable__names__noun}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} has a {bioavailable#low&high} {bioavailable__names__noun}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} {bioavailable__names__adjective}?{bioavailable#yes&no}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: Predict if the molecule has a low or high {bioavailable__names__noun}?
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "low" or "high" without using any {#other|additional!} words.
+ Result:{bioavailable#low&high}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Has the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a high {bioavailable__names__noun}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {bioavailable%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Has the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a high {bioavailable__names__noun}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {bioavailable%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules have a high {bioavailable__names__noun}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%bioavailable%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules have a high {bioavailable__names__noun}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%bioavailable%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/block_polymers_morphology/meta.yaml b/data/tabular/block_polymers_morphology/meta.yaml
index 874438bde..4c3644bf2 100644
--- a/data/tabular/block_polymers_morphology/meta.yaml
+++ b/data/tabular/block_polymers_morphology/meta.yaml
@@ -1,68 +1,66 @@
----
name: block_polymers_morphology
description: |-
- Results of experimental phase measurements of di-block copolymers.
+ Results of experimental phase measurements of di-block copolymers.
targets:
- - id: phase1
- description: experimentally observed phase
- type: text
- names:
- - noun: phase
- - noun: experimentally observed phase
- - id: T
- description: temperature of measurement
- type: continuous
- significant_digits: 0
- units: K
- - id: Mn
- description: number-average molar mass
- type: continuous
- units: g/mol
- significant_digits: 0
- names:
- - noun: number-average molar mass
- - noun: Mn
- - noun: number-average molar mass (Mn)
- - id: f1
- description: volume fraction of block type 1
- type: continuous
- significant_digits: 2
- names:
- - noun: volume fraction of block type 1
- - id: Mw
- description: mass-average molar mass
- type: text
- names:
- - noun: mass-average molar mass
- - noun: mass-average molar mass (Mw)
- - id: D
- description: dispersity
- type: text
- names:
- - noun: dispersity
- - noun: dispersity (D)
+ - id: phase1
+ description: experimentally observed phase
+ type: text
+ names:
+ - noun: phase
+ - noun: experimentally observed phase
+ - id: T
+ description: temperature of measurement
+ type: continuous
+ significant_digits: 0
+ units: K
+ - id: Mn
+ description: number-average molar mass
+ type: continuous
+ units: g/mol
+ significant_digits: 0
+ names:
+ - noun: number-average molar mass
+ - noun: Mn
+ - noun: number-average molar mass (Mn)
+ - id: f1
+ description: volume fraction of block type 1
+ type: continuous
+ significant_digits: 2
+ names:
+ - noun: volume fraction of block type 1
+ - id: Mw
+ description: mass-average molar mass
+ type: text
+ names:
+ - noun: mass-average molar mass
+ - noun: mass-average molar mass (Mw)
+ - id: D
+ description: dispersity
+ type: text
+ names:
+ - noun: dispersity
+ - noun: dispersity (D)
identifiers:
- - id: BigSMILES
- type: string
- description: BigSMILES
+ - id: BigSMILES
+ type: string
+ description: BigSMILES
license: CC BY 4.0
links:
- - url: https://github.com/olsenlabmit/BCDB/tree/main
- description: original data source
+ - url: https://github.com/olsenlabmit/BCDB/tree/main
+ description: original data source
num_points: 4438
templates:
- - The {#polymer|di-block copolymer|copolymer!} with BigSMILES {BigSMILES#}, {Mn__names__noun} of {Mn#} {Mn__units}, {f1__names__noun} of {f1#}{Mw#}{D#}
- was {#measured|analyzed|studied!} at {T#} {T__units} and found to be in the {phase1#} phase.
- - |-
- Question: If I have a {#polymer|di-block copolymer|copolymer!} with {Mn__names__noun} of {Mn#} {Mn__units}, {f1__names__noun} of {f1#}{Mw#}{D#}, what phase will it be in at {T#} {T__units}?
- Answer: The polymer will be in the {phase1#} phase.
- - |-
- User: I want to design a {#polymer|di-block copolymer|copolymer!} with a particular {Mn__names__noun}, {f1__names__noun}, and {phase1__names__noun}.
- Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, !}I would need to know the {Mn__names__noun}, {f1__names__noun}, and {phase1__names__noun} of the polymer you want to design.
- User: The {Mn__names__noun} should be {Mn#} {Mn__units}, the {f1__names__noun} should be {f1#}, and the {phase1__names__noun} should be {phase1#}.
- Assistant: I {#recommend|suggest|propose|advise!} the {#polymer|di-block copolymer|copolymer!} with BigSMILES {BigSMILES#}{Mw#}{D#}.
- - |-
- User: I want to design a {#polymer|di-block copolymer|copolymer!} that is in the {phase1#} phase.
- Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, !}{#do you have any other constraints?|do you have other requirements?|what else should I take into account?!}
- User: The {Mn__names__noun} should be {Mn#} {Mn__units}, the {f1__names__noun} should be {f1#}.
- Assistant: I {#recommend|suggest|propose|advise!} the {#polymer|di-block copolymer|copolymer!} with BigSMILES {BigSMILES#}{Mw#}{D#}.
+ - The {#polymer|di-block copolymer|copolymer!} with BigSMILES {BigSMILES#}, {Mn__names__noun} of {Mn#} {Mn__units}, {f1__names__noun} of {f1#}{Mw#}{D#} was {#measured|analyzed|studied!} at {T#} {T__units} and found to be in the {phase1#} phase.
+ - |-
+ Question: If I have a {#polymer|di-block copolymer|copolymer!} with {Mn__names__noun} of {Mn#} {Mn__units}, {f1__names__noun} of {f1#}{Mw#}{D#}, what phase will it be in at {T#} {T__units}?
+ Answer: The polymer will be in the {phase1#} phase.
+ - |-
+ User: I want to design a {#polymer|di-block copolymer|copolymer!} with a particular {Mn__names__noun}, {f1__names__noun}, and {phase1__names__noun}.
+ Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, !}I would need to know the {Mn__names__noun}, {f1__names__noun}, and {phase1__names__noun} of the polymer you want to design.
+ User: The {Mn__names__noun} should be {Mn#} {Mn__units}, the {f1__names__noun} should be {f1#}, and the {phase1__names__noun} should be {phase1#}.
+ Assistant: I {#recommend|suggest|propose|advise!} the {#polymer|di-block copolymer|copolymer!} with BigSMILES {BigSMILES#}{Mw#}{D#}.
+ - |-
+ User: I want to design a {#polymer|di-block copolymer|copolymer!} that is in the {phase1#} phase.
+ Assistant: {#Cool, |Awesome, |Great, |That sounds interesting, !}{#do you have any other constraints?|do you have other requirements?|what else should I take into account?!}
+ User: The {Mn__names__noun} should be {Mn#} {Mn__units}, the {f1__names__noun} should be {f1#}.
+ Assistant: I {#recommend|suggest|propose|advise!} the {#polymer|di-block copolymer|copolymer!} with BigSMILES {BigSMILES#}{Mw#}{D#}.
diff --git a/data/tabular/blood_brain_barrier_martins_et_al/meta.yaml b/data/tabular/blood_brain_barrier_martins_et_al/meta.yaml
index d8e213bb9..db2f840c9 100644
--- a/data/tabular/blood_brain_barrier_martins_et_al/meta.yaml
+++ b/data/tabular/blood_brain_barrier_martins_et_al/meta.yaml
@@ -1,155 +1,154 @@
----
name: blood_brain_barrier_martins_et_al
description: |-
- As a membrane separating circulating blood and brain extracellular
- fluid, the blood-brain barrier (BBB) is the protection layer that blocks most
- foreign drugs. Thus the ability of a drug to penetrate the barrier to deliver
- to the site of action forms a crucial challenge in development of drugs for the
- central nervous system.
+ As a membrane separating circulating blood and brain extracellular
+ fluid, the blood-brain barrier (BBB) is the protection layer that blocks most
+ foreign drugs. Thus the ability of a drug to penetrate the barrier to deliver
+ to the site of action forms a crucial challenge in development of drugs for the
+ central nervous system.
targets:
- - id: penetrate_BBB
- description: The ability of a drug to penetrate the blood brain barrier (1) or not (0)
- units:
- type: boolean
- names:
- - noun: blood brain barrier penetration
- - noun: ADME blood-brain barrier penetration
- - verb: penetrates the blood brain barrier to reach the brain
- - verb: penetrates the blood brain barrier
- - adjective: penetrating the blood brain barrier
- - adjective: penetrating the blood brain barrier to reach the brain
- uris:
+ - id: penetrate_BBB
+ description: The ability of a drug to penetrate the blood brain barrier (1) or not (0)
+ units:
+ type: boolean
+ names:
+ - noun: blood brain barrier penetration
+ - noun: ADME blood-brain barrier penetration
+ - verb: penetrates the blood brain barrier to reach the brain
+ - verb: penetrates the blood brain barrier
+ - adjective: penetrating the blood brain barrier
+ - adjective: penetrating the blood brain barrier to reach the brain
+ uris:
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
- - id: compound_name
- type: Other
- names:
- - noun: compound name
- - noun: drug name
- - noun: generic drug name
- description: compound name
+ - id: SMILES
+ type: SMILES
+ description: SMILES
+ - id: compound_name
+ type: Other
+ names:
+ - noun: compound name
+ - noun: drug name
+ - noun: generic drug name
+ description: compound name
license: CC BY 4.0
links:
- - url: https://doi.org/10.1021/ci300124c
- description: corresponding publication
- - url: https://rb.gy/0xx91v
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/adme/#bbb-blood-brain-barrier-martins-et-al
- description: data source
+ - url: https://doi.org/10.1021/ci300124c
+ description: corresponding publication
+ - url: https://rb.gy/0xx91v
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/adme/#bbb-blood-brain-barrier-martins-et-al
+ description: data source
num_points: 2030
bibtex:
- - |-
- @article{Martins2012,
- doi = {10.1021/ci300124c},
- url = {https://doi.org/10.1021/ci300124c},
- year = {2012},
- month = jun,
- publisher = {American Chemical Society (ACS)},
- volume = {52},
- number = {6},
- pages = {1686--1697},
- author = {Ines Filipa Martins and Ana L. Teixeira and Luis Pinheiro
- and Andre O. Falcao},
- title = {A Bayesian Approach to in Silico Blood-Brain Barrier Penetration Modeling},
- journal = {Journal of Chemical Information and Modeling}
- - |-
- @article{Wu2018,
- doi = {10.1039/c7sc02664a},
- url = {https://doi.org/10.1039/c7sc02664a},
- year = {2018},
- publisher = {Royal Society of Chemistry (RSC)},
- volume = {9},
- number = {2},
- pages = {513--530},
- author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg and Joseph
- Gomes and Caleb Geniesse and Aneesh S. Pappu and Karl Leswing and Vijay Pande},
- title = {MoleculeNet: a benchmark for molecular machine learning},
- journal = {Chemical Science}
+ - |-
+ @article{Martins2012,
+ doi = {10.1021/ci300124c},
+ url = {https://doi.org/10.1021/ci300124c},
+ year = {2012},
+ month = jun,
+ publisher = {American Chemical Society (ACS)},
+ volume = {52},
+ number = {6},
+ pages = {1686--1697},
+ author = {Ines Filipa Martins and Ana L. Teixeira and Luis Pinheiro
+ and Andre O. Falcao},
+ title = {A Bayesian Approach to in Silico Blood-Brain Barrier Penetration Modeling},
+ journal = {Journal of Chemical Information and Modeling}
+ - |-
+ @article{Wu2018,
+ doi = {10.1039/c7sc02664a},
+ url = {https://doi.org/10.1039/c7sc02664a},
+ year = {2018},
+ publisher = {Royal Society of Chemistry (RSC)},
+ volume = {9},
+ number = {2},
+ pages = {513--530},
+ author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg and Joseph
+ Gomes and Caleb Geniesse and Aneesh S. Pappu and Karl Leswing and Vijay Pande},
+ title = {MoleculeNet: a benchmark for molecular machine learning},
+ journal = {Chemical Science}
templates:
- - The molecule with the {SMILES__description} {SMILES#} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {penetrate_BBB#not &NULL}identified as {penetrate_BBB__names__adjective}.
- - The molecule represented with the {SMILES__description} {SMILES#} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
- - '{SMILES#} represents a molecule that is {penetrate_BBB#not &NULL}identified as {penetrate_BBB__names__adjective}.'
- - '{SMILES#} represents a molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.'
- - '{SMILES#} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.'
- - The {#molecule |!}{SMILES__description} {SMILES#} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {penetrate_BBB__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {penetrate_BBB#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {penetrate_BBB__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
- - |-
- Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|chemical compound!} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {penetrate_BBB__names__adjective}?
- Assistant: {penetrate_BBB#No&Yes}, this molecule is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {penetrate_BBB__names__adjective}?
- Assistant: {penetrate_BBB#No&Yes}, it is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}?
- Assistant: This is a molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} the {SMILES__description} of a {#molecule|chemical!}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {penetrate_BBB#not &NULL}be {penetrate_BBB__names__adjective}.
- Assistant: Got it, this {SMILES__description} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} {penetrate_BBB__names__adjective}:{penetrate_BBB#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {penetrate_BBB__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{penetrate_BBB#False&True}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} of {SMILES#} {penetrate_BBB__names__adjective}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {penetrate_BBB%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%penetrate_BBB%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} of {SMILES#} {penetrate_BBB__names__adjective}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {penetrate_BBB%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%penetrate_BBB%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {SMILES#} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {penetrate_BBB#not &NULL}identified as {penetrate_BBB__names__adjective}.
+ - The molecule represented with the {SMILES__description} {SMILES#} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
+ - "{SMILES#} represents a molecule that is {penetrate_BBB#not &NULL}identified as {penetrate_BBB__names__adjective}."
+ - "{SMILES#} represents a molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}."
+ - "{SMILES#} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}."
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {penetrate_BBB__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {penetrate_BBB#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {penetrate_BBB__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
+ - |-
+ Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|chemical compound!} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {penetrate_BBB__names__adjective}?
+ Assistant: {penetrate_BBB#No&Yes}, this molecule is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {penetrate_BBB__names__adjective}?
+ Assistant: {penetrate_BBB#No&Yes}, it is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}?
+ Assistant: This is a molecule that is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} the {SMILES__description} of a {#molecule|chemical!}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {penetrate_BBB#not &NULL}be {penetrate_BBB__names__adjective}.
+ Assistant: Got it, this {SMILES__description} is {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} {penetrate_BBB__names__adjective}:{penetrate_BBB#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {penetrate_BBB__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{penetrate_BBB#False&True}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} of {SMILES#} {penetrate_BBB__names__adjective}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {penetrate_BBB%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%penetrate_BBB%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} of {SMILES#} {penetrate_BBB__names__adjective}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {penetrate_BBB%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {penetrate_BBB#not &NULL}{penetrate_BBB__names__adjective}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%penetrate_BBB%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/buchwald_hartwig/meta.yaml b/data/tabular/buchwald_hartwig/meta.yaml
index d777d975a..4ab71993f 100644
--- a/data/tabular/buchwald_hartwig/meta.yaml
+++ b/data/tabular/buchwald_hartwig/meta.yaml
@@ -1,103 +1,101 @@
----
name: buchwald_hartwig_doyle
description: |-
- High-throughput experimentation palladium-catalyzed Buchwald Hardwig
- C-N cross-coupling data set with yields.
+ High-throughput experimentation palladium-catalyzed Buchwald Hardwig
+ C-N cross-coupling data set with yields.
targets:
- - id: yield
- description: Reaction yields analyzed by LCMS
- units: \%
- type: continuous
- names:
- - noun: reaction yield
- - noun: yield
- - noun: reaction yield (measured by LCMS)
- - id: masked_rxn_smiles
- type: text
- description: reaction SMILES with one element masked
- names:
- - noun: reaction SMILES with one element masked as `MASK`
- - noun: reaction SMILES with one element hidden as `MASK`
- - noun: masked reaction SMILES (one component masked as `MASK`)
- - noun: masked reaction SMILES string (one component masked as `MASK`)
- - noun: masked RXNSMILES (one component masked as `MASK`)
- - id: educt_string
- type: text
- description: reaction educts
- names:
- - noun: reaction educts
- - noun: educts
- - noun: starting materials
- - id: product_string
- type: text
- description: reaction products
- names:
- - noun: reaction products
- - noun: products
+ - id: yield
+ description: Reaction yields analyzed by LCMS
+ units: \%
+ type: continuous
+ names:
+ - noun: reaction yield
+ - noun: yield
+ - noun: reaction yield (measured by LCMS)
+ - id: masked_rxn_smiles
+ type: text
+ description: reaction SMILES with one element masked
+ names:
+ - noun: reaction SMILES with one element masked as `MASK`
+ - noun: reaction SMILES with one element hidden as `MASK`
+ - noun: masked reaction SMILES (one component masked as `MASK`)
+ - noun: masked reaction SMILES string (one component masked as `MASK`)
+ - noun: masked RXNSMILES (one component masked as `MASK`)
+ - id: educt_string
+ type: text
+ description: reaction educts
+ names:
+ - noun: reaction educts
+ - noun: educts
+ - noun: starting materials
+ - id: product_string
+ type: text
+ description: reaction products
+ names:
+ - noun: reaction products
+ - noun: products
identifiers:
- - id: RXNSMILES
- type: RXNSMILES
- description: RXNSMILES
- names:
- - noun: reaction SMILES
- - noun: reaction SMILES string
- - noun: RXNSMILES
- - noun: reaction SMILES (RXNSMILES)
- - id: missing_component
- type: text
- description: masked element
+ - id: RXNSMILES
+ type: RXNSMILES
+ description: RXNSMILES
+ names:
+ - noun: reaction SMILES
+ - noun: reaction SMILES string
+ - noun: RXNSMILES
+ - noun: reaction SMILES (RXNSMILES)
+ - id: missing_component
+ type: text
+ description: masked element
license: MIT
links:
- - url: https://doi.org/10.1126/science.aar5169
- description: corresponding publication
- - url: https://www.sciencedirect.com/science/article/pii/S2451929420300851
- description: publication with data processing
- - url: https://github.com/rxn4chemistry/rxn_yields/blob/master/rxn_yields/data.py
- description: preprocessing
- - url: https://github.com/reymond-group/drfp/tree/main/data
- description: dataset
+ - url: https://doi.org/10.1126/science.aar5169
+ description: corresponding publication
+ - url: https://www.sciencedirect.com/science/article/pii/S2451929420300851
+ description: publication with data processing
+ - url: https://github.com/rxn4chemistry/rxn_yields/blob/master/rxn_yields/data.py
+ description: preprocessing
+ - url: https://github.com/reymond-group/drfp/tree/main/data
+ description: dataset
num_points: 3955
url: https://doi.org/10.1126/science.aar5169
bibtex:
- - |-
- @article{ahneman2018predicting,
- title={Predicting reaction performance in C--N cross-coupling using machine learning},
- author={Ahneman, Derek T and Estrada, Jes{'u}s G and Lin, Shishi and Dreher, Spencer D and Doyle, Abigail G},
- journal={Science},
- volume={360},
- number={6385},
- pages={186--190},
- year={2018},
- publisher={American Association for the Advancement of Science},
- }
+ - |-
+ @article{ahneman2018predicting,
+ title={Predicting reaction performance in C--N cross-coupling using machine learning},
+ author={Ahneman, Derek T and Estrada, Jes{'u}s G and Lin, Shishi and Dreher, Spencer D and Doyle, Abigail G},
+ journal={Science},
+ volume={360},
+ number={6385},
+ pages={186--190},
+ year={2018},
+ publisher={American Association for the Advancement of Science},
+ }
templates:
- - The {RXNSMILES__names__noun} {RXNSMILES#} has the {educt_string__names__noun} {educt_string#} and the {product_string__names__noun} {product_string#}.
- - The {RXNSMILES__names__noun} {RXNSMILES#} has the {product_string__names__noun} {product_string#} and the {educt_string__names__noun} {educt_string#}.
- - The masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#} is {missing_component#}.
- - The {#chemical|compound!} with SMILES {missing_component#} is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}.
- - |-
- Question: {#What|Which!} {educt_string__names__noun} are {#needed|required!} to {#produce|synthesize!} the {product_string__names__noun} {product_string#}?
- Answer: {educt_string#}.
- - |-
- Question: {#What|Which!} {product_string__names__noun} are produced from the {educt_string__names__noun} {educt_string#}?
- Answer: {product_string#}.
- - |-
- User: I {#want|would like to|must|need to!} {#synthesize|produce!} the {product_string__names__noun} {product_string#}.
- Assistant: {#Cool, is|That's interesting, is|Great, is|Is!} there anything else I can do for you?
- User: {#Yes, |!}I would like to know the {educt_string__names__noun} I need to produce the {product_string__names__noun} {product_string#}.
- Assistant: {#I recommend|I suggest|I propose|I advise!} the following {educt_string__names__noun}: {educt_string#}.
- - |-
- Question: What is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}?
- Answer: {missing_component#}.
- - |-
- Task: Predict the masked component in a {masked_rxn_smiles__names__noun}.
- Description: {masked_rxn_smiles#}
- {#Answer|Solution!}: {missing_component#}
- - The {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#} is {yield#}{yield__units}.
- - |-
- User: {#I need|I want|I would like!} to run a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}. What is the {yield__names__noun} {#I can expect|I should expect|I should get|I can get!}?
- Assistant: {#The|The expected|The predicted|The estimated!} {yield__names__noun} is {yield#}{yield__units}.
-
- - |-
- Question: {#What is|What's|What is the|What's the!} {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}?
- Answer: {yield#}{yield__units}.
+ - The {RXNSMILES__names__noun} {RXNSMILES#} has the {educt_string__names__noun} {educt_string#} and the {product_string__names__noun} {product_string#}.
+ - The {RXNSMILES__names__noun} {RXNSMILES#} has the {product_string__names__noun} {product_string#} and the {educt_string__names__noun} {educt_string#}.
+ - The masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#} is {missing_component#}.
+ - The {#chemical|compound!} with SMILES {missing_component#} is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}.
+ - |-
+ Question: {#What|Which!} {educt_string__names__noun} are {#needed|required!} to {#produce|synthesize!} the {product_string__names__noun} {product_string#}?
+ Answer: {educt_string#}.
+ - |-
+ Question: {#What|Which!} {product_string__names__noun} are produced from the {educt_string__names__noun} {educt_string#}?
+ Answer: {product_string#}.
+ - |-
+ User: I {#want|would like to|must|need to!} {#synthesize|produce!} the {product_string__names__noun} {product_string#}.
+ Assistant: {#Cool, is|That's interesting, is|Great, is|Is!} there anything else I can do for you?
+ User: {#Yes, |!}I would like to know the {educt_string__names__noun} I need to produce the {product_string__names__noun} {product_string#}.
+ Assistant: {#I recommend|I suggest|I propose|I advise!} the following {educt_string__names__noun}: {educt_string#}.
+ - |-
+ Question: What is the masked component in the {masked_rxn_smiles__names__noun} {masked_rxn_smiles#}?
+ Answer: {missing_component#}.
+ - |-
+ Task: Predict the masked component in a {masked_rxn_smiles__names__noun}.
+ Description: {masked_rxn_smiles#}
+ {#Answer|Solution!}: {missing_component#}
+ - The {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#} is {yield#}{yield__units}.
+ - |-
+ User: {#I need|I want|I would like!} to run a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}. What is the {yield__names__noun} {#I can expect|I should expect|I should get|I can get!}?
+ Assistant: {#The|The expected|The predicted|The estimated!} {yield__names__noun} is {yield#}{yield__units}.
+ - |-
+ Question: {#What is|What's|What is the|What's the!} {yield__names__noun} of a reaction with the {RXNSMILES__names__noun} {RXNSMILES#}?
+ Answer: {yield#}{yield__units}.
diff --git a/data/tabular/caco2_wang/meta.yaml b/data/tabular/caco2_wang/meta.yaml
index e336c6c47..5d4fb41da 100644
--- a/data/tabular/caco2_wang/meta.yaml
+++ b/data/tabular/caco2_wang/meta.yaml
@@ -1,57 +1,56 @@
----
name: caco2_wang
description: |-
- The human colon epithelial cancer cell line, Caco-2,
- is used as an in vitro model to simulate the human intestinal tissue.
- The experimental result on the rate of drug passing through
- the Caco-2 cells can approximate the rate at which the drug permeates
- through the human intestinal tissue.
+ The human colon epithelial cancer cell line, Caco-2,
+ is used as an in vitro model to simulate the human intestinal tissue.
+ The experimental result on the rate of drug passing through
+ the Caco-2 cells can approximate the rate at which the drug permeates
+ through the human intestinal tissue.
targets:
- - id: permeability
- description: Caco-2 cell effective permeability.
- units: cm/s
- type: continuous
- names:
- - noun: Caco-2 cell effective permeability
- - noun: Caco-2 cell permeability
- - noun: Caco-2 permeability
- pubchem_aids:
- - 678378
- uris:
- - http://www.bioassayontology.org/bao#BAO_0010008
- - http://purl.obolibrary.org/obo/MI_2162
+ - id: permeability
+ description: Caco-2 cell effective permeability.
+ units: cm/s
+ type: continuous
+ names:
+ - noun: Caco-2 cell effective permeability
+ - noun: Caco-2 cell permeability
+ - noun: Caco-2 permeability
+ pubchem_aids:
+ - 678378
+ uris:
+ - http://www.bioassayontology.org/bao#BAO_0010008
+ - http://purl.obolibrary.org/obo/MI_2162
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
- - id: compound_name
- type: Other
- description: compound name
- names:
- - noun: compound
- - noun: compound name
+ - id: SMILES
+ type: SMILES
+ description: SMILES
+ - id: compound_name
+ type: Other
+ description: compound name
+ names:
+ - noun: compound
+ - noun: compound name
license: CC BY 4.0
links:
- - url: https://tdcommons.ai/single_pred_tasks/adme/#caco-2-cell-effective-permeability-wang-et-al
- description: original data set link
- - url: https://pubs.acs.org/doi/10.1021/acs.jcim.5b00642
- description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/adme/#caco-2-cell-effective-permeability-wang-et-al
+ description: original data set link
+ - url: https://pubs.acs.org/doi/10.1021/acs.jcim.5b00642
+ description: corresponding publication
num_points: 910
bibtex:
- - |-
- @article{wang2016adme,
- title={ADME properties evaluation in drug discovery: prediction of Caco-2 cell permeability
- using a combination of NSGA-II and boosting},
- author={Wang, Ning-Ning and Dong, Jie and Deng, Yin-Hua and Zhu, Min-Feng and Wen, Ming and Yao,
- Zhi-Jiang and Lu, Ai-Ping and Wang, Jian-Bing and Cao, Dong-Sheng},
- journal={Journal of Chemical Information and Modeling},
- volume={56},
- number={4},
- pages={763--773},
- year={2016},
- publisher={ACS Publications}
- }
+ - |-
+ @article{wang2016adme,
+ title={ADME properties evaluation in drug discovery: prediction of Caco-2 cell permeability
+ using a combination of NSGA-II and boosting},
+ author={Wang, Ning-Ning and Dong, Jie and Deng, Yin-Hua and Zhu, Min-Feng and Wen, Ming and Yao,
+ Zhi-Jiang and Lu, Ai-Ping and Wang, Jian-Bing and Cao, Dong-Sheng},
+ journal={Journal of Chemical Information and Modeling},
+ volume={56},
+ number={4},
+ pages={763--773},
+ year={2016},
+ publisher={ACS Publications}
+ }
diff --git a/data/tabular/carcinogens/meta.yaml b/data/tabular/carcinogens/meta.yaml
index 7e4fde301..abcf07603 100644
--- a/data/tabular/carcinogens/meta.yaml
+++ b/data/tabular/carcinogens/meta.yaml
@@ -1,144 +1,142 @@
----
name: carcinogens
description: |-
- A carcinogen is any substance, radionuclide, or radiation that promotes
- carcinogenesis, the formation of cancer. This may be due to the ability to damage
- the genome or to the disruption of cellular metabolic processes.
+ A carcinogen is any substance, radionuclide, or radiation that promotes
+ carcinogenesis, the formation of cancer. This may be due to the ability to damage
+ the genome or to the disruption of cellular metabolic processes.
targets:
- - id: carcinogen
- description: whether it is carcinogenic (1) or not (0).
- units:
- type: boolean
- names:
- - noun: carcinogen
- - adjective: carcinogenic
- - gerund: having the potential to cause cancer
- uris:
- - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C347
- - http://purl.bioontology.org/ontology/SNOMEDCT/88376000
+ - id: carcinogen
+ description: whether it is carcinogenic (1) or not (0).
+ units:
+ type: boolean
+ names:
+ - noun: carcinogen
+ - adjective: carcinogenic
+ - gerund: having the potential to cause cancer
+ uris:
+ - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C347
+ - http://purl.bioontology.org/ontology/SNOMEDCT/88376000
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://doi.org/10.1002/qsar.200860192
- description: corresponding publication
- - url: https://doi.org/10.1021/ci300367a
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/tox/#carcinogens
- description: Data source
+ - url: https://doi.org/10.1002/qsar.200860192
+ description: corresponding publication
+ - url: https://doi.org/10.1021/ci300367a
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/tox/#carcinogens
+ description: Data source
num_points: 280
bibtex:
- - |-
- @article{Lagunin2009,
- doi = {10.1002/qsar.200860192},
- url = {https://doi.org/10.1002/qsar.200860192},
- year = {2009},
- month = jun,
- publisher = {Wiley},
- volume = {28},
- number = {8},
- pages = {806--810},
- author = {Alexey Lagunin and Dmitrii Filimonov and Alexey Zakharov and Wei Xie
- and Ying Huang and Fucheng Zhu and Tianxiang Shen and Jianhua Yao and Vladimir Poroikov},
- title = {Computer-Aided Prediction of Rodent Carcinogenicity by PASS and CISOC PSCT},
- journal = {QSAR & Combinatorial Science}
- - |-
- @article{Cheng2012,
- doi = {10.1021/ci300367a},
- url = {https://doi.org/10.1021/ci300367a},
- year = {2012},
- month = nov,
- publisher = {American Chemical Society (ACS)},
- volume = {52},
- number = {11},
- pages = {3099--3105},
- author = {Feixiong Cheng and Weihua Li and Yadi Zhou and Jie Shen and Zengrui Wu
- and Guixia Liu and Philip W. Lee and Yun Tang},
- title = {admetSAR: A Comprehensive Source and Free Tool for Assessment of Chemical ADMET Properties},
- journal = {Journal of Chemical Information and Modeling}
+ - |-
+ @article{Lagunin2009,
+ doi = {10.1002/qsar.200860192},
+ url = {https://doi.org/10.1002/qsar.200860192},
+ year = {2009},
+ month = jun,
+ publisher = {Wiley},
+ volume = {28},
+ number = {8},
+ pages = {806--810},
+ author = {Alexey Lagunin and Dmitrii Filimonov and Alexey Zakharov and Wei Xie
+ and Ying Huang and Fucheng Zhu and Tianxiang Shen and Jianhua Yao and Vladimir Poroikov},
+ title = {Computer-Aided Prediction of Rodent Carcinogenicity by PASS and CISOC PSCT},
+ journal = {QSAR & Combinatorial Science}
+ - |-
+ @article{Cheng2012,
+ doi = {10.1021/ci300367a},
+ url = {https://doi.org/10.1021/ci300367a},
+ year = {2012},
+ month = nov,
+ publisher = {American Chemical Society (ACS)},
+ volume = {52},
+ number = {11},
+ pages = {3099--3105},
+ author = {Feixiong Cheng and Weihua Li and Yadi Zhou and Jie Shen and Zengrui Wu
+ and Guixia Liu and Philip W. Lee and Yun Tang},
+ title = {admetSAR: A Comprehensive Source and Free Tool for Assessment of Chemical ADMET Properties},
+ journal = {Journal of Chemical Information and Modeling}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {carcinogen#no &NULL}{carcinogen__names__adjective}
- {#properties|effects!}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {carcinogen#no &NULL}{carcinogen__names__adjective} {#effects|properties|characteristics|features!}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {carcinogen#not &NULL}identified as {carcinogen__names__adjective}.
- - The {SMILES__description} {SMILES#} is {carcinogen#not &NULL}{carcinogen__names__adjective}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {carcinogen#not &NULL}{carcinogen__names__adjective}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {carcinogen__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {carcinogen#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {carcinogen__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {carcinogen#not &NULL}{carcinogen__names__adjective}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {carcinogen#not &NULL}{carcinogen__names__adjective}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {carcinogen__names__adjective}?
- Assistant: {carcinogen#No&Yes}, this molecule is {carcinogen#not &NULL}{carcinogen__names__adjective}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {carcinogen__names__adjective}?
- Assistant: {carcinogen#No&Yes}, it is {carcinogen#not &NULL}{carcinogen__names__adjective}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {carcinogen#not &NULL}{carcinogen__names__adjective}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {carcinogen#not &NULL}{carcinogen__names__adjective}?
- Assistant: This is a molecule that is {carcinogen#not &NULL}{carcinogen__names__adjective}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {carcinogen#not &NULL}be {carcinogen__names__adjective}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {carcinogen#not &NULL}{carcinogen__names__adjective}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {carcinogen#not &NULL}be {carcinogen__names__adjective}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {carcinogen#not &NULL}{carcinogen__names__adjective}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} {carcinogen__names__adjective}:{carcinogen#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {carcinogen__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{carcinogen#False&True}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {carcinogen__names__adjective}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {carcinogen%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {carcinogen__names__adjective}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {carcinogen%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {carcinogen#not &NULL}{carcinogen__names__adjective}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%carcinogen%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {carcinogen#not &NULL}{carcinogen__names__adjective}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%carcinogen%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {carcinogen#no &NULL}{carcinogen__names__adjective} {#properties|effects!}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {carcinogen#no &NULL}{carcinogen__names__adjective} {#effects|properties|characteristics|features!}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {carcinogen#not &NULL}identified as {carcinogen__names__adjective}.
+ - The {SMILES__description} {SMILES#} is {carcinogen#not &NULL}{carcinogen__names__adjective}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {carcinogen#not &NULL}{carcinogen__names__adjective}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {carcinogen__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {carcinogen#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {carcinogen__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {carcinogen#not &NULL}{carcinogen__names__adjective}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {carcinogen#not &NULL}{carcinogen__names__adjective}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {carcinogen__names__adjective}?
+ Assistant: {carcinogen#No&Yes}, this molecule is {carcinogen#not &NULL}{carcinogen__names__adjective}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {carcinogen__names__adjective}?
+ Assistant: {carcinogen#No&Yes}, it is {carcinogen#not &NULL}{carcinogen__names__adjective}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {carcinogen#not &NULL}{carcinogen__names__adjective}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {carcinogen#not &NULL}{carcinogen__names__adjective}?
+ Assistant: This is a molecule that is {carcinogen#not &NULL}{carcinogen__names__adjective}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {carcinogen#not &NULL}be {carcinogen__names__adjective}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {carcinogen#not &NULL}{carcinogen__names__adjective}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {carcinogen#not &NULL}be {carcinogen__names__adjective}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {carcinogen#not &NULL}{carcinogen__names__adjective}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} {carcinogen__names__adjective}:{carcinogen#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {carcinogen__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{carcinogen#False&True}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {carcinogen__names__adjective}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {carcinogen%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {carcinogen__names__adjective}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {carcinogen%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {carcinogen#not &NULL}{carcinogen__names__adjective}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%carcinogen%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {carcinogen#not &NULL}{carcinogen__names__adjective}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%carcinogen%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/cav3_t-type_calcium_channels_butkiewicz/meta.yaml b/data/tabular/cav3_t-type_calcium_channels_butkiewicz/meta.yaml
index 7157d491e..03c4c5ea1 100644
--- a/data/tabular/cav3_t-type_calcium_channels_butkiewicz/meta.yaml
+++ b/data/tabular/cav3_t-type_calcium_channels_butkiewicz/meta.yaml
@@ -1,169 +1,166 @@
----
name: cav3_t-type_calcium_channels_butkiewicz
description: |-
- This dataset was initially curated from HTS data at the PubChem database.
- The curation process is documented in Butkiewicz et al.
- Primary screening with AID 449739 identified inhibitors of Cav3 T-type calcium channels.
- Four follow-up screens were performed to confirm inhibitory effects on smaller sets of compounds
- involving AID 493021, AID 493022, AID 493023, and AID 493041.
- AID 489005 was performed as counter screen validating active compounds of the primary screen.
+ This dataset was initially curated from HTS data at the PubChem database.
+ The curation process is documented in Butkiewicz et al.
+ Primary screening with AID 449739 identified inhibitors of Cav3 T-type calcium channels.
+ Four follow-up screens were performed to confirm inhibitory effects on smaller sets of compounds
+ involving AID 493021, AID 493022, AID 493023, and AID 493041.
+ AID 489005 was performed as counter screen validating active compounds of the primary screen.
targets:
- - id: activity_cav3_t_type_calcium_channels
- description: whether it active against cav3 t-type calcium channels receptor (1) or not (0)
- units:
- type: boolean
- names:
- - noun: inhibition of the cav3 t-type calcium channel activity
- - adjective: cav3 t-type calcium channel inhibition
- - gerund: inhibiting the activity of cav3 t-type calcium channels
- - verb: blocks t-type calcium channels
- - verb: inhibits cav3 t-type calcium channels
- pubchem_aids:
- - 1053190
- - 489005
- - 493021
- - 493022
- - 493023
- - 493041
- uris:
- - http://purl.obolibrary.org/obo/CHEBI_194338
+ - id: activity_cav3_t_type_calcium_channels
+ description: whether it active against cav3 t-type calcium channels receptor (1) or not (0)
+ units:
+ type: boolean
+ names:
+ - noun: inhibition of the cav3 t-type calcium channel activity
+ - adjective: cav3 t-type calcium channel inhibition
+ - gerund: inhibiting the activity of cav3 t-type calcium channels
+ - verb: blocks t-type calcium channels
+ - verb: inhibits cav3 t-type calcium channels
+ pubchem_aids:
+ - 1053190
+ - 489005
+ - 493021
+ - 493022
+ - 493023
+ - 493041
+ uris:
+ - http://purl.obolibrary.org/obo/CHEBI_194338
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al
- description: original dataset
- - url: https://doi.org/10.3390/molecules18010735
- description: corresponding publication
- - url: https://doi.org/10.1093/nar/gky1033
- description: corresponding publication
- - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/
- description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al
+ description: original dataset
+ - url: https://doi.org/10.3390/molecules18010735
+ description: corresponding publication
+ - url: https://doi.org/10.1093/nar/gky1033
+ description: corresponding publication
+ - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/
+ description: corresponding publication
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
num_points: 100875
bibtex:
- - |-
- @article{Butkiewicz2013,
- doi = {10.3390/molecules18010735},
- url = {https://doi.org/10.3390/molecules18010735},
- year = {2013},
- month = jan,
- publisher = {{MDPI} {AG}},
- volume = {18},
- number = {1},
- pages = {735--756},
- author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and Jeffrey Mendenhall
- and Pedro Teixeira and C. Weaver and Jens Meiler},
- title = {Benchmarking Ligand-Based Virtual High-Throughput Screening with the {PubChem} Database},
- journal = {Molecules}}
- - |-
- @article{Kim2018,
- doi = {10.1093/nar/gky1033},
- url = {https://doi.org/10.1093/nar/gky1033},
- year = {2018},
- month = oct,
- publisher = {Oxford University Press ({OUP})},
- volume = {47},
- number = {D1},
- pages = {D1102--D1109},
- author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta Gindulyte and Jia He and Siqian He
- and Qingliang Li and Benjamin A Shoemaker and Paul A Thiessen and Bo Yu and Leonid Zaslavsky
- and Jian Zhang and Evan E Bolton},
- title = {{PubChem} 2019 update: improved access to chemical data},
- journal = {Nucleic Acids Research}}
- - |-
- @article{Butkiewicz2017,
- doi = {},
- url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/},
- year = {2017},
- publisher = {Chem Inform},
- volume = {3},
- number = {1},
- author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe, E. W. and Weaver, D. C.
- and Meiler, J.},
- title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from the {P}ub{C}hem {D}atabase}},
- journal = {Chemical Science}}
+ - |-
+ @article{Butkiewicz2013,
+ doi = {10.3390/molecules18010735},
+ url = {https://doi.org/10.3390/molecules18010735},
+ year = {2013},
+ month = jan,
+ publisher = {{MDPI} {AG}},
+ volume = {18},
+ number = {1},
+ pages = {735--756},
+ author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and Jeffrey Mendenhall
+ and Pedro Teixeira and C. Weaver and Jens Meiler},
+ title = {Benchmarking Ligand-Based Virtual High-Throughput Screening with the {PubChem} Database},
+ journal = {Molecules}}
+ - |-
+ @article{Kim2018,
+ doi = {10.1093/nar/gky1033},
+ url = {https://doi.org/10.1093/nar/gky1033},
+ year = {2018},
+ month = oct,
+ publisher = {Oxford University Press ({OUP})},
+ volume = {47},
+ number = {D1},
+ pages = {D1102--D1109},
+ author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta Gindulyte and Jia He and Siqian He
+ and Qingliang Li and Benjamin A Shoemaker and Paul A Thiessen and Bo Yu and Leonid Zaslavsky
+ and Jian Zhang and Evan E Bolton},
+ title = {{PubChem} 2019 update: improved access to chemical data},
+ journal = {Nucleic Acids Research}}
+ - |-
+ @article{Butkiewicz2017,
+ doi = {},
+ url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/},
+ year = {2017},
+ publisher = {Chem Inform},
+ volume = {3},
+ number = {1},
+ author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe, E. W. and Weaver, D. C.
+ and Meiler, J.},
+ title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from the {P}ub{C}hem {D}atabase}},
+ journal = {Chemical Science}}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_cav3_t_type_calcium_channels#no
- &NULL}{activity_cav3_t_type_calcium_channels__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {activity_cav3_t_type_calcium_channels#no
- &NULL}{activity_cav3_t_type_calcium_channels__names__noun}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_cav3_t_type_calcium_channels#no &NULL}{activity_cav3_t_type_calcium_channels__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {activity_cav3_t_type_calcium_channels__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {activity_cav3_t_type_calcium_channels#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {activity_cav3_t_type_calcium_channels__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_cav3_t_type_calcium_channels__names__gerund}?
- Assistant: {activity_cav3_t_type_calcium_channels#No&Yes}, this molecule is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {activity_cav3_t_type_calcium_channels__names__gerund}?
- Assistant: {activity_cav3_t_type_calcium_channels#No&Yes}, it is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}?
- Assistant: This is a molecule that is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {activity_cav3_t_type_calcium_channels#not &NULL}be {activity_cav3_t_type_calcium_channels__names__gerund}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {activity_cav3_t_type_calcium_channels#not &NULL}be {activity_cav3_t_type_calcium_channels__names__gerund}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} {activity_cav3_t_type_calcium_channels__names__gerund}:{activity_cav3_t_type_calcium_channels#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {activity_cav3_t_type_calcium_channels__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{activity_cav3_t_type_calcium_channels#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {activity_cav3_t_type_calcium_channels__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result:This molecule is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}.
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_cav3_t_type_calcium_channels__names__gerund}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {activity_cav3_t_type_calcium_channels%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%activity_cav3_t_type_calcium_channels%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%activity_cav3_t_type_calcium_channels%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_cav3_t_type_calcium_channels#no &NULL}{activity_cav3_t_type_calcium_channels__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {activity_cav3_t_type_calcium_channels#no &NULL}{activity_cav3_t_type_calcium_channels__names__noun}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_cav3_t_type_calcium_channels#no &NULL}{activity_cav3_t_type_calcium_channels__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {activity_cav3_t_type_calcium_channels__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {activity_cav3_t_type_calcium_channels#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {activity_cav3_t_type_calcium_channels__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_cav3_t_type_calcium_channels__names__gerund}?
+ Assistant: {activity_cav3_t_type_calcium_channels#No&Yes}, this molecule is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {activity_cav3_t_type_calcium_channels__names__gerund}?
+ Assistant: {activity_cav3_t_type_calcium_channels#No&Yes}, it is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}?
+ Assistant: This is a molecule that is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {activity_cav3_t_type_calcium_channels#not &NULL}be {activity_cav3_t_type_calcium_channels__names__gerund}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {activity_cav3_t_type_calcium_channels#not &NULL}be {activity_cav3_t_type_calcium_channels__names__gerund}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} {activity_cav3_t_type_calcium_channels__names__gerund}:{activity_cav3_t_type_calcium_channels#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {activity_cav3_t_type_calcium_channels__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{activity_cav3_t_type_calcium_channels#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {activity_cav3_t_type_calcium_channels__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result:This molecule is {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}.
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_cav3_t_type_calcium_channels__names__gerund}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {activity_cav3_t_type_calcium_channels%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%activity_cav3_t_type_calcium_channels%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {activity_cav3_t_type_calcium_channels#not &NULL}{activity_cav3_t_type_calcium_channels__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%activity_cav3_t_type_calcium_channels%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/chebi_20/meta.yaml b/data/tabular/chebi_20/meta.yaml
index 75a6bd7e4..53a853540 100644
--- a/data/tabular/chebi_20/meta.yaml
+++ b/data/tabular/chebi_20/meta.yaml
@@ -1,108 +1,107 @@
----
name: chebi_20
description: A dataset of pairs of natural language descriptions and SMILEs.
targets:
- - id: description
- description: a natural language description of the molecule SMILE
- units:
- type: string
- names:
- - noun: natural language description
- pubchem_aids: []
- uris: []
+ - id: description
+ description: a natural language description of the molecule SMILE
+ units:
+ type: string
+ names:
+ - noun: natural language description
+ pubchem_aids: []
+ uris: []
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
- - id: CID
- type: Other
- names:
- - noun: compound id
- sample: false
- description: This is the PubChem CID to identify a given molecule.
+ - id: SMILES
+ type: SMILES
+ description: SMILES
+ - id: CID
+ type: Other
+ names:
+ - noun: compound id
+ sample: false
+ description: This is the PubChem CID to identify a given molecule.
license: CC BY 4.0
links:
- - name: Research Paper
- url: https://aclanthology.org/2021.emnlp-main.47/
- description: Original Text2Mol paper which introduced the chebi_20 dataset.
- - name: Dataset
- url: https://github.com/cnedwards/text2mol
- description: Text2Mol original data repository on GitHub.
- - name: Hugging Face dataset upload
- url: https://huggingface.co/datasets/OpenBioML/chebi_20
- description: Hugging Face dataset uploaded to the OpenBioML organisation.
+ - name: Research Paper
+ url: https://aclanthology.org/2021.emnlp-main.47/
+ description: Original Text2Mol paper which introduced the chebi_20 dataset.
+ - name: Dataset
+ url: https://github.com/cnedwards/text2mol
+ description: Text2Mol original data repository on GitHub.
+ - name: Hugging Face dataset upload
+ url: https://huggingface.co/datasets/OpenBioML/chebi_20
+ description: Hugging Face dataset uploaded to the OpenBioML organisation.
benchmarks: []
num_points: 33008
bibtex:
- - |-
- @inproceedings{edwards2021text2mol,
- title={Text2Mol: Cross-Modal Molecule Retrieval with Natural Language Queries},
- author={Edwards, Carl and Zhai, ChengXiang and Ji, Heng},
- booktitle={Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing},
- pages={595--607},
- year={2021},
- url = {https://aclanthology.org/2021.emnlp-main.47/}
- }
- - |-
- @inproceedings{edwards-etal-2022-translation,
- title = "Translation between Molecules and Natural Language",
- author = "Edwards, Carl and
- Lai, Tuan and
- Ros, Kevin and
- Honke, Garrett and
- Cho, Kyunghyun and
- Ji, Heng",
- booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
- month = dec,
- year = "2022",
- address = "Abu Dhabi, United Arab Emirates",
- publisher = "Association for Computational Linguistics",
- url = "https://aclanthology.org/2022.emnlp-main.26",
- pages = "375--413",
- abstract = "We present MolT5 - a self-supervised learning framework for pretraining models on a vast amount of unlabeled natural language text and molecule strings. MolT5 allows for new, useful, and challenging analogs of traditional vision-language tasks, such as molecule captioning and text-based de novo molecule generation (altogether: translation between molecules and language), which we explore for the first time. Since MolT5 pretrains models on single-modal data, it helps overcome the chemistry domain shortcoming of data scarcity. Furthermore, we consider several metrics, including a new cross-modal embedding-based metric, to evaluate the tasks of molecule captioning and text-based molecule generation. Our results show that MolT5-based models are able to generate outputs, both molecules and captions, which in many cases are high quality.",
- }
+ - |-
+ @inproceedings{edwards2021text2mol,
+ title={Text2Mol: Cross-Modal Molecule Retrieval with Natural Language Queries},
+ author={Edwards, Carl and Zhai, ChengXiang and Ji, Heng},
+ booktitle={Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing},
+ pages={595--607},
+ year={2021},
+ url = {https://aclanthology.org/2021.emnlp-main.47/}
+ }
+ - |-
+ @inproceedings{edwards-etal-2022-translation,
+ title = "Translation between Molecules and Natural Language",
+ author = "Edwards, Carl and
+ Lai, Tuan and
+ Ros, Kevin and
+ Honke, Garrett and
+ Cho, Kyunghyun and
+ Ji, Heng",
+ booktitle = "Proceedings of the 2022 Conference on Empirical Methods in Natural Language Processing",
+ month = dec,
+ year = "2022",
+ address = "Abu Dhabi, United Arab Emirates",
+ publisher = "Association for Computational Linguistics",
+ url = "https://aclanthology.org/2022.emnlp-main.26",
+ pages = "375--413",
+ abstract = "We present MolT5 - a self-supervised learning framework for pretraining models on a vast amount of unlabeled natural language text and molecule strings. MolT5 allows for new, useful, and challenging analogs of traditional vision-language tasks, such as molecule captioning and text-based de novo molecule generation (altogether: translation between molecules and language), which we explore for the first time. Since MolT5 pretrains models on single-modal data, it helps overcome the chemistry domain shortcoming of data scarcity. Furthermore, we consider several metrics, including a new cross-modal embedding-based metric, to evaluate the tasks of molecule captioning and text-based molecule generation. Our results show that MolT5-based models are able to generate outputs, both molecules and captions, which in many cases are high quality.",
+ }
templates:
- - |-
- The molecule with the {SMILES__description} {#representation of |!}{SMILES#} can be described {#by|as!}:
- {description#}
- - |-
- Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule can be described {#by|as!}:
- {description#}
- - |-
- Task: Please create a {#text |!}description for a molecule{# based on its representation|!}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question with {#full|complete!} sentences.
- Result: {description#}
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: {description#}
- Result: {SMILES#}
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule based in this description:
- {description#}
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that can be described {#by|as!}:
- {description#}
- Assistant: This is a molecule that fits {#your|this!} description: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule can be described {#by|as!}:
- {description#}
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} fits {#your|this!} description: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule can be described {#by|as!}:
- {description#}
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} fits {#your|this!} description: {SMILES#}
- - |-
- Task: Please create a {#text |!}description for a molecule{# based on its representation|!}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question with {#full|complete!} sentences.
- Result:{description#}
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: {description#}
- Result:{SMILES#}
+ - |-
+ The molecule with the {SMILES__description} {#representation of |!}{SMILES#} can be described {#by|as!}:
+ {description#}
+ - |-
+ Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule can be described {#by|as!}:
+ {description#}
+ - |-
+ Task: Please create a {#text |!}description for a molecule{# based on its representation|!}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question with {#full|complete!} sentences.
+ Result: {description#}
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: {description#}
+ Result: {SMILES#}
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule based in this description:
+ {description#}
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that can be described {#by|as!}:
+ {description#}
+ Assistant: This is a molecule that fits {#your|this!} description: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule can be described {#by|as!}:
+ {description#}
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} fits {#your|this!} description: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule can be described {#by|as!}:
+ {description#}
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} fits {#your|this!} description: {SMILES#}
+ - |-
+ Task: Please create a {#text |!}description for a molecule{# based on its representation|!}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question with {#full|complete!} sentences.
+ Result:{description#}
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: {description#}
+ Result:{SMILES#}
diff --git a/data/tabular/chem_caption_smarts/meta.yaml b/data/tabular/chem_caption_smarts/meta.yaml
index 57f8ecfbe..7e8982ccf 100644
--- a/data/tabular/chem_caption_smarts/meta.yaml
+++ b/data/tabular/chem_caption_smarts/meta.yaml
@@ -1,41 +1,40 @@
----
name: chem_caption_smarts
description: |-
- This dataset contains the count of substructures in molecules
+ This dataset contains the count of substructures in molecules
targets:
- - id: smarts
- type: text
- description: substructure smarts
- names:
- - noun: SMARTS
- - noun: SMiles ARbitrary Target Specification (SMARTS)
- - id: completion
- type: categorical
- description: number of matches
- - id: completion_labels
- type: text
- description: name of the substructure
+ - id: smarts
+ type: text
+ description: substructure smarts
+ names:
+ - noun: SMARTS
+ - noun: SMiles ARbitrary Target Specification (SMARTS)
+ - id: completion
+ type: categorical
+ description: number of matches
+ - id: completion_labels
+ type: text
+ description: name of the substructure
identifiers:
- - id: representation
- type: text
- description: representation
- - id: representation_type
- type: text
- description: representation type
+ - id: representation
+ type: text
+ description: representation
+ - id: representation_type
+ type: text
+ description: representation type
license: CC BY 4.0
links:
- - url: https://github.com/lamalab-org/chem-caption
- description: Original codebase used to generate this dataset
+ - url: https://github.com/lamalab-org/chem-caption
+ description: Original codebase used to generate this dataset
templates:
- - |-
- Question: {#How many times|How often!} does the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contain the substructure with the {smarts__names__noun} {#smarts#}?
- Answer: {completion#}
- - |-
- Question: {#How many times|How often!} does the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contain a {completion#} substructure?
- Answer: {smarts__names__noun} {#smarts#}
- - |-
- User: {#I want to|I have to|I must|I would like to!} know {#how many times|how often!} the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contains the substructure with the {smarts__names__noun} {#smarts#}.
- Assistant: The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contains the substructure with the {smarts__names__noun} {#smarts#} {completion#} times.
- - |-
- User: {#I want to|I have to|I must|I would like to!} know how many times the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contains a {completion#} substructure.
- Assistant: The {#molecule|chemical|compound|chemical structure!} contains the substructure with the {smarts__names__noun} {#smarts#} {completion#} times.
+ - |-
+ Question: {#How many times|How often!} does the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contain the substructure with the {smarts__names__noun} {#smarts#}?
+ Answer: {completion#}
+ - |-
+ Question: {#How many times|How often!} does the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contain a {completion#} substructure?
+ Answer: {smarts__names__noun} {#smarts#}
+ - |-
+ User: {#I want to|I have to|I must|I would like to!} know {#how many times|how often!} the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contains the substructure with the {smarts__names__noun} {#smarts#}.
+ Assistant: The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contains the substructure with the {smarts__names__noun} {#smarts#} {completion#} times.
+ - |-
+ User: {#I want to|I have to|I must|I would like to!} know how many times the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} contains a {completion#} substructure.
+ Assistant: The {#molecule|chemical|compound|chemical structure!} contains the substructure with the {smarts__names__noun} {#smarts#} {completion#} times.
diff --git a/data/tabular/chembl_v29/meta.yaml b/data/tabular/chembl_v29/meta.yaml
index d729bd357..c6f09128e 100644
--- a/data/tabular/chembl_v29/meta.yaml
+++ b/data/tabular/chembl_v29/meta.yaml
@@ -1,48 +1,47 @@
----
name: chembl_v29
description: |-
- ChEMBL is a manually curated database of bioactive molecules with drug-like properties.
- It brings together chemical, bioactivity and genomic data
- to aid the translation of genomic information into effective new drugs.
+ ChEMBL is a manually curated database of bioactive molecules with drug-like properties.
+ It brings together chemical, bioactivity and genomic data
+ to aid the translation of genomic information into effective new drugs.
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY-SA 3.0
links:
- - url: https://academic.oup.com/nar/article/47/D1/D930/5162468
- description: Article about original dataset
- - url: https://academic.oup.com/nar/article/43/W1/W612/2467881
- description: Exemplary related article shown in tdc's website
+ - url: https://academic.oup.com/nar/article/47/D1/D930/5162468
+ description: Article about original dataset
+ - url: https://academic.oup.com/nar/article/43/W1/W612/2467881
+ description: Exemplary related article shown in tdc's website
num_points: 2084637
bibtex:
- - |-
- @article{10.1093/nar/gky1075,
- author = {Mendez, David and Gaulton, Anna and Bento, A Patricia and Chambers, Jon and De Veij,
- Marleen and Felix, Eloy and Magarinos, Maria Paula and Mosquera,
- Juan F and Mutowo, Prudence and Nowotka, Michal and Gordillo-Maranon,
- Maria and Hunter, Fiona and Junco, Laura and Mugumbate, Grace and Rodriguez-Lopez, Milagros and Atkinson,
- Francis and Bosc, Nicolas and Radoux, Chris J and Segura-Cabrera, Aldo and Hersey, Anne and Leach, Andrew R},
- title = {ChEMBL: towards direct deposition of bioassay data},
- journal = {Nucleic Acids Research},
- volume = {47},
- number = {D1},
- pages = {D930-D940},
- year = {2018},
- month = {11},
- abstract = "{ChEMBL is a large, open-access bioactivity database
- (https://www.ebi.ac.uk/chembl), previously described in the 2012,
- 2014 and 2017 Nucleic Acids Research Database Issues.
- In the last two years, several important improvements have been made to the database and are described here.
- These include more robust capture and representation of assay details;
- a new data deposition system, allowing updating of data sets and deposition of supplementary data;
- and a completely redesigned web interface, with enhanced search and filtering capabilities.}",
- issn = {0305-1048},
- doi = {10.1093/nar/gky1075},
- url = {https://doi.org/10.1093/nar/gky1075},
- eprint = {https://academic.oup.com/nar/article-pdf/47/D1/D930/27437436/gky1075.pdf},
- }
+ - |-
+ @article{10.1093/nar/gky1075,
+ author = {Mendez, David and Gaulton, Anna and Bento, A Patricia and Chambers, Jon and De Veij,
+ Marleen and Felix, Eloy and Magarinos, Maria Paula and Mosquera,
+ Juan F and Mutowo, Prudence and Nowotka, Michal and Gordillo-Maranon,
+ Maria and Hunter, Fiona and Junco, Laura and Mugumbate, Grace and Rodriguez-Lopez, Milagros and Atkinson,
+ Francis and Bosc, Nicolas and Radoux, Chris J and Segura-Cabrera, Aldo and Hersey, Anne and Leach, Andrew R},
+ title = {ChEMBL: towards direct deposition of bioassay data},
+ journal = {Nucleic Acids Research},
+ volume = {47},
+ number = {D1},
+ pages = {D930-D940},
+ year = {2018},
+ month = {11},
+ abstract = "{ChEMBL is a large, open-access bioactivity database
+ (https://www.ebi.ac.uk/chembl), previously described in the 2012,
+ 2014 and 2017 Nucleic Acids Research Database Issues.
+ In the last two years, several important improvements have been made to the database and are described here.
+ These include more robust capture and representation of assay details;
+ a new data deposition system, allowing updating of data sets and deposition of supplementary data;
+ and a completely redesigned web interface, with enhanced search and filtering capabilities.}",
+ issn = {0305-1048},
+ doi = {10.1093/nar/gky1075},
+ url = {https://doi.org/10.1093/nar/gky1075},
+ eprint = {https://academic.oup.com/nar/article-pdf/47/D1/D930/27437436/gky1075.pdf},
+ }
diff --git a/data/tabular/chemcaption_fragments/meta.yaml b/data/tabular/chemcaption_fragments/meta.yaml
index d10c228c1..a6dc95c16 100644
--- a/data/tabular/chemcaption_fragments/meta.yaml
+++ b/data/tabular/chemcaption_fragments/meta.yaml
@@ -1,51 +1,50 @@
----
name: chemcaption_fragments
description: |-
- Checks if a given fragment is present in a molecule.
+ Checks if a given fragment is present in a molecule.
targets:
- - id: presence
- description: flag indicating whether the fragment is present in the molecule
- type: boolean
+ - id: presence
+ description: flag indicating whether the fragment is present in the molecule
+ type: boolean
identifiers:
- - id: molecule
- type: text
- description: identifier of the molecule
- - id: fragment
- type: text
- description: identifier of the fragment
- - id: smarts
- type: text
- description: SMARTS of the fragment
- - id: representation_type
- type: text
- description: representation type of the molecule
+ - id: molecule
+ type: text
+ description: identifier of the molecule
+ - id: fragment
+ type: text
+ description: identifier of the fragment
+ - id: smarts
+ type: text
+ description: SMARTS of the fragment
+ - id: representation_type
+ type: text
+ description: representation type of the molecule
license: MIT
links:
- - url: https://github.com/lamalab-org/chem-caption
- description: software used to generate the data
+ - url: https://github.com/lamalab-org/chem-caption
+ description: software used to generate the data
num_points: 812177
templates:
- - |-
- {#Question: |Q: !}Is the fragment with SMARTs {smarts#} present in the molecule with {representation_type#} {molecule#}?
- {#Answer: |A: |!}{presence#No&Yes}
- - |-
- {#Question: |Q: !}Is a {fragment#} fragment present in the molecule with {representation_type#} {molecule#}?
- {#Answer: |A: |!}{presence#No&Yes}
- - A {fragment#} fragment is {presence#present&absent} in the molecule with {representation_type#} {molecule#}.
- - |-
- Task: {#Answer a question about substructures|Answer a question about fragments!}
- {#Question: |Q: !}Is the fragment with SMARTS {smarts#} {#present in|part of!} the molecule with {representation_type#} {molecule#}?
- {#Answer: |A: |!}{presence#No&Yes}
- - |-
- User: Is the fragment {fragment#} {#present in|part of!} the molecule with {representation_type#} {molecule#}?
- Assistant: {presence#No&Yes}
- - |-
- User: I have a question about the molecule with {representation_type#} {molecule#}.
- Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!}
- User: Is a {fragment#} fragment {#present in|part of!} the molecule?
- Assistant: {presence#No&Yes}
- - |-
- User: I want to know more about the molecule with {representation_type#} {molecule#}.
- Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!}
- User: Is a {fragment#} fragment {#present in|part of of|substructure of!} the molecule?
- Assistant: {presence#No&Yes}
+ - |-
+ {#Question: |Q: !}Is the fragment with SMARTs {smarts#} present in the molecule with {representation_type#} {molecule#}?
+ {#Answer: |A: |!}{presence#No&Yes}
+ - |-
+ {#Question: |Q: !}Is a {fragment#} fragment present in the molecule with {representation_type#} {molecule#}?
+ {#Answer: |A: |!}{presence#No&Yes}
+ - A {fragment#} fragment is {presence#present&absent} in the molecule with {representation_type#} {molecule#}.
+ - |-
+ Task: {#Answer a question about substructures|Answer a question about fragments!}
+ {#Question: |Q: !}Is the fragment with SMARTS {smarts#} {#present in|part of!} the molecule with {representation_type#} {molecule#}?
+ {#Answer: |A: |!}{presence#No&Yes}
+ - |-
+ User: Is the fragment {fragment#} {#present in|part of!} the molecule with {representation_type#} {molecule#}?
+ Assistant: {presence#No&Yes}
+ - |-
+ User: I have a question about the molecule with {representation_type#} {molecule#}.
+ Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!}
+ User: Is a {fragment#} fragment {#present in|part of!} the molecule?
+ Assistant: {presence#No&Yes}
+ - |-
+ User: I want to know more about the molecule with {representation_type#} {molecule#}.
+ Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!}
+ User: Is a {fragment#} fragment {#present in|part of of|substructure of!} the molecule?
+ Assistant: {presence#No&Yes}
diff --git a/data/tabular/chemcaption_rdkit/meta.yaml b/data/tabular/chemcaption_rdkit/meta.yaml
index 99424eed9..69057f470 100644
--- a/data/tabular/chemcaption_rdkit/meta.yaml
+++ b/data/tabular/chemcaption_rdkit/meta.yaml
@@ -1,342 +1,332 @@
----
name: chemcaption_rdkit
description: |-
- This dataset contains molecular descriptors, mostly derived using RDKit.
+ This dataset contains molecular descriptors, mostly derived using RDKit.
targets:
- - id: num_valence_electrons
- type: categorical
- description: number of valence electrons
- names:
- - noun: number of valence electrons
- - noun: valence electron count
- - only_name: valence electrons
- - id: rotable_proportion
- type: continuous
- significant_digits: 3
- description: proportion of rotatable bonds
- names:
- - noun: proportion of rotatable bonds
- - noun: rotatable bond proportion
- - id: non_rotable_proportion
- type: continuous
- significant_digits: 3
- description: proportion of non-rotatable bonds
- names:
- - noun: proportion of non-rotatable bonds
- - noun: non-rotatable bond proportion
- - id: num_single_bonds
- type: categorical
- description: number of single bonds
- names:
- - noun: number of single bonds
- - only_name: single bonds
- - id: num_double_bonds
- type: categorical
- description: number of double bonds
- names:
- - noun: number of double bonds
- - only_name: double bonds
- - id: num_triple_bonds
- type: categorical
- description: number of triple bonds
- names:
- - noun: number of triple bonds
- - only_name: triple bonds
- - id: num_aromatic_bonds
- type: categorical
- description: number of aromatic bonds
- names:
- - noun: number of aromatic bonds
- - only_name: aromatic bonds
- - id: num_bonds
- type: categorical
- description: number of bonds
- names:
- - noun: number of bonds
- - noun: bond count
- - only_name: bonds
- - id: num_carbon_atoms
- type: categorical
- description: number of carbon atoms
- names:
- - noun: number of carbon atoms
- - noun: carbon atom count
- - only_name: carbon atoms
- - id: num_hydrogen_atoms
- type: categorical
- description: number of hydrogen atoms
- names:
- - noun: number of hydrogen atoms
- - noun: hydrogen atom count
- - only_name: hydrogen atoms
- - id: num_nitrogen_atoms
- type: categorical
- description: number of nitrogen atoms
- names:
- - noun: number of nitrogen atoms
- - noun: nitrogen atom count
- - only_name: nitrogen atoms
- - id: num_oxygen_atoms
- type: categorical
- description: number of oxygen atoms
- names:
- - noun: number of oxygen atoms
- - noun: oxygen atom count
- - only_name: oxygen atoms
- - id: num_hydrogen_bond_acceptors
- type: categorical
- description: number of hydrogen bond acceptors
- names:
- - noun: number of hydrogen bond acceptors
- - noun: hydrogen bond acceptor count
- - only_name: hydrogen bond acceptors
- - id: num_hydrogen_bond_donors
- type: categorical
- description: number of hydrogen bond donors
- names:
- - noun: number of hydrogen bond donors
- - noun: hydrogen bond donor count
- - only_name: hydrogen bond donors
- - id: num_lipinski_violations
- type: categorical
- description: number of Lipinski violations
- names:
- - noun: number of violations of Lipinski's rule of five
- - noun: number of violations of Lipinski's rule of 5
- - only_name: violations of Lipinski's rule of five
- - only_name: violations of Lipinski's rule of 5
- - id: monoisotopic_molecular_mass
- type: continuous
- significant_digits: 3
- description: monoisotopic molecular mass
- names:
- - noun: monoisotopic molecular mass
- - noun: monoisotopic mass
- units: Da
- - id: carbon_mass
- type: continuous
- significant_digits: 3
- description: carbon mass
- names:
- - noun: carbon mass fraction
- - noun: carbon mass proportion
- - id: hydrogen_mass
- type: continuous
- significant_digits: 3
- description: hydrogen mass
- names:
- - noun: hydrogen mass fraction
- - noun: hydrogen mass proportion
- - id: nitrogen_mass
- type: continuous
- significant_digits: 3
- description: nitrogen mass
- names:
- - noun: nitrogen mass fraction
- - noun: nitrogen mass proportion
- - id: oxygen_mass
- type: continuous
- significant_digits: 3
- description: oxygen mass
- names:
- - noun: oxygen mass fraction
- - noun: oxygen mass proportion
- - id: num_chiral_centers
- type: categorical
- description: number of chiral centers
- names:
- - noun: number of chiral centers
- - noun: chiral center count
- - only_name: chiral centers
- - id: inertial_shape_factor
- type: continuous
- significant_digits: 3
- description: inertial shape factor
- names:
- - noun: inertial shape factor
- - id: eccentricity
- type: continuous
- significant_digits: 3
- description: eccentricity
- names:
- - noun: eccentricity
- - id: asphericity
- type: continuous
- significant_digits: 3
- description: asphericity
- names:
- - noun: asphericity
- - id: npr1_value
- type: continuous
- significant_digits: 3
- description: NPR1 value
- names:
- - noun: NPR1 value
- - noun: normalized principal moment of inertia ratio 1 value
- - noun: normalized principal moment of inertia ratio 1 (NPR1) value
- - id: npr2_value
- type: continuous
- significant_digits: 3
- description: NPR2 value
- names:
- - noun: NPR2 value
- - noun: normalized principal moment of inertia ratio 2 value
- - noun: normalized principal moment of inertia ratio 2 (NPR2) value
- - id: pmi1_value
- type: continuous
- significant_digits: 3
- description: PMI1 value
- names:
- - noun: PMI1 value
- - noun: principal moment of inertia 1 value
- - noun: principal moment of inertia 1 (PMI1) value
- - id: pmi2_value
- type: continuous
- significant_digits: 3
- description: PMI2 value
- names:
- - noun: PMI2 value
- - noun: principal moment of inertia 2 value
- - noun: principal moment of inertia 2 (PMI2) value
- - id: molecular_formula
- type: text
- description: molecular formula
- names:
- - noun: molecular formula
- - noun: chemical formula
+ - id: num_valence_electrons
+ type: categorical
+ description: number of valence electrons
+ names:
+ - noun: number of valence electrons
+ - noun: valence electron count
+ - only_name: valence electrons
+ - id: rotable_proportion
+ type: continuous
+ significant_digits: 3
+ description: proportion of rotatable bonds
+ names:
+ - noun: proportion of rotatable bonds
+ - noun: rotatable bond proportion
+ - id: non_rotable_proportion
+ type: continuous
+ significant_digits: 3
+ description: proportion of non-rotatable bonds
+ names:
+ - noun: proportion of non-rotatable bonds
+ - noun: non-rotatable bond proportion
+ - id: num_single_bonds
+ type: categorical
+ description: number of single bonds
+ names:
+ - noun: number of single bonds
+ - only_name: single bonds
+ - id: num_double_bonds
+ type: categorical
+ description: number of double bonds
+ names:
+ - noun: number of double bonds
+ - only_name: double bonds
+ - id: num_triple_bonds
+ type: categorical
+ description: number of triple bonds
+ names:
+ - noun: number of triple bonds
+ - only_name: triple bonds
+ - id: num_aromatic_bonds
+ type: categorical
+ description: number of aromatic bonds
+ names:
+ - noun: number of aromatic bonds
+ - only_name: aromatic bonds
+ - id: num_bonds
+ type: categorical
+ description: number of bonds
+ names:
+ - noun: number of bonds
+ - noun: bond count
+ - only_name: bonds
+ - id: num_carbon_atoms
+ type: categorical
+ description: number of carbon atoms
+ names:
+ - noun: number of carbon atoms
+ - noun: carbon atom count
+ - only_name: carbon atoms
+ - id: num_hydrogen_atoms
+ type: categorical
+ description: number of hydrogen atoms
+ names:
+ - noun: number of hydrogen atoms
+ - noun: hydrogen atom count
+ - only_name: hydrogen atoms
+ - id: num_nitrogen_atoms
+ type: categorical
+ description: number of nitrogen atoms
+ names:
+ - noun: number of nitrogen atoms
+ - noun: nitrogen atom count
+ - only_name: nitrogen atoms
+ - id: num_oxygen_atoms
+ type: categorical
+ description: number of oxygen atoms
+ names:
+ - noun: number of oxygen atoms
+ - noun: oxygen atom count
+ - only_name: oxygen atoms
+ - id: num_hydrogen_bond_acceptors
+ type: categorical
+ description: number of hydrogen bond acceptors
+ names:
+ - noun: number of hydrogen bond acceptors
+ - noun: hydrogen bond acceptor count
+ - only_name: hydrogen bond acceptors
+ - id: num_hydrogen_bond_donors
+ type: categorical
+ description: number of hydrogen bond donors
+ names:
+ - noun: number of hydrogen bond donors
+ - noun: hydrogen bond donor count
+ - only_name: hydrogen bond donors
+ - id: num_lipinski_violations
+ type: categorical
+ description: number of Lipinski violations
+ names:
+ - noun: number of violations of Lipinski's rule of five
+ - noun: number of violations of Lipinski's rule of 5
+ - only_name: violations of Lipinski's rule of five
+ - only_name: violations of Lipinski's rule of 5
+ - id: monoisotopic_molecular_mass
+ type: continuous
+ significant_digits: 3
+ description: monoisotopic molecular mass
+ names:
+ - noun: monoisotopic molecular mass
+ - noun: monoisotopic mass
+ units: Da
+ - id: carbon_mass
+ type: continuous
+ significant_digits: 3
+ description: carbon mass
+ names:
+ - noun: carbon mass fraction
+ - noun: carbon mass proportion
+ - id: hydrogen_mass
+ type: continuous
+ significant_digits: 3
+ description: hydrogen mass
+ names:
+ - noun: hydrogen mass fraction
+ - noun: hydrogen mass proportion
+ - id: nitrogen_mass
+ type: continuous
+ significant_digits: 3
+ description: nitrogen mass
+ names:
+ - noun: nitrogen mass fraction
+ - noun: nitrogen mass proportion
+ - id: oxygen_mass
+ type: continuous
+ significant_digits: 3
+ description: oxygen mass
+ names:
+ - noun: oxygen mass fraction
+ - noun: oxygen mass proportion
+ - id: num_chiral_centers
+ type: categorical
+ description: number of chiral centers
+ names:
+ - noun: number of chiral centers
+ - noun: chiral center count
+ - only_name: chiral centers
+ - id: inertial_shape_factor
+ type: continuous
+ significant_digits: 3
+ description: inertial shape factor
+ names:
+ - noun: inertial shape factor
+ - id: eccentricity
+ type: continuous
+ significant_digits: 3
+ description: eccentricity
+ names:
+ - noun: eccentricity
+ - id: asphericity
+ type: continuous
+ significant_digits: 3
+ description: asphericity
+ names:
+ - noun: asphericity
+ - id: npr1_value
+ type: continuous
+ significant_digits: 3
+ description: NPR1 value
+ names:
+ - noun: NPR1 value
+ - noun: normalized principal moment of inertia ratio 1 value
+ - noun: normalized principal moment of inertia ratio 1 (NPR1) value
+ - id: npr2_value
+ type: continuous
+ significant_digits: 3
+ description: NPR2 value
+ names:
+ - noun: NPR2 value
+ - noun: normalized principal moment of inertia ratio 2 value
+ - noun: normalized principal moment of inertia ratio 2 (NPR2) value
+ - id: pmi1_value
+ type: continuous
+ significant_digits: 3
+ description: PMI1 value
+ names:
+ - noun: PMI1 value
+ - noun: principal moment of inertia 1 value
+ - noun: principal moment of inertia 1 (PMI1) value
+ - id: pmi2_value
+ type: continuous
+ significant_digits: 3
+ description: PMI2 value
+ names:
+ - noun: PMI2 value
+ - noun: principal moment of inertia 2 value
+ - noun: principal moment of inertia 2 (PMI2) value
+ - id: molecular_formula
+ type: text
+ description: molecular formula
+ names:
+ - noun: molecular formula
+ - noun: chemical formula
identifiers:
- - id: representation
- type: text
- description: representation
- - id: representation_type
- type: text
- description: representation type
+ - id: representation
+ type: text
+ description: representation
+ - id: representation_type
+ type: text
+ description: representation type
license: CC BY 4.0
num_points: 79811
links:
- - url: https://github.com/lamalab-org/chem-caption
- description: Original codebase used to generate this dataset
+ - url: https://github.com/lamalab-org/chem-caption
+ description: Original codebase used to generate this dataset
templates:
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_valence_electrons#} {num_valence_electrons__names__only_name}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {rotable_proportion__names__noun} of {rotable_proportion#}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {non_rotable_proportion__names__noun} of
- {non_rotable_proportion#}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_single_bonds#} {num_single_bonds__names__only_name}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_double_bonds#} {num_double_bonds__names__only_name}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_triple_bonds#} {num_triple_bonds__names__only_name}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_aromatic_bonds#} {num_aromatic_bonds__names__only_name}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_bonds#} {num_bonds__names__only_name}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_carbon_atoms#} {num_carbon_atoms__names__only_name}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_hydrogen_atoms#} {num_hydrogen_atoms__names__only_name}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_nitrogen_atoms#} {num_nitrogen_atoms__names__only_name}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_oxygen_atoms#} {num_oxygen_atoms__names__only_name}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__only_name}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__only_name}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_lipinski_violations#} {num_lipinski_violations__names__only_name}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {monoisotopic_molecular_mass__names__noun}
- of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {carbon_mass__names__noun} of {carbon_mass#}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {hydrogen_mass__names__noun} of {hydrogen_mass#}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {nitrogen_mass__names__noun} of {nitrogen_mass#}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {oxygen_mass__names__noun} of {oxygen_mass#}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_chiral_centers#} {num_chiral_centers__names__only_name}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {inertial_shape_factor__names__noun} of
- {inertial_shape_factor#}.
- - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {eccentricity__names__noun}
- of {eccentricity#}.
- - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {asphericity__names__noun}
- of {asphericity#}.
- - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {npr1_value__names__noun}
- of {npr1_value#}.
- - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {npr2_value__names__noun}
- of {npr2_value#}.
- - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {pmi1_value__names__noun}
- of {pmi1_value#}.
- - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {pmi2_value__names__noun}
- of {pmi2_value#}.
- - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has the {molecular_formula__names__noun} {molecular_formula#}.
- - |-
- Question: What is the {molecular_formula__names__noun} and {monoisotopic_molecular_mass__names__noun} of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}?
- Constraint: Answer by only returning the values separated by a comma.
- Answer: {molecular_formula#}, {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}
- - |-
- Question: What is the {molecular_formula__names__noun} and {num_valence_electrons__names__noun} of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}?
- Constraint: Answer by only returning the values separated by a comma.
- Answer: {molecular_formula#}, {num_valence_electrons#}
- - |-
- Question: What is the {molecular_formula__names__noun}, {rotable_proportion__names__noun}, and {num_chiral_centers__names__noun} of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}?
- Constraint: Answer by only returning the values separated by a comma.
- Answer: {molecular_formula#}, {rotable_proportion#}, {num_chiral_centers#}
- - |-
- Question: What is the {carbon_mass__names__noun}, {hydrogen_mass__names__noun}, {nitrogen_mass__names__noun}, and {oxygen_mass__names__noun} of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}?
- Constraint: Answer by only returning the values separated by a comma.
- Answer: {carbon_mass#}, {hydrogen_mass#}, {nitrogen_mass#}, {oxygen_mass#}
- - |-
- User: I {#want|need|have!} to design a {#molecule|chemical|compound|chemical structure!} with {molecular_formula__names__noun} {molecular_formula#}.
- Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
- User: I {#want|would like!} the {#molecule|chemical|compound|chemical structure!} to have {num_valence_electrons#} {num_valence_electrons__names__noun}, {num_chiral_centers#} {num_chiral_centers__names__only_name}, and {num_lipinski_violations#} {num_lipinski_violations__names__only_name}.
- Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}.
- - |-
- User: I {#want|need|have!} to design a {#molecule|chemical|compound|chemical structure!} with {num_lipinski_violations#} {num_lipinski_violations__names__only_name}.
- Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
- User: I {#want|would like!} the {#molecule|chemical|compound|chemical structure!} to have {num_bonds#} {num_bonds__names__only_name}, {num_chiral_centers#} {num_chiral_centers__names__only_name}, and a {carbon_mass__names__noun} of {carbon_mass#}.
- Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}.
- - |-
- User: I {#want|need|have!} to design a {#molecule|chemical|compound|drug|chemical structure!} with {num_lipinski_violations#} {num_lipinski_violations__names__only_name}.
- Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
- User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_bonds#} {num_bonds__names__only_name}.
- Assistant: {#OK, that already helps constraining my search. |Thanks, that already helps constraining my search. |Thanks, that already helps. |OK, that already helps. |!}It would {#help|be great|be useful!} if you could tell me more about the {#molecule|chemical|compound|drug|chemical structure!} you are looking for.
- User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_chiral_centers#} {num_chiral_centers__names__only_name}. {#In addition,|Additionally,|Moreover,!} I want the {#molecule|chemical|compound|drug|chemical structure!} to have a {carbon_mass__names__noun} of {carbon_mass#}.
- Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
- - |-
- User: I {#want|need|have!} to design a {#molecule|chemical|compound|drug|chemical structure!} with {num_lipinski_violations#} {num_lipinski_violations__names__only_name}.
- Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
- User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_bonds#} {num_bonds__names__only_name}.
- Assistant: {#OK, that already helps constraining my search. |Thanks, that already helps constraining my search. |Thanks, that already helps. |OK, that already helps. |!}It would {#help|be great|be useful!} if you could tell me more about the {#molecule|chemical|compound|drug|chemical structure!} you {#want to design|are looking for|are interested in!}.
- User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_chiral_centers#} {num_chiral_centers__names__only_name}. {#In addition,|Additionally,|Moreover,!} I want the {#molecule|chemical|compound|drug|chemical structure!} to have a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}.
- Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
- - |-
- User: I {#want|need|have!} to design a {#molecule|chemical|compound|drug|chemical structure!} with a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}.
- Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
- User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_bonds#} {num_bonds__names__only_name}.
- Assistant: {#OK, that already helps constraining my search. |Thanks, that already helps constraining my search. |Thanks, that already helps. |OK, that already helps. |!}It would {#help|be great|be useful!} if you could tell me more about the {#molecule|chemical|compound|drug|chemical structure!} you {#want to design|are looking for|are interested in!}.
- User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_chiral_centers#} {num_chiral_centers__names__only_name}.
- Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
- - |-
- User: I {#have some questions|want to ask you!} about the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
- Assistant: {#How can I help?|What can I do for you?|How can I be of assistance?!}
- User: {#What is|I want to know|I need to know!} the {molecular_formula__names__noun} and {monoisotopic_molecular_mass__names__noun} of this {#molecule|chemical|compound|drug|chemical structure!}.
- Assistant: The {#molecule|chemical|compound|drug|chemical structure!} has the {molecular_formula__names__noun} {molecular_formula#} and a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}.
- - |-
- User: I {#have some questions|want to ask you!} about the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
- Assistant: {#How can I help?|What can I do for you?|How can I be of assistance?!}
- User: {#What is|I want to know|I need to know!} the {asphericity__names__noun} of this {#molecule|chemical|compound|drug|chemical structure!}.
- Assistant: The {#molecule|chemical|compound|drug|chemical structure!} has an {asphericity__names__noun} of {asphericity#}.
- - |-
- User: I {#want to|must|would like to|need to!} {#design|create|synthesize|make!} a {#molecule|chemical|compound|drug|chemical structure!} with {asphericity__names__noun} of {asphericity#}.
- Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
- User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {eccentricity__names__noun} of {eccentricity#}.
- Assistant: {#Is there anything else I should know?|Is there anything else I should be aware of?|Is there anything else I should take into account?|Is there anything else I should consider?|Is there anything else I should take into consideration?|Is there anything else I should take into account?|Is there anything else I should take into consideration?|Is there anything else I should consider?!}
- User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {npr1_value__names__noun} of {npr1_value#} and a {molecular_formula__names__noun} of {molecular_formula#}.
- Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
- - |-
- User: I {#want to|must|would like to|need to!} {#design|create|synthesize|make!} a {#molecule|chemical|compound|drug|chemical structure!} with {eccentricity__names__noun} of {eccentricity#}.
- Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
- User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {asphericity__names__noun} of {asphericity#}.
- Assistant: {#Is there anything else I should know?|Is there anything else I should be aware of?|Is there anything else I should take into account?|Is there anything else I should consider?|Is there anything else I should take into consideration?|Is there anything else I should take into account?|Is there anything else I should take into consideration?|Is there anything else I should consider?!}
- User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {num_lipinski_violations#} {num_lipinski_violations__names__only_name} and a {molecular_formula__names__noun} of {molecular_formula#}.
- Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
- - |-
- User: I {#want to|must|would like to|need to!} {#design|create|synthesize|make!} a {#molecule|chemical|compound|drug|chemical structure!} with {npr1_value__names__noun} of {npr1_value#}.
- Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
- User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {npr2_value__names__noun} of {npr2_value#}.
- Assistant: {#Is there anything else I should know?|Is there anything else I should be aware of?|Is there anything else I should take into account?|Is there anything else I should consider?|Is there anything else I should take into consideration?|Is there anything else I should take into account?|Is there anything else I should take into consideration?|Is there anything else I should consider?!}
- User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {molecular_formula__names__noun} of {molecular_formula#}.
- Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
- - |-
- User: I {#want to|must|would like to|need to!} {#design|create|synthesize|make!} a {#molecule|chemical|compound|drug|chemical structure!} with {pmi1_value__names__noun} of {pmi1_value#}.
- Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
- User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {pmi2_value__names__noun} of {pmi2_value#}.
- Assistant: {#Is there anything else I should know?|Is there anything else I should be aware of?|Is there anything else I should take into account?|Is there anything else I should consider?|Is there anything else I should take into consideration?|Is there anything else I should take into account?|Is there anything else I should take into consideration?|Is there anything else I should consider?!}
- User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {molecular_formula__names__noun} of {molecular_formula#}.
- Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_valence_electrons#} {num_valence_electrons__names__only_name}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {rotable_proportion__names__noun} of {rotable_proportion#}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {non_rotable_proportion__names__noun} of {non_rotable_proportion#}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_single_bonds#} {num_single_bonds__names__only_name}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_double_bonds#} {num_double_bonds__names__only_name}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_triple_bonds#} {num_triple_bonds__names__only_name}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_aromatic_bonds#} {num_aromatic_bonds__names__only_name}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_bonds#} {num_bonds__names__only_name}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_carbon_atoms#} {num_carbon_atoms__names__only_name}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_hydrogen_atoms#} {num_hydrogen_atoms__names__only_name}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_nitrogen_atoms#} {num_nitrogen_atoms__names__only_name}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_oxygen_atoms#} {num_oxygen_atoms__names__only_name}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__only_name}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__only_name}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_lipinski_violations#} {num_lipinski_violations__names__only_name}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {carbon_mass__names__noun} of {carbon_mass#}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {hydrogen_mass__names__noun} of {hydrogen_mass#}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {nitrogen_mass__names__noun} of {nitrogen_mass#}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {oxygen_mass__names__noun} of {oxygen_mass#}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has {num_chiral_centers#} {num_chiral_centers__names__only_name}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {inertial_shape_factor__names__noun} of {inertial_shape_factor#}.
+ - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {eccentricity__names__noun} of {eccentricity#}.
+ - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {asphericity__names__noun} of {asphericity#}.
+ - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {npr1_value__names__noun} of {npr1_value#}.
+ - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has an {npr2_value__names__noun} of {npr2_value#}.
+ - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {pmi1_value__names__noun} of {pmi1_value#}.
+ - A conformer of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has a {pmi2_value__names__noun} of {pmi2_value#}.
+ - The {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#} has the {molecular_formula__names__noun} {molecular_formula#}.
+ - |-
+ Question: What is the {molecular_formula__names__noun} and {monoisotopic_molecular_mass__names__noun} of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}?
+ Constraint: Answer by only returning the values separated by a comma.
+ Answer: {molecular_formula#}, {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}
+ - |-
+ Question: What is the {molecular_formula__names__noun} and {num_valence_electrons__names__noun} of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}?
+ Constraint: Answer by only returning the values separated by a comma.
+ Answer: {molecular_formula#}, {num_valence_electrons#}
+ - |-
+ Question: What is the {molecular_formula__names__noun}, {rotable_proportion__names__noun}, and {num_chiral_centers__names__noun} of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}?
+ Constraint: Answer by only returning the values separated by a comma.
+ Answer: {molecular_formula#}, {rotable_proportion#}, {num_chiral_centers#}
+ - |-
+ Question: What is the {carbon_mass__names__noun}, {hydrogen_mass__names__noun}, {nitrogen_mass__names__noun}, and {oxygen_mass__names__noun} of the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}?
+ Constraint: Answer by only returning the values separated by a comma.
+ Answer: {carbon_mass#}, {hydrogen_mass#}, {nitrogen_mass#}, {oxygen_mass#}
+ - |-
+ User: I {#want|need|have!} to design a {#molecule|chemical|compound|chemical structure!} with {molecular_formula__names__noun} {molecular_formula#}.
+ Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
+ User: I {#want|would like!} the {#molecule|chemical|compound|chemical structure!} to have {num_valence_electrons#} {num_valence_electrons__names__noun}, {num_chiral_centers#} {num_chiral_centers__names__only_name}, and {num_lipinski_violations#} {num_lipinski_violations__names__only_name}.
+ Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}.
+ - |-
+ User: I {#want|need|have!} to design a {#molecule|chemical|compound|chemical structure!} with {num_lipinski_violations#} {num_lipinski_violations__names__only_name}.
+ Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
+ User: I {#want|would like!} the {#molecule|chemical|compound|chemical structure!} to have {num_bonds#} {num_bonds__names__only_name}, {num_chiral_centers#} {num_chiral_centers__names__only_name}, and a {carbon_mass__names__noun} of {carbon_mass#}.
+ Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|chemical structure!} with {representation_type#} {representation#}.
+ - |-
+ User: I {#want|need|have!} to design a {#molecule|chemical|compound|drug|chemical structure!} with {num_lipinski_violations#} {num_lipinski_violations__names__only_name}.
+ Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
+ User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_bonds#} {num_bonds__names__only_name}.
+ Assistant: {#OK, that already helps constraining my search. |Thanks, that already helps constraining my search. |Thanks, that already helps. |OK, that already helps. |!}It would {#help|be great|be useful!} if you could tell me more about the {#molecule|chemical|compound|drug|chemical structure!} you are looking for.
+ User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_chiral_centers#} {num_chiral_centers__names__only_name}. {#In addition,|Additionally,|Moreover,!} I want the {#molecule|chemical|compound|drug|chemical structure!} to have a {carbon_mass__names__noun} of {carbon_mass#}.
+ Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
+ - |-
+ User: I {#want|need|have!} to design a {#molecule|chemical|compound|drug|chemical structure!} with {num_lipinski_violations#} {num_lipinski_violations__names__only_name}.
+ Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
+ User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_bonds#} {num_bonds__names__only_name}.
+ Assistant: {#OK, that already helps constraining my search. |Thanks, that already helps constraining my search. |Thanks, that already helps. |OK, that already helps. |!}It would {#help|be great|be useful!} if you could tell me more about the {#molecule|chemical|compound|drug|chemical structure!} you {#want to design|are looking for|are interested in!}.
+ User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_chiral_centers#} {num_chiral_centers__names__only_name}. {#In addition,|Additionally,|Moreover,!} I want the {#molecule|chemical|compound|drug|chemical structure!} to have a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}.
+ Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
+ - |-
+ User: I {#want|need|have!} to design a {#molecule|chemical|compound|drug|chemical structure!} with a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}.
+ Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
+ User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_bonds#} {num_bonds__names__only_name}.
+ Assistant: {#OK, that already helps constraining my search. |Thanks, that already helps constraining my search. |Thanks, that already helps. |OK, that already helps. |!}It would {#help|be great|be useful!} if you could tell me more about the {#molecule|chemical|compound|drug|chemical structure!} you {#want to design|are looking for|are interested in!}.
+ User: I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have {num_chiral_centers#} {num_chiral_centers__names__only_name}.
+ Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
+ - |-
+ User: I {#have some questions|want to ask you!} about the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
+ Assistant: {#How can I help?|What can I do for you?|How can I be of assistance?!}
+ User: {#What is|I want to know|I need to know!} the {molecular_formula__names__noun} and {monoisotopic_molecular_mass__names__noun} of this {#molecule|chemical|compound|drug|chemical structure!}.
+ Assistant: The {#molecule|chemical|compound|drug|chemical structure!} has the {molecular_formula__names__noun} {molecular_formula#} and a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}.
+ - |-
+ User: I {#have some questions|want to ask you!} about the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
+ Assistant: {#How can I help?|What can I do for you?|How can I be of assistance?!}
+ User: {#What is|I want to know|I need to know!} the {asphericity__names__noun} of this {#molecule|chemical|compound|drug|chemical structure!}.
+ Assistant: The {#molecule|chemical|compound|drug|chemical structure!} has an {asphericity__names__noun} of {asphericity#}.
+ - |-
+ User: I {#want to|must|would like to|need to!} {#design|create|synthesize|make!} a {#molecule|chemical|compound|drug|chemical structure!} with {asphericity__names__noun} of {asphericity#}.
+ Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
+ User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {eccentricity__names__noun} of {eccentricity#}.
+ Assistant: {#Is there anything else I should know?|Is there anything else I should be aware of?|Is there anything else I should take into account?|Is there anything else I should consider?|Is there anything else I should take into consideration?|Is there anything else I should take into account?|Is there anything else I should take into consideration?|Is there anything else I should consider?!}
+ User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {npr1_value__names__noun} of {npr1_value#} and a {molecular_formula__names__noun} of {molecular_formula#}.
+ Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
+ - |-
+ User: I {#want to|must|would like to|need to!} {#design|create|synthesize|make!} a {#molecule|chemical|compound|drug|chemical structure!} with {eccentricity__names__noun} of {eccentricity#}.
+ Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
+ User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {asphericity__names__noun} of {asphericity#}.
+ Assistant: {#Is there anything else I should know?|Is there anything else I should be aware of?|Is there anything else I should take into account?|Is there anything else I should consider?|Is there anything else I should take into consideration?|Is there anything else I should take into account?|Is there anything else I should take into consideration?|Is there anything else I should consider?!}
+ User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {num_lipinski_violations#} {num_lipinski_violations__names__only_name} and a {molecular_formula__names__noun} of {molecular_formula#}.
+ Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
+ - |-
+ User: I {#want to|must|would like to|need to!} {#design|create|synthesize|make!} a {#molecule|chemical|compound|drug|chemical structure!} with {npr1_value__names__noun} of {npr1_value#}.
+ Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
+ User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {npr2_value__names__noun} of {npr2_value#}.
+ Assistant: {#Is there anything else I should know?|Is there anything else I should be aware of?|Is there anything else I should take into account?|Is there anything else I should consider?|Is there anything else I should take into consideration?|Is there anything else I should take into account?|Is there anything else I should take into consideration?|Is there anything else I should consider?!}
+ User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {molecular_formula__names__noun} of {molecular_formula#}.
+ Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
+ - |-
+ User: I {#want to|must|would like to|need to!} {#design|create|synthesize|make!} a {#molecule|chemical|compound|drug|chemical structure!} with {pmi1_value__names__noun} of {pmi1_value#}.
+ Assistant: {#That's interesting, do you have|Do you have|Cool, do you have|Awesome, do you have!} any other {#constraints|requirements|conditions|limitations!}?
+ User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {pmi2_value__names__noun} of {pmi2_value#}.
+ Assistant: {#Is there anything else I should know?|Is there anything else I should be aware of?|Is there anything else I should take into account?|Is there anything else I should consider?|Is there anything else I should take into consideration?|Is there anything else I should take into account?|Is there anything else I should take into consideration?|Is there anything else I should consider?!}
+ User: {#In addition,|Additionally,|Moreover,!} I {#want|would like!} the {#molecule|chemical|compound|drug|chemical structure!} to have a {molecular_formula__names__noun} of {molecular_formula#}.
+ Assistant: {#Given those requirements, |In that case, |!}I recommend the {#molecule|chemical|compound|drug|chemical structure!} with {representation_type#} {representation#}.
diff --git a/data/tabular/chemdner/meta.yaml b/data/tabular/chemdner/meta.yaml
index 4a207ba67..dad41aa94 100644
--- a/data/tabular/chemdner/meta.yaml
+++ b/data/tabular/chemdner/meta.yaml
@@ -1,97 +1,96 @@
----
name: chemdner
description: |-
- The CHEMDNER corpus comprises 10,000 PubMed abstracts, which have been meticulously annotated by expert chemistry literature curators according to task-specific guidelines, identifying a total of 84,355 mentions of chemical entities. The CHEMDNER corpus is a collection of 10,000 PubMed abstracts that contain a total of 84,355 chemical entity mentions labeled manually by expert chemistry literature curators, following annotation guidelines specifically defined for this task.
+ The CHEMDNER corpus comprises 10,000 PubMed abstracts, which have been meticulously annotated by expert chemistry literature curators according to task-specific guidelines, identifying a total of 84,355 mentions of chemical entities. The CHEMDNER corpus is a collection of 10,000 PubMed abstracts that contain a total of 84,355 chemical entity mentions labeled manually by expert chemistry literature curators, following annotation guidelines specifically defined for this task.
targets:
- - id: matched_words
- description: matched words
- type: text
- names:
- - noun: entity
- - noun: matched entity
+ - id: matched_words
+ description: matched words
+ type: text
+ names:
+ - noun: entity
+ - noun: matched entity
identifiers:
- - id: sentence
- description: Sentence
- type: text
- names:
- - noun: sentence
- - noun: text
+ - id: sentence
+ description: Sentence
+ type: text
+ names:
+ - noun: sentence
+ - noun: text
license: unknown
links:
- - url: https://huggingface.co/datasets/bigbio/chemdner
- description: original dataset
+ - url: https://huggingface.co/datasets/bigbio/chemdner
+ description: original dataset
benchmarks:
- - name: chemdner
- link: hhttps://huggingface.co/datasets/bigbio/blurb
- split_column: split
+ - name: chemdner
+ link: hhttps://huggingface.co/datasets/bigbio/blurb
+ split_column: split
num_points: 19440
bibtex:
- - |-
- @article{Krallinger2015,
- title = {The CHEMDNER corpus of chemicals and drugs and its annotation principles},
- author = {
- Krallinger, Martin and Rabal, Obdulia and Leitner, Florian and Vazquez,
- Miguel and Salgado, David and Lu, Zhiyong and Leaman, Robert and Lu, Yanan
- and Ji, Donghong and Lowe, Daniel M. and Sayle, Roger A. and
- Batista-Navarro, Riza Theresa and Rak, Rafal and Huber, Torsten and
- Rockt{"a}schel, Tim and Matos, S{'e}rgio and Campos, David and Tang,
- Buzhou and Xu, Hua and Munkhdalai, Tsendsuren and Ryu, Keun Ho and Ramanan,
- S. V. and Nathan, Senthil and {{Z}}itnik, Slavko and Bajec, Marko and
- Weber, Lutz and Irmer, Matthias and Akhondi, Saber A. and Kors, Jan A. and
- Xu, Shuo and An, Xin and Sikdar, Utpal Kumar and Ekbal, Asif and Yoshioka,
- Masaharu and Dieb, Thaer M. and Choi, Miji and Verspoor, Karin and Khabsa,
- Madian and Giles, C. Lee and Liu, Hongfang and Ravikumar, Komandur
- Elayavilli and Lamurias, Andre and Couto, Francisco M. and Dai, Hong-Jie
- and Tsai, Richard Tzong-Han and Ata, Caglar and Can, Tolga and Usi{'e},
- Anabel and Alves, Rui and Segura-Bedmar, Isabel and Mart{'i}nez, Paloma
- and Oyarzabal, Julen and Valencia, Alfonso
- },
- year = 2015,
- month = {Jan},
- day = 19,
- journal = {Journal of Cheminformatics},
- volume = 7,
- number = 1,
- pages = {S2},
- doi = {10.1186/1758-2946-7-S1-S2},
- issn = {1758-2946},
- url = {https://doi.org/10.1186/1758-2946-7-S1-S2},
- abstract = {
- The automatic extraction of chemical information from text requires the
- recognition of chemical entity mentions as one of its key steps. When
- developing supervised named entity recognition (NER) systems, the
- availability of a large, manually annotated text corpus is desirable.
- Furthermore, large corpora permit the robust evaluation and comparison of
- different approaches that detect chemicals in documents. We present the
- CHEMDNER corpus, a collection of 10,000 PubMed abstracts that contain a
- total of 84,355 chemical entity mentions labeled manually by expert
- chemistry literature curators, following annotation guidelines specifically
- defined for this task. The abstracts of the CHEMDNER corpus were selected
- to be representative for all major chemical disciplines. Each of the
- chemical entity mentions was manually labeled according to its
- structure-associated chemical entity mention (SACEM) class: abbreviation,
- family, formula, identifier, multiple, systematic and trivial. The
- difficulty and consistency of tagging chemicals in text was measured using
- an agreement study between annotators, obtaining a percentage agreement of
- 91. For a subset of the CHEMDNER corpus (the test set of 3,000 abstracts)
- we provide not only the Gold Standard manual annotations, but also mentions
- automatically detected by the 26 teams that participated in the BioCreative
- IV CHEMDNER chemical mention recognition task. In addition, we release the
- CHEMDNER silver standard corpus of automatically extracted mentions from
- 17,000 randomly selected PubMed abstracts. A version of the CHEMDNER corpus
- in the BioC format has been generated as well. We propose a standard for
- required minimum information about entity annotations for the construction
- of domain specific corpora on chemical and drug entities. The CHEMDNER
- corpus and annotation guidelines are available at:
- ttp://www.biocreative.org/resources/biocreative-iv/chemdner-corpus/
- }
- }
+ - |-
+ @article{Krallinger2015,
+ title = {The CHEMDNER corpus of chemicals and drugs and its annotation principles},
+ author = {
+ Krallinger, Martin and Rabal, Obdulia and Leitner, Florian and Vazquez,
+ Miguel and Salgado, David and Lu, Zhiyong and Leaman, Robert and Lu, Yanan
+ and Ji, Donghong and Lowe, Daniel M. and Sayle, Roger A. and
+ Batista-Navarro, Riza Theresa and Rak, Rafal and Huber, Torsten and
+ Rockt{"a}schel, Tim and Matos, S{'e}rgio and Campos, David and Tang,
+ Buzhou and Xu, Hua and Munkhdalai, Tsendsuren and Ryu, Keun Ho and Ramanan,
+ S. V. and Nathan, Senthil and {{Z}}itnik, Slavko and Bajec, Marko and
+ Weber, Lutz and Irmer, Matthias and Akhondi, Saber A. and Kors, Jan A. and
+ Xu, Shuo and An, Xin and Sikdar, Utpal Kumar and Ekbal, Asif and Yoshioka,
+ Masaharu and Dieb, Thaer M. and Choi, Miji and Verspoor, Karin and Khabsa,
+ Madian and Giles, C. Lee and Liu, Hongfang and Ravikumar, Komandur
+ Elayavilli and Lamurias, Andre and Couto, Francisco M. and Dai, Hong-Jie
+ and Tsai, Richard Tzong-Han and Ata, Caglar and Can, Tolga and Usi{'e},
+ Anabel and Alves, Rui and Segura-Bedmar, Isabel and Mart{'i}nez, Paloma
+ and Oyarzabal, Julen and Valencia, Alfonso
+ },
+ year = 2015,
+ month = {Jan},
+ day = 19,
+ journal = {Journal of Cheminformatics},
+ volume = 7,
+ number = 1,
+ pages = {S2},
+ doi = {10.1186/1758-2946-7-S1-S2},
+ issn = {1758-2946},
+ url = {https://doi.org/10.1186/1758-2946-7-S1-S2},
+ abstract = {
+ The automatic extraction of chemical information from text requires the
+ recognition of chemical entity mentions as one of its key steps. When
+ developing supervised named entity recognition (NER) systems, the
+ availability of a large, manually annotated text corpus is desirable.
+ Furthermore, large corpora permit the robust evaluation and comparison of
+ different approaches that detect chemicals in documents. We present the
+ CHEMDNER corpus, a collection of 10,000 PubMed abstracts that contain a
+ total of 84,355 chemical entity mentions labeled manually by expert
+ chemistry literature curators, following annotation guidelines specifically
+ defined for this task. The abstracts of the CHEMDNER corpus were selected
+ to be representative for all major chemical disciplines. Each of the
+ chemical entity mentions was manually labeled according to its
+ structure-associated chemical entity mention (SACEM) class: abbreviation,
+ family, formula, identifier, multiple, systematic and trivial. The
+ difficulty and consistency of tagging chemicals in text was measured using
+ an agreement study between annotators, obtaining a percentage agreement of
+ 91. For a subset of the CHEMDNER corpus (the test set of 3,000 abstracts)
+ we provide not only the Gold Standard manual annotations, but also mentions
+ automatically detected by the 26 teams that participated in the BioCreative
+ IV CHEMDNER chemical mention recognition task. In addition, we release the
+ CHEMDNER silver standard corpus of automatically extracted mentions from
+ 17,000 randomly selected PubMed abstracts. A version of the CHEMDNER corpus
+ in the BioC format has been generated as well. We propose a standard for
+ required minimum information about entity annotations for the construction
+ of domain specific corpora on chemical and drug entities. The CHEMDNER
+ corpus and annotation guidelines are available at:
+ ttp://www.biocreative.org/resources/biocreative-iv/chemdner-corpus/
+ }
+ }
templates:
- - |-
- Task: Find all the mentions of {#chemicals|chemical compounds|chemical substances!} in the {#following|subsequent!} {#text|sentence!}. Return the matching {#words|entities!}. If there is no {#match|mention of a chemical|matching entity!}, return `no match`.
- {#Sentence|Description!}: {sentence#}
- Answer: {matched_words#}
- - |-
- User: Does the following text contain mentions of {#chemicals|chemical compounds|chemical substances!}? {#Can you return matches?|Can you output matches?|Please return matches.!}
- {#Text: |!}{sentence#}
- Assistant: {#I found|There is!} {matched_words#}.
+ - |-
+ Task: Find all the mentions of {#chemicals|chemical compounds|chemical substances!} in the {#following|subsequent!} {#text|sentence!}. Return the matching {#words|entities!}. If there is no {#match|mention of a chemical|matching entity!}, return `no match`.
+ {#Sentence|Description!}: {sentence#}
+ Answer: {matched_words#}
+ - |-
+ User: Does the following text contain mentions of {#chemicals|chemical compounds|chemical substances!}? {#Can you return matches?|Can you output matches?|Please return matches.!}
+ {#Text: |!}{sentence#}
+ Assistant: {#I found|There is!} {matched_words#}.
diff --git a/data/tabular/chemistry_stackexchange/meta.yaml b/data/tabular/chemistry_stackexchange/meta.yaml
index 62c581c20..33e973401 100644
--- a/data/tabular/chemistry_stackexchange/meta.yaml
+++ b/data/tabular/chemistry_stackexchange/meta.yaml
@@ -1,31 +1,30 @@
----
name: chemistry_stackexchange
description: |-
- Questions and answers mined from chemistry.stackexchange.com.
+ Questions and answers mined from chemistry.stackexchange.com.
targets:
- - id: a
- description: answer to the question
- type: string
- - id: title
- description: title of the question
- type: string
+ - id: a
+ description: answer to the question
+ type: string
+ - id: title
+ description: title of the question
+ type: string
identifiers:
- - id: q
- type: string
- description: question asked on chemistry.stackexchange.com
+ - id: q
+ type: string
+ description: question asked on chemistry.stackexchange.com
license: CC BY-SA
links:
- - url: chemistry.stackexchange.com
- description: original data source
- - url: https://stackoverflow.com/help/licensing
- description: information about the license
+ - url: chemistry.stackexchange.com
+ description: original data source
+ - url: https://stackoverflow.com/help/licensing
+ description: information about the license
num_points: 4582
templates:
- - |-
- {#Task: Please answer the question of the user.|Task: Provide a detailed response to the user's question.|Task: Address the user's query with a well-structured answer.|Task: Your role is to respond to the user's question with clarity.|Task: Offer a concise and informative answer to the user's question.|Task: Provide a clear and concise reply to the user's inquiry.!}
- {#User: |Question: |Inquiry: |\n!}{#q}
- {#Assistant: |Answer: !}{#a}
- - |-
- {#Task: Generate a title for this question.|Task: Create a meaningful title for this question.|Task: Summarize the question in a title.!}
- {#Question: |Inquiry: |\n!}{#q}
- {#Assistant: |Title: |Answer: |!}{#title}
+ - |-
+ {#Task: Please answer the question of the user.|Task: Provide a detailed response to the user's question.|Task: Address the user's query with a well-structured answer.|Task: Your role is to respond to the user's question with clarity.|Task: Offer a concise and informative answer to the user's question.|Task: Provide a clear and concise reply to the user's inquiry.!}
+ {#User: |Question: |Inquiry: |\n!}{#q}
+ {#Assistant: |Answer: !}{#a}
+ - |-
+ {#Task: Generate a title for this question.|Task: Create a meaningful title for this question.|Task: Summarize the question in a title.!}
+ {#Question: |Inquiry: |\n!}{#q}
+ {#Assistant: |Title: |Answer: |!}{#title}
diff --git a/data/tabular/choline_transporter_butkiewicz/meta.yaml b/data/tabular/choline_transporter_butkiewicz/meta.yaml
index ba6dcd688..cc7e170e6 100644
--- a/data/tabular/choline_transporter_butkiewicz/meta.yaml
+++ b/data/tabular/choline_transporter_butkiewicz/meta.yaml
@@ -1,174 +1,173 @@
----
name: choline_transporter_butkiewicz
description: |-
- This dataset was originally curated from HTS data at
- the PubChem database. The primary screen AID 488975 identified
- inhibitors of CHT. The counter screen AID 493221 was used as a
- validation screen to confirm the active compounds that inhibit CHT.
- AID504840 and AID588401 experiments were used as additional validation
- experiments. The screen AID 493222 evaluated remaining active compounds
- for non-specific activity in parental HEK293 cells. AID602208 tested a
- selected set of compounds for 3H choline uptake. The final set of 254
- active compounds was determined by the overlap of active compounds in
- screens AID 493221, AID504840, and AID588401 subtracting any
- non-specific hits from AID 49322 and all inactive compounds in the
- re-confirmation screen AID602208.
+ This dataset was originally curated from HTS data at
+ the PubChem database. The primary screen AID 488975 identified
+ inhibitors of CHT. The counter screen AID 493221 was used as a
+ validation screen to confirm the active compounds that inhibit CHT.
+ AID504840 and AID588401 experiments were used as additional validation
+ experiments. The screen AID 493222 evaluated remaining active compounds
+ for non-specific activity in parental HEK293 cells. AID602208 tested a
+ selected set of compounds for 3H choline uptake. The final set of 254
+ active compounds was determined by the overlap of active compounds in
+ screens AID 493221, AID504840, and AID588401 subtracting any
+ non-specific hits from AID 49322 and all inactive compounds in the
+ re-confirmation screen AID602208.
targets:
- - id: activity_choline_transporter
- description: inhibition of choline transporter receptor (1) or not (0).
- units:
- type: boolean
- names:
- - noun: inhibition of choline transporter activity
- - adjective: choline transporter activity inhibition
- - gerund: inhibiting the choline transporter activity
- - verb: inhibits choline transporter activity
- pubchem_aids:
- - 488975
- - 493221
- - 504840
- - 588401
- - 493222
- - 602208
+ - id: activity_choline_transporter
+ description: inhibition of choline transporter receptor (1) or not (0).
+ units:
+ type: boolean
+ names:
+ - noun: inhibition of choline transporter activity
+ - adjective: choline transporter activity inhibition
+ - gerund: inhibiting the choline transporter activity
+ - verb: inhibits choline transporter activity
+ pubchem_aids:
+ - 488975
+ - 493221
+ - 504840
+ - 588401
+ - 493222
+ - 602208
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al
- description: original dataset
- - url: https://doi.org/10.3390/molecules18010735
- description: corresponding publication
- - url: https://doi.org/10.1093/nar/gky1033
- description: corresponding publication
- - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/
- description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al
+ description: original dataset
+ - url: https://doi.org/10.3390/molecules18010735
+ description: corresponding publication
+ - url: https://doi.org/10.1093/nar/gky1033
+ description: corresponding publication
+ - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/
+ description: corresponding publication
num_points: 302306
bibtex:
- - |-
- @article{Butkiewicz2013,
- doi = {10.3390/molecules18010735},
- url = {https://doi.org/10.3390/molecules18010735},
- year = {2013},
- month = jan,
- publisher = {{MDPI} {AG}},
- volume = {18},
- number = {1},
- pages = {735--756},
- author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and
- Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens
- Meiler},
- title = {Benchmarking Ligand-Based Virtual High-Throughput
- Screening with the {PubChem} Database},
- journal = {Molecules}}
- - |-
- @article{Kim2018,
- doi = {10.1093/nar/gky1033},
- url = {https://doi.org/10.1093/nar/gky1033},
- year = {2018},
- month = oct,
- publisher = {Oxford University Press ({OUP})},
- volume = {47},
- number = {D1},
- pages = {D1102--D1109},
- author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and
- Asta Gindulyte and Jia He and Siqian He and Qingliang Li and
- Benjamin A Shoemaker and Paul A Thiessen and Bo Yu and Leonid
- Zaslavsky and Jian Zhang and Evan E Bolton},
- title = {{PubChem} 2019 update: improved access to chemical data},
- journal = {Nucleic Acids Research}}
- - |-
- @article{Butkiewicz2017,
- doi = {},
- url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/},
- year = {2017},
- publisher = {Chem Inform},
- volume = {3},
- number = {1},
- author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe,
- E. W. and Weaver, D. C. and Meiler, J.},
- title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from
- the {P}ub{C}hem {D}atabase}},
- journal = {Chemical Science}}
+ - |-
+ @article{Butkiewicz2013,
+ doi = {10.3390/molecules18010735},
+ url = {https://doi.org/10.3390/molecules18010735},
+ year = {2013},
+ month = jan,
+ publisher = {{MDPI} {AG}},
+ volume = {18},
+ number = {1},
+ pages = {735--756},
+ author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and
+ Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens
+ Meiler},
+ title = {Benchmarking Ligand-Based Virtual High-Throughput
+ Screening with the {PubChem} Database},
+ journal = {Molecules}}
+ - |-
+ @article{Kim2018,
+ doi = {10.1093/nar/gky1033},
+ url = {https://doi.org/10.1093/nar/gky1033},
+ year = {2018},
+ month = oct,
+ publisher = {Oxford University Press ({OUP})},
+ volume = {47},
+ number = {D1},
+ pages = {D1102--D1109},
+ author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and
+ Asta Gindulyte and Jia He and Siqian He and Qingliang Li and
+ Benjamin A Shoemaker and Paul A Thiessen and Bo Yu and Leonid
+ Zaslavsky and Jian Zhang and Evan E Bolton},
+ title = {{PubChem} 2019 update: improved access to chemical data},
+ journal = {Nucleic Acids Research}}
+ - |-
+ @article{Butkiewicz2017,
+ doi = {},
+ url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/},
+ year = {2017},
+ publisher = {Chem Inform},
+ volume = {3},
+ number = {1},
+ author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe,
+ E. W. and Weaver, D. C. and Meiler, J.},
+ title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from
+ the {P}ub{C}hem {D}atabase}},
+ journal = {Chemical Science}}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_choline_transporter#no &NULL}{activity_choline_transporter__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {activity_choline_transporter#no &NULL}{activity_choline_transporter__names__noun}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_choline_transporter#no &NULL}{activity_choline_transporter__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {activity_choline_transporter__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {activity_choline_transporter#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {activity_choline_transporter__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {activity_choline_transporter__names__gerund}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_choline_transporter__names__gerund}?
- Assistant: {activity_choline_transporter#No&Yes}, this molecule is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {activity_choline_transporter__names__gerund}?
- Assistant: {activity_choline_transporter#No&Yes}, it is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}?
- Assistant: This is a molecule that is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {activity_choline_transporter#not &NULL}be {activity_choline_transporter__names__gerund}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {activity_choline_transporter#not &NULL}be {activity_choline_transporter__names__gerund}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} {activity_choline_transporter__names__gerund}:{activity_choline_transporter#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {activity_choline_transporter__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{activity_choline_transporter#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {activity_choline_transporter__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result:This molecule is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}.
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_choline_transporter__names__gerund}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {activity_choline_transporter%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%activity_choline_transporter%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%activity_choline_transporter%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_choline_transporter#no &NULL}{activity_choline_transporter__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {activity_choline_transporter#no &NULL}{activity_choline_transporter__names__noun}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_choline_transporter#no &NULL}{activity_choline_transporter__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {activity_choline_transporter__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {activity_choline_transporter#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {activity_choline_transporter__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {activity_choline_transporter__names__gerund}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_choline_transporter__names__gerund}?
+ Assistant: {activity_choline_transporter#No&Yes}, this molecule is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {activity_choline_transporter__names__gerund}?
+ Assistant: {activity_choline_transporter#No&Yes}, it is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}?
+ Assistant: This is a molecule that is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {activity_choline_transporter#not &NULL}be {activity_choline_transporter__names__gerund}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {activity_choline_transporter#not &NULL}be {activity_choline_transporter__names__gerund}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} {activity_choline_transporter__names__gerund}:{activity_choline_transporter#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {activity_choline_transporter__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{activity_choline_transporter#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {activity_choline_transporter__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result:This molecule is {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}.
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_choline_transporter__names__gerund}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {activity_choline_transporter%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%activity_choline_transporter%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%activity_choline_transporter%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/clearance_astrazeneca/meta.yaml b/data/tabular/clearance_astrazeneca/meta.yaml
index 5a9ec0ae6..81eb58889 100644
--- a/data/tabular/clearance_astrazeneca/meta.yaml
+++ b/data/tabular/clearance_astrazeneca/meta.yaml
@@ -1,67 +1,66 @@
----
name: clearance_astrazeneca
description: |-
- Drug clearance is defined as the volume of plasma cleared of a drug
- over a specified time period and it measures the rate at which the active drug
- is removed from the body. This is a dataset curated from ChEMBL database containing
- experimental results on intrinsic clearance, deposited from AstraZeneca. It
- contains clearance measures from two experiments types, hepatocyte and microsomes.
+ Drug clearance is defined as the volume of plasma cleared of a drug
+ over a specified time period and it measures the rate at which the active drug
+ is removed from the body. This is a dataset curated from ChEMBL database containing
+ experimental results on intrinsic clearance, deposited from AstraZeneca. It
+ contains clearance measures from two experiments types, hepatocyte and microsomes.
targets:
- - id: drug_clearance
- description: the volume of plasma cleared of a drug over a specified time period
- units: mL / (min g)
- type: continuous
- names:
- - noun: drug clearance
- - noun: volume of plasma cleared of a drug over a specified time period
- uris:
- - http://purl.bioontology.org/ontology/MEDDRA/10077254
+ - id: drug_clearance
+ description: the volume of plasma cleared of a drug over a specified time period
+ units: mL / (min g)
+ type: continuous
+ names:
+ - noun: drug clearance
+ - noun: volume of plasma cleared of a drug over a specified time period
+ uris:
+ - http://purl.bioontology.org/ontology/MEDDRA/10077254
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
- - id: chembl_id
- type: Other
- names:
- - noun: ChEMBL id
- - noun: ChEMBL identifier number
- description: ChEMBL ids
- sample: false
+ - id: SMILES
+ type: SMILES
+ description: SMILES
+ - id: chembl_id
+ type: Other
+ names:
+ - noun: ChEMBL id
+ - noun: ChEMBL identifier number
+ description: ChEMBL ids
+ sample: false
license: CC BY 4.0
links:
- - url: http://dx.doi.org/10.6019/CHEMBL3301361
- description: corresponding publication
- - url: https://doi.org/10.1016/j.ejmech.2012.06.043
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/adme/#clearance-astrazeneca
- description: data source
+ - url: http://dx.doi.org/10.6019/CHEMBL3301361
+ description: corresponding publication
+ - url: https://doi.org/10.1016/j.ejmech.2012.06.043
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/adme/#clearance-astrazeneca
+ description: data source
num_points: 1213
bibtex:
- - |-
- @techreport{Hersey2015,
- doi = {10.6019/chembl3301361},
- url = {https://doi.org/10.6019/chembl3301361},
- year = {2015},
- month = feb,
- publisher = {{EMBL}-{EBI}},
- author = {Anne Hersey},
- title = {{ChEMBL} Deposited Data Set - {AZ dataset}}
- - |-
- @article{Di2012,
- doi = {10.1016/j.ejmech.2012.06.043},
- url = {https://doi.org/10.1016/j.ejmech.2012.06.043},
- year = {2012},
- month = nov,
- publisher = {Elsevier BV},
- volume = {57},
- pages = {441--448},
- author = {Li Di and Christopher Keefer and Dennis O. Scott and Timothy J. Strelevitz
- and George Chang and Yi-An Bi and Yurong Lai and Jonathon Duckworth and
- Katherine Fenner and Matthew D. Troutman and R. Scott Obach},
- title = {Mechanistic insights from comparing intrinsic clearance values between
- human liver microsomes and hepatocytes to guide drug design},
- journal = {European Journal of Medicinal Chemistry}
+ - |-
+ @techreport{Hersey2015,
+ doi = {10.6019/chembl3301361},
+ url = {https://doi.org/10.6019/chembl3301361},
+ year = {2015},
+ month = feb,
+ publisher = {{EMBL}-{EBI}},
+ author = {Anne Hersey},
+ title = {{ChEMBL} Deposited Data Set - {AZ dataset}}
+ - |-
+ @article{Di2012,
+ doi = {10.1016/j.ejmech.2012.06.043},
+ url = {https://doi.org/10.1016/j.ejmech.2012.06.043},
+ year = {2012},
+ month = nov,
+ publisher = {Elsevier BV},
+ volume = {57},
+ pages = {441--448},
+ author = {Li Di and Christopher Keefer and Dennis O. Scott and Timothy J. Strelevitz
+ and George Chang and Yi-An Bi and Yurong Lai and Jonathon Duckworth and
+ Katherine Fenner and Matthew D. Troutman and R. Scott Obach},
+ title = {Mechanistic insights from comparing intrinsic clearance values between
+ human liver microsomes and hepatocytes to guide drug design},
+ journal = {European Journal of Medicinal Chemistry}
diff --git a/data/tabular/clintox/meta.yaml b/data/tabular/clintox/meta.yaml
index fcda1a556..282d3342c 100644
--- a/data/tabular/clintox/meta.yaml
+++ b/data/tabular/clintox/meta.yaml
@@ -1,129 +1,127 @@
----
name: clintox
description: |-
- The ClinTox dataset includes drugs that have failed
- clinical trials for toxicity reasons and also drugs that are associated
- with successful trials.
+ The ClinTox dataset includes drugs that have failed
+ clinical trials for toxicity reasons and also drugs that are associated
+ with successful trials.
targets:
- - id: clinical_toxicity
- description: whether it can cause clinical toxicity (1) or not (0).
- units:
- type: boolean
- names:
- - noun: toxicity
- - noun: clinical toxicity
- - adjective: toxic
- - adjective: clinically toxic
- - gerund: displaying clinical toxicity
- uris:
- - http://purl.bioontology.org/ontology/MESH/Q000633
- - https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&ns=ncit&code=C27990
- - https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&ns=ncit&code=C27955
+ - id: clinical_toxicity
+ description: whether it can cause clinical toxicity (1) or not (0).
+ units:
+ type: boolean
+ names:
+ - noun: toxicity
+ - noun: clinical toxicity
+ - adjective: toxic
+ - adjective: clinically toxic
+ - gerund: displaying clinical toxicity
+ uris:
+ - http://purl.bioontology.org/ontology/MESH/Q000633
+ - https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&ns=ncit&code=C27990
+ - https://ncit.nci.nih.gov/ncitbrowser/ConceptReport.jsp?dictionary=NCI_Thesaurus&ns=ncit&code=C27955
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://tdcommons.ai/single_pred_tasks/tox/#clintox
- description: original dataset
- - url: https://doi.org/10.1016/j.chembiol.2016.07.023
- description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/tox/#clintox
+ description: original dataset
+ - url: https://doi.org/10.1016/j.chembiol.2016.07.023
+ description: corresponding publication
num_points: 1478
bibtex:
- - |-
- @article{Gayvert2016,
- doi = {10.1016/j.chembiol.2016.07.023},
- url = {https://doi.org/10.1016/j.chembiol.2016.07.023},
- year = {2016},
- month = oct,
- publisher = {Elsevier {BV}},
- volume = {23},
- number = {10},
- pages = {1294--1301},
- author = {Kaitlyn~M. Gayvert and Neel~S. Madhukar and Olivier Elemento},
- title = {A Data-Driven Approach to Predicting Successes and Failures of Clinical Trials},
- journal = {Cell Chemical Biology}}
+ - |-
+ @article{Gayvert2016,
+ doi = {10.1016/j.chembiol.2016.07.023},
+ url = {https://doi.org/10.1016/j.chembiol.2016.07.023},
+ year = {2016},
+ month = oct,
+ publisher = {Elsevier {BV}},
+ volume = {23},
+ number = {10},
+ pages = {1294--1301},
+ author = {Kaitlyn~M. Gayvert and Neel~S. Madhukar and Olivier Elemento},
+ title = {A Data-Driven Approach to Predicting Successes and Failures of Clinical Trials},
+ journal = {Cell Chemical Biology}}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {clinical_toxicity#no &NULL}{clinical_toxicity__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {clinical_toxicity#no &NULL}{clinical_toxicity__names__adjective}
- {#properties|characteristics|features|traits!}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {clinical_toxicity#not &NULL}identified as {clinical_toxicity__names__adjective}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {clinical_toxicity__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {clinical_toxicity#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {clinical_toxicity__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {clinical_toxicity__names__adjective}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {clinical_toxicity__names__adjective}?
- Assistant: {clinical_toxicity#No&Yes}, this molecule is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {clinical_toxicity__names__adjective}?
- Assistant: {clinical_toxicity#No&Yes}, it is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}?
- Assistant: This is a molecule that is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {clinical_toxicity#not &NULL}be {clinical_toxicity__names__adjective}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {clinical_toxicity#not &NULL}be {clinical_toxicity__names__adjective}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} {clinical_toxicity__names__adjective}:{clinical_toxicity#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {clinical_toxicity__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{clinical_toxicity#False&True}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {clinical_toxicity__names__adjective}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {clinical_toxicity%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {clinical_toxicity__names__adjective}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {clinical_toxicity%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%clinical_toxicity%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%clinical_toxicity%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {clinical_toxicity#no &NULL}{clinical_toxicity__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {clinical_toxicity#no &NULL}{clinical_toxicity__names__adjective} {#properties|characteristics|features|traits!}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {clinical_toxicity#not &NULL}identified as {clinical_toxicity__names__adjective}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {clinical_toxicity__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {clinical_toxicity#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {clinical_toxicity__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {clinical_toxicity__names__adjective}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {clinical_toxicity__names__adjective}?
+ Assistant: {clinical_toxicity#No&Yes}, this molecule is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {clinical_toxicity__names__adjective}?
+ Assistant: {clinical_toxicity#No&Yes}, it is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}?
+ Assistant: This is a molecule that is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {clinical_toxicity#not &NULL}be {clinical_toxicity__names__adjective}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {clinical_toxicity#not &NULL}be {clinical_toxicity__names__adjective}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} {clinical_toxicity__names__adjective}:{clinical_toxicity#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {clinical_toxicity__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{clinical_toxicity#False&True}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {clinical_toxicity__names__adjective}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {clinical_toxicity%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {clinical_toxicity__names__adjective}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {clinical_toxicity%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%clinical_toxicity%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {clinical_toxicity#not &NULL}{clinical_toxicity__names__adjective}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%clinical_toxicity%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/core_mof_no_topo/meta.yaml b/data/tabular/core_mof_no_topo/meta.yaml
index ee15f93c6..460f840aa 100644
--- a/data/tabular/core_mof_no_topo/meta.yaml
+++ b/data/tabular/core_mof_no_topo/meta.yaml
@@ -1,250 +1,239 @@
----
name: core_mof_no_topo
description: |-
- CoRE MOF is a database of "computationally ready" crystal structures of metal-organic frameworks
+ CoRE MOF is a database of "computationally ready" crystal structures of metal-organic frameworks
targets:
- - id: cif
- description: Crystal structure, in CIF format
- type: text
- names:
- - noun: crystal structure in CIF format
- - noun: content of a CIF file with the crystal structure
- - noun: data from a CIF file with the crystal structure
- - noun: content within a CIF file of the crystal structure
- - noun: crystal structure represented in CIF format
- - id: outputs.pure_CO2_widomHOA
- description: heat of adsorption of CO2, simulated using Widom insertion
- units: kJ/mol
- type: continuous
- significant_digits: 2
- names:
- - noun: heat of adsorption of CO2 (computed using the Widom insertion technique)
- - noun: simulated heat of adsorption of CO2 (obtained using Widom insertions)
- - noun: heat of adsorption of carbon dioxide (computed using the Widom insertion technique)
- - noun: simulated heat of adsorption of carbon dioxide (obtained using Widom insertions)
- - id: outputs.pure_methane_widomHOA
- description: heat of adsorption for methane, simulated using Widom insertion
- units: kJ/mol
- type: continuous
- significant_digits: 2
- names:
- - noun: heat of adsorption of methane (computed using the Widom insertion technique)
- - noun: simulated heat of adsorption of methane (obtained using Widom insertions)
- - noun: heat of adsorption of CH4 (computed using the Widom insertion technique)
- - noun: simulated heat of adsorption of CH4 (obtained using Widom insertions)
- - id: outputs.pure_uptake_CO2_298.00_15000
- description: CO2 uptake at 298 K and 15000 Pa
- units: mol/kg
- type: continuous
- significant_digits: 1
- names:
- - noun: CO2 uptake at 298 K and 15000 Pa as obtained from GCMC simulations
- - noun: simulated CO2 uptake at 298 K and 15000 Pa
- - noun: CO2 uptake at 298 K and 15000 Pa as obtained from grand canonical Monte Carlo simulations
- - noun: carbon dioxide uptake at 298 K and 15000 Pa as obtained from GCMC simulations
- - noun: simulated carbon dioxide uptake at 298 K and 15000 Pa
- - noun: carbon dioxide uptake at 298 K and 15000 Pa as obtained from grand canonical Monte Carlo simulations
- - noun: CO2 uptake at 298 K and 0.15 bar as obtained from GCMC simulations
- - noun: simulated CO2 uptake at 298 K and 0.15 bar
- - id: outputs.pure_uptake_CO2_298.00_1600000
- description: CO2 uptake at 298 K and 1600000 Pa
- units: mol/kg
- type: continuous
- significant_digits: 1
- names:
- - noun: CO2 uptake at 298 K and 1600000 Pa as obtained from GCMC simulations
- - noun: simulated CO2 uptake at 298 K and 1600000 Pa
- - noun: CO2 uptake at 298 K and 1600000 Pa as obtained from grand canonical Monte Carlo simulations
- - noun: carbon dioxide uptake at 298 K and 1600000 Pa as obtained from GCMC simulations
- - noun: simulated carbon dioxide uptake at 298 K and 1600000 Pa
- - noun: carbon dioxide uptake at 298 K and 1600000 Pa as obtained from grand canonical Monte Carlo simulations
- - noun: CO2 uptake at 298 K and 16 bar as obtained from GCMC simulations
- - noun: simulated CO2 uptake at 298 K and 16 bar
- - id: outputs.pure_uptake_methane_298.00_580000
- description: methane uptake at 298 K and 580000 Pa
- units: mol/kg
- type: continuous
- significant_digits: 1
- names:
- - noun: methane uptake at 298 K and 580000 Pa as obtained from GCMC simulations
- - noun: simulated methane uptake at 298 K and 580000 Pa
- - noun: methane uptake at 298 K and 580000 Pa as obtained from grand canonical Monte Carlo simulations
- - noun: CH4 uptake at 298 K and 580000 Pa as obtained from GCMC simulations
- - noun: simulated CH4 uptake at 298 K and 580000 Pa
- - noun: CH4 uptake at 298 K and 580000 Pa as obtained from grand canonical Monte Carlo simulations
- - noun: methane uptake at 298 K and 5.8 bar as obtained from GCMC simulations
- - noun: simulated methane uptake at 298 K and 5.8 bar
- - id: outputs.pure_uptake_methane_298.00_6500000
- description: methane uptake at 298 K and 6500000 Pa
- units: mol/kg
- type: continuous
- significant_digits: 1
- names:
- - noun: methane uptake at 298 K and 6500000 Pa as obtained from GCMC simulations
- - noun: simulated methane uptake at 298 K and 6500000 Pa
- - noun: methane uptake at 298 K and 6500000 Pa as obtained from grand canonical Monte Carlo simulations
- - noun: CH4 uptake at 298 K and 6500000 Pa as obtained from GCMC simulations
- - noun: simulated CH4 uptake at 298 K and 6500000 Pa
- - noun: CH4 uptake at 298 K and 6500000 Pa as obtained from grand canonical Monte Carlo simulations
- - noun: methane uptake at 298 K and 65 bar as obtained from GCMC simulations
- - noun: simulated methane uptake at 298 K and 65 bar
- - id: outputs.logKH_CO2
- description: logarithm of Henry's constant for CO2
- units: log(mol/kg/Pa)
- type: continuous
- significant_digits: 2
- names:
- - noun: logarithm of Henry's constant for CO2 obtained from Widom insertion simulations
- - noun: logarithm of Henry's constant for carbon dioxide obtained from Widom insertion simulations
- - noun: logarithm of Henry's constant for CO2 obtained from Widom insertion simulations
- - noun: logarithm of Henry's constant for carbon dioxide obtained from Widom insertion simulations
- - id: outputs.logKH_CH4
- description: logarithm of Henry's constant for methane
- units: log(mol/kg/Pa)
- type: continuous
- significant_digits: 2
- names:
- - noun: logarithm of Henry's constant for methane obtained from Widom insertion simulations
- - noun: logarithm of Henry's constant for CH4 obtained from Widom insertion simulations
- - noun: logarithm of Henry's constant for methane obtained from Widom insertion simulations
- - noun: logarithm of Henry's constant for CH4 obtained from Widom insertion simulations
- - id: outputs.CH4DC
- description: deliverable capacity of methane
- type: continuous
- units: vSTP/v
- names:
- - noun: deliverable capacity (between 5.8 bar and 65 bar at 298 K) of methane obtained from GCMC simulations
- - noun: deliverable capacity of CH4 obtained from GCMC simulations between 5.8 bar and 65 bar at 298 K
- - noun: deliverable capacity of methane (between 5.8 bar and 65 bar at 298 K) obtained from GCMC simulations
- - noun: deliverable capacity of CH4 (between 5.8 bar and 65 bar at 298 K) obtained from GCMC simulations
+ - id: cif
+ description: Crystal structure, in CIF format
+ type: text
+ names:
+ - noun: crystal structure in CIF format
+ - noun: content of a CIF file with the crystal structure
+ - noun: data from a CIF file with the crystal structure
+ - noun: content within a CIF file of the crystal structure
+ - noun: crystal structure represented in CIF format
+ - id: outputs.pure_CO2_widomHOA
+ description: heat of adsorption of CO2, simulated using Widom insertion
+ units: kJ/mol
+ type: continuous
+ significant_digits: 2
+ names:
+ - noun: heat of adsorption of CO2 (computed using the Widom insertion technique)
+ - noun: simulated heat of adsorption of CO2 (obtained using Widom insertions)
+ - noun: heat of adsorption of carbon dioxide (computed using the Widom insertion technique)
+ - noun: simulated heat of adsorption of carbon dioxide (obtained using Widom insertions)
+ - id: outputs.pure_methane_widomHOA
+ description: heat of adsorption for methane, simulated using Widom insertion
+ units: kJ/mol
+ type: continuous
+ significant_digits: 2
+ names:
+ - noun: heat of adsorption of methane (computed using the Widom insertion technique)
+ - noun: simulated heat of adsorption of methane (obtained using Widom insertions)
+ - noun: heat of adsorption of CH4 (computed using the Widom insertion technique)
+ - noun: simulated heat of adsorption of CH4 (obtained using Widom insertions)
+ - id: outputs.pure_uptake_CO2_298.00_15000
+ description: CO2 uptake at 298 K and 15000 Pa
+ units: mol/kg
+ type: continuous
+ significant_digits: 1
+ names:
+ - noun: CO2 uptake at 298 K and 15000 Pa as obtained from GCMC simulations
+ - noun: simulated CO2 uptake at 298 K and 15000 Pa
+ - noun: CO2 uptake at 298 K and 15000 Pa as obtained from grand canonical Monte Carlo simulations
+ - noun: carbon dioxide uptake at 298 K and 15000 Pa as obtained from GCMC simulations
+ - noun: simulated carbon dioxide uptake at 298 K and 15000 Pa
+ - noun: carbon dioxide uptake at 298 K and 15000 Pa as obtained from grand canonical Monte Carlo simulations
+ - noun: CO2 uptake at 298 K and 0.15 bar as obtained from GCMC simulations
+ - noun: simulated CO2 uptake at 298 K and 0.15 bar
+ - id: outputs.pure_uptake_CO2_298.00_1600000
+ description: CO2 uptake at 298 K and 1600000 Pa
+ units: mol/kg
+ type: continuous
+ significant_digits: 1
+ names:
+ - noun: CO2 uptake at 298 K and 1600000 Pa as obtained from GCMC simulations
+ - noun: simulated CO2 uptake at 298 K and 1600000 Pa
+ - noun: CO2 uptake at 298 K and 1600000 Pa as obtained from grand canonical Monte Carlo simulations
+ - noun: carbon dioxide uptake at 298 K and 1600000 Pa as obtained from GCMC simulations
+ - noun: simulated carbon dioxide uptake at 298 K and 1600000 Pa
+ - noun: carbon dioxide uptake at 298 K and 1600000 Pa as obtained from grand canonical Monte Carlo simulations
+ - noun: CO2 uptake at 298 K and 16 bar as obtained from GCMC simulations
+ - noun: simulated CO2 uptake at 298 K and 16 bar
+ - id: outputs.pure_uptake_methane_298.00_580000
+ description: methane uptake at 298 K and 580000 Pa
+ units: mol/kg
+ type: continuous
+ significant_digits: 1
+ names:
+ - noun: methane uptake at 298 K and 580000 Pa as obtained from GCMC simulations
+ - noun: simulated methane uptake at 298 K and 580000 Pa
+ - noun: methane uptake at 298 K and 580000 Pa as obtained from grand canonical Monte Carlo simulations
+ - noun: CH4 uptake at 298 K and 580000 Pa as obtained from GCMC simulations
+ - noun: simulated CH4 uptake at 298 K and 580000 Pa
+ - noun: CH4 uptake at 298 K and 580000 Pa as obtained from grand canonical Monte Carlo simulations
+ - noun: methane uptake at 298 K and 5.8 bar as obtained from GCMC simulations
+ - noun: simulated methane uptake at 298 K and 5.8 bar
+ - id: outputs.pure_uptake_methane_298.00_6500000
+ description: methane uptake at 298 K and 6500000 Pa
+ units: mol/kg
+ type: continuous
+ significant_digits: 1
+ names:
+ - noun: methane uptake at 298 K and 6500000 Pa as obtained from GCMC simulations
+ - noun: simulated methane uptake at 298 K and 6500000 Pa
+ - noun: methane uptake at 298 K and 6500000 Pa as obtained from grand canonical Monte Carlo simulations
+ - noun: CH4 uptake at 298 K and 6500000 Pa as obtained from GCMC simulations
+ - noun: simulated CH4 uptake at 298 K and 6500000 Pa
+ - noun: CH4 uptake at 298 K and 6500000 Pa as obtained from grand canonical Monte Carlo simulations
+ - noun: methane uptake at 298 K and 65 bar as obtained from GCMC simulations
+ - noun: simulated methane uptake at 298 K and 65 bar
+ - id: outputs.logKH_CO2
+ description: logarithm of Henry's constant for CO2
+ units: log(mol/kg/Pa)
+ type: continuous
+ significant_digits: 2
+ names:
+ - noun: logarithm of Henry's constant for CO2 obtained from Widom insertion simulations
+ - noun: logarithm of Henry's constant for carbon dioxide obtained from Widom insertion simulations
+ - noun: logarithm of Henry's constant for CO2 obtained from Widom insertion simulations
+ - noun: logarithm of Henry's constant for carbon dioxide obtained from Widom insertion simulations
+ - id: outputs.logKH_CH4
+ description: logarithm of Henry's constant for methane
+ units: log(mol/kg/Pa)
+ type: continuous
+ significant_digits: 2
+ names:
+ - noun: logarithm of Henry's constant for methane obtained from Widom insertion simulations
+ - noun: logarithm of Henry's constant for CH4 obtained from Widom insertion simulations
+ - noun: logarithm of Henry's constant for methane obtained from Widom insertion simulations
+ - noun: logarithm of Henry's constant for CH4 obtained from Widom insertion simulations
+ - id: outputs.CH4DC
+ description: deliverable capacity of methane
+ type: continuous
+ units: vSTP/v
+ names:
+ - noun: deliverable capacity (between 5.8 bar and 65 bar at 298 K) of methane obtained from GCMC simulations
+ - noun: deliverable capacity of CH4 obtained from GCMC simulations between 5.8 bar and 65 bar at 298 K
+ - noun: deliverable capacity of methane (between 5.8 bar and 65 bar at 298 K) obtained from GCMC simulations
+ - noun: deliverable capacity of CH4 (between 5.8 bar and 65 bar at 298 K) obtained from GCMC simulations
identifiers:
- - id: smiles_linkers
- description: SMILES representation of the linker
- type: text
- - id: smiles_nodes
- description: SMILES representation of the nodes
- type: text
+ - id: smiles_linkers
+ description: SMILES representation of the linker
+ type: text
+ - id: smiles_nodes
+ description: SMILES representation of the nodes
+ type: text
license: CC BY 4.0
num_points: 142
links:
- - url: https://huggingface.co/datasets/kjappelbaum/chemnlp-core-mof/tree/main
- description: original data source
+ - url: https://huggingface.co/datasets/kjappelbaum/chemnlp-core-mof/tree/main
+ description: original data source
bibtex:
- - |-
- @article{Jablonka_2023,
- doi = {10.1021/acscentsci.2c01177},
- url = {https://doi.org/10.1021%2Facscentsci.2c01177},
- year = 2023,
- month = {mar},
+ - |-
+ @article{Jablonka_2023,
+ doi = {10.1021/acscentsci.2c01177},
+ url = {https://doi.org/10.1021%2Facscentsci.2c01177},
+ year = 2023,
+ month = {mar},
+ publisher = {American Chemical Society ({ACS})},
+ volume = {9},
+ number = {4},
+ pages = {563--581},
+ author = {Kevin Maik Jablonka and Andrew S. Rosen and Aditi S. Krishnapriyan and Berend Smit},
+ title = {An Ecosystem for Digital Reticular Chemistry},
+ journal = {ACS Cent. Sci.}
+ }
+ - |-
+ @article{Chung_2014,
+ doi = {10.1021/cm502594j},
+ url = {https://doi.org/10.1021%2Fcm502594j},
+ year = 2014,
+ month = {oct},
publisher = {American Chemical Society ({ACS})},
- volume = {9},
- number = {4},
- pages = {563--581},
- author = {Kevin Maik Jablonka and Andrew S. Rosen and Aditi S. Krishnapriyan and Berend Smit},
- title = {An Ecosystem for Digital Reticular Chemistry},
- journal = {ACS Cent. Sci.}
- }
- - |-
- @article{Chung_2014,
- doi = {10.1021/cm502594j},
- url = {https://doi.org/10.1021%2Fcm502594j},
- year = 2014,
- month = {oct},
- publisher = {American Chemical Society ({ACS})},
- volume = {26},
- number = {21},
- pages = {6185--6192},
- author = {Yongchul G. Chung and Jeffrey Camp and Maciej Haranczyk and Benjamin J. Sikora and Wojciech Bury and Vaiva Krungleviciute and Taner Yildirim and Omar K. Farha and David S. Sholl and Randall Q. Snurr},
- title = {Computation-Ready, Experimental Metal{\textendash}Organic Frameworks: A Tool To Enable High-Throughput Screening of Nanoporous Crystals},
- journal = {Chem. Mater.}
- }
- - |-
- @article{Chung_2019,
- doi = {10.1021/acs.jced.9b00835},
- url = {https://doi.org/10.1021%2Facs.jced.9b00835},
- year = 2019,
- month = {nov},
- publisher = {American Chemical Society ({ACS})},
- volume = {64},
- number = {12},
- pages = {5985--5998},
- author = {Yongchul G. Chung and Emmanuel Haldoupis and Benjamin J. Bucior and Maciej Haranczyk and Seulchan Lee and Hongda Zhang and Konstantinos D. Vogiatzis and Marija Milisavljevic and Sanliang Ling and Jeffrey S. Camp and Ben Slater and J. Ilja Siepmann and David S. Sholl and Randall Q. Snurr},
- title = {Advances, Updates, and Analytics for the Computation-Ready, Experimental Metal{\textendash}Organic Framework Database: {CoRE} {MOF} 2019},
- journal = {J. Chem. Eng. Data}amp$\mathsemicolon$ Engineering Data}
- }
+ volume = {26},
+ number = {21},
+ pages = {6185--6192},
+ author = {Yongchul G. Chung and Jeffrey Camp and Maciej Haranczyk and Benjamin J. Sikora and Wojciech Bury and Vaiva Krungleviciute and Taner Yildirim and Omar K. Farha and David S. Sholl and Randall Q. Snurr},
+ title = {Computation-Ready, Experimental Metal{\textendash}Organic Frameworks: A Tool To Enable High-Throughput Screening of Nanoporous Crystals},
+ journal = {Chem. Mater.}
+ }
+ - |-
+ @article{Chung_2019,
+ doi = {10.1021/acs.jced.9b00835},
+ url = {https://doi.org/10.1021%2Facs.jced.9b00835},
+ year = 2019,
+ month = {nov},
+ publisher = {American Chemical Society ({ACS})},
+ volume = {64},
+ number = {12},
+ pages = {5985--5998},
+ author = {Yongchul G. Chung and Emmanuel Haldoupis and Benjamin J. Bucior and Maciej Haranczyk and Seulchan Lee and Hongda Zhang and Konstantinos D. Vogiatzis and Marija Milisavljevic and Sanliang Ling and Jeffrey S. Camp and Ben Slater and J. Ilja Siepmann and David S. Sholl and Randall Q. Snurr},
+ title = {Advances, Updates, and Analytics for the Computation-Ready, Experimental Metal{\textendash}Organic Framework Database: {CoRE} {MOF} 2019},
+ journal = {J. Chem. Eng. Data}amp$\mathsemicolon$ Engineering Data}
+ }
templates:
- - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!}
- {cif#} is build from linker molecules with the SMILES {smiles_linkers#} and nodes with the SMILES {smiles_nodes#}.
- - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!}
- {cif#} has a {outputs.pure_CO2_widomHOA__names__noun} of {outputs.pure_CO2_widomHOA#} {outputs.pure_CO2_widomHOA__units}.
- - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!}
- {cif#} has a {outputs.pure_methane_widomHOA__names__noun} of {outputs.pure_methane_widomHOA#} {outputs.pure_methane_widomHOA__units}.
- - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!}
- {cif#} has a {outputs.pure_uptake_CO2_298.00_15000__names__noun} of {outputs.pure_uptake_CO2_298.00_15000#} {outputs.pure_uptake_CO2_298.00_15000__units}.
- - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!}
- {cif#} has a {outputs.pure_uptake_CO2_298.00_1600000__names__noun} of {outputs.pure_uptake_CO2_298.00_1600000#} {outputs.pure_uptake_CO2_298.00_1600000__units}.
- - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!}
- {cif#} has a {outputs.pure_uptake_methane_298.00_580000__names__noun} of {outputs.pure_uptake_methane_298.00_580000#} {outputs.pure_uptake_methane_298.00_580000__units}.
- - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!}
- {cif#} has a {outputs.pure_uptake_methane_298.00_6500000__names__noun} of {outputs.pure_uptake_methane_298.00_6500000#} {outputs.pure_uptake_methane_298.00_6500000__units}.
- - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!}
- {cif#} has a {outputs.logKH_CO2__names__noun} of {outputs.logKH_CO2#} {outputs.logKH_CO2__units}.
- - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!}
- {cif#} has a {outputs.logKH_CH4__names__noun} of {outputs.logKH_CH4#} {outputs.logKH_CH4__units}.
- - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!}
- {cif#} has a {outputs.CH4DC__names__noun} of {outputs.CH4DC#} {outputs.CH4DC__units}.
- - |-
- Question: What linker molecules are {#used|present!} in the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
- Answer: The SMILES of the linker molecules are {smiles_linkers#}.
- - |-
- Question: What nodes are {#used|present!} in the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
- Answer: The SMILES of the nodes are {smiles_nodes#}.
- - |-
- Question: What is the {outputs.pure_CO2_widomHOA__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
- Answer: The {outputs.pure_CO2_widomHOA__names__noun} is {outputs.pure_CO2_widomHOA#} {outputs.pure_CO2_widomHOA__units}.
- - |-
- Question: What is the {outputs.pure_methane_widomHOA__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
- Answer: The {outputs.pure_methane_widomHOA__names__noun} is {outputs.pure_methane_widomHOA#} {outputs.pure_methane_widomHOA__units}.
- - |-
- Question: What is the {outputs.pure_uptake_CO2_298.00_15000__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
- Answer: The {outputs.pure_uptake_CO2_298.00_15000__names__noun} is {outputs.pure_uptake_CO2_298.00_15000#} {outputs.pure_uptake_CO2_298.00_15000__units}.
- - |-
- Question: What is the {outputs.pure_uptake_CO2_298.00_1600000__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
- Answer: The {outputs.pure_uptake_CO2_298.00_1600000__names__noun} is {outputs.pure_uptake_CO2_298.00_1600000#} {outputs.pure_uptake_CO2_298.00_1600000__units}.
- - |-
- Question: What is the {outputs.pure_uptake_methane_298.00_580000__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
- Answer: The {outputs.pure_uptake_methane_298.00_580000__names__noun} is {outputs.pure_uptake_methane_298.00_580000#} {outputs.pure_uptake_methane_298.00_580000__units}.
- - |-
- Question: What is the {outputs.pure_uptake_methane_298.00_6500000__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
- Answer: The {outputs.pure_uptake_methane_298.00_6500000__names__noun} is {outputs.pure_uptake_methane_298.00_6500000#} {outputs.pure_uptake_methane_298.00_6500000__units}.
- - |-
- Question: What is the {outputs.logKH_CO2__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
- Answer: The {outputs.logKH_CO2__names__noun} is {outputs.logKH_CO2#} {outputs.logKH_CO2__units}.
- - |-
- Question: What is the {outputs.logKH_CH4__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
- Answer: The {outputs.logKH_CH4__names__noun} is {outputs.logKH_CH4#} {outputs.logKH_CH4__units}.
- - |-
- Question: What is the {outputs.CH4DC__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
- Answer: The {outputs.CH4DC__names__noun} is {outputs.CH4DC#} {outputs.CH4DC__units}.
- - |-
- User: {#I have|I am working with|I'm interested in!} a {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}. What is the {outputs.pure_CO2_widomHOA__names__noun}?
- Assistant: The {outputs.pure_CO2_widomHOA__names__noun} is {outputs.pure_CO2_widomHOA#} {outputs.pure_CO2_widomHOA__units}.
- - |-
- User: I just {#synthesized|made|created!} a {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}. What is the {outputs.pure_methane_widomHOA__names__noun}?
- Assistant: The {outputs.pure_methane_widomHOA__names__noun} is {outputs.pure_methane_widomHOA#} {outputs.pure_methane_widomHOA__units}. {#Is there anything else you want to know?|Is there anything else I can do for you?!}
- User: {#Yes, |Yeah, |Indeed, |!}I would like to know the {outputs.pure_uptake_CO2_298.00_15000__names__noun}.
- Assistant: The {outputs.pure_uptake_CO2_298.00_15000__names__noun} is {outputs.pure_uptake_CO2_298.00_15000#} {outputs.pure_uptake_CO2_298.00_15000__units}.
- - |-
- User: {#I am interested in|I want to make|I want to synthesize!} a {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}.
- Assistant: {#That's interesting.|Cool.|!} {#How can I help?|How can I be of assistance?|Is there anything I can help you with?!}
- User: What building blocks do I need to combine to make this {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!}?
- Assistant: The SMILES of the linker molecules are {smiles_linkers#} and the SMILES of the nodes are {smiles_nodes#}.
- - |-
- User: {#I want to|I need to|I must|I have to!} {#synthesize|create|make!} a {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with a {outputs.CH4DC__names__noun} of {outputs.CH4DC#} {outputs.CH4DC__units}. What {#building blocks|linkers and nodes!} do I need to combine?
- Assistant: {#I'd go for|I'd recommend!} linkers with SMILES {smiles_linkers#} and nodes with SMILES {smiles_nodes#}. {#Is there anything else you want to know?|Is there anything else I can do for you?!}
- User: {#Yes, |Yeah, |Indeed, |!}I would like to know the {outputs.pure_uptake_CO2_298.00_15000__names__noun}.
- Assistant: The {outputs.pure_uptake_CO2_298.00_15000__names__noun} is {outputs.pure_uptake_CO2_298.00_15000#} {outputs.pure_uptake_CO2_298.00_15000__units}. {#Is there anything else you want to know?|Is there anything else I can do for you?!}
- User: {#Yes, |Yeah, |Indeed, |!}I would like to know the {outputs.pure_uptake_methane_298.00_580000__names__noun}.
- Assistant: The {outputs.pure_uptake_methane_298.00_580000__names__noun} is {outputs.pure_uptake_methane_298.00_580000#} {outputs.pure_uptake_methane_298.00_580000__units}. {#Is there anything else you want to know?|Is there anything else I can do for you?!}
- User: {#No, |Nope, |No thank you, |!}that's all I need to know.
- Assistant: {#You're welcome.||Anytime.|Happy to help.!}
+ - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} is build from linker molecules with the SMILES {smiles_linkers#} and nodes with the SMILES {smiles_nodes#}.
+ - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.pure_CO2_widomHOA__names__noun} of {outputs.pure_CO2_widomHOA#} {outputs.pure_CO2_widomHOA__units}.
+ - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.pure_methane_widomHOA__names__noun} of {outputs.pure_methane_widomHOA#} {outputs.pure_methane_widomHOA__units}.
+ - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.pure_uptake_CO2_298.00_15000__names__noun} of {outputs.pure_uptake_CO2_298.00_15000#} {outputs.pure_uptake_CO2_298.00_15000__units}.
+ - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.pure_uptake_CO2_298.00_1600000__names__noun} of {outputs.pure_uptake_CO2_298.00_1600000#} {outputs.pure_uptake_CO2_298.00_1600000__units}.
+ - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.pure_uptake_methane_298.00_580000__names__noun} of {outputs.pure_uptake_methane_298.00_580000#} {outputs.pure_uptake_methane_298.00_580000__units}.
+ - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.pure_uptake_methane_298.00_6500000__names__noun} of {outputs.pure_uptake_methane_298.00_6500000#} {outputs.pure_uptake_methane_298.00_6500000__units}.
+ - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.logKH_CO2__names__noun} of {outputs.logKH_CO2#} {outputs.logKH_CO2__units}.
+ - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.logKH_CH4__names__noun} of {outputs.logKH_CH4#} {outputs.logKH_CH4__units}.
+ - The {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#} has a {outputs.CH4DC__names__noun} of {outputs.CH4DC#} {outputs.CH4DC__units}.
+ - |-
+ Question: What linker molecules are {#used|present!} in the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
+ Answer: The SMILES of the linker molecules are {smiles_linkers#}.
+ - |-
+ Question: What nodes are {#used|present!} in the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
+ Answer: The SMILES of the nodes are {smiles_nodes#}.
+ - |-
+ Question: What is the {outputs.pure_CO2_widomHOA__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
+ Answer: The {outputs.pure_CO2_widomHOA__names__noun} is {outputs.pure_CO2_widomHOA#} {outputs.pure_CO2_widomHOA__units}.
+ - |-
+ Question: What is the {outputs.pure_methane_widomHOA__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
+ Answer: The {outputs.pure_methane_widomHOA__names__noun} is {outputs.pure_methane_widomHOA#} {outputs.pure_methane_widomHOA__units}.
+ - |-
+ Question: What is the {outputs.pure_uptake_CO2_298.00_15000__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
+ Answer: The {outputs.pure_uptake_CO2_298.00_15000__names__noun} is {outputs.pure_uptake_CO2_298.00_15000#} {outputs.pure_uptake_CO2_298.00_15000__units}.
+ - |-
+ Question: What is the {outputs.pure_uptake_CO2_298.00_1600000__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
+ Answer: The {outputs.pure_uptake_CO2_298.00_1600000__names__noun} is {outputs.pure_uptake_CO2_298.00_1600000#} {outputs.pure_uptake_CO2_298.00_1600000__units}.
+ - |-
+ Question: What is the {outputs.pure_uptake_methane_298.00_580000__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
+ Answer: The {outputs.pure_uptake_methane_298.00_580000__names__noun} is {outputs.pure_uptake_methane_298.00_580000#} {outputs.pure_uptake_methane_298.00_580000__units}.
+ - |-
+ Question: What is the {outputs.pure_uptake_methane_298.00_6500000__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
+ Answer: The {outputs.pure_uptake_methane_298.00_6500000__names__noun} is {outputs.pure_uptake_methane_298.00_6500000#} {outputs.pure_uptake_methane_298.00_6500000__units}.
+ - |-
+ Question: What is the {outputs.logKH_CO2__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
+ Answer: The {outputs.logKH_CO2__names__noun} is {outputs.logKH_CO2#} {outputs.logKH_CO2__units}.
+ - |-
+ Question: What is the {outputs.logKH_CH4__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
+ Answer: The {outputs.logKH_CH4__names__noun} is {outputs.logKH_CH4#} {outputs.logKH_CH4__units}.
+ - |-
+ Question: What is the {outputs.CH4DC__names__noun} of the {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}?
+ Answer: The {outputs.CH4DC__names__noun} is {outputs.CH4DC#} {outputs.CH4DC__units}.
+ - |-
+ User: {#I have|I am working with|I'm interested in!} a {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}. What is the {outputs.pure_CO2_widomHOA__names__noun}?
+ Assistant: The {outputs.pure_CO2_widomHOA__names__noun} is {outputs.pure_CO2_widomHOA#} {outputs.pure_CO2_widomHOA__units}.
+ - |-
+ User: I just {#synthesized|made|created!} a {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}. What is the {outputs.pure_methane_widomHOA__names__noun}?
+ Assistant: The {outputs.pure_methane_widomHOA__names__noun} is {outputs.pure_methane_widomHOA#} {outputs.pure_methane_widomHOA__units}. {#Is there anything else you want to know?|Is there anything else I can do for you?!}
+ User: {#Yes, |Yeah, |Indeed, |!}I would like to know the {outputs.pure_uptake_CO2_298.00_15000__names__noun}.
+ Assistant: The {outputs.pure_uptake_CO2_298.00_15000__names__noun} is {outputs.pure_uptake_CO2_298.00_15000#} {outputs.pure_uptake_CO2_298.00_15000__units}.
+ - |-
+ User: {#I am interested in|I want to make|I want to synthesize!} a {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with the following {#CIF file|crystal structure in CIF format!} {cif#}.
+ Assistant: {#That's interesting.|Cool.|!} {#How can I help?|How can I be of assistance?|Is there anything I can help you with?!}
+ User: What building blocks do I need to combine to make this {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!}?
+ Assistant: The SMILES of the linker molecules are {smiles_linkers#} and the SMILES of the nodes are {smiles_nodes#}.
+ - |-
+ User: {#I want to|I need to|I must|I have to!} {#synthesize|create|make!} a {#metal-organic framework|MOF|reticular material|metal-organic framework (MOF)!} with a {outputs.CH4DC__names__noun} of {outputs.CH4DC#} {outputs.CH4DC__units}. What {#building blocks|linkers and nodes!} do I need to combine?
+ Assistant: {#I'd go for|I'd recommend!} linkers with SMILES {smiles_linkers#} and nodes with SMILES {smiles_nodes#}. {#Is there anything else you want to know?|Is there anything else I can do for you?!}
+ User: {#Yes, |Yeah, |Indeed, |!}I would like to know the {outputs.pure_uptake_CO2_298.00_15000__names__noun}.
+ Assistant: The {outputs.pure_uptake_CO2_298.00_15000__names__noun} is {outputs.pure_uptake_CO2_298.00_15000#} {outputs.pure_uptake_CO2_298.00_15000__units}. {#Is there anything else you want to know?|Is there anything else I can do for you?!}
+ User: {#Yes, |Yeah, |Indeed, |!}I would like to know the {outputs.pure_uptake_methane_298.00_580000__names__noun}.
+ Assistant: The {outputs.pure_uptake_methane_298.00_580000__names__noun} is {outputs.pure_uptake_methane_298.00_580000#} {outputs.pure_uptake_methane_298.00_580000__units}. {#Is there anything else you want to know?|Is there anything else I can do for you?!}
+ User: {#No, |Nope, |No thank you, |!}that's all I need to know.
+ Assistant: {#You're welcome.||Anytime.|Happy to help.!}
diff --git a/data/tabular/cyp2c9_substrate_carbonmangels/meta.yaml b/data/tabular/cyp2c9_substrate_carbonmangels/meta.yaml
index 875fd4ce3..4e950d80f 100644
--- a/data/tabular/cyp2c9_substrate_carbonmangels/meta.yaml
+++ b/data/tabular/cyp2c9_substrate_carbonmangels/meta.yaml
@@ -1,151 +1,150 @@
----
name: cyp2c9_substrate_carbonmangels
description: |-
- CYP P450 2C9 plays a major role in the oxidation of both xenobiotic
- and endogenous compounds. Substrates are drugs that are metabolized by the enzyme.
- TDC used a dataset from Carbon Mangels et al, which merged information on substrates
- and nonsubstrates from six publications.
+ CYP P450 2C9 plays a major role in the oxidation of both xenobiotic
+ and endogenous compounds. Substrates are drugs that are metabolized by the enzyme.
+ TDC used a dataset from Carbon Mangels et al, which merged information on substrates
+ and nonsubstrates from six publications.
targets:
- - id: CYP2C9_Substrate
- description: drugs that are metabolized by CYP P450 2C9 (1) or not (0)
- units:
- type: boolean
- names:
- - noun: CYP P450 2C9 substrate
- - noun: CYP2C9 substrate
- - noun: substrate for CYP2C9
- - noun: substrate for CYP P450 2C9
- - verb: metabolized by CYP2C9
- - verb: metabolized by CYP P450 2C9
- uris:
+ - id: CYP2C9_Substrate
+ description: drugs that are metabolized by CYP P450 2C9 (1) or not (0)
+ units:
+ type: boolean
+ names:
+ - noun: CYP P450 2C9 substrate
+ - noun: CYP2C9 substrate
+ - noun: substrate for CYP2C9
+ - noun: substrate for CYP P450 2C9
+ - verb: metabolized by CYP2C9
+ - verb: metabolized by CYP P450 2C9
+ uris:
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
- - id: compound_name
- type: Other
- names:
- - noun: compound name
- - noun: drug name
- - noun: generic drug name
- description: drug name
+ - id: SMILES
+ type: SMILES
+ description: SMILES
+ - id: compound_name
+ type: Other
+ names:
+ - noun: compound name
+ - noun: drug name
+ - noun: generic drug name
+ description: drug name
license: CC BY 4.0
links:
- - url: https://doi.org/10.1002/minf.201100069
- description: corresponding publication
- - url: https://doi.org/10.1021/ci300367a
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp2c9-substrate-carbon-mangels-et-al
- description: data source
+ - url: https://doi.org/10.1002/minf.201100069
+ description: corresponding publication
+ - url: https://doi.org/10.1021/ci300367a
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp2c9-substrate-carbon-mangels-et-al
+ description: data source
num_points: 669
bibtex:
- - |-
- @article{CarbonMangels2011,
- doi = {10.1002/minf.201100069},
- url = {https://doi.org/10.1002/minf.201100069},
- year = {2011},
- month = sep,
- publisher = {Wiley},
- volume = {30},
- number = {10},
- pages = {885--895},
- author = {Miriam Carbon-Mangels and Michael C. Hutter},
- title = {Selecting Relevant Descriptors for Classification by Bayesian Estimates:
- A Comparison with Decision Trees and Support Vector Machines Approaches for Disparate Data Sets},
- journal = {Molecular Informatics}
- - |-
- @article{Cheng2012,
- doi = {10.1021/ci300367a},
- url = {https://doi.org/10.1021/ci300367a},
- year = {2012},
- month = nov,
- publisher = {American Chemical Society (ACS)},
- volume = {52},
- number = {11},
- number = {3099--3105},
- author = {Feixiong Cheng and Weihua Li and Yadi Zhou and Jie Shen and Zengrui Wu
- and Guixia Liu and Philip W. Lee and Yun Tang},
- title = {admetSAR: A Comprehensive Source and Free Tool for Assessment of Chemical ADMET Properties},
- journal = {Journal of Chemical Information and Modeling}
+ - |-
+ @article{CarbonMangels2011,
+ doi = {10.1002/minf.201100069},
+ url = {https://doi.org/10.1002/minf.201100069},
+ year = {2011},
+ month = sep,
+ publisher = {Wiley},
+ volume = {30},
+ number = {10},
+ pages = {885--895},
+ author = {Miriam Carbon-Mangels and Michael C. Hutter},
+ title = {Selecting Relevant Descriptors for Classification by Bayesian Estimates:
+ A Comparison with Decision Trees and Support Vector Machines Approaches for Disparate Data Sets},
+ journal = {Molecular Informatics}
+ - |-
+ @article{Cheng2012,
+ doi = {10.1021/ci300367a},
+ url = {https://doi.org/10.1021/ci300367a},
+ year = {2012},
+ month = nov,
+ publisher = {American Chemical Society (ACS)},
+ volume = {52},
+ number = {11},
+ number = {3099--3105},
+ author = {Feixiong Cheng and Weihua Li and Yadi Zhou and Jie Shen and Zengrui Wu
+ and Guixia Liu and Philip W. Lee and Yun Tang},
+ title = {admetSAR: A Comprehensive Source and Free Tool for Assessment of Chemical ADMET Properties},
+ journal = {Journal of Chemical Information and Modeling}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {CYP2C9_Substrate#not &NULL}identified as a {CYP2C9_Substrate__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is a {CYP2C9_Substrate__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- esult: {CYP2C9_Substrate#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP2C9_Substrate__names__verb}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is a {CYP2C9_Substrate__names__noun}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {CYP2C9_Substrate__names__noun}?
- Assistant: {CYP2C9_Substrate#No&Yes}, this molecule is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2C9_Substrate__names__verb}?
- Assistant: {CYP2C9_Substrate#No&Yes}, it is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is a {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__noun}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}?
- Assistant: This is a molecule that is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {CYP2C9_Substrate#not &NULL}be {CYP2C9_Substrate__names__verb}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {CYP2C9_Substrate#not &NULL}be a {CYP2C9_Substrate__names__noun}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} a {CYP2C9_Substrate__names__noun}:{CYP2C9_Substrate#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is a {CYP2C9_Substrate__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{CYP2C9_Substrate#False&True}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C9_Substrate__names__verb}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {CYP2C9_Substrate%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {CYP2C9_Substrate__names__noun}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {CYP2C9_Substrate%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%CYP2C9_Substrate%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%CYP2C9_Substrate%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {CYP2C9_Substrate#not &NULL}identified as a {CYP2C9_Substrate__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is a {CYP2C9_Substrate__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ esult: {CYP2C9_Substrate#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP2C9_Substrate__names__verb}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is a {CYP2C9_Substrate__names__noun}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {CYP2C9_Substrate__names__noun}?
+ Assistant: {CYP2C9_Substrate#No&Yes}, this molecule is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2C9_Substrate__names__verb}?
+ Assistant: {CYP2C9_Substrate#No&Yes}, it is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is a {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__noun}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}?
+ Assistant: This is a molecule that is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {CYP2C9_Substrate#not &NULL}be {CYP2C9_Substrate__names__verb}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {CYP2C9_Substrate#not &NULL}be a {CYP2C9_Substrate__names__noun}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} a {CYP2C9_Substrate__names__noun}:{CYP2C9_Substrate#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is a {CYP2C9_Substrate__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{CYP2C9_Substrate#False&True}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C9_Substrate__names__verb}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {CYP2C9_Substrate%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {CYP2C9_Substrate__names__noun}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {CYP2C9_Substrate%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {CYP2C9_Substrate#not &NULL}a {CYP2C9_Substrate__names__noun}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%CYP2C9_Substrate%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {CYP2C9_Substrate#not &NULL}{CYP2C9_Substrate__names__verb}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%CYP2C9_Substrate%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/cyp2d6_substrate_carbonmangels/meta.yaml b/data/tabular/cyp2d6_substrate_carbonmangels/meta.yaml
index 383b9de22..f7446332e 100644
--- a/data/tabular/cyp2d6_substrate_carbonmangels/meta.yaml
+++ b/data/tabular/cyp2d6_substrate_carbonmangels/meta.yaml
@@ -1,152 +1,151 @@
----
name: cyp2d6_substrate_carbonmangels
description: |-
- CYP2D6 is primarily expressed in the liver. It is also highly expressed
- in areas of the central nervous system, including the substantia nigra. TDC
- used a dataset from Carbon Mangels et al, which merged information on substrates
- and nonsubstrates from six publications.
+ CYP2D6 is primarily expressed in the liver. It is also highly expressed
+ in areas of the central nervous system, including the substantia nigra. TDC
+ used a dataset from Carbon Mangels et al, which merged information on substrates
+ and nonsubstrates from six publications.
targets:
- - id: CYP2D6_Substrate
- description: drugs that are metabolized by the CYP P450 2D6 (1) or not (0)
- units:
- type: boolean
- names:
- - noun: CYP P450 2D6 substrate
- - noun: CYP2D6 substrate
- - noun: substrate for CYP2D6
- - noun: substrate for CYP P450 2D6
- - verb: metabolized by CYP2D6
- - verb: metabolized by CYP P450 2D6
- uris:
+ - id: CYP2D6_Substrate
+ description: drugs that are metabolized by the CYP P450 2D6 (1) or not (0)
+ units:
+ type: boolean
+ names:
+ - noun: CYP P450 2D6 substrate
+ - noun: CYP2D6 substrate
+ - noun: substrate for CYP2D6
+ - noun: substrate for CYP P450 2D6
+ - verb: metabolized by CYP2D6
+ - verb: metabolized by CYP P450 2D6
+ uris:
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
- - id: compound_name
- type: Other
- names:
- - noun: compound name
- - noun: drug name
- - noun: generic drug name
- description: drug name
+ - id: SMILES
+ type: SMILES
+ description: SMILES
+ - id: compound_name
+ type: Other
+ names:
+ - noun: compound name
+ - noun: drug name
+ - noun: generic drug name
+ description: drug name
license: CC BY 4.0
links:
- - url: https://doi.org/10.1002/minf.201100069
- description: corresponding publication
- - url: https://doi.org/10.1021/ci300367a
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp2d6-substrate-carbon-mangels-et-al
- description: data source
+ - url: https://doi.org/10.1002/minf.201100069
+ description: corresponding publication
+ - url: https://doi.org/10.1021/ci300367a
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp2d6-substrate-carbon-mangels-et-al
+ description: data source
num_points: 667
bibtex:
- - |-
- @article{CarbonMangels2011,
- doi = {10.1002/minf.201100069},
- url = {https://doi.org/10.1002/minf.201100069},
- year = {2011},
- month = sep,
- publisher = {Wiley},
- volume = {30},
- number = {10},
- pages = {885--895},
- author = {Miriam Carbon-Mangels and Michael C. Hutter},
- title = {Selecting Relevant Descriptors for Classification by Bayesian Estimates:
- A Comparison with Decision Trees and Support Vector Machines Approaches for Disparate Data Sets},
- journal = {Molecular Informatics}
- - |-
- @article{Cheng2012,
- doi = {10.1021/ci300367a},
- url = {https://doi.org/10.1021/ci300367a},
- year = {2012},
- month = nov,
- publisher = {American Chemical Society (ACS)},
- volume = {52},
- number = {11},
- pages = {3099--3105},
- author = {Feixiong Cheng and Weihua Li and Yadi Zhou and Jie Shen
- and Zengrui Wu and Guixia Liu and Philip W. Lee and Yun Tang},
- title = {admetSAR: A Comprehensive Source and Free Tool for
- Assessment of Chemical ADMET Properties},
- journal = {Journal of Chemical Information and Modeling}
+ - |-
+ @article{CarbonMangels2011,
+ doi = {10.1002/minf.201100069},
+ url = {https://doi.org/10.1002/minf.201100069},
+ year = {2011},
+ month = sep,
+ publisher = {Wiley},
+ volume = {30},
+ number = {10},
+ pages = {885--895},
+ author = {Miriam Carbon-Mangels and Michael C. Hutter},
+ title = {Selecting Relevant Descriptors for Classification by Bayesian Estimates:
+ A Comparison with Decision Trees and Support Vector Machines Approaches for Disparate Data Sets},
+ journal = {Molecular Informatics}
+ - |-
+ @article{Cheng2012,
+ doi = {10.1021/ci300367a},
+ url = {https://doi.org/10.1021/ci300367a},
+ year = {2012},
+ month = nov,
+ publisher = {American Chemical Society (ACS)},
+ volume = {52},
+ number = {11},
+ pages = {3099--3105},
+ author = {Feixiong Cheng and Weihua Li and Yadi Zhou and Jie Shen
+ and Zengrui Wu and Guixia Liu and Philip W. Lee and Yun Tang},
+ title = {admetSAR: A Comprehensive Source and Free Tool for
+ Assessment of Chemical ADMET Properties},
+ journal = {Journal of Chemical Information and Modeling}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {CYP2D6_Substrate#not &NULL}identified as a {CYP2D6_Substrate__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is a {CYP2D6_Substrate__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- esult: {CYP2D6_Substrate#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP2D6_Substrate__names__verb}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is a {CYP2D6_Substrate__names__noun}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {CYP2D6_Substrate__names__noun}?
- Assistant: {CYP2D6_Substrate#No&Yes}, this molecule is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2D6_Substrate__names__verb}?
- Assistant: {CYP2D6_Substrate#No&Yes}, it is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is a {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__noun}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}?
- Assistant: This is a molecule that is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {CYP2D6_Substrate#not &NULL}be {CYP2D6_Substrate__names__verb}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {CYP2D6_Substrate#not &NULL}be a {CYP2D6_Substrate__names__noun}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} a {CYP2D6_Substrate__names__noun}:{CYP2D6_Substrate#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is a {CYP2D6_Substrate__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{CYP2D6_Substrate#False&True}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2D6_Substrate__names__verb}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {CYP2D6_Substrate%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {CYP2D6_Substrate__names__noun}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {CYP2D6_Substrate%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%CYP2D6_Substrate%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%CYP2D6_Substrate%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {CYP2D6_Substrate#not &NULL}identified as a {CYP2D6_Substrate__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is a {CYP2D6_Substrate__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ esult: {CYP2D6_Substrate#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP2D6_Substrate__names__verb}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is a {CYP2D6_Substrate__names__noun}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {CYP2D6_Substrate__names__noun}?
+ Assistant: {CYP2D6_Substrate#No&Yes}, this molecule is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2D6_Substrate__names__verb}?
+ Assistant: {CYP2D6_Substrate#No&Yes}, it is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is a {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__noun}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}?
+ Assistant: This is a molecule that is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {CYP2D6_Substrate#not &NULL}be {CYP2D6_Substrate__names__verb}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {CYP2D6_Substrate#not &NULL}be a {CYP2D6_Substrate__names__noun}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} a {CYP2D6_Substrate__names__noun}:{CYP2D6_Substrate#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is a {CYP2D6_Substrate__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{CYP2D6_Substrate#False&True}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2D6_Substrate__names__verb}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {CYP2D6_Substrate%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {CYP2D6_Substrate__names__noun}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {CYP2D6_Substrate%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {CYP2D6_Substrate#not &NULL}a {CYP2D6_Substrate__names__noun}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%CYP2D6_Substrate%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {CYP2D6_Substrate#not &NULL}{CYP2D6_Substrate__names__verb}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%CYP2D6_Substrate%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/cyp3a4_substrate_carbonmangels/meta.yaml b/data/tabular/cyp3a4_substrate_carbonmangels/meta.yaml
index 42cc5b3b3..020c1e096 100644
--- a/data/tabular/cyp3a4_substrate_carbonmangels/meta.yaml
+++ b/data/tabular/cyp3a4_substrate_carbonmangels/meta.yaml
@@ -1,153 +1,152 @@
----
name: cyp3a4_substrate_carbonmangels
description: |-
- CYP3A4 is an important enzyme in the body, mainly found in the liver
- and in the intestine. It oxidizes small foreign organic molecules (xenobiotics),
- such as toxins or drugs, so that they can be removed from the body. TDC used
- a dataset from Carbon Mangels et al, which merged information on substrates
- and nonsubstrates from six publications.
+ CYP3A4 is an important enzyme in the body, mainly found in the liver
+ and in the intestine. It oxidizes small foreign organic molecules (xenobiotics),
+ such as toxins or drugs, so that they can be removed from the body. TDC used
+ a dataset from Carbon Mangels et al, which merged information on substrates
+ and nonsubstrates from six publications.
targets:
- - id: CYP3A4_Substrate
- description: drugs that are metabolized by the CYP P450 3A4 (1) or not (0)
- units:
- type: boolean
- names:
- - noun: CYP P450 3A4 substrate
- - noun: CYP3A4 substrate
- - noun: substrate for CYP3A4
- - noun: substrate for CYP P450 3A4
- - verb: metabolized by CYP3A4
- - verb: metabolized by CYP P450 3A4
- uris:
+ - id: CYP3A4_Substrate
+ description: drugs that are metabolized by the CYP P450 3A4 (1) or not (0)
+ units:
+ type: boolean
+ names:
+ - noun: CYP P450 3A4 substrate
+ - noun: CYP3A4 substrate
+ - noun: substrate for CYP3A4
+ - noun: substrate for CYP P450 3A4
+ - verb: metabolized by CYP3A4
+ - verb: metabolized by CYP P450 3A4
+ uris:
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
- - id: compound_name
- type: Other
- names:
- - noun: compound name
- - noun: drug name
- - noun: generic drug name
- description: drug name
+ - id: SMILES
+ type: SMILES
+ description: SMILES
+ - id: compound_name
+ type: Other
+ names:
+ - noun: compound name
+ - noun: drug name
+ - noun: generic drug name
+ description: drug name
license: CC BY 4.0
links:
- - url: https://doi.org/10.1002/minf.201100069
- description: corresponding publication
- - url: https://doi.org/10.1021/ci300367a
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp3a4-substrate-carbon-mangels-et-al
- description: data source
+ - url: https://doi.org/10.1002/minf.201100069
+ description: corresponding publication
+ - url: https://doi.org/10.1021/ci300367a
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp3a4-substrate-carbon-mangels-et-al
+ description: data source
num_points: 670
bibtex:
- - |-
- @article{CarbonMangels2011,
- doi = {10.1002/minf.201100069},
- url = {https://doi.org/10.1002/minf.201100069},
- year = {2011},
- month = sep,
- publisher = {Wiley},
- volume = {30},
- number = {10},
- pages = {885--895},
- author = {Miriam Carbon-Mangels and Michael C. Hutter},
- title = {Selecting Relevant Descriptors for Classification by Bayesian Estimates:
- A Comparison with Decision Trees and Support Vector Machines Approaches for Disparate Data Sets},
- journal = {Molecular Informatics}
- - |-
- @article{Cheng2012,
- doi = {10.1021/ci300367a},
- url = {https://doi.org/10.1021/ci300367a},
- year = {2012},
- month = nov,
- publisher = {American Chemical Society (ACS)},
- volume = {52},
- number = {11},
- pages = {3099--3105},
- author = {Feixiong Cheng and Weihua Li and Yadi Zhou and Jie Shen
- and Zengrui Wu and Guixia Liu and Philip W. Lee and Yun Tang},
- title = {admetSAR: A Comprehensive Source and Free Tool for
- Assessment of Chemical ADMET Properties},
- journal = {Journal of Chemical Information and Modeling}
+ - |-
+ @article{CarbonMangels2011,
+ doi = {10.1002/minf.201100069},
+ url = {https://doi.org/10.1002/minf.201100069},
+ year = {2011},
+ month = sep,
+ publisher = {Wiley},
+ volume = {30},
+ number = {10},
+ pages = {885--895},
+ author = {Miriam Carbon-Mangels and Michael C. Hutter},
+ title = {Selecting Relevant Descriptors for Classification by Bayesian Estimates:
+ A Comparison with Decision Trees and Support Vector Machines Approaches for Disparate Data Sets},
+ journal = {Molecular Informatics}
+ - |-
+ @article{Cheng2012,
+ doi = {10.1021/ci300367a},
+ url = {https://doi.org/10.1021/ci300367a},
+ year = {2012},
+ month = nov,
+ publisher = {American Chemical Society (ACS)},
+ volume = {52},
+ number = {11},
+ pages = {3099--3105},
+ author = {Feixiong Cheng and Weihua Li and Yadi Zhou and Jie Shen
+ and Zengrui Wu and Guixia Liu and Philip W. Lee and Yun Tang},
+ title = {admetSAR: A Comprehensive Source and Free Tool for
+ Assessment of Chemical ADMET Properties},
+ journal = {Journal of Chemical Information and Modeling}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {CYP3A4_Substrate#not &NULL}identified as a {CYP3A4_Substrate__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is a {CYP3A4_Substrate__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- esult: {CYP3A4_Substrate#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP3A4_Substrate__names__verb}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is a {CYP3A4_Substrate__names__noun}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {CYP3A4_Substrate__names__noun}?
- Assistant: {CYP3A4_Substrate#No&Yes}, this molecule is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {CYP3A4_Substrate__names__verb}?
- Assistant: {CYP3A4_Substrate#No&Yes}, it is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is a {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__noun}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}?
- Assistant: This is a molecule that is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {CYP3A4_Substrate#not &NULL}be {CYP3A4_Substrate__names__verb}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {CYP3A4_Substrate#not &NULL}be a {CYP3A4_Substrate__names__noun}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} a {CYP3A4_Substrate__names__noun}:{CYP3A4_Substrate#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is a {CYP3A4_Substrate__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{CYP3A4_Substrate#False&True}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP3A4_Substrate__names__verb}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {CYP3A4_Substrate%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {CYP3A4_Substrate__names__noun}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {CYP3A4_Substrate%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%CYP3A4_Substrate%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%CYP3A4_Substrate%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {CYP3A4_Substrate#not &NULL}identified as a {CYP3A4_Substrate__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is a {CYP3A4_Substrate__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ esult: {CYP3A4_Substrate#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP3A4_Substrate__names__verb}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is a {CYP3A4_Substrate__names__noun}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {CYP3A4_Substrate__names__noun}?
+ Assistant: {CYP3A4_Substrate#No&Yes}, this molecule is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {CYP3A4_Substrate__names__verb}?
+ Assistant: {CYP3A4_Substrate#No&Yes}, it is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is a {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__noun}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}?
+ Assistant: This is a molecule that is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {CYP3A4_Substrate#not &NULL}be {CYP3A4_Substrate__names__verb}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {CYP3A4_Substrate#not &NULL}be a {CYP3A4_Substrate__names__noun}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} a {CYP3A4_Substrate__names__noun}:{CYP3A4_Substrate#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is a {CYP3A4_Substrate__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{CYP3A4_Substrate#False&True}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP3A4_Substrate__names__verb}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {CYP3A4_Substrate%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {CYP3A4_Substrate__names__noun}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {CYP3A4_Substrate%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {CYP3A4_Substrate#not &NULL}a {CYP3A4_Substrate__names__noun}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%CYP3A4_Substrate%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {CYP3A4_Substrate#not &NULL}{CYP3A4_Substrate__names__verb}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%CYP3A4_Substrate%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/cyp_p450_1a2_inhibition_veith_et_al/meta.yaml b/data/tabular/cyp_p450_1a2_inhibition_veith_et_al/meta.yaml
index 1fb61165c..ae5056c6c 100644
--- a/data/tabular/cyp_p450_1a2_inhibition_veith_et_al/meta.yaml
+++ b/data/tabular/cyp_p450_1a2_inhibition_veith_et_al/meta.yaml
@@ -1,140 +1,139 @@
----
name: cyp_p450_1a2_inhibition_veith_et_al
description: |-
- The CYP P450 genes are involved in the formation and breakdown (metabolism)
- of various molecules and chemicals within cells. Specifically, CYP1A2 localizes
- to the endoplasmic reticulum and its expression is induced by some polycyclic
- aromatic hydrocarbons (PAHs), some of which are found in cigarette smoke. It
- is able to metabolize some PAHs to carcinogenic intermediates. Other xenobiotic
- substrates for this enzyme include caffeine, aflatoxin B1, and acetaminophen.
+ The CYP P450 genes are involved in the formation and breakdown (metabolism)
+ of various molecules and chemicals within cells. Specifically, CYP1A2 localizes
+ to the endoplasmic reticulum and its expression is induced by some polycyclic
+ aromatic hydrocarbons (PAHs), some of which are found in cigarette smoke. It
+ is able to metabolize some PAHs to carcinogenic intermediates. Other xenobiotic
+ substrates for this enzyme include caffeine, aflatoxin B1, and acetaminophen.
targets:
- - id: CYP1A2_inhibition
- description: ability of the drug to inhibit CYP P450 1A2 (1) or not (0)
- units:
- type: boolean
- names:
- - noun: inhibition of CYP1A2
- - noun: inhibition of CYP P450 1A2
- - adjective: CYP1A2 inhibition
- - adjective: CYP P450 1A2 inhibition
- - verb: inhibits CYP P450 1A2
- - verb: inhibits CYP1A2
- - gerund: inhibiting CYP P450 1A2
- - gerund: inhibiting CYP1A2
- uris:
+ - id: CYP1A2_inhibition
+ description: ability of the drug to inhibit CYP P450 1A2 (1) or not (0)
+ units:
+ type: boolean
+ names:
+ - noun: inhibition of CYP1A2
+ - noun: inhibition of CYP P450 1A2
+ - adjective: CYP1A2 inhibition
+ - adjective: CYP P450 1A2 inhibition
+ - verb: inhibits CYP P450 1A2
+ - verb: inhibits CYP1A2
+ - gerund: inhibiting CYP P450 1A2
+ - gerund: inhibiting CYP1A2
+ uris:
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://doi.org/10.1038/nbt.1581
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-1a2-inhibition-veith-et-al
- description: data source
+ - url: https://doi.org/10.1038/nbt.1581
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-1a2-inhibition-veith-et-al
+ description: data source
num_points: 12579
bibtex:
- - |-
- @article{Veith2009,
- doi = {10.1038/nbt.1581},
- url = {https://doi.org/10.1038/nbt.1581},
- year = {2009},
- month = oct,
- publisher = {Springer Science and Business Media LLC},
- volume = {27},
- number = {11},
- pages = {1050--1055},
- author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James
- and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese
- and Christopher P Austin and David G Lloyd and Douglas S Auld},
- title = {Comprehensive characterization of cytochrome P450 isozyme selectivity
- across chemical libraries},
- journal = {Nature Biotechnology}
+ - |-
+ @article{Veith2009,
+ doi = {10.1038/nbt.1581},
+ url = {https://doi.org/10.1038/nbt.1581},
+ year = {2009},
+ month = oct,
+ publisher = {Springer Science and Business Media LLC},
+ volume = {27},
+ number = {11},
+ pages = {1050--1055},
+ author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James
+ and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese
+ and Christopher P Austin and David G Lloyd and Douglas S Auld},
+ title = {Comprehensive characterization of cytochrome P450 isozyme selectivity
+ across chemical libraries},
+ journal = {Nature Biotechnology}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP1A2_inhibition#no &NULL}{CYP1A2_inhibition__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP1A2_inhibition#no &NULL}{CYP1A2_inhibition__names__noun}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP1A2_inhibition#no &NULL}{CYP1A2_inhibition__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP1A2_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {CYP1A2_inhibition#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP1A2_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {CYP1A2_inhibition__names__gerund}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP1A2_inhibition__names__gerund}?
- Assistant: {CYP1A2_inhibition#No&Yes}, this molecule is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {CYP1A2_inhibition__names__gerund}?
- Assistant: {CYP1A2_inhibition#No&Yes}, it is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}?
- Assistant: This is a molecule that is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {CYP1A2_inhibition#not &NULL}be {CYP1A2_inhibition__names__gerund}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {CYP1A2_inhibition#not &NULL}be {CYP1A2_inhibition__names__gerund}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} {CYP1A2_inhibition__names__gerund}:{CYP1A2_inhibition#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP1A2_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{CYP1A2_inhibition#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP1A2_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result:This molecule is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}.
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP1A2_inhibition__names__gerund}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {CYP1A2_inhibition%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP1A2_inhibition__names__gerund}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {CYP1A2_inhibition%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%CYP1A2_inhibition%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%CYP1A2_inhibition%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP1A2_inhibition#no &NULL}{CYP1A2_inhibition__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP1A2_inhibition#no &NULL}{CYP1A2_inhibition__names__noun}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP1A2_inhibition#no &NULL}{CYP1A2_inhibition__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP1A2_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {CYP1A2_inhibition#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP1A2_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {CYP1A2_inhibition__names__gerund}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP1A2_inhibition__names__gerund}?
+ Assistant: {CYP1A2_inhibition#No&Yes}, this molecule is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {CYP1A2_inhibition__names__gerund}?
+ Assistant: {CYP1A2_inhibition#No&Yes}, it is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}?
+ Assistant: This is a molecule that is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {CYP1A2_inhibition#not &NULL}be {CYP1A2_inhibition__names__gerund}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {CYP1A2_inhibition#not &NULL}be {CYP1A2_inhibition__names__gerund}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} {CYP1A2_inhibition__names__gerund}:{CYP1A2_inhibition#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP1A2_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{CYP1A2_inhibition#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP1A2_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result:This molecule is {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}.
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP1A2_inhibition__names__gerund}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {CYP1A2_inhibition%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP1A2_inhibition__names__gerund}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {CYP1A2_inhibition%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%CYP1A2_inhibition%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {CYP1A2_inhibition#not &NULL}{CYP1A2_inhibition__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%CYP1A2_inhibition%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/cyp_p450_2c19_inhibition_veith_et_al/meta.yaml b/data/tabular/cyp_p450_2c19_inhibition_veith_et_al/meta.yaml
index 787c4de0b..239aa54ed 100644
--- a/data/tabular/cyp_p450_2c19_inhibition_veith_et_al/meta.yaml
+++ b/data/tabular/cyp_p450_2c19_inhibition_veith_et_al/meta.yaml
@@ -1,140 +1,139 @@
----
name: cyp_p450_2c19_inhibition_veith_et_al
description: |-
- The CYP P450 genes are essential in the breakdown (metabolism) of
- various molecules and chemicals within cells. A drug that can inhibit these
- enzymes would mean poor metabolism to this drug and other drugs, which could lead
- to drug-drug interactions and adverse effects. Specifically, the CYP2C19 gene
- provides instructions for making an enzyme of the endoplasmic reticulum, which
- is involved in protein processing and transport.
+ The CYP P450 genes are essential in the breakdown (metabolism) of
+ various molecules and chemicals within cells. A drug that can inhibit these
+ enzymes would mean poor metabolism to this drug and other drugs, which could lead
+ to drug-drug interactions and adverse effects. Specifically, the CYP2C19 gene
+ provides instructions for making an enzyme of the endoplasmic reticulum, which
+ is involved in protein processing and transport.
targets:
- - id: CYP2C19_inhibition
- description: ability of the drug to inhibit CYP 2C19 (1) or not (0)
- units:
- type: boolean
- names:
- - noun: inhibition of CYP2C19
- - noun: inhibition of CYP P450 2C19
- - adjective: CYP2C19 inhibition
- - adjective: CYP P450 2C19 inhibition
- - verb: inhibits CYP P450 2C19
- - verb: inhibits CYP2C19
- - gerund: inhibiting CYP P450 2C19
- - gerund: inhibiting CYP2C19
- uris:
+ - id: CYP2C19_inhibition
+ description: ability of the drug to inhibit CYP 2C19 (1) or not (0)
+ units:
+ type: boolean
+ names:
+ - noun: inhibition of CYP2C19
+ - noun: inhibition of CYP P450 2C19
+ - adjective: CYP2C19 inhibition
+ - adjective: CYP P450 2C19 inhibition
+ - verb: inhibits CYP P450 2C19
+ - verb: inhibits CYP2C19
+ - gerund: inhibiting CYP P450 2C19
+ - gerund: inhibiting CYP2C19
+ uris:
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://doi.org/10.1038/nbt.1581
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-2c19-inhibition-veith-et-al
- description: data source
+ - url: https://doi.org/10.1038/nbt.1581
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-2c19-inhibition-veith-et-al
+ description: data source
num_points: 12665
bibtex:
- - |-
- @article{Veith2009,
- doi = {10.1038/nbt.1581},
- url = {https://doi.org/10.1038/nbt.1581},
- year = {2009},
- month = oct,
- publisher = {Springer Science and Business Media LLC},
- volume = {27},
- number = {11},
- pages = {1050--1055},
- author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James
- and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese
- and Christopher P Austin and David G Lloyd and Douglas S Auld},
- title = {Comprehensive characterization of cytochrome P450 isozyme selectivity
- across chemical libraries},
- journal = {Nature Biotechnology}
+ - |-
+ @article{Veith2009,
+ doi = {10.1038/nbt.1581},
+ url = {https://doi.org/10.1038/nbt.1581},
+ year = {2009},
+ month = oct,
+ publisher = {Springer Science and Business Media LLC},
+ volume = {27},
+ number = {11},
+ pages = {1050--1055},
+ author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James
+ and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese
+ and Christopher P Austin and David G Lloyd and Douglas S Auld},
+ title = {Comprehensive characterization of cytochrome P450 isozyme selectivity
+ across chemical libraries},
+ journal = {Nature Biotechnology}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP2C19_inhibition#no &NULL}{CYP2C19_inhibition__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP2C19_inhibition#no &NULL}{CYP2C19_inhibition__names__noun}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP2C19_inhibition#no &NULL}{CYP2C19_inhibition__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP2C19_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {CYP2C19_inhibition#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP2C19_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {CYP2C19_inhibition__names__gerund}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP2C19_inhibition__names__gerund}?
- Assistant: {CYP2C19_inhibition#No&Yes}, this molecule is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2C19_inhibition__names__gerund}?
- Assistant: {CYP2C19_inhibition#No&Yes}, it is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}?
- Assistant: This is a molecule that is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {CYP2C19_inhibition#not &NULL}be {CYP2C19_inhibition__names__gerund}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {CYP2C19_inhibition#not &NULL}be {CYP2C19_inhibition__names__gerund}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} {CYP2C19_inhibition__names__gerund}:{CYP2C19_inhibition#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP2C19_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{CYP2C19_inhibition#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP2C19_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result:This molecule is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}.
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C19_inhibition__names__gerund}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {CYP2C19_inhibition%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C19_inhibition__names__gerund}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {CYP2C19_inhibition%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%CYP2C19_inhibition%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%CYP2C19_inhibition%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP2C19_inhibition#no &NULL}{CYP2C19_inhibition__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP2C19_inhibition#no &NULL}{CYP2C19_inhibition__names__noun}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP2C19_inhibition#no &NULL}{CYP2C19_inhibition__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP2C19_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {CYP2C19_inhibition#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP2C19_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {CYP2C19_inhibition__names__gerund}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP2C19_inhibition__names__gerund}?
+ Assistant: {CYP2C19_inhibition#No&Yes}, this molecule is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2C19_inhibition__names__gerund}?
+ Assistant: {CYP2C19_inhibition#No&Yes}, it is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}?
+ Assistant: This is a molecule that is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {CYP2C19_inhibition#not &NULL}be {CYP2C19_inhibition__names__gerund}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {CYP2C19_inhibition#not &NULL}be {CYP2C19_inhibition__names__gerund}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} {CYP2C19_inhibition__names__gerund}:{CYP2C19_inhibition#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP2C19_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{CYP2C19_inhibition#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP2C19_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result:This molecule is {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}.
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C19_inhibition__names__gerund}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {CYP2C19_inhibition%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C19_inhibition__names__gerund}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {CYP2C19_inhibition%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%CYP2C19_inhibition%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {CYP2C19_inhibition#not &NULL}{CYP2C19_inhibition__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%CYP2C19_inhibition%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/cyp_p450_2c9_inhibition_veith_et_al/meta.yaml b/data/tabular/cyp_p450_2c9_inhibition_veith_et_al/meta.yaml
index ff8245e88..56889d1e7 100644
--- a/data/tabular/cyp_p450_2c9_inhibition_veith_et_al/meta.yaml
+++ b/data/tabular/cyp_p450_2c9_inhibition_veith_et_al/meta.yaml
@@ -1,137 +1,136 @@
----
name: cyp_p450_2c9_inhibition_veith_et_al
description: |-
- The CYP P450 genes are involved in the formation and breakdown (metabolism)
- of various molecules and chemicals within cells. Specifically, the CYP P450
- 2C9 plays a major role in the oxidation of both xenobiotic and endogenous compounds.
+ The CYP P450 genes are involved in the formation and breakdown (metabolism)
+ of various molecules and chemicals within cells. Specifically, the CYP P450
+ 2C9 plays a major role in the oxidation of both xenobiotic and endogenous compounds.
targets:
- - id: CYP2C9_inhibition
- description: ability of the drug to inhibit CYP P450 2C9 (1) or not (0)
- units:
- type: boolean
- names:
- - noun: inhibition of CYP2C9
- - noun: inhibition of CYP P450 2C9
- - adjective: CYP2C9 inhibition
- - adjective: CYP P450 2C9 inhibition
- - verb: inhibits CYP P450 2C9
- - verb: inhibits CYP2C9
- - gerund: inhibiting CYP P450 2C9
- - gerund: inhibiting CYP2C9
- uris:
+ - id: CYP2C9_inhibition
+ description: ability of the drug to inhibit CYP P450 2C9 (1) or not (0)
+ units:
+ type: boolean
+ names:
+ - noun: inhibition of CYP2C9
+ - noun: inhibition of CYP P450 2C9
+ - adjective: CYP2C9 inhibition
+ - adjective: CYP P450 2C9 inhibition
+ - verb: inhibits CYP P450 2C9
+ - verb: inhibits CYP2C9
+ - gerund: inhibiting CYP P450 2C9
+ - gerund: inhibiting CYP2C9
+ uris:
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://doi.org/10.1038/nbt.1581
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-2c9-inhibition-veith-et-al
- description: data source
+ - url: https://doi.org/10.1038/nbt.1581
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-2c9-inhibition-veith-et-al
+ description: data source
num_points: 12092
bibtex:
- - |-
- @article{Veith2009,
- doi = {10.1038/nbt.1581},
- url = {https://doi.org/10.1038/nbt.1581},
- year = {2009},
- month = oct,
- publisher = {Springer Science and Business Media LLC},
- volume = {27},
- number = {11},
- pages = {1050--1055},
- author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James
- and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese
- and Christopher P Austin and David G Lloyd and Douglas S Auld},
- title = {Comprehensive characterization of cytochrome P450 isozyme selectivity
- across chemical libraries},
- journal = {Nature Biotechnology}
+ - |-
+ @article{Veith2009,
+ doi = {10.1038/nbt.1581},
+ url = {https://doi.org/10.1038/nbt.1581},
+ year = {2009},
+ month = oct,
+ publisher = {Springer Science and Business Media LLC},
+ volume = {27},
+ number = {11},
+ pages = {1050--1055},
+ author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James
+ and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese
+ and Christopher P Austin and David G Lloyd and Douglas S Auld},
+ title = {Comprehensive characterization of cytochrome P450 isozyme selectivity
+ across chemical libraries},
+ journal = {Nature Biotechnology}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP2C9_inhibition#no &NULL}{CYP2C9_inhibition__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP2C9_inhibition#no &NULL}{CYP2C9_inhibition__names__noun}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP2C9_inhibition#no &NULL}{CYP2C9_inhibition__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP2C9_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {CYP2C9_inhibition#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP2C9_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {CYP2C9_inhibition__names__gerund}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP2C9_inhibition__names__gerund}?
- Assistant: {CYP2C9_inhibition#No&Yes}, this molecule is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2C9_inhibition__names__gerund}?
- Assistant: {CYP2C9_inhibition#No&Yes}, it is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}?
- Assistant: This is a molecule that is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {CYP2C9_inhibition#not &NULL}be {CYP2C9_inhibition__names__gerund}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {CYP2C9_inhibition#not &NULL}be {CYP2C9_inhibition__names__gerund}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} {CYP2C9_inhibition__names__gerund}:{CYP2C9_inhibition#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP2C9_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{CYP2C9_inhibition#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP2C9_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result:This molecule is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}.
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C9_inhibition__names__gerund}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {CYP2C9_inhibition%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C9_inhibition__names__gerund}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {CYP2C9_inhibition%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%CYP2C9_inhibition%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%CYP2C9_inhibition%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP2C9_inhibition#no &NULL}{CYP2C9_inhibition__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP2C9_inhibition#no &NULL}{CYP2C9_inhibition__names__noun}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP2C9_inhibition#no &NULL}{CYP2C9_inhibition__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP2C9_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {CYP2C9_inhibition#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP2C9_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {CYP2C9_inhibition__names__gerund}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP2C9_inhibition__names__gerund}?
+ Assistant: {CYP2C9_inhibition#No&Yes}, this molecule is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2C9_inhibition__names__gerund}?
+ Assistant: {CYP2C9_inhibition#No&Yes}, it is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}?
+ Assistant: This is a molecule that is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {CYP2C9_inhibition#not &NULL}be {CYP2C9_inhibition__names__gerund}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {CYP2C9_inhibition#not &NULL}be {CYP2C9_inhibition__names__gerund}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} {CYP2C9_inhibition__names__gerund}:{CYP2C9_inhibition#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP2C9_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{CYP2C9_inhibition#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP2C9_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result:This molecule is {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}.
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C9_inhibition__names__gerund}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {CYP2C9_inhibition%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2C9_inhibition__names__gerund}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {CYP2C9_inhibition%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%CYP2C9_inhibition%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {CYP2C9_inhibition#not &NULL}{CYP2C9_inhibition__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%CYP2C9_inhibition%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/cyp_p450_2d6_inhibition_veith_et_al/meta.yaml b/data/tabular/cyp_p450_2d6_inhibition_veith_et_al/meta.yaml
index 734bb7587..c1c5cb9a7 100644
--- a/data/tabular/cyp_p450_2d6_inhibition_veith_et_al/meta.yaml
+++ b/data/tabular/cyp_p450_2d6_inhibition_veith_et_al/meta.yaml
@@ -1,138 +1,137 @@
----
name: cyp_p450_2d6_inhibition_veith_et_al
description: |-
- The CYP P450 genes are involved in the formation and breakdown (metabolism)
- of various molecules and chemicals within cells. Specifically, CYP2D6 is primarily
- expressed in the liver. It is also highly expressed in areas of the central
- nervous system, including the substantia nigra.
+ The CYP P450 genes are involved in the formation and breakdown (metabolism)
+ of various molecules and chemicals within cells. Specifically, CYP2D6 is primarily
+ expressed in the liver. It is also highly expressed in areas of the central
+ nervous system, including the substantia nigra.
targets:
- - id: CYP2D6_inhibition
- description: ability of the drug to inhibit CYP P450 2D6 (1) or not (0)
- units:
- type: boolean
- names:
- - noun: inhibition of CYP2D6
- - noun: inhibition of CYP P450 2D6
- - adjective: CYP2D6 inhibition
- - adjective: CYP P450 2D6 inhibition
- - verb: inhibits CYP P450 2D6
- - verb: inhibits CYP2D6
- - gerund: inhibiting CYP P450 2D6
- - gerund: inhibiting CYP2D6
- uris:
+ - id: CYP2D6_inhibition
+ description: ability of the drug to inhibit CYP P450 2D6 (1) or not (0)
+ units:
+ type: boolean
+ names:
+ - noun: inhibition of CYP2D6
+ - noun: inhibition of CYP P450 2D6
+ - adjective: CYP2D6 inhibition
+ - adjective: CYP P450 2D6 inhibition
+ - verb: inhibits CYP P450 2D6
+ - verb: inhibits CYP2D6
+ - gerund: inhibiting CYP P450 2D6
+ - gerund: inhibiting CYP2D6
+ uris:
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://doi.org/10.1038/nbt.1581
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-2d6-inhibition-veith-et-al
- description: data source
+ - url: https://doi.org/10.1038/nbt.1581
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-2d6-inhibition-veith-et-al
+ description: data source
num_points: 13130
bibtex:
- - |-
- @article{Veith2009,
- doi = {10.1038/nbt.1581},
- url = {https://doi.org/10.1038/nbt.1581},
- year = {2009},
- month = oct,
- publisher = {Springer Science and Business Media LLC},
- volume = {27},
- number = {11},
- pages = {1050--1055},
- author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James
- and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese
- and Christopher P Austin and David G Lloyd and Douglas S Auld},
- title = {Comprehensive characterization of cytochrome P450 isozyme selectivity
- across chemical libraries},
- journal = {Nature Biotechnology}
+ - |-
+ @article{Veith2009,
+ doi = {10.1038/nbt.1581},
+ url = {https://doi.org/10.1038/nbt.1581},
+ year = {2009},
+ month = oct,
+ publisher = {Springer Science and Business Media LLC},
+ volume = {27},
+ number = {11},
+ pages = {1050--1055},
+ author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James
+ and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese
+ and Christopher P Austin and David G Lloyd and Douglas S Auld},
+ title = {Comprehensive characterization of cytochrome P450 isozyme selectivity
+ across chemical libraries},
+ journal = {Nature Biotechnology}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP2D6_inhibition#no &NULL}{CYP2D6_inhibition__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP2D6_inhibition#no &NULL}{CYP2D6_inhibition__names__noun}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP2D6_inhibition#no &NULL}{CYP2D6_inhibition__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP2D6_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {CYP2D6_inhibition#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP2D6_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {CYP2D6_inhibition__names__gerund}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP2D6_inhibition__names__gerund}?
- Assistant: {CYP2D6_inhibition#No&Yes}, this molecule is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2D6_inhibition__names__gerund}?
- Assistant: {CYP2D6_inhibition#No&Yes}, it is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}?
- Assistant: This is a molecule that is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {CYP2D6_inhibition#not &NULL}be {CYP2D6_inhibition__names__gerund}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {CYP2D6_inhibition#not &NULL}be {CYP2D6_inhibition__names__gerund}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} {CYP2D6_inhibition__names__gerund}:{CYP2D6_inhibition#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP2D6_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{CYP2D6_inhibition#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP2D6_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result:This molecule is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}.
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2D6_inhibition__names__gerund}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {CYP2D6_inhibition%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2D6_inhibition__names__gerund}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {CYP2D6_inhibition%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%CYP2D6_inhibition%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%CYP2D6_inhibition%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP2D6_inhibition#no &NULL}{CYP2D6_inhibition__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP2D6_inhibition#no &NULL}{CYP2D6_inhibition__names__noun}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP2D6_inhibition#no &NULL}{CYP2D6_inhibition__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP2D6_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {CYP2D6_inhibition#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP2D6_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {CYP2D6_inhibition__names__gerund}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP2D6_inhibition__names__gerund}?
+ Assistant: {CYP2D6_inhibition#No&Yes}, this molecule is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {CYP2D6_inhibition__names__gerund}?
+ Assistant: {CYP2D6_inhibition#No&Yes}, it is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}?
+ Assistant: This is a molecule that is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {CYP2D6_inhibition#not &NULL}be {CYP2D6_inhibition__names__gerund}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {CYP2D6_inhibition#not &NULL}be {CYP2D6_inhibition__names__gerund}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} {CYP2D6_inhibition__names__gerund}:{CYP2D6_inhibition#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP2D6_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{CYP2D6_inhibition#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP2D6_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result:This molecule is {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}.
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2D6_inhibition__names__gerund}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {CYP2D6_inhibition%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP2D6_inhibition__names__gerund}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {CYP2D6_inhibition%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%CYP2D6_inhibition%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {CYP2D6_inhibition#not &NULL}{CYP2D6_inhibition__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%CYP2D6_inhibition%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/cyp_p450_3a4_inhibition_veith_et_al/meta.yaml b/data/tabular/cyp_p450_3a4_inhibition_veith_et_al/meta.yaml
index 3fdb25903..b17d5272d 100644
--- a/data/tabular/cyp_p450_3a4_inhibition_veith_et_al/meta.yaml
+++ b/data/tabular/cyp_p450_3a4_inhibition_veith_et_al/meta.yaml
@@ -1,139 +1,138 @@
----
name: cyp_p450_3a4_inhibition_veith_et_al
description: |-
- The CYP P450 genes are involved in the formation and breakdown (metabolism)
- of various molecules and chemicals within cells. Specifically, CYP3A4 is an
- important enzyme in the body, mainly found in the liver and in the intestine.
- It oxidizes small foreign organic molecules (xenobiotics), such as toxins or
- drugs, so that they can be removed from the body.
+ The CYP P450 genes are involved in the formation and breakdown (metabolism)
+ of various molecules and chemicals within cells. Specifically, CYP3A4 is an
+ important enzyme in the body, mainly found in the liver and in the intestine.
+ It oxidizes small foreign organic molecules (xenobiotics), such as toxins or
+ drugs, so that they can be removed from the body.
targets:
- - id: CYP3A4_inhibition
- description: ability of the drug to inhibit CYP P450 3A4 (1) or not (0)
- units:
- type: boolean
- names:
- - noun: inhibition of CYP3A4
- - noun: inhibition of CYP P450 3A4
- - adjective: CYP3A4 inhibition
- - adjective: CYP P450 3A4 inhibition
- - verb: inhibits CYP P450 3A4
- - verb: inhibits CYP3A4
- - gerund: inhibiting CYP P450 3A4
- - gerund: inhibiting CYP3A4
- uris:
+ - id: CYP3A4_inhibition
+ description: ability of the drug to inhibit CYP P450 3A4 (1) or not (0)
+ units:
+ type: boolean
+ names:
+ - noun: inhibition of CYP3A4
+ - noun: inhibition of CYP P450 3A4
+ - adjective: CYP3A4 inhibition
+ - adjective: CYP P450 3A4 inhibition
+ - verb: inhibits CYP P450 3A4
+ - verb: inhibits CYP3A4
+ - gerund: inhibiting CYP P450 3A4
+ - gerund: inhibiting CYP3A4
+ uris:
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://doi.org/10.1038/nbt.1581
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-3a4-inhibition-veith-et-al
- description: data source
+ - url: https://doi.org/10.1038/nbt.1581
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/adme/#cyp-p450-3a4-inhibition-veith-et-al
+ description: data source
num_points: 12328
bibtex:
- - |-
- @article{Veith2009,
- doi = {10.1038/nbt.1581},
- url = {https://doi.org/10.1038/nbt.1581},
- year = {2009},
- month = oct,
- publisher = {Springer Science and Business Media LLC},
- volume = {27},
- number = {11},
- pages = {1050--1055},
- author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James
- and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese
- and Christopher P Austin and David G Lloyd and Douglas S Auld},
- title = {Comprehensive characterization of cytochrome P450 isozyme selectivity
- across chemical libraries},
- journal = {Nature Biotechnology}
+ - |-
+ @article{Veith2009,
+ doi = {10.1038/nbt.1581},
+ url = {https://doi.org/10.1038/nbt.1581},
+ year = {2009},
+ month = oct,
+ publisher = {Springer Science and Business Media LLC},
+ volume = {27},
+ number = {11},
+ pages = {1050--1055},
+ author = {Henrike Veith and Noel Southall and Ruili Huang and Tim James
+ and Darren Fayne and Natalia Artemenko and Min Shen and James Inglese
+ and Christopher P Austin and David G Lloyd and Douglas S Auld},
+ title = {Comprehensive characterization of cytochrome P450 isozyme selectivity
+ across chemical libraries},
+ journal = {Nature Biotechnology}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP3A4_inhibition#no &NULL}{CYP3A4_inhibition__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP3A4_inhibition#no &NULL}{CYP3A4_inhibition__names__noun}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP3A4_inhibition#no &NULL}{CYP3A4_inhibition__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP3A4_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {CYP3A4_inhibition#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP3A4_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {CYP3A4_inhibition__names__gerund}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP3A4_inhibition__names__gerund}?
- Assistant: {CYP3A4_inhibition#No&Yes}, this molecule is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {CYP3A4_inhibition__names__gerund}?
- Assistant: {CYP3A4_inhibition#No&Yes}, it is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}?
- Assistant: This is a molecule that is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {CYP3A4_inhibition#not &NULL}be {CYP3A4_inhibition__names__gerund}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {CYP3A4_inhibition#not &NULL}be {CYP3A4_inhibition__names__gerund}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} {CYP3A4_inhibition__names__gerund}:{CYP3A4_inhibition#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP3A4_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{CYP3A4_inhibition#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {CYP3A4_inhibition__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result:This molecule is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}.
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP3A4_inhibition__names__gerund}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {CYP3A4_inhibition%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP3A4_inhibition__names__gerund}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {CYP3A4_inhibition%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%CYP3A4_inhibition%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%CYP3A4_inhibition%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {CYP3A4_inhibition#no &NULL}{CYP3A4_inhibition__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {CYP3A4_inhibition#no &NULL}{CYP3A4_inhibition__names__noun}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {CYP3A4_inhibition#no &NULL}{CYP3A4_inhibition__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP3A4_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {CYP3A4_inhibition#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP3A4_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {CYP3A4_inhibition__names__gerund}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {CYP3A4_inhibition__names__gerund}?
+ Assistant: {CYP3A4_inhibition#No&Yes}, this molecule is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {CYP3A4_inhibition__names__gerund}?
+ Assistant: {CYP3A4_inhibition#No&Yes}, it is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}?
+ Assistant: This is a molecule that is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {CYP3A4_inhibition#not &NULL}be {CYP3A4_inhibition__names__gerund}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {CYP3A4_inhibition#not &NULL}be {CYP3A4_inhibition__names__gerund}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} {CYP3A4_inhibition__names__gerund}:{CYP3A4_inhibition#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP3A4_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{CYP3A4_inhibition#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {CYP3A4_inhibition__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result:This molecule is {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}.
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP3A4_inhibition__names__gerund}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {CYP3A4_inhibition%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {CYP3A4_inhibition__names__gerund}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {CYP3A4_inhibition%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%CYP3A4_inhibition%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {CYP3A4_inhibition#not &NULL}{CYP3A4_inhibition__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%CYP3A4_inhibition%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/drug_induced_liver_injury/meta.yaml b/data/tabular/drug_induced_liver_injury/meta.yaml
index 7e536091b..9338f7266 100644
--- a/data/tabular/drug_induced_liver_injury/meta.yaml
+++ b/data/tabular/drug_induced_liver_injury/meta.yaml
@@ -1,129 +1,128 @@
----
name: drug_induced_liver_injury
description: |-
- Drug-induced liver injury (DILI) is fatal liver disease caused by drugs
- and it has been the single most frequent cause of safety-related drug marketing
- withdrawals for the past 50 years (e.g. iproniazid, ticrynafen, benoxaprofen).
- This dataset is aggregated from U.S. FDA 2019s National Center for Toxicological
- Research.
+ Drug-induced liver injury (DILI) is fatal liver disease caused by drugs
+ and it has been the single most frequent cause of safety-related drug marketing
+ withdrawals for the past 50 years (e.g. iproniazid, ticrynafen, benoxaprofen).
+ This dataset is aggregated from U.S. FDA 2019s National Center for Toxicological
+ Research.
targets:
- - id: liver_injury
- description: whether it can cause liver injury (1) or not (0).
- units:
- type: boolean
- names:
- - noun: drug-induced liver injury
- - noun: drug-induced liver injury (DILI)
- - noun: fatal liver disease caused by drugs
- - verb: causes drug-induced liver injury
- uris:
- - http://purl.bioontology.org/ontology/MEDDRA/10072268
- - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C84427
+ - id: liver_injury
+ description: whether it can cause liver injury (1) or not (0).
+ units:
+ type: boolean
+ names:
+ - noun: drug-induced liver injury
+ - noun: drug-induced liver injury (DILI)
+ - noun: fatal liver disease caused by drugs
+ - verb: causes drug-induced liver injury
+ uris:
+ - http://purl.bioontology.org/ontology/MEDDRA/10072268
+ - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C84427
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://doi.org/10.1021/acs.jcim.5b00238
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/tox/#dili-drug-induced-liver-injury
- description: Data source
+ - url: https://doi.org/10.1021/acs.jcim.5b00238
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/tox/#dili-drug-induced-liver-injury
+ description: Data source
num_points: 475
bibtex:
- - |-
- @article{Xu2015,
- doi = {10.1021/acs.jcim.5b00238},
- url = {https://doi.org/10.1021/acs.jcim.5b00238},
- year = {2015},
- month = oct,
- publisher = {American Chemical Society (ACS)},
- volume = {55},
- number = {10},
- pages = {2085-2093},
- author = {Youjun Xu and Ziwei Dai and Fangjin Chen
- and Shuaishi Gao and Jianfeng Pei and Luhua Lai},
- title = {Deep Learning for Drug-Induced Liver Injury},
- journal = {Journal of Chemical Information and Modeling}
+ - |-
+ @article{Xu2015,
+ doi = {10.1021/acs.jcim.5b00238},
+ url = {https://doi.org/10.1021/acs.jcim.5b00238},
+ year = {2015},
+ month = oct,
+ publisher = {American Chemical Society (ACS)},
+ volume = {55},
+ number = {10},
+ pages = {2085-2093},
+ author = {Youjun Xu and Ziwei Dai and Fangjin Chen
+ and Shuaishi Gao and Jianfeng Pei and Luhua Lai},
+ title = {Deep Learning for Drug-Induced Liver Injury},
+ journal = {Journal of Chemical Information and Modeling}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|causes|displays!} {liver_injury#no &NULL}{liver_injury__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule causes {liver_injury#no &NULL}{liver_injury__names__noun}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {liver_injury#not &NULL}identified as causing a {liver_injury__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is causing {liver_injury#no &NULL}{liver_injury__names__noun}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that {#shows|causes!} {liver_injury__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {liver_injury#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that {#shows|causes!} {liver_injury__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {liver_injury#not &NULL}causing {liver_injury__names__noun}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that {#shows|causes!} {liver_injury__names__noun}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} {#shows|causes!} a {liver_injury__names__noun}?
- Assistant: {liver_injury#No&Yes}, this molecule is {liver_injury#not &NULL}causing a {liver_injury__names__noun}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} causing a {liver_injury__names__noun}?
- Assistant: {liver_injury#No&Yes}, it is {liver_injury#not &NULL}causing a {liver_injury__names__noun}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {liver_injury#not &NULL}{#showing|causing!} a {liver_injury__names__noun}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {liver_injury#not &NULL}{#showing|causing!} a {liver_injury__names__noun}.
- Assistant: This is a molecule that is {liver_injury#not &NULL}causing a {liver_injury__names__noun}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {liver_injury#not &NULL}be causing a {liver_injury__names__noun}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {liver_injury#not &NULL}causing a {liver_injury__names__noun}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {liver_injury#not &NULL}be causing a {liver_injury__names__noun}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {liver_injury#not &NULL}causing a {liver_injury__names__noun}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} causing a {liver_injury__names__noun}:{liver_injury#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that {#shows|causes!} a {liver_injury__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{liver_injury#False&True}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} causing a {liver_injury__names__noun}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {liver_injury%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} causing a {liver_injury__names__noun}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {liver_injury%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {liver_injury#not &NULL} causing a {liver_injury__names__noun}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%liver_injury%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {liver_injury#not &NULL} causing a {liver_injury__names__noun}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%liver_injury%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|causes|displays!} {liver_injury#no &NULL}{liver_injury__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule causes {liver_injury#no &NULL}{liver_injury__names__noun}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {liver_injury#not &NULL}identified as causing a {liver_injury__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is causing {liver_injury#no &NULL}{liver_injury__names__noun}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that {#shows|causes!} {liver_injury__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {liver_injury#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that {#shows|causes!} {liver_injury__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {liver_injury#not &NULL}causing {liver_injury__names__noun}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that {#shows|causes!} {liver_injury__names__noun}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} {#shows|causes!} a {liver_injury__names__noun}?
+ Assistant: {liver_injury#No&Yes}, this molecule is {liver_injury#not &NULL}causing a {liver_injury__names__noun}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} causing a {liver_injury__names__noun}?
+ Assistant: {liver_injury#No&Yes}, it is {liver_injury#not &NULL}causing a {liver_injury__names__noun}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {liver_injury#not &NULL}{#showing|causing!} a {liver_injury__names__noun}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {liver_injury#not &NULL}{#showing|causing!} a {liver_injury__names__noun}.
+ Assistant: This is a molecule that is {liver_injury#not &NULL}causing a {liver_injury__names__noun}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {liver_injury#not &NULL}be causing a {liver_injury__names__noun}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {liver_injury#not &NULL}causing a {liver_injury__names__noun}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {liver_injury#not &NULL}be causing a {liver_injury__names__noun}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {liver_injury#not &NULL}causing a {liver_injury__names__noun}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} causing a {liver_injury__names__noun}:{liver_injury#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that {#shows|causes!} a {liver_injury__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{liver_injury#False&True}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} causing a {liver_injury__names__noun}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {liver_injury%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} causing a {liver_injury__names__noun}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {liver_injury%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {liver_injury#not &NULL} causing a {liver_injury__names__noun}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%liver_injury%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {liver_injury#not &NULL} causing a {liver_injury__names__noun}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%liver_injury%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/drugchat_liang_zhang_et_al/meta.yaml b/data/tabular/drugchat_liang_zhang_et_al/meta.yaml
index da9520a12..8584e8186 100644
--- a/data/tabular/drugchat_liang_zhang_et_al/meta.yaml
+++ b/data/tabular/drugchat_liang_zhang_et_al/meta.yaml
@@ -1,57 +1,56 @@
----
name: drugchat_liang_zhang_et_al
description: |-
- Instruction tuning dataset used for the LLM component of DrugChat.
- 10,834 compounds (3,8962 from ChEMBL and 6,942 from PubChem) containing
- descriptive drug information were collected. 143,517 questions were generated
- using the molecules' classification, properties and descriptions from ChEBI, LOTUS & YMDB.
+ Instruction tuning dataset used for the LLM component of DrugChat.
+ 10,834 compounds (3,8962 from ChEMBL and 6,942 from PubChem) containing
+ descriptive drug information were collected. 143,517 questions were generated
+ using the molecules' classification, properties and descriptions from ChEBI, LOTUS & YMDB.
targets:
- - id: answ
- description: answer to the question about the SMILES
- type: string
+ - id: answ
+ description: answer to the question about the SMILES
+ type: string
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
- - id: quest
- type: string
- description: Question about SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
+ - id: quest
+ type: string
+ description: Question about SMILES
license: BSD-3-Clause
links:
- - url: https://www.techrxiv.org/articles/preprint/DrugChat_Towards_Enabling_ChatGPT-Like_Capabilities_on_Drug_Molecule_Graphs/22945922
- description: corresponding publication
- - url: https://github.com/UCSD-AI4H/drugchat
- description: rep & data source
+ - url: https://www.techrxiv.org/articles/preprint/DrugChat_Towards_Enabling_ChatGPT-Like_Capabilities_on_Drug_Molecule_Graphs/22945922
+ description: corresponding publication
+ - url: https://github.com/UCSD-AI4H/drugchat
+ description: rep & data source
num_points: 143,517
bibtex:
- - |-
- @article{Liang2023,
- author = "Youwei Liang and Ruiyi Zhang and Li Zhang and Pengtao Xie",
- title = "{DrugChat: Towards Enabling ChatGPT-Like Capabilities on Drug Molecule Graphs}",
- year = "2023",
- month = "5",
- url = "https://www.techrxiv.org/articles/preprint/DrugChat_Towards_Enabling_ChatGPT-Like_Capabilities_on_Drug_Molecule_Graphs/22945922",
- doi = "10.36227/techrxiv.22945922.v1"}
+ - |-
+ @article{Liang2023,
+ author = "Youwei Liang and Ruiyi Zhang and Li Zhang and Pengtao Xie",
+ title = "{DrugChat: Towards Enabling ChatGPT-Like Capabilities on Drug Molecule Graphs}",
+ year = "2023",
+ month = "5",
+ url = "https://www.techrxiv.org/articles/preprint/DrugChat_Towards_Enabling_ChatGPT-Like_Capabilities_on_Drug_Molecule_Graphs/22945922",
+ doi = "10.36227/techrxiv.22945922.v1"}
templates:
- - |-
- Task: Please answer the following question about the molecule with {SMILES__description} {SMILES#}.
- {#Description|Question|Request!}: {quest#}
- {#Result|Answer|Completion!}: {answ#}
- - |-
- {#Question|Q!}: {quest#}
- {#Constraint:|Description:|!} The {#Molecule|Compound|Chemical!} {#has the|can be represented with the!} {SMILES__description} {SMILES#}.
- {#Answer|Result|Answer!}: {answ#}
- - |-
- User: I have a question about the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#}.
- Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!}
- User: {quest#}
- Assistant: {#The answer is |!}{answ#}
- - |-
- User: I want to know more about the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#}.
- Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!}
- User: {quest#}
- Assistant: {#The answer is |!}{answ#}
- - |-
- Task: Answer the following question about the molecule with {SMILES__description} {SMILES#}.
- {#Description|Question|Request!}: {quest#}
- {#Result|Answer|Completion!}: {answ#}
+ - |-
+ Task: Please answer the following question about the molecule with {SMILES__description} {SMILES#}.
+ {#Description|Question|Request!}: {quest#}
+ {#Result|Answer|Completion!}: {answ#}
+ - |-
+ {#Question|Q!}: {quest#}
+ {#Constraint:|Description:|!} The {#Molecule|Compound|Chemical!} {#has the|can be represented with the!} {SMILES__description} {SMILES#}.
+ {#Answer|Result|Answer!}: {answ#}
+ - |-
+ User: I have a question about the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#}.
+ Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!}
+ User: {quest#}
+ Assistant: {#The answer is |!}{answ#}
+ - |-
+ User: I want to know more about the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#}.
+ Assistant: {#Sure, what is your question?|How can I help?|That sounds interesting, how can I help?|Interesting, how can I help?!}
+ User: {quest#}
+ Assistant: {#The answer is |!}{answ#}
+ - |-
+ Task: Answer the following question about the molecule with {SMILES__description} {SMILES#}.
+ {#Description|Question|Request!}: {quest#}
+ {#Result|Answer|Completion!}: {answ#}
diff --git a/data/tabular/fda_adverse_reactions/meta.yaml b/data/tabular/fda_adverse_reactions/meta.yaml
index 0c5e97d06..d45659aec 100644
--- a/data/tabular/fda_adverse_reactions/meta.yaml
+++ b/data/tabular/fda_adverse_reactions/meta.yaml
@@ -1,32 +1,31 @@
----
name: fda_adverse_reactions
description: A dataset of adverse reaction statistics for drugs and reaction events.
targets:
- - id: count
- description: A count of how many reaction events occurred for this chembl id.
- units:
- type: ordinal
- names:
- - noun: adverse reaction frequency
- pubchem_aids: []
- uris: []
- - id: event
- description: The type of event that occurred for this molecule interaction.
- units:
- type: string
- names:
- - noun: adverse event reaction
- pubchem_aids: []
- uris: []
+ - id: count
+ description: A count of how many reaction events occurred for this chembl id.
+ units:
+ type: ordinal
+ names:
+ - noun: adverse reaction frequency
+ pubchem_aids: []
+ uris: []
+ - id: event
+ description: The type of event that occurred for this molecule interaction.
+ units:
+ type: string
+ names:
+ - noun: adverse event reaction
+ pubchem_aids: []
+ uris: []
identifiers:
- - id: SMILES
- type: SMILES
- description: This is the SMILES identifier for a given molecule.
+ - id: SMILES
+ type: SMILES
+ description: This is the SMILES identifier for a given molecule.
license: CC BY-SA 3.0
links:
- - name: Dataset
- url: https://platform.opentargets.org/downloads
- description: The website which we download the dataset from during the transformation script.
+ - name: Dataset
+ url: https://platform.opentargets.org/downloads
+ description: The website which we download the dataset from during the transformation script.
benchmarks: []
num_points: 94910
bibtex: []
diff --git a/data/tabular/flashpoint/meta.yaml b/data/tabular/flashpoint/meta.yaml
index 379a9e8e9..184fe3ba7 100644
--- a/data/tabular/flashpoint/meta.yaml
+++ b/data/tabular/flashpoint/meta.yaml
@@ -1,38 +1,37 @@
----
name: flashpoint
description: |
- Curation of experimentally determined flash point values measured with open cup and closed cup methods.
- The values are from academic papers, the Hazardous Chemicals Handbook, and the PubChem chemical database.
- Differences from the stated sources in the paper are:
- * Values from the DIPPR database are not included in their dataset as they are proprietary.
- * There are appear to be no values from Lange's handbook of chemistry in their dataset.
- * We did our own processing to resolve duplicate SMILES.
+ Curation of experimentally determined flash point values measured with open cup and closed cup methods.
+ The values are from academic papers, the Hazardous Chemicals Handbook, and the PubChem chemical database.
+ Differences from the stated sources in the paper are:
+ * Values from the DIPPR database are not included in their dataset as they are proprietary.
+ * There are appear to be no values from Lange's handbook of chemistry in their dataset.
+ * We did our own processing to resolve duplicate SMILES.
targets:
- - id: flashpoint
- description: Experimental flash point value (K)
- units: K
- type: continuous
- names:
- - noun: flash point
- uris:
- - http://semanticscience.org/resource/CHEMINF_000417
+ - id: flashpoint
+ description: Experimental flash point value (K)
+ units: K
+ type: continuous
+ names:
+ - noun: flash point
+ uris:
+ - http://semanticscience.org/resource/CHEMINF_000417
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
num_points: 9878 # downloaded dataset has 14696 datapoints, but there are duplicate smiles
links:
- - url: https://figshare.com/ndownloader/files/18509711
- description: Original figshare dataset
+ - url: https://figshare.com/ndownloader/files/18509711
+ description: Original figshare dataset
bibtex:
- - |
- "@article{sun2020assessing,
- title={Assessing Graph-based Deep Learning Models for Predicting Flash Point},
- author={Sun, Xiaoyu and Krakauer, Nathaniel J and Politowicz, Alexander and Chen, Wei-Ting and Li, Qiying and Li, Zuoyi and Shao, Xianjia and Sunaryo, Alfred and Shen, Mingren and Wang, James and others},
- journal={Molecular informatics},
- volume={39},
- number={6},
- pages={e1900101},
- year={2020}
- }"
+ - |
+ "@article{sun2020assessing,
+ title={Assessing Graph-based Deep Learning Models for Predicting Flash Point},
+ author={Sun, Xiaoyu and Krakauer, Nathaniel J and Politowicz, Alexander and Chen, Wei-Ting and Li, Qiying and Li, Zuoyi and Shao, Xianjia and Sunaryo, Alfred and Shen, Mingren and Wang, James and others},
+ journal={Molecular informatics},
+ volume={39},
+ number={6},
+ pages={e1900101},
+ year={2020}
+ }"
diff --git a/data/tabular/formation_energies/meta.yaml b/data/tabular/formation_energies/meta.yaml
index ce2b19ef9..9490c1df9 100644
--- a/data/tabular/formation_energies/meta.yaml
+++ b/data/tabular/formation_energies/meta.yaml
@@ -1,86 +1,84 @@
----
name: formation_energies
description: |-
- Formation and decomposition energies of inorganic solids mined from the Materials Project database.
+ Formation and decomposition energies of inorganic solids mined from the Materials Project database.
targets:
- - id: rxn
- description: decomposition reaction
- type: text
- - id: Ed
- description: decomposition enthalpy
- units: eV/atom
- type: continuous
- significant_digits: 3
- - id: Ef
- description: formation enthalpy
- units: eV/atom
- type: continuous
- significant_digits: 3
- - id: stability
- description: thermodynamic stability of material
- type: boolean
+ - id: rxn
+ description: decomposition reaction
+ type: text
+ - id: Ed
+ description: decomposition enthalpy
+ units: eV/atom
+ type: continuous
+ significant_digits: 3
+ - id: Ef
+ description: formation enthalpy
+ units: eV/atom
+ type: continuous
+ significant_digits: 3
+ - id: stability
+ description: thermodynamic stability of material
+ type: boolean
benchmarks: []
identifiers:
- - id: composition
- type: COMPOSITION
- description: chemical formula
+ - id: composition
+ type: COMPOSITION
+ description: chemical formula
license: CC BY 4.0
links:
- - url: https://github.com/CJBartel/TestStabilityML/tree/master
- description: original data source
+ - url: https://github.com/CJBartel/TestStabilityML/tree/master
+ description: original data source
num_points: 85014
bibtex:
- - |-
- @article{Bartel_2020,
- doi = {10.1038/s41524-020-00362-y},
- url = {https://doi.org/10.1038%2Fs41524-020-00362-y},
- year = 2020,
- month = {jul},
- publisher = {Springer Science and Business Media {LLC}},
- volume = {6},
- number = {1},
- author = {Christopher J. Bartel and Amalie Trewartha and Qi Wang and Alexander Dunn and Anubhav Jain and Gerbrand Ceder},
- title = {A critical examination of compound stability predictions from machine-learned formation energies},
- journal = {npj Comput Mater}
- }
+ - |-
+ @article{Bartel_2020,
+ doi = {10.1038/s41524-020-00362-y},
+ url = {https://doi.org/10.1038%2Fs41524-020-00362-y},
+ year = 2020,
+ month = {jul},
+ publisher = {Springer Science and Business Media {LLC}},
+ volume = {6},
+ number = {1},
+ author = {Christopher J. Bartel and Amalie Trewartha and Qi Wang and Alexander Dunn and Anubhav Jain and Gerbrand Ceder},
+ title = {A critical examination of compound stability predictions from machine-learned formation energies},
+ journal = {npj Comput Mater}
+ }
templates:
- - The decomposition enthalpy of {composition#} is {Ed#} {Ed__units}.
- - The formation enthalpy of {composition#} is {Ef#} {Ef__units}.
- - The decomposition enthalpy of {composition#} is {Ed#} {Ed__units}.
- - The decomposition reaction of {composition#} is {rxn#}.
- - The {#material|compound|crystal|solid|inorganic material!} with composition {composition#} is {stability#not &NULL}thermodynamically stable because
- its decomposition enthalpy is {Ed#} {Ed__units}.
- - |-
- User: What is the decomposition enthalpy of {composition#}?
- Assistant: {#The decomposition enthalpy of is |!}{Ed#} {Ed__units}.
- - |-
- User: What is the formation enthalpy of {composition#}?
- Assistant: {#The formation enthalpy of is |!}{Ef#} {Ef__units}.
- - |-
- User: What is the decomposition enthalpy of {composition#}?
- Assistant: {#The decomposition enthalpy is|!}is {Ed#} {Ed__units}.
- - |-
- User: {#I have a question about |I want to know something about| I need help with!}{composition#}.
- Assistant: {#Sure.|How can I help?|How can I be of help?|How can I assist?|Happy to help.!} {#What is your question?|What do you want to know?|!}
- User: {composition#} is {stability#not &NULL}thermodynamically stable because its decomposition enthalpy is {Ed#} {Ed__units}.
- - |-
- User: I want to design a {stability#not &NULL}thermodynamically stable {#material|structure|compound!} What {#chemical formula|composition!} should I use?
- Assistant: {#I recommend using |I suggest using |I would use |I would recommend using !}{composition#}.
- - |-
- User: I want to design a {#material|structure|compound!} that is {stability#not &NULL}thermodynamically stable. What {#chemical formula|composition!} should I use?
- Assistant: {#Do you have|Are there!} any other {#requirements|constraints|preferences!}?
- User: The {#material|structure|compound!} should have a decomposition enthalpy of {Ed#} {Ed__units}.
- Assistant: {#I recommend using |I suggest using |I would use |I would recommend using !}{composition#}.
- - |-
- User: I want to design a {#material|structure|compound!} that is {stability#not &NULL}thermodynamically stable. What {#chemical formula|composition!} should I use?
- Assistant: {#Do you have|Are there!} any other {#requirements|constraints|preferences!}?
- User: The {#material|structure|compound!} should have a formation enthalpy of {Ef#} {Ef__units}.
- Assistant: {#I recommend using |I suggest using |I would use |I would recommend using !}{composition#}.
- - |-
- Task: Classify the stability of {composition#}.
- Constraint: Give a reason for your answer.
- Answer: {#The material is |The compound is |The crystal is |The solid is !}{stability#not &NULL}thermodynamically stable because its decomposition enthalpy is {Ed#} {Ed__units}.
- - |-
- Question: What is a compound with the following decomposition reaction?
- Description: {rxn#}
- Answer: {composition#}
+ - The decomposition enthalpy of {composition#} is {Ed#} {Ed__units}.
+ - The formation enthalpy of {composition#} is {Ef#} {Ef__units}.
+ - The decomposition enthalpy of {composition#} is {Ed#} {Ed__units}.
+ - The decomposition reaction of {composition#} is {rxn#}.
+ - The {#material|compound|crystal|solid|inorganic material!} with composition {composition#} is {stability#not &NULL}thermodynamically stable because its decomposition enthalpy is {Ed#} {Ed__units}.
+ - |-
+ User: What is the decomposition enthalpy of {composition#}?
+ Assistant: {#The decomposition enthalpy of is |!}{Ed#} {Ed__units}.
+ - |-
+ User: What is the formation enthalpy of {composition#}?
+ Assistant: {#The formation enthalpy of is |!}{Ef#} {Ef__units}.
+ - |-
+ User: What is the decomposition enthalpy of {composition#}?
+ Assistant: {#The decomposition enthalpy is|!}is {Ed#} {Ed__units}.
+ - |-
+ User: {#I have a question about |I want to know something about| I need help with!}{composition#}.
+ Assistant: {#Sure.|How can I help?|How can I be of help?|How can I assist?|Happy to help.!} {#What is your question?|What do you want to know?|!}
+ User: {composition#} is {stability#not &NULL}thermodynamically stable because its decomposition enthalpy is {Ed#} {Ed__units}.
+ - |-
+ User: I want to design a {stability#not &NULL}thermodynamically stable {#material|structure|compound!} What {#chemical formula|composition!} should I use?
+ Assistant: {#I recommend using |I suggest using |I would use |I would recommend using !}{composition#}.
+ - |-
+ User: I want to design a {#material|structure|compound!} that is {stability#not &NULL}thermodynamically stable. What {#chemical formula|composition!} should I use?
+ Assistant: {#Do you have|Are there!} any other {#requirements|constraints|preferences!}?
+ User: The {#material|structure|compound!} should have a decomposition enthalpy of {Ed#} {Ed__units}.
+ Assistant: {#I recommend using |I suggest using |I would use |I would recommend using !}{composition#}.
+ - |-
+ User: I want to design a {#material|structure|compound!} that is {stability#not &NULL}thermodynamically stable. What {#chemical formula|composition!} should I use?
+ Assistant: {#Do you have|Are there!} any other {#requirements|constraints|preferences!}?
+ User: The {#material|structure|compound!} should have a formation enthalpy of {Ef#} {Ef__units}.
+ Assistant: {#I recommend using |I suggest using |I would use |I would recommend using !}{composition#}.
+ - |-
+ Task: Classify the stability of {composition#}.
+ Constraint: Give a reason for your answer.
+ Answer: {#The material is |The compound is |The crystal is |The solid is !}{stability#not &NULL}thermodynamically stable because its decomposition enthalpy is {Ed#} {Ed__units}.
+ - |-
+ Question: What is a compound with the following decomposition reaction?
+ Description: {rxn#}
+ Answer: {composition#}
diff --git a/data/tabular/freesolv/meta.yaml b/data/tabular/freesolv/meta.yaml
index ffd29a3f1..04758b6d9 100644
--- a/data/tabular/freesolv/meta.yaml
+++ b/data/tabular/freesolv/meta.yaml
@@ -1,106 +1,76 @@
----
name: freesolv
description: Experimental and calculated small molecule hydration free energies
targets:
- - id: exp_value
- description: experimental hydration free energy value
- units: kcal/mol
- type: continuous
- names:
- - noun: hydration free energy
- - id: exp_uncertainty
- description: experimental hydration free energy uncertainty
- units: kcal/mol
- type: continuous
- names:
- - noun: hydration free energy uncertainty
- - id: GAFF
- description: mobley group calculated value
- units: kcal/mol
- type: continuous
- names:
- - noun: hydration free energy computed using the GAFF force field
- - id: calc_uncertainty
- description: mobley group calculated value calculated uncertainty
- units: kcal/mol
- type: continuous
- names:
- - noun: uncertainty in hydration free energy computed using the GAFF force field
+ - id: exp_value
+ description: experimental hydration free energy value
+ units: kcal/mol
+ type: continuous
+ names:
+ - noun: hydration free energy
+ - id: exp_uncertainty
+ description: experimental hydration free energy uncertainty
+ units: kcal/mol
+ type: continuous
+ names:
+ - noun: hydration free energy uncertainty
+ - id: GAFF
+ description: mobley group calculated value
+ units: kcal/mol
+ type: continuous
+ names:
+ - noun: hydration free energy computed using the GAFF force field
+ - id: calc_uncertainty
+ description: mobley group calculated value calculated uncertainty
+ units: kcal/mol
+ type: continuous
+ names:
+ - noun: uncertainty in hydration free energy computed using the GAFF force field
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
- - id: iupac_name
- type: IUPAC
- description: IUPAC
+ - id: SMILES
+ type: SMILES
+ description: SMILES
+ - id: iupac_name
+ type: IUPAC
+ description: IUPAC
license: CC BY-NC-SA 4.0
links:
- - url: https://github.com/MobleyLab/FreeSolv
- description: issue tracker and source data
- - url: https://escholarship.org/uc/item/6sd403pz
- description: repository with data
+ - url: https://github.com/MobleyLab/FreeSolv
+ description: issue tracker and source data
+ - url: https://escholarship.org/uc/item/6sd403pz
+ description: repository with data
num_points: 642
bibtex:
- - "@article{mobley2013experimental,\ntitle={Experimental and calculated small molecule hydration free energies},\nauthor={Mobley, David L},\nyear={2013}"
+ - "@article{mobley2013experimental,\ntitle={Experimental and calculated small molecule hydration free energies},\nauthor={Mobley, David L},\nyear={2013}"
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} has a {exp_value__names__noun} of {exp_value#} {exp_value__units}.
- - Based on the {SMILES__description} {#representation of |!}{SMILES#}, the molecule has a {exp_value__names__noun} of {exp_value#} {exp_value__units}.
- - The {SMILES__description} {SMILES#} {#represents|is representing!} a molecule {#that has a|with a!} {exp_value__names__noun} of {exp_value#} {exp_value__units}.
- - The molecule with the {SMILES__description} {SMILES#} has a {exp_value__names__noun} of {exp_value#} {exp_value__units}.
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} has a {GAFF__names__noun} of {GAFF#} {GAFF__units}.
- - Based on the {SMILES__description} {#representation of |!}{SMILES#}, the molecule has a {GAFF__names__noun} of {GAFF#} {GAFF__units}.
- - The {SMILES__description} {SMILES#} {#represents|is representing!} a molecule {#that has a|with a!} {GAFF__names__noun} of {GAFF#} {GAFF__units}.
- - The molecule with the {SMILES__description} {SMILES#} has a {GAFF__names__noun} of {GAFF#} {GAFF__units}.
- - "Task: Please predict a molecule feature based on the description.\nDescription: Predict the {exp_value__names__noun} in {exp_value__units}.\n{#Molecule\
- \ |!}{SMILES__description}: {SMILES#}\nConstraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {exp_value__units}\
- \ without using any {#other|additional!} words.\nResult: {exp_value#} {exp_value__units}"
- - "Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.\nDescription: A molecule\
- \ that has {exp_value__names__noun} of {exp_value#} {exp_value__units}.\nResult: {SMILES#}"
- - "Task: Please predict a molecule feature based on the description.\nDescription: Predict the {GAFF__names__noun} in {GAFF__units}.\n{#Molecule |!}{SMILES__description}:\
- \ {SMILES#}\nConstraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {GAFF__units} without using any {#other|additional!}\
- \ words.\nResult: {GAFF#} {GAFF__units}"
- - "Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.\nDescription: A molecule\
- \ that has {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nResult: {SMILES#}"
- - "User: Can you {#tell me|derive|estimate!} the {exp_value__names__noun} in {exp_value__units} of the molecule with the {SMILES__description} {SMILES#}?\n\
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, this molecule has a {exp_value__names__noun} of {exp_value#} {exp_value__units}."
- - "User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}?\n\
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}"
- - "User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}.\n\
- Assistant: This is a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}: {SMILES#}"
- - "User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.\nAssistant: {#This sounds very exciting. |This sounds very\
- \ interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?\nUser: Yes, please. The molecule should have\
- \ a {exp_value__names__noun} of {exp_value#} {exp_value__units}.\nAssistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} represents\
- \ a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}: {SMILES#}"
- - "User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.\nAssistant: {#This sounds very exciting. |This sounds very\
- \ interesting. !}Should it be a special {#molecule|one!}?\nUser: Yes, the molecule should have a {exp_value__names__noun} of {exp_value#} {exp_value__units}.\n\
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} represents a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}:\
- \ {SMILES#}"
- - "User: Can you {#tell me|derive|estimate!} the {GAFF__names__noun} in {GAFF__units} of the molecule with the {SMILES__description} {SMILES#}?\nAssistant:\
- \ {#Yes|Of course|Sure|Yes, I'm happy to help!}, this molecule has a {GAFF__names__noun} of {GAFF#} {GAFF__units}."
- - "User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}?\nAssistant:\
- \ {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}"
- - "User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nAssistant:\
- \ This is a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}: {SMILES#}"
- - "User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.\nAssistant: {#This sounds very exciting. |This sounds very\
- \ interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?\nUser: Yes, please. The molecule should have\
- \ a {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nAssistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} represents a molecule that\
- \ has a {GAFF__names__noun} of {GAFF#} {GAFF__units}: {SMILES#}"
- - "User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.\nAssistant: {#This sounds very exciting. |This sounds very\
- \ interesting. !}Should it be a special {#molecule|one!}?\nUser: Yes, the molecule should have a {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nAssistant:\
- \ {#Understood|Got it|Ok!}, this {SMILES__description} represents a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}: {SMILES#}"
- - The {exp_value__names__noun} of the molecule with the {SMILES__description} {SMILES#} is:{exp_value#} {exp_value__units}
- - The {exp_value__names__noun} of the {SMILES__description} {SMILES#} is:{exp_value#} {exp_value__units}
- - The {exp_value__names__noun} of the molecule {SMILES__description} {SMILES#} is:{exp_value#} {exp_value__units}
- - "Task: Please predict a molecule feature based on the description.\nDescription: Predict the {exp_value__names__noun} in {exp_value__units} of a molecule.\n\
- {#Molecule |!}{SMILES__description}: {SMILES#}\nConstraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {exp_value__units}\
- \ without using any {#other|additional!} words.\nResult:{exp_value#} {exp_value__units}"
- - "Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.\nDescription: A molecule\
- \ that has {exp_value__names__noun} of {exp_value#} {exp_value__units}.\nResult:{SMILES#}"
- - The {GAFF__names__noun} of the molecule with the {SMILES__description} {SMILES#} is:{GAFF#} {GAFF__units}
- - The {GAFF__names__noun} of the {SMILES__description} {SMILES#} is:{GAFF#} {GAFF__units}
- - The {GAFF__names__noun} of the molecule {SMILES__description} {SMILES#} is:{GAFF#} {GAFF__units}
- - "Task: Please predict a molecule feature based on the description.\nDescription: Predict the {GAFF__names__noun} in {GAFF__units} of a molecule.\n\
- {#Molecule |!}{SMILES__description}: {SMILES#}\nConstraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {GAFF__units}\
- \ without using any {#other|additional!} words.\nResult:{GAFF#} {GAFF__units}"
- - "Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.\nDescription: A molecule\
- \ that has {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nResult:{SMILES#}"
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} has a {exp_value__names__noun} of {exp_value#} {exp_value__units}.
+ - Based on the {SMILES__description} {#representation of |!}{SMILES#}, the molecule has a {exp_value__names__noun} of {exp_value#} {exp_value__units}.
+ - The {SMILES__description} {SMILES#} {#represents|is representing!} a molecule {#that has a|with a!} {exp_value__names__noun} of {exp_value#} {exp_value__units}.
+ - The molecule with the {SMILES__description} {SMILES#} has a {exp_value__names__noun} of {exp_value#} {exp_value__units}.
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} has a {GAFF__names__noun} of {GAFF#} {GAFF__units}.
+ - Based on the {SMILES__description} {#representation of |!}{SMILES#}, the molecule has a {GAFF__names__noun} of {GAFF#} {GAFF__units}.
+ - The {SMILES__description} {SMILES#} {#represents|is representing!} a molecule {#that has a|with a!} {GAFF__names__noun} of {GAFF#} {GAFF__units}.
+ - The molecule with the {SMILES__description} {SMILES#} has a {GAFF__names__noun} of {GAFF#} {GAFF__units}.
+ - "Task: Please predict a molecule feature based on the description.\nDescription: Predict the {exp_value__names__noun} in {exp_value__units}.\n{#Molecule |!}{SMILES__description}: {SMILES#}\nConstraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {exp_value__units} without using any {#other|additional!} words.\nResult: {exp_value#} {exp_value__units}"
+ - "Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.\nDescription: A molecule that has {exp_value__names__noun} of {exp_value#} {exp_value__units}.\nResult: {SMILES#}"
+ - "Task: Please predict a molecule feature based on the description.\nDescription: Predict the {GAFF__names__noun} in {GAFF__units}.\n{#Molecule |!}{SMILES__description}: {SMILES#}\nConstraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {GAFF__units} without using any {#other|additional!} words.\nResult: {GAFF#} {GAFF__units}"
+ - "Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.\nDescription: A molecule that has {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nResult: {SMILES#}"
+ - "User: Can you {#tell me|derive|estimate!} the {exp_value__names__noun} in {exp_value__units} of the molecule with the {SMILES__description} {SMILES#}?\nAssistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, this molecule has a {exp_value__names__noun} of {exp_value#} {exp_value__units}."
+ - "User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}?\nAssistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}"
+ - "User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}.\nAssistant: This is a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}: {SMILES#}"
+ - "User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.\nAssistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?\nUser: Yes, please. The molecule should have a {exp_value__names__noun} of {exp_value#} {exp_value__units}.\nAssistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} represents a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}: {SMILES#}"
+ - "User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.\nAssistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?\nUser: Yes, the molecule should have a {exp_value__names__noun} of {exp_value#} {exp_value__units}.\nAssistant: {#Understood|Got it|Ok!}, this {SMILES__description} represents a molecule that has a {exp_value__names__noun} of {exp_value#} {exp_value__units}: {SMILES#}"
+ - "User: Can you {#tell me|derive|estimate!} the {GAFF__names__noun} in {GAFF__units} of the molecule with the {SMILES__description} {SMILES#}?\nAssistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, this molecule has a {GAFF__names__noun} of {GAFF#} {GAFF__units}."
+ - "User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}?\nAssistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}"
+ - "User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nAssistant: This is a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}: {SMILES#}"
+ - "User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.\nAssistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?\nUser: Yes, please. The molecule should have a {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nAssistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} represents a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}: {SMILES#}"
+ - "User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.\nAssistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?\nUser: Yes, the molecule should have a {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nAssistant: {#Understood|Got it|Ok!}, this {SMILES__description} represents a molecule that has a {GAFF__names__noun} of {GAFF#} {GAFF__units}: {SMILES#}"
+ - The {exp_value__names__noun} of the molecule with the {SMILES__description} {SMILES#} is:{exp_value#} {exp_value__units}
+ - The {exp_value__names__noun} of the {SMILES__description} {SMILES#} is:{exp_value#} {exp_value__units}
+ - The {exp_value__names__noun} of the molecule {SMILES__description} {SMILES#} is:{exp_value#} {exp_value__units}
+ - "Task: Please predict a molecule feature based on the description.\nDescription: Predict the {exp_value__names__noun} in {exp_value__units} of a molecule.\n{#Molecule |!}{SMILES__description}: {SMILES#}\nConstraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {exp_value__units} without using any {#other|additional!} words.\nResult:{exp_value#} {exp_value__units}"
+ - "Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.\nDescription: A molecule that has {exp_value__names__noun} of {exp_value#} {exp_value__units}.\nResult:{SMILES#}"
+ - The {GAFF__names__noun} of the molecule with the {SMILES__description} {SMILES#} is:{GAFF#} {GAFF__units}
+ - The {GAFF__names__noun} of the {SMILES__description} {SMILES#} is:{GAFF#} {GAFF__units}
+ - The {GAFF__names__noun} of the molecule {SMILES__description} {SMILES#} is:{GAFF#} {GAFF__units}
+ - "Task: Please predict a molecule feature based on the description.\nDescription: Predict the {GAFF__names__noun} in {GAFF__units} of a molecule.\n{#Molecule |!}{SMILES__description}: {SMILES#}\nConstraint: Even if you are {#uncertain|not sure!}, you must answer with a numeric value in {GAFF__units} without using any {#other|additional!} words.\nResult:{GAFF#} {GAFF__units}"
+ - "Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.\nDescription: A molecule that has {GAFF__names__noun} of {GAFF#} {GAFF__units}.\nResult:{SMILES#}"
diff --git a/data/tabular/h2_storage_materials/meta.yaml b/data/tabular/h2_storage_materials/meta.yaml
index 6a464062b..b06e215a4 100644
--- a/data/tabular/h2_storage_materials/meta.yaml
+++ b/data/tabular/h2_storage_materials/meta.yaml
@@ -1,39 +1,38 @@
----
name: h2_storage_reversible_hydrides
description: synthetic procedures, experimental and theoretical h2 capacities of hydrides
targets:
- - id: h_weight_density_theory
- description: theoretical hydrogen storage capacity
- units: wt%
- type: continuous
- names:
- - noun: theoretical hydrogen storage weight density
- - id: h_weight_density_experiment
- description: experimental hydrogen storage capacity
- units: wt%
- type: continuous
- names:
- - noun: experimental hydrogen storage capacity
+ - id: h_weight_density_theory
+ description: theoretical hydrogen storage capacity
+ units: wt%
+ type: continuous
+ names:
+ - noun: theoretical hydrogen storage weight density
+ - id: h_weight_density_experiment
+ description: experimental hydrogen storage capacity
+ units: wt%
+ type: continuous
+ names:
+ - noun: experimental hydrogen storage capacity
identifiers:
- - id: material_name
- type: IUPAC
- description: chemical name
- - id: chemical_formula
- type: COMPOSITION
- names:
- - noun: chemical formula
- description: chemical formula
- - id: synthetic_information
- names:
- - noun: synthesis procedure summary
- description: brief description of synthetic procedure
- type: Other
+ - id: material_name
+ type: IUPAC
+ description: chemical name
+ - id: chemical_formula
+ type: COMPOSITION
+ names:
+ - noun: chemical formula
+ description: chemical formula
+ - id: synthetic_information
+ names:
+ - noun: synthesis procedure summary
+ description: brief description of synthetic procedure
+ type: Other
license: File
links:
- - url: https://datahub.hymarc.org/dataset/hydrogen-storage-materials-db/resource/4ef1c494-366e-43a3-bed4-a3985de5c374
- description: website with source data
- - url: https://datahub.hymarc.org/dataset/ad580d95-e7e2-4ef4-a7f6-3b2f91a96eba/resource/4ef1c494-366e-43a3-bed4-a3985de5c374/download/hydstormatdb-reversible_hydrides.csv
- description: original_dataset
+ - url: https://datahub.hymarc.org/dataset/hydrogen-storage-materials-db/resource/4ef1c494-366e-43a3-bed4-a3985de5c374
+ description: website with source data
+ - url: https://datahub.hymarc.org/dataset/ad580d95-e7e2-4ef4-a7f6-3b2f91a96eba/resource/4ef1c494-366e-43a3-bed4-a3985de5c374/download/hydstormatdb-reversible_hydrides.csv
+ description: original_dataset
num_points: 30
bibtex:
- - "@online{hymarcReversibleHydrides,\ntitle={Hydrogen Storage Materials Database Reversible Hydrides},\nauthor={HyMARC},\nyear={2019}"
+ - "@online{hymarcReversibleHydrides,\ntitle={Hydrogen Storage Materials Database Reversible Hydrides},\nauthor={HyMARC},\nyear={2019}"
diff --git a/data/tabular/h2_storage_materials/processing.ipynb b/data/tabular/h2_storage_materials/processing.ipynb
index b9d6d8515..3095b0d51 100644
--- a/data/tabular/h2_storage_materials/processing.ipynb
+++ b/data/tabular/h2_storage_materials/processing.ipynb
@@ -35,7 +35,7 @@
"metadata": {},
"outputs": [],
"source": [
- "data_path = 'https://datahub.hymarc.org/dataset/ad580d95-e7e2-4ef4-a7f6-3b2f91a96eba/resource/4ef1c494-366e-43a3-bed4-a3985de5c374/download/hydstormatdb-reversible_hydrides.csv' "
+ "data_path = \"https://datahub.hymarc.org/dataset/ad580d95-e7e2-4ef4-a7f6-3b2f91a96eba/resource/4ef1c494-366e-43a3-bed4-a3985de5c374/download/hydstormatdb-reversible_hydrides.csv\""
]
},
{
@@ -44,10 +44,11 @@
"metadata": {},
"outputs": [],
"source": [
- "data_path = ('https://datahub.hymarc.org/dataset/'\n",
- " 'ad580d95-e7e2-4ef4-a7f6-3b2f91a96eba/resource/'\n",
- " '4ef1c494-366e-43a3-bed4-a3985de5c374/download/hydstormatdb-reversible_hydrides.csv'\n",
- " ) "
+ "data_path = (\n",
+ " \"https://datahub.hymarc.org/dataset/\"\n",
+ " \"ad580d95-e7e2-4ef4-a7f6-3b2f91a96eba/resource/\"\n",
+ " \"4ef1c494-366e-43a3-bed4-a3985de5c374/download/hydstormatdb-reversible_hydrides.csv\"\n",
+ ")"
]
},
{
@@ -76,7 +77,7 @@
"metadata": {},
"outputs": [],
"source": [
- "df = pd.read_csv(fn_data_original, sep=',')"
+ "df = pd.read_csv(fn_data_original, sep=\",\")"
]
},
{
@@ -112,18 +113,18 @@
"outputs": [],
"source": [
"remove_columns = [\n",
- " 'keywords',\n",
- " 'activation',\n",
- " 'principal_investigator',\n",
- " 'institution',\n",
- " 'reversible_capacity',\n",
- " 'h_volume_density_theory',\n",
- " 'h_volume_density_experiment',\n",
- " 'h_volume_density_reference',\n",
- " 'temperature_release_reference',\n",
- " 'h_volume_density_reference',\n",
- " 'entry_date',\n",
- " 'precursors'\n",
+ " \"keywords\",\n",
+ " \"activation\",\n",
+ " \"principal_investigator\",\n",
+ " \"institution\",\n",
+ " \"reversible_capacity\",\n",
+ " \"h_volume_density_theory\",\n",
+ " \"h_volume_density_experiment\",\n",
+ " \"h_volume_density_reference\",\n",
+ " \"temperature_release_reference\",\n",
+ " \"h_volume_density_reference\",\n",
+ " \"entry_date\",\n",
+ " \"precursors\",\n",
"]"
]
},
@@ -152,7 +153,7 @@
"metadata": {},
"outputs": [],
"source": [
- "df['synthesis_information'] = df['synthesis_method'] + ': ' + df['synthesis_conditions']"
+ "df[\"synthesis_information\"] = df[\"synthesis_method\"] + \": \" + df[\"synthesis_conditions\"]"
]
},
{
@@ -161,7 +162,7 @@
"metadata": {},
"outputs": [],
"source": [
- "df = df.drop(['synthesis_method', 'synthesis_conditions'], axis=1)"
+ "df = df.drop([\"synthesis_method\", \"synthesis_conditions\"], axis=1)"
]
},
{
@@ -170,7 +171,7 @@
"metadata": {},
"outputs": [],
"source": [
- "string_columns = list(df.select_dtypes(include=['object']).columns)"
+ "string_columns = list(df.select_dtypes(include=[\"object\"]).columns)"
]
},
{
@@ -310,7 +311,7 @@
" \"license\": \"No License Provided\", # license under which the original dataset was published\n",
" \"links\": [ # list of relevant links (original dataset, other uses, etc.)\n",
" {\n",
- " \"url\": 'https://datahub.hymarc.org/dataset/hydrogen-storage-materials-db/resource/4ef1c494-366e-43a3-bed4-a3985de5c374',\n",
+ " \"url\": \"https://datahub.hymarc.org/dataset/hydrogen-storage-materials-db/resource/4ef1c494-366e-43a3-bed4-a3985de5c374\",\n",
" \"description\": \"website with source data\",\n",
" },\n",
" {\n",
@@ -356,7 +357,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -365,7 +366,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -540,7 +541,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -575,13 +576,6 @@
"source": [
"!python3 transform.py"
]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
}
],
"metadata": {
@@ -599,8 +593,7 @@
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.15"
+ "pygments_lexer": "ipython3"
}
},
"nbformat": 4,
diff --git a/data/tabular/half_life_obach/meta.yaml b/data/tabular/half_life_obach/meta.yaml
index a9b9bcac8..161da87c4 100644
--- a/data/tabular/half_life_obach/meta.yaml
+++ b/data/tabular/half_life_obach/meta.yaml
@@ -1,55 +1,54 @@
----
name: half_life_obach
description: |-
- Half life of a drug is the duration for the concentration of the drug
- in the body to be reduced by half. It measures the duration of actions of a drug.
- This dataset deposited version under CHEMBL assay 1614674.
+ Half life of a drug is the duration for the concentration of the drug
+ in the body to be reduced by half. It measures the duration of actions of a drug.
+ This dataset deposited version under CHEMBL assay 1614674.
targets:
- - id: half_life_duration
- description: the time it takes for the plasma concentration of a drug in the body to be reduced by half
- units: hours
- type: continuous
- significant_digits: 2
- names:
- - noun: half life in humans after IV administration
- - noun: half life time in humans after IV administration
- - noun: drug half life time in humans after IV administration
- uris:
- - http://purl.bioontology.org/ontology/MESH/D006207
+ - id: half_life_duration
+ description: the time it takes for the plasma concentration of a drug in the body to be reduced by half
+ units: hours
+ type: continuous
+ significant_digits: 2
+ names:
+ - noun: half life in humans after IV administration
+ - noun: half life time in humans after IV administration
+ - noun: drug half life time in humans after IV administration
+ uris:
+ - http://purl.bioontology.org/ontology/MESH/D006207
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
- - id: chembl_id
- type: Other
- names:
- - noun: ChEMBL database id
- - noun: ChEMBL identifier number
- description: ChEMBL ids
- sample: false
+ - id: SMILES
+ type: SMILES
+ description: SMILES
+ - id: chembl_id
+ type: Other
+ names:
+ - noun: ChEMBL database id
+ - noun: ChEMBL identifier number
+ description: ChEMBL ids
+ sample: false
license: CC BY 4.0
links:
- - url: https://doi.org/10.1124/dmd.108.020479
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/adme/#half-life-obach-et-al
- description: data source
+ - url: https://doi.org/10.1124/dmd.108.020479
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/adme/#half-life-obach-et-al
+ description: data source
num_points: 667
bibtex:
- - |-
- @article{Obach2008,
- doi = {10.1124/dmd.108.020479},
- url = {https://doi.org/10.1124/dmd.108.020479},
- year = {2008},
- month = apr,
- publisher = {American Society for Pharmacology and Experimental Therapeutics (ASPET)},
- volume = {36},
- number = {7},
- pages = {1385--1405},
- author = {R. Scott Obach and Franco Lombardo and Nigel J. Waters},
- title = {Trend Analysis of a Database of Intravenous Pharmacokinetic
- Parameters in Humans for 670 Drug Compounds},
- journal = {Drug Metabolism and Disposition}
+ - |-
+ @article{Obach2008,
+ doi = {10.1124/dmd.108.020479},
+ url = {https://doi.org/10.1124/dmd.108.020479},
+ year = {2008},
+ month = apr,
+ publisher = {American Society for Pharmacology and Experimental Therapeutics (ASPET)},
+ volume = {36},
+ number = {7},
+ pages = {1385--1405},
+ author = {R. Scott Obach and Franco Lombardo and Nigel J. Waters},
+ title = {Trend Analysis of a Database of Intravenous Pharmacokinetic
+ Parameters in Humans for 670 Drug Compounds},
+ journal = {Drug Metabolism and Disposition}
diff --git a/data/tabular/herg_blockers/meta.yaml b/data/tabular/herg_blockers/meta.yaml
index bf89b3e65..3d998c759 100644
--- a/data/tabular/herg_blockers/meta.yaml
+++ b/data/tabular/herg_blockers/meta.yaml
@@ -1,137 +1,134 @@
----
name: herg_blockers
-description: "Human ether-à-go-go related gene (hERG) is crucial for the coordination\nof the heart's beating. Thus, if a drug blocks the hERG, it could\
- \ lead to severe\nadverse effects. Therefore, reliable prediction of hERG liability in the early\nstages of drug design is quite important to reduce\
- \ the risk of cardiotoxicity\nrelated attritions in the later development stages."
+description: "Human ether-à-go-go related gene (hERG) is crucial for the coordination\nof the heart's beating. Thus, if a drug blocks the hERG, it could lead to severe\nadverse effects. Therefore, reliable prediction of hERG liability in the early\nstages of drug design is quite important to reduce the risk of cardiotoxicity\nrelated attritions in the later development stages."
targets:
- - id: herg_blocker
- description: whether it blocks hERG (1) or not (0)
- units:
- type: boolean
- names:
- - noun: hERG blocker
- - noun: hERG blocking compound
- - noun: hERG blocking compound (<10uM)
- - noun: human ether-à-go-go related gene (hERG) blocker
- - noun: human ether-à-go-go related gene (hERG) blocking compound
- - verb: blocks hERG
- - verb: blocks the human ether-à-go-go related gene (hERG)
- - verb: is active against hERG (<10uM)
- - verb: is active against the human ether-à-go-go related gene (hERG)
- uris:
- - http://purl.obolibrary.org/obo/MI_2136
+ - id: herg_blocker
+ description: whether it blocks hERG (1) or not (0)
+ units:
+ type: boolean
+ names:
+ - noun: hERG blocker
+ - noun: hERG blocking compound
+ - noun: hERG blocking compound (<10uM)
+ - noun: human ether-à-go-go related gene (hERG) blocker
+ - noun: human ether-à-go-go related gene (hERG) blocking compound
+ - verb: blocks hERG
+ - verb: blocks the human ether-à-go-go related gene (hERG)
+ - verb: is active against hERG (<10uM)
+ - verb: is active against the human ether-à-go-go related gene (hERG)
+ uris:
+ - http://purl.obolibrary.org/obo/MI_2136
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
- - id: compound_name
- type: Other
- description: compound name
- names:
- - noun: compound
- - noun: compound name
- - noun: drug
+ - id: SMILES
+ type: SMILES
+ description: SMILES
+ - id: compound_name
+ type: Other
+ description: compound name
+ names:
+ - noun: compound
+ - noun: compound name
+ - noun: drug
license: CC BY 4.0
links:
- - url: https://doi.org/10.1021/acs.molpharmaceut.6b00471
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-blockers
- description: Data source
+ - url: https://doi.org/10.1021/acs.molpharmaceut.6b00471
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-blockers
+ description: Data source
num_points: 655
bibtex:
- - |-
- @article{Wang2016,
- doi = {10.1021/acs.molpharmaceut.6b00471},
- url = {https://doi.org/10.1021/acs.molpharmaceut.6b00471},
- year = {2016},
- month = jul,
- publisher = {American Chemical Society (ACS)},
- volume = {13},
- number = {8},
- pages = {2855--2866},
- author = {Shuangquan Wang and Huiyong Sun and Hui Liu and Dan Li and
- Youyong Li and Tingjun Hou},
- title = {ADMET Evaluation in Drug Discovery. 16. Predicting hERG Blockers
- by Combining Multiple Pharmacophores and Machine Learning Approaches},
- journal = {Molecular Pharmaceutics}
+ - |-
+ @article{Wang2016,
+ doi = {10.1021/acs.molpharmaceut.6b00471},
+ url = {https://doi.org/10.1021/acs.molpharmaceut.6b00471},
+ year = {2016},
+ month = jul,
+ publisher = {American Chemical Society (ACS)},
+ volume = {13},
+ number = {8},
+ pages = {2855--2866},
+ author = {Shuangquan Wang and Huiyong Sun and Hui Liu and Dan Li and
+ Youyong Li and Tingjun Hou},
+ title = {ADMET Evaluation in Drug Discovery. 16. Predicting hERG Blockers
+ by Combining Multiple Pharmacophores and Machine Learning Approaches},
+ journal = {Molecular Pharmaceutics}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that {herg_blocker__names__verb}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {herg_blocker#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is a {herg_blocker__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {herg_blocker#no &NULL}{herg_blocker__names__noun}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is a {herg_blocker__names__noun}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {herg_blocker__names__noun}?
- Assistant: {herg_blocker#No&Yes}, this molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} a {herg_blocker__names__noun}?
- Assistant: {herg_blocker#No&Yes}, it is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that {herg_blocker#not &NULL}{herg_blocker__names__verb}?
- Assistant: This is a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {herg_blocker#not &NULL}be a {herg_blocker__names__noun}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {herg_blocker#not &NULL}be a {herg_blocker__names__noun}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} a {herg_blocker__names__noun}:{herg_blocker#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is a {herg_blocker__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{herg_blocker#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is a {herg_blocker__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result:This molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {herg_blocker__names__noun}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {herg_blocker%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {herg_blocker#not &NULL}a {herg_blocker__names__noun}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%herg_blocker%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {herg_blocker#not &NULL}a {herg_blocker__names__noun}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%herg_blocker%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that {herg_blocker__names__verb}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {herg_blocker#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is a {herg_blocker__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {herg_blocker#no &NULL}{herg_blocker__names__noun}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is a {herg_blocker__names__noun}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {herg_blocker__names__noun}?
+ Assistant: {herg_blocker#No&Yes}, this molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} a {herg_blocker__names__noun}?
+ Assistant: {herg_blocker#No&Yes}, it is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that {herg_blocker#not &NULL}{herg_blocker__names__verb}?
+ Assistant: This is a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {herg_blocker#not &NULL}be a {herg_blocker__names__noun}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {herg_blocker#not &NULL}be a {herg_blocker__names__noun}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} a {herg_blocker__names__noun}:{herg_blocker#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is a {herg_blocker__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{herg_blocker#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is a {herg_blocker__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result:This molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {herg_blocker__names__noun}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {herg_blocker%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {herg_blocker#not &NULL}a {herg_blocker__names__noun}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%herg_blocker%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {herg_blocker#not &NULL}a {herg_blocker__names__noun}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%herg_blocker%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/herg_central_at_10uM/meta.yaml b/data/tabular/herg_central_at_10uM/meta.yaml
index 763653fc5..3a72f4ad6 100644
--- a/data/tabular/herg_central_at_10uM/meta.yaml
+++ b/data/tabular/herg_central_at_10uM/meta.yaml
@@ -1,37 +1,31 @@
----
name: herg_central_at_10uM
-description: "Human ether-à-go-go related gene (hERG) is crucial for the coordination\nof the heart's beating. Thus, if a drug blocks the hERG, it could\
- \ lead to severe\nadverse effects. Therefore, reliable prediction of hERG liability in the early\nstages of drug design is quite important to reduce\
- \ the risk of cardiotoxicity-related\nattritions in the later development stages. There are three targets: hERG_at_1microM,\nhERG_at_10microM, and herg_inhib."
+description: "Human ether-à-go-go related gene (hERG) is crucial for the coordination\nof the heart's beating. Thus, if a drug blocks the hERG, it could lead to severe\nadverse effects. Therefore, reliable prediction of hERG liability in the early\nstages of drug design is quite important to reduce the risk of cardiotoxicity-related\nattritions in the later development stages. There are three targets: hERG_at_1microM,\nhERG_at_10microM, and herg_inhib."
targets:
- - id: herg_central_at_10uM
- description: the percent inhibition of hERG at a 10uM concentration
- units: '%'
- type: continuous
- names:
- - noun: hERG inhibition at a concentration of 10uM
- - noun: hERG inhibition at a concentration of 10uM
- - noun: hERG inhibition at 10uM
- - noun: human ether-à-go-go related gene (hERG) inhibition at a concentration of 10uM
- - noun: human ether-à-go-go related gene (hERG) inhibition at 10uM
- - noun: human ether-à-go-go related gene (hERG) inhibition at 10uM
- uris:
- - http://purl.obolibrary.org/obo/MI_2136
+ - id: herg_central_at_10uM
+ description: the percent inhibition of hERG at a 10uM concentration
+ units: "%"
+ type: continuous
+ names:
+ - noun: hERG inhibition at a concentration of 10uM
+ - noun: hERG inhibition at a concentration of 10uM
+ - noun: hERG inhibition at 10uM
+ - noun: human ether-à-go-go related gene (hERG) inhibition at a concentration of 10uM
+ - noun: human ether-à-go-go related gene (hERG) inhibition at 10uM
+ - noun: human ether-à-go-go related gene (hERG) inhibition at 10uM
+ uris:
+ - http://purl.obolibrary.org/obo/MI_2136
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://doi.org/10.1089/adt.2011.0425
- description: corresponding publication
- - url: https://bbirnbaum.com/
- description: TDC Contributer
- - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-central
- description: Data source
+ - url: https://doi.org/10.1089/adt.2011.0425
+ description: corresponding publication
+ - url: https://bbirnbaum.com/
+ description: TDC Contributer
+ - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-central
+ description: Data source
num_points: 306893
bibtex:
- - "@article{Du2011,\ndoi = {10.1089/adt.2011.0425},\nurl = {https://doi.org/10.1089/adt.2011.0425},\nyear = {2011},\nmonth = dec,\npublisher = {Mary\
- \ Ann Liebert Inc},\nvolume = {9},\nnumber = {6},\npages = {580--588},\nauthor = {Fang Du and Haibo Yu and Beiyan Zou and Joseph Babcock\nand Shunyou\
- \ Long and Min Li},\ntitle = {hERGCentral: A Large Database to Store, Retrieve, and Analyze Compound Human\nEther-à-go-go Related Gene Channel Interactions\
- \ to Facilitate Cardiotoxicity Assessment in Drug Development},\njournal = {ASSAY and Drug Development Technologies}"
+ - "@article{Du2011,\ndoi = {10.1089/adt.2011.0425},\nurl = {https://doi.org/10.1089/adt.2011.0425},\nyear = {2011},\nmonth = dec,\npublisher = {Mary Ann Liebert Inc},\nvolume = {9},\nnumber = {6},\npages = {580--588},\nauthor = {Fang Du and Haibo Yu and Beiyan Zou and Joseph Babcock\nand Shunyou Long and Min Li},\ntitle = {hERGCentral: A Large Database to Store, Retrieve, and Analyze Compound Human\nEther-à-go-go Related Gene Channel Interactions to Facilitate Cardiotoxicity Assessment in Drug Development},\njournal = {ASSAY and Drug Development Technologies}"
diff --git a/data/tabular/herg_central_at_10uM/transform.py b/data/tabular/herg_central_at_10uM/transform.py
index 39c9080a1..d2870a332 100644
--- a/data/tabular/herg_central_at_10uM/transform.py
+++ b/data/tabular/herg_central_at_10uM/transform.py
@@ -96,7 +96,7 @@ def get_and_transform_data():
},
{
"url": "https://bbirnbaum.com/",
- "description": "TDC Contributer",
+ "description": "TDC Contributor",
},
{
"url": "https://tdcommons.ai/single_pred_tasks/tox/#herg-central",
diff --git a/data/tabular/herg_central_at_1uM/meta.yaml b/data/tabular/herg_central_at_1uM/meta.yaml
index 0c47d92d2..2817fd315 100644
--- a/data/tabular/herg_central_at_1uM/meta.yaml
+++ b/data/tabular/herg_central_at_1uM/meta.yaml
@@ -1,37 +1,31 @@
----
name: herg_central_at_1uM
-description: "Human ether-à-go-go related gene (hERG) is crucial for the coordination\nof the heart's beating. Thus, if a drug blocks the hERG, it could\
- \ lead to severe\nadverse effects. Therefore, reliable prediction of hERG liability in the early\nstages of drug design is quite important to reduce\
- \ the risk of cardiotoxicity-related\nattritions in the later development stages. There are three targets: hERG_at_1microM,\nhERG_at_10microM, and herg_inhib."
+description: "Human ether-à-go-go related gene (hERG) is crucial for the coordination\nof the heart's beating. Thus, if a drug blocks the hERG, it could lead to severe\nadverse effects. Therefore, reliable prediction of hERG liability in the early\nstages of drug design is quite important to reduce the risk of cardiotoxicity-related\nattritions in the later development stages. There are three targets: hERG_at_1microM,\nhERG_at_10microM, and herg_inhib."
targets:
- - id: herg_central_at_1uM
- description: the percent inhibition of hERG at a 1uM concentration
- units: '%'
- type: continuous
- names:
- - noun: hERG inhibition at a concentration of 1uM
- - noun: hERG inhibition at a concentration of 1uM
- - noun: hERG inhibition at 1uM
- - noun: human ether-à-go-go related gene (hERG) inhibition at a concentration of 1uM
- - noun: human ether-à-go-go related gene (hERG) inhibition at 1uM
- - noun: human ether-à-go-go related gene (hERG) inhibition at 1uM
- uris:
- - http://purl.obolibrary.org/obo/MI_2136
+ - id: herg_central_at_1uM
+ description: the percent inhibition of hERG at a 1uM concentration
+ units: "%"
+ type: continuous
+ names:
+ - noun: hERG inhibition at a concentration of 1uM
+ - noun: hERG inhibition at a concentration of 1uM
+ - noun: hERG inhibition at 1uM
+ - noun: human ether-à-go-go related gene (hERG) inhibition at a concentration of 1uM
+ - noun: human ether-à-go-go related gene (hERG) inhibition at 1uM
+ - noun: human ether-à-go-go related gene (hERG) inhibition at 1uM
+ uris:
+ - http://purl.obolibrary.org/obo/MI_2136
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://doi.org/10.1089/adt.2011.0425
- description: corresponding publication
- - url: https://bbirnbaum.com/
- description: TDC Contributer
- - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-central
- description: Data source
+ - url: https://doi.org/10.1089/adt.2011.0425
+ description: corresponding publication
+ - url: https://bbirnbaum.com/
+ description: TDC Contributer
+ - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-central
+ description: Data source
num_points: 306893
bibtex:
- - "@article{Du2011,\ndoi = {10.1089/adt.2011.0425},\nurl = {https://doi.org/10.1089/adt.2011.0425},\nyear = {2011},\nmonth = dec,\npublisher = {Mary\
- \ Ann Liebert Inc},\nvolume = {9},\nnumber = {6},\npages = {580--588},\nauthor = {Fang Du and Haibo Yu and Beiyan Zou and Joseph Babcock\nand Shunyou\
- \ Long and Min Li},\ntitle = {hERGCentral: A Large Database to Store, Retrieve, and Analyze Compound Human\nEther-à-go-go Related Gene Channel Interactions\
- \ to Facilitate Cardiotoxicity Assessment in Drug Development},\njournal = {ASSAY and Drug Development Technologies}"
+ - "@article{Du2011,\ndoi = {10.1089/adt.2011.0425},\nurl = {https://doi.org/10.1089/adt.2011.0425},\nyear = {2011},\nmonth = dec,\npublisher = {Mary Ann Liebert Inc},\nvolume = {9},\nnumber = {6},\npages = {580--588},\nauthor = {Fang Du and Haibo Yu and Beiyan Zou and Joseph Babcock\nand Shunyou Long and Min Li},\ntitle = {hERGCentral: A Large Database to Store, Retrieve, and Analyze Compound Human\nEther-à-go-go Related Gene Channel Interactions to Facilitate Cardiotoxicity Assessment in Drug Development},\njournal = {ASSAY and Drug Development Technologies}"
diff --git a/data/tabular/herg_central_inhib/meta.yaml b/data/tabular/herg_central_inhib/meta.yaml
index 5c62d550b..8c4514045 100644
--- a/data/tabular/herg_central_inhib/meta.yaml
+++ b/data/tabular/herg_central_inhib/meta.yaml
@@ -1,121 +1,115 @@
----
name: herg_central_inhib
-description: "Human ether-à-go-go related gene (hERG) is crucial for the coordination\nof the heart's beating. Thus, if a drug blocks the hERG, it could\
- \ lead to severe\nadverse effects. Therefore, reliable prediction of hERG liability in the early\nstages of drug design is quite important to reduce\
- \ the risk of cardiotoxicity-related\nattritions in the later development stages. There are three targets: hERG_at_1microM,\nhERG_at_10microM, and herg_inhib."
+description: "Human ether-à-go-go related gene (hERG) is crucial for the coordination\nof the heart's beating. Thus, if a drug blocks the hERG, it could lead to severe\nadverse effects. Therefore, reliable prediction of hERG liability in the early\nstages of drug design is quite important to reduce the risk of cardiotoxicity-related\nattritions in the later development stages. There are three targets: hERG_at_1microM,\nhERG_at_10microM, and herg_inhib."
targets:
- - id: herg_inhib
- description: |-
- whether it blocks (1) or not blocks (0) hERG
- (This is equivalent to whether hERG_at_10microM < -50, i.e.,
- whether the compound has an IC50 of less than 10microM.)
- units:
- type: boolean
- names:
- - noun: hERG blocker
- - noun: hERG blocking compound
- - noun: hERG blocking compound (IC50 < 10uM)
- - noun: hERG blocking compound (IC50 less than 10uM)
- - noun: human ether-à-go-go related gene (hERG) blocker
- - noun: human ether-à-go-go related gene (hERG) blocking compound
- - verb: block hERG
- - verb: block hERG (IC50 < 10uM)
- - verb: block hERG (IC50 less than 10uM)
- - verb: block the human ether-à-go-go related gene (hERG)
- uris:
- - http://purl.obolibrary.org/obo/MI_2136
+ - id: herg_inhib
+ description: |-
+ whether it blocks (1) or not blocks (0) hERG
+ (This is equivalent to whether hERG_at_10microM < -50, i.e.,
+ whether the compound has an IC50 of less than 10microM.)
+ units:
+ type: boolean
+ names:
+ - noun: hERG blocker
+ - noun: hERG blocking compound
+ - noun: hERG blocking compound (IC50 < 10uM)
+ - noun: hERG blocking compound (IC50 less than 10uM)
+ - noun: human ether-à-go-go related gene (hERG) blocker
+ - noun: human ether-à-go-go related gene (hERG) blocking compound
+ - verb: block hERG
+ - verb: block hERG (IC50 < 10uM)
+ - verb: block hERG (IC50 less than 10uM)
+ - verb: block the human ether-à-go-go related gene (hERG)
+ uris:
+ - http://purl.obolibrary.org/obo/MI_2136
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://doi.org/10.1089/adt.2011.0425
- description: corresponding publication
- - url: https://bbirnbaum.com/
- description: TDC Contributer
- - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-central
- description: Data source
+ - url: https://doi.org/10.1089/adt.2011.0425
+ description: corresponding publication
+ - url: https://bbirnbaum.com/
+ description: TDC Contributer
+ - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-central
+ description: Data source
num_points: 306893
bibtex:
- - "@article{Du2011,\ndoi = {10.1089/adt.2011.0425},\nurl = {https://doi.org/10.1089/adt.2011.0425},\nyear = {2011},\nmonth = dec,\npublisher = {Mary\
- \ Ann Liebert Inc},\nvolume = {9},\nnumber = {6},\npages = {580--588},\nauthor = {Fang Du and Haibo Yu and Beiyan Zou and Joseph Babcock\nand Shunyou\
- \ Long and Min Li},\ntitle = {hERGCentral: A Large Database to Store, Retrieve, and Analyze Compound Human\nEther-à-go-go Related Gene Channel Interactions\
- \ to Facilitate Cardiotoxicity Assessment in Drug Development},\njournal = {ASSAY and Drug Development Technologies}"
+ - "@article{Du2011,\ndoi = {10.1089/adt.2011.0425},\nurl = {https://doi.org/10.1089/adt.2011.0425},\nyear = {2011},\nmonth = dec,\npublisher = {Mary Ann Liebert Inc},\nvolume = {9},\nnumber = {6},\npages = {580--588},\nauthor = {Fang Du and Haibo Yu and Beiyan Zou and Joseph Babcock\nand Shunyou Long and Min Li},\ntitle = {hERGCentral: A Large Database to Store, Retrieve, and Analyze Compound Human\nEther-à-go-go Related Gene Channel Interactions to Facilitate Cardiotoxicity Assessment in Drug Development},\njournal = {ASSAY and Drug Development Technologies}"
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {herg_inhib#not &NULL}a {herg_inhib__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {herg_inhib#not &NULL}a {herg_inhib__names__noun}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {herg_inhib#not &NULL}a {herg_inhib__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {herg_inhib#not &NULL}a {herg_inhib__names__noun}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that {herg_inhib__names__verb}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {herg_inhib#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is a {herg_inhib__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {herg_inhib#no &NULL}{herg_inhib__names__noun}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is a {herg_inhib__names__noun}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {herg_inhib__names__noun}?
- Assistant: {herg_inhib#No&Yes}, this molecule is {herg_inhib#not &NULL}a {herg_inhib__names__noun}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} a {herg_inhib__names__noun}?
- Assistant: {herg_inhib#No&Yes}, it is {herg_inhib#not &NULL}a {herg_inhib__names__noun}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {herg_inhib#not &NULL}a {herg_inhib__names__noun}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that does {herg_inhib#not &NULL}{herg_inhib__names__verb}?
- Assistant: This is a molecule that is {herg_inhib#not &NULL}a {herg_inhib__names__noun}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {herg_inhib#not &NULL}be a {herg_inhib__names__noun}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {herg_inhib#not &NULL}a {herg_inhib__names__noun}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {herg_inhib#not &NULL}be a {herg_inhib__names__noun}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {herg_inhib#not &NULL}a {herg_inhib__names__noun}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} a {herg_inhib__names__noun}:{herg_inhib#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is a {herg_inhib__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{herg_inhib#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is a {herg_inhib__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result:This molecule is {herg_inhib#not &NULL}a {herg_inhib__names__noun}.
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {herg_inhib__names__noun}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {herg_inhib%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {herg_inhib#not &NULL}a {herg_inhib__names__noun}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%herg_inhib%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {herg_inhib#not &NULL}a {herg_inhib__names__noun}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%herg_inhib%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {herg_inhib#not &NULL}a {herg_inhib__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {herg_inhib#not &NULL}a {herg_inhib__names__noun}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {herg_inhib#not &NULL}a {herg_inhib__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {herg_inhib#not &NULL}a {herg_inhib__names__noun}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that {herg_inhib__names__verb}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {herg_inhib#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is a {herg_inhib__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {herg_inhib#no &NULL}{herg_inhib__names__noun}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is a {herg_inhib__names__noun}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {herg_inhib__names__noun}?
+ Assistant: {herg_inhib#No&Yes}, this molecule is {herg_inhib#not &NULL}a {herg_inhib__names__noun}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} a {herg_inhib__names__noun}?
+ Assistant: {herg_inhib#No&Yes}, it is {herg_inhib#not &NULL}a {herg_inhib__names__noun}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {herg_inhib#not &NULL}a {herg_inhib__names__noun}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that does {herg_inhib#not &NULL}{herg_inhib__names__verb}?
+ Assistant: This is a molecule that is {herg_inhib#not &NULL}a {herg_inhib__names__noun}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {herg_inhib#not &NULL}be a {herg_inhib__names__noun}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {herg_inhib#not &NULL}a {herg_inhib__names__noun}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {herg_inhib#not &NULL}be a {herg_inhib__names__noun}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {herg_inhib#not &NULL}a {herg_inhib__names__noun}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} a {herg_inhib__names__noun}:{herg_inhib#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is a {herg_inhib__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{herg_inhib#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is a {herg_inhib__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result:This molecule is {herg_inhib#not &NULL}a {herg_inhib__names__noun}.
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {herg_inhib__names__noun}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {herg_inhib%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {herg_inhib#not &NULL}a {herg_inhib__names__noun}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%herg_inhib%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {herg_inhib#not &NULL}a {herg_inhib__names__noun}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%herg_inhib%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/herg_karim_et_al/meta.yaml b/data/tabular/herg_karim_et_al/meta.yaml
index a419981bb..0220393be 100644
--- a/data/tabular/herg_karim_et_al/meta.yaml
+++ b/data/tabular/herg_karim_et_al/meta.yaml
@@ -1,126 +1,122 @@
----
name: herg_karim_et_al
-description: "A integrated Ether-à-go-go-related gene (hERG) dataset consisting\nof molecular structures labelled as hERG (<10uM) and non-hERG (>=10uM)\
- \ blockers in\nthe form of SMILES strings was obtained from the DeepHIT, the BindingDB database,\nChEMBL bioactivity database, and other literature."
+description: "A integrated Ether-à-go-go-related gene (hERG) dataset consisting\nof molecular structures labelled as hERG (<10uM) and non-hERG (>=10uM) blockers in\nthe form of SMILES strings was obtained from the DeepHIT, the BindingDB database,\nChEMBL bioactivity database, and other literature."
targets:
- - id: herg_blocker
- description: whether it blocks hERG (1, <10uM) or not (0, >=10uM)
- units:
- type: boolean
- names:
- - noun: hERG blocker (<10uM)
- - noun: hERG blocking compound (<10uM)
- - noun: human ether-à-go-go related gene (hERG) blocker (<10uM)
- - noun: human ether-à-go-go related gene (hERG) blocking compound (<10uM)
- - verb: block hERG (<10uM)
- - verb: block the human ether-à-go-go related gene (hERG) (<10uM)
- uris:
- - http://purl.obolibrary.org/obo/MI_2136
+ - id: herg_blocker
+ description: whether it blocks hERG (1, <10uM) or not (0, >=10uM)
+ units:
+ type: boolean
+ names:
+ - noun: hERG blocker (<10uM)
+ - noun: hERG blocking compound (<10uM)
+ - noun: human ether-à-go-go related gene (hERG) blocker (<10uM)
+ - noun: human ether-à-go-go related gene (hERG) blocking compound (<10uM)
+ - verb: block hERG (<10uM)
+ - verb: block the human ether-à-go-go related gene (hERG) (<10uM)
+ uris:
+ - http://purl.obolibrary.org/obo/MI_2136
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://doi.org/10.1186/s13321-021-00541-z
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-karim-et-al
- description: Data source
+ - url: https://doi.org/10.1186/s13321-021-00541-z
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/tox/#herg-karim-et-al
+ description: Data source
num_points: 13445
bibtex:
- - |-
- @article{Karim2021,
- doi = {10.1186/s13321-021-00541-z},
- url = {https://doi.org/10.1186/s13321-021-00541-z},
- year = {2021},
- month = aug,
- publisher = {Springer Science and Business Media LLC},
- volume = {13},
- number = {1},
- author = {Abdul Karim and Matthew Lee and Thomas Balle and Abdul Sattar},
- title = {CardioTox net: a robust predictor for hERG channel blockade
- based on deep learning meta-feature ensembles},
- journal = {Journal of Cheminformatics}
+ - |-
+ @article{Karim2021,
+ doi = {10.1186/s13321-021-00541-z},
+ url = {https://doi.org/10.1186/s13321-021-00541-z},
+ year = {2021},
+ month = aug,
+ publisher = {Springer Science and Business Media LLC},
+ volume = {13},
+ number = {1},
+ author = {Abdul Karim and Matthew Lee and Thomas Balle and Abdul Sattar},
+ title = {CardioTox net: a robust predictor for hERG channel blockade
+ based on deep learning meta-feature ensembles},
+ journal = {Journal of Cheminformatics}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {herg_blocker#not a hERG blocker (>= 10uM)&a hERG blocker (<10uM)}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {herg_blocker#not a human ether-à-go-go related gene (hERG) blocker
- (>= 10uM)&a human ether-à-go-go related gene (hERG) blocker (<10uM)}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {herg_blocker#not a hERG blocker (>= 10uM)&a hERG blocker (<10uM)}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {herg_blocker#not a human ether-à-go-go related gene (hERG) blocker (>= 10uM)&a human ether-à-go-go
- related gene (hERG) blocker (<10uM)}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that does {herg_blocker__names__verb}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {herg_blocker#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is a {herg_blocker__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {herg_blocker#no &NULL}{herg_blocker__names__noun}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is a {herg_blocker__names__noun}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {herg_blocker__names__noun}?
- Assistant: {herg_blocker#No&Yes}, this molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} a {herg_blocker__names__noun}?
- Assistant: {herg_blocker#No&Yes}, it is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that does {herg_blocker#not &NULL}{herg_blocker__names__verb}?
- Assistant: This is a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {herg_blocker#not &NULL}be a {herg_blocker__names__noun}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {herg_blocker#not &NULL}be a {herg_blocker__names__noun}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} a {herg_blocker__names__noun}:{herg_blocker#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is a {herg_blocker__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{herg_blocker#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is a {herg_blocker__names__noun}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result:This molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {herg_blocker__names__noun}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {herg_blocker%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {herg_blocker#not &NULL}a {herg_blocker__names__noun}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%herg_blocker%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {herg_blocker#not &NULL}a {herg_blocker__names__noun}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%herg_blocker%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} is {herg_blocker#not a hERG blocker (>= 10uM)&a hERG blocker (<10uM)}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule is {herg_blocker#not a human ether-à-go-go related gene (hERG) blocker (>= 10uM)&a human ether-à-go-go related gene (hERG) blocker (<10uM)}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that is {herg_blocker#not a hERG blocker (>= 10uM)&a hERG blocker (<10uM)}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {herg_blocker#not a human ether-à-go-go related gene (hERG) blocker (>= 10uM)&a human ether-à-go-go related gene (hERG) blocker (<10uM)}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that does {herg_blocker__names__verb}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {herg_blocker#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is a {herg_blocker__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {herg_blocker#no &NULL}{herg_blocker__names__noun}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is a {herg_blocker__names__noun}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is a {herg_blocker__names__noun}?
+ Assistant: {herg_blocker#No&Yes}, this molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} a {herg_blocker__names__noun}?
+ Assistant: {herg_blocker#No&Yes}, it is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that does {herg_blocker#not &NULL}{herg_blocker__names__verb}?
+ Assistant: This is a molecule that is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {herg_blocker#not &NULL}be a {herg_blocker__names__noun}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {herg_blocker#not &NULL}be a {herg_blocker__names__noun}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {herg_blocker#not &NULL}a {herg_blocker__names__noun}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} a {herg_blocker__names__noun}:{herg_blocker#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is a {herg_blocker__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{herg_blocker#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is a {herg_blocker__names__noun}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result:This molecule is {herg_blocker#not &NULL}a {herg_blocker__names__noun}.
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} a {herg_blocker__names__noun}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {herg_blocker%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {herg_blocker#not &NULL}a {herg_blocker__names__noun}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%herg_blocker%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {herg_blocker#not &NULL}a {herg_blocker__names__noun}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%herg_blocker%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/hiv/meta.yaml b/data/tabular/hiv/meta.yaml
index 595cd53f5..a94119ea7 100644
--- a/data/tabular/hiv/meta.yaml
+++ b/data/tabular/hiv/meta.yaml
@@ -1,124 +1,123 @@
----
name: hiv
description: |-
- The HIV dataset was introduced by the Drug Therapeutics Program (DTP)
- AIDS Antiviral Screen, which tested the ability to inhibit HIV replication for
- over 40,000 compounds.
+ The HIV dataset was introduced by the Drug Therapeutics Program (DTP)
+ AIDS Antiviral Screen, which tested the ability to inhibit HIV replication for
+ over 40,000 compounds.
targets:
- - id: activity_HIV
- description: whether it is active against HIV virus (1) or not (0).
- units:
- type: boolean
- names:
- - noun: activity against the human immunodeficiency virus
- - noun: activity against HIV
- - adjective: active against the human immunodeficiency virus
- - adjective: active against HIV
+ - id: activity_HIV
+ description: whether it is active against HIV virus (1) or not (0).
+ units:
+ type: boolean
+ names:
+ - noun: activity against the human immunodeficiency virus
+ - noun: activity against HIV
+ - adjective: active against the human immunodeficiency virus
+ - adjective: active against HIV
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://rb.gy/wphpqg
- description: data source
- - url: https://rb.gy/0xx91v
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/hts/#hiv
- description: data source
+ - url: https://rb.gy/wphpqg
+ description: data source
+ - url: https://rb.gy/0xx91v
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/hts/#hiv
+ description: data source
num_points: 41127
bibtex:
- - |-
- @article{Wu2018,
- doi = {10.1039/c7sc02664a},
- url = {https://doi.org/10.1039/c7sc02664a},
- year = {2018},
- publisher = {Royal Society of Chemistry (RSC)},
- volume = {9},
- number = {2},
- pages = {513--530},
- author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg and Joseph Gomes
- and Caleb Geniesse and Aneesh S. Pappu and Karl Leswing and Vijay Pande},
- title = {MoleculeNet: a benchmark for molecular machine learning},
- journal = {Chemical Science}
+ - |-
+ @article{Wu2018,
+ doi = {10.1039/c7sc02664a},
+ url = {https://doi.org/10.1039/c7sc02664a},
+ year = {2018},
+ publisher = {Royal Society of Chemistry (RSC)},
+ volume = {9},
+ number = {2},
+ pages = {513--530},
+ author = {Zhenqin Wu and Bharath Ramsundar and Evan~N. Feinberg and Joseph Gomes
+ and Caleb Geniesse and Aneesh S. Pappu and Karl Leswing and Vijay Pande},
+ title = {MoleculeNet: a benchmark for molecular machine learning},
+ journal = {Chemical Science}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_HIV#no &NULL}{activity_HIV__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {activity_HIV#no &NULL}{activity_HIV__names__noun}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_HIV#no &NULL}{activity_HIV__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_HIV#not &NULL}{activity_HIV__names__adjective}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {activity_HIV__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {activity_HIV#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {activity_HIV__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {activity_HIV#not &NULL}{activity_HIV__names__adjective}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {activity_HIV__names__adjective}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_HIV__names__adjective}?
- Assistant: Yes, this molecule is {activity_HIV#not &NULL}{activity_HIV__names__adjective}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {activity_HIV__names__adjective}?
- Assistant: Yes, it is {activity_HIV#not &NULL}{activity_HIV__names__adjective}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_HIV#not &NULL}{activity_HIV__names__adjective}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_HIV#not &NULL}{activity_HIV__names__adjective}?
- Assistant: This is a molecule that is {activity_HIV#not &NULL}{activity_HIV__names__adjective}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {activity_HIV#not &NULL}be {activity_HIV__names__adjective}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_HIV#not &NULL}{activity_HIV__names__adjective}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {activity_HIV#not &NULL}be {activity_HIV__names__adjective}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_HIV#not &NULL}{activity_HIV__names__adjective}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} {activity_HIV__names__adjective}:{activity_HIV#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {activity_HIV__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{activity_HIV#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {activity_HIV__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result:This molecule is {activity_HIV#not &NULL}{activity_HIV__names__adjective}.
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_HIV__names__adjective}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {activity_HIV%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {activity_HIV#not &NULL}{activity_HIV__names__adjective}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%activity_HIV%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {activity_HIV#not &NULL}{activity_HIV__names__adjective}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%activity_HIV%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_HIV#no &NULL}{activity_HIV__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {activity_HIV#no &NULL}{activity_HIV__names__noun}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_HIV#no &NULL}{activity_HIV__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_HIV#not &NULL}{activity_HIV__names__adjective}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {activity_HIV__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {activity_HIV#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {activity_HIV__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {activity_HIV#not &NULL}{activity_HIV__names__adjective}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {activity_HIV__names__adjective}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_HIV__names__adjective}?
+ Assistant: Yes, this molecule is {activity_HIV#not &NULL}{activity_HIV__names__adjective}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {activity_HIV__names__adjective}?
+ Assistant: Yes, it is {activity_HIV#not &NULL}{activity_HIV__names__adjective}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_HIV#not &NULL}{activity_HIV__names__adjective}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_HIV#not &NULL}{activity_HIV__names__adjective}?
+ Assistant: This is a molecule that is {activity_HIV#not &NULL}{activity_HIV__names__adjective}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {activity_HIV#not &NULL}be {activity_HIV__names__adjective}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_HIV#not &NULL}{activity_HIV__names__adjective}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {activity_HIV#not &NULL}be {activity_HIV__names__adjective}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_HIV#not &NULL}{activity_HIV__names__adjective}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} {activity_HIV__names__adjective}:{activity_HIV#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {activity_HIV__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{activity_HIV#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {activity_HIV__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result:This molecule is {activity_HIV#not &NULL}{activity_HIV__names__adjective}.
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_HIV__names__adjective}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {activity_HIV%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {activity_HIV#not &NULL}{activity_HIV__names__adjective}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%activity_HIV%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {activity_HIV#not &NULL}{activity_HIV__names__adjective}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%activity_HIV%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/human_intestinal_absorption/meta.yaml b/data/tabular/human_intestinal_absorption/meta.yaml
index b26e11d4a..99e2a9234 100644
--- a/data/tabular/human_intestinal_absorption/meta.yaml
+++ b/data/tabular/human_intestinal_absorption/meta.yaml
@@ -1,136 +1,133 @@
----
name: human_intestinal_absorption
description: |-
- When a drug is orally administered, it needs to be absorbed from the
- human gastrointestinal system into the bloodstream of the human body. This ability
- of absorption is called human intestinal absorption (HIA) and it is crucial
- for a drug to be delivered to the target.
+ When a drug is orally administered, it needs to be absorbed from the
+ human gastrointestinal system into the bloodstream of the human body. This ability
+ of absorption is called human intestinal absorption (HIA) and it is crucial
+ for a drug to be delivered to the target.
targets:
- - id: absorption_HIA_Hou
- description: whether it is absorbed from the human gastrointestinal system (1) or not (0)
- units:
- type: boolean
- names:
- - noun: human intestinal absorption
- - noun: human intestinal absorption (HIA)
- - noun: HIA
- - adjective: absorbed from the human gastrointestinal system
- uris:
- - http://purl.bioontology.org/ontology/MESH/D007408
+ - id: absorption_HIA_Hou
+ description: whether it is absorbed from the human gastrointestinal system (1) or not (0)
+ units:
+ type: boolean
+ names:
+ - noun: human intestinal absorption
+ - noun: human intestinal absorption (HIA)
+ - noun: HIA
+ - adjective: absorbed from the human gastrointestinal system
+ uris:
+ - http://purl.bioontology.org/ontology/MESH/D007408
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
- - id: compound_name
- type: Other
- names:
- - noun: compound name
- - noun: drug name
- - noun: generic drug name
- description: drug name
+ - id: SMILES
+ type: SMILES
+ description: SMILES
+ - id: compound_name
+ type: Other
+ names:
+ - noun: compound name
+ - noun: drug name
+ - noun: generic drug name
+ description: drug name
license: CC BY 4.0
links:
- - url: https://doi.org/10.1021/ci600343x
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/adme/#hia-human-intestinal-absorption-hou-et-al
- description: data source
+ - url: https://doi.org/10.1021/ci600343x
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/adme/#hia-human-intestinal-absorption-hou-et-al
+ description: data source
num_points: 578
bibtex:
- - |-
- @article{Hou2006,
- doi = {10.1021/ci600343x},
- url = {https://doi.org/10.1021/ci600343x},
- year = {2006},
- month = nov,
- publisher = {American Chemical Society (ACS)},
- volume = {47},
- number = {1},
- pages = {208--218},
- author = {Tingjun Hou and Junmei Wang and Wei Zhang and Xiaojie Xu},
- title = {ADME Evaluation in Drug Discovery. 7. Prediction of Oral Absorption
- by Correlation and Classification},
- journal = {Journal of Chemical Information and Modeling}
+ - |-
+ @article{Hou2006,
+ doi = {10.1021/ci600343x},
+ url = {https://doi.org/10.1021/ci600343x},
+ year = {2006},
+ month = nov,
+ publisher = {American Chemical Society (ACS)},
+ volume = {47},
+ number = {1},
+ pages = {208--218},
+ author = {Tingjun Hou and Junmei Wang and Wei Zhang and Xiaojie Xu},
+ title = {ADME Evaluation in Drug Discovery. 7. Prediction of Oral Absorption
+ by Correlation and Classification},
+ journal = {Journal of Chemical Information and Modeling}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {absorption_HIA_Hou#no &NULL}{absorption_HIA_Hou__names__noun}
- properties.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {absorption_HIA_Hou#no &NULL}{absorption_HIA_Hou__names__noun}
- {#properties|characteristics|features!}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {absorption_HIA_Hou__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {absorption_HIA_Hou#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {absorption_HIA_Hou__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {absorption_HIA_Hou__names__adjective}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {absorption_HIA_Hou__names__adjective}?
- Assistant: {absorption_HIA_Hou#No&Yes}, this molecule is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {absorption_HIA_Hou__names__adjective}?
- Assistant: {absorption_HIA_Hou#No&Yes}, it is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}?
- Assistant: This is a molecule that is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {absorption_HIA_Hou#not &NULL}be {absorption_HIA_Hou__names__adjective}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {absorption_HIA_Hou#not &NULL}be {absorption_HIA_Hou__names__adjective}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} {absorption_HIA_Hou__names__adjective}:{absorption_HIA_Hou#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {absorption_HIA_Hou__names__adjective}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{absorption_HIA_Hou#False&True}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {absorption_HIA_Hou__names__adjective}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {absorption_HIA_Hou%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {absorption_HIA_Hou__names__adjective}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {absorption_HIA_Hou%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%absorption_HIA_Hou%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%absorption_HIA_Hou%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {absorption_HIA_Hou#no &NULL}{absorption_HIA_Hou__names__noun} properties.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule has {absorption_HIA_Hou#no &NULL}{absorption_HIA_Hou__names__noun} {#properties|characteristics|features!}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {absorption_HIA_Hou__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {absorption_HIA_Hou#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {absorption_HIA_Hou__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {absorption_HIA_Hou__names__adjective}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {absorption_HIA_Hou__names__adjective}?
+ Assistant: {absorption_HIA_Hou#No&Yes}, this molecule is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {absorption_HIA_Hou__names__adjective}?
+ Assistant: {absorption_HIA_Hou#No&Yes}, it is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}?
+ Assistant: This is a molecule that is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {absorption_HIA_Hou#not &NULL}be {absorption_HIA_Hou__names__adjective}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {absorption_HIA_Hou#not &NULL}be {absorption_HIA_Hou__names__adjective}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} {absorption_HIA_Hou__names__adjective}:{absorption_HIA_Hou#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {absorption_HIA_Hou__names__adjective}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{absorption_HIA_Hou#False&True}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {absorption_HIA_Hou__names__adjective}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {absorption_HIA_Hou%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {absorption_HIA_Hou__names__adjective}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {absorption_HIA_Hou%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%absorption_HIA_Hou%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {absorption_HIA_Hou#not &NULL}{absorption_HIA_Hou__names__adjective}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%absorption_HIA_Hou%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/inverse_1/meta.yaml b/data/tabular/inverse_1/meta.yaml
index 44014874d..6dc0057b7 100644
--- a/data/tabular/inverse_1/meta.yaml
+++ b/data/tabular/inverse_1/meta.yaml
@@ -1,223 +1,222 @@
----
name: inverse_1
description: |-
- Inverse design task constructed by merging solubility_aqsoldb and
- nr_ar_tox21 and augmenting it with molecular descriptors.
+ Inverse design task constructed by merging solubility_aqsoldb and
+ nr_ar_tox21 and augmenting it with molecular descriptors.
targets:
- - id: aqeuous_solubility
- description: aqueous solubility
- units: log(mol/L)
- type: continuous
- names:
- - noun: aqueous solubility (logarithmic)
- - noun: water solubility (measured in log(mol/L))
- - noun: water solubility (logarithmic)
- - adjective: dissolves in a water
- uris:
- - http://purl.jp/bio/4/id/200906006880450101
- - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C60821
- - id: toxicity_NR-AR
- description: whether it toxic in a androgen receptor toxicity assay (1) or not (0)
- units:
- type: boolean
- names:
- - noun: NR-AR toxicity
- - noun: NR-androgen receptor toxicity
- - verb: is toxic in a androgen receptor toxicity assay
- - adjective: toxic in the NR-AR assay
- - adjective: toxic in the NR-androgen receptor assay
- - gerund: displaying toxicity in the NR-AR assay
- - gerund: exhibiting toxicity in the NR-androgen assay
- - gerund: demonstrating toxicity in the NR-androgen assay
- uris:
- - id: carboxyl_count
- description: number of carboxyl groups
- type: ordinal
- names:
- - noun: carboxyl groups
- - id: carbonyl_count
- description: number of carbonyl groups
- type: ordinal
- names:
- - noun: carbonyl groups
- - id: ether_count
- description: number of ether groups
- type: ordinal
- names:
- - noun: ether groups
- - id: alkanol_count
- description: number of alkanol groups
- type: ordinal
- names:
- - noun: alkanol groups
- - id: thiol_count
- description: number of thiol groups
- type: ordinal
- names:
- - noun: thiol groups
- - id: halogen_count
- description: number of halogen groups
- type: ordinal
- names:
- - noun: halogen groups
- - id: amine_count
- description: number of amine groups
- type: ordinal
- names:
- - noun: amine groups
- - id: amide_count
- description: number of amide groups
- type: ordinal
- names:
- - noun: amide groups
- - id: ketone_count
- description: number of ketone groups
- type: ordinal
- names:
- - noun: ketone group count
- - id: num_valence_electrons
- description: number of valence electrons
- type: ordinal
- names:
- - noun: valence electrons
- - id: molecular_formula
- description: molecular formula
- type: text
- names:
- - noun: molecular formula
- - id: monoisotopic_molecular_mass
- description: monoisotopic molecular mass
- type: continuous
- units: g/mol
- names:
- - noun: monoisotopic molecular mass
- - id: carbon_mass
- description: carbon mass
- type: continuous
- units: g/mol
- names:
- - noun: carbon mass
- - id: hydrogen_mass
- description: hydrogen mass
- type: continuous
- units: g/mol
- names:
- - noun: hydrogen mass
- - id: nitrogen_mass
- description: nitrogen mass
- type: continuous
- units: g/mol
- names:
- - noun: nitrogen mass
- - id: oxygen_mass
- description: oxygen mass
- units: g/mol
- type: continuous
- names:
- - noun: oxygen mass
- - id: num_carbon_atoms
- description: number of carbon atoms
- type: ordinal
- names:
- - noun: carbon atoms
- - id: num_hydrogen_atoms
- type: ordinal
- description: number of hydrogen atoms
- names:
- - noun: hydrogen atoms
- - id: num_nitrogen_atoms
- description: number of nitrogen atoms
- type: ordinal
- names:
- - noun: nitrogen atoms
- - id: num_oxygen_atoms
- description: number of oxygen atoms
- type: ordinal
- names:
- - noun: oxygen atoms
- - id: num_hydrogen_bond_acceptors
- description: number of hydrogen bond acceptors
- type: ordinal
- names:
- - noun: hydrogen bond acceptors
- - id: num_hydrogen_bond_donors
- description: number of hydrogen bond donors
- type: ordinal
- names:
- - noun: hydrogen bond donors
- - id: num_lipinski_violations
- description: number of Lipinski violations
- type: ordinal
- names:
- - noun: Lipinski violations
- - noun: Lipinski rule of five violations
- - id: num_chiral_centers
- description: number of chiral centers
- type: ordinal
- names:
- - noun: chiral center count
+ - id: aqeuous_solubility
+ description: aqueous solubility
+ units: log(mol/L)
+ type: continuous
+ names:
+ - noun: aqueous solubility (logarithmic)
+ - noun: water solubility (measured in log(mol/L))
+ - noun: water solubility (logarithmic)
+ - adjective: dissolves in a water
+ uris:
+ - http://purl.jp/bio/4/id/200906006880450101
+ - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C60821
+ - id: toxicity_NR-AR
+ description: whether it toxic in a androgen receptor toxicity assay (1) or not (0)
+ units:
+ type: boolean
+ names:
+ - noun: NR-AR toxicity
+ - noun: NR-androgen receptor toxicity
+ - verb: is toxic in a androgen receptor toxicity assay
+ - adjective: toxic in the NR-AR assay
+ - adjective: toxic in the NR-androgen receptor assay
+ - gerund: displaying toxicity in the NR-AR assay
+ - gerund: exhibiting toxicity in the NR-androgen assay
+ - gerund: demonstrating toxicity in the NR-androgen assay
+ uris:
+ - id: carboxyl_count
+ description: number of carboxyl groups
+ type: ordinal
+ names:
+ - noun: carboxyl groups
+ - id: carbonyl_count
+ description: number of carbonyl groups
+ type: ordinal
+ names:
+ - noun: carbonyl groups
+ - id: ether_count
+ description: number of ether groups
+ type: ordinal
+ names:
+ - noun: ether groups
+ - id: alkanol_count
+ description: number of alkanol groups
+ type: ordinal
+ names:
+ - noun: alkanol groups
+ - id: thiol_count
+ description: number of thiol groups
+ type: ordinal
+ names:
+ - noun: thiol groups
+ - id: halogen_count
+ description: number of halogen groups
+ type: ordinal
+ names:
+ - noun: halogen groups
+ - id: amine_count
+ description: number of amine groups
+ type: ordinal
+ names:
+ - noun: amine groups
+ - id: amide_count
+ description: number of amide groups
+ type: ordinal
+ names:
+ - noun: amide groups
+ - id: ketone_count
+ description: number of ketone groups
+ type: ordinal
+ names:
+ - noun: ketone group count
+ - id: num_valence_electrons
+ description: number of valence electrons
+ type: ordinal
+ names:
+ - noun: valence electrons
+ - id: molecular_formula
+ description: molecular formula
+ type: text
+ names:
+ - noun: molecular formula
+ - id: monoisotopic_molecular_mass
+ description: monoisotopic molecular mass
+ type: continuous
+ units: g/mol
+ names:
+ - noun: monoisotopic molecular mass
+ - id: carbon_mass
+ description: carbon mass
+ type: continuous
+ units: g/mol
+ names:
+ - noun: carbon mass
+ - id: hydrogen_mass
+ description: hydrogen mass
+ type: continuous
+ units: g/mol
+ names:
+ - noun: hydrogen mass
+ - id: nitrogen_mass
+ description: nitrogen mass
+ type: continuous
+ units: g/mol
+ names:
+ - noun: nitrogen mass
+ - id: oxygen_mass
+ description: oxygen mass
+ units: g/mol
+ type: continuous
+ names:
+ - noun: oxygen mass
+ - id: num_carbon_atoms
+ description: number of carbon atoms
+ type: ordinal
+ names:
+ - noun: carbon atoms
+ - id: num_hydrogen_atoms
+ type: ordinal
+ description: number of hydrogen atoms
+ names:
+ - noun: hydrogen atoms
+ - id: num_nitrogen_atoms
+ description: number of nitrogen atoms
+ type: ordinal
+ names:
+ - noun: nitrogen atoms
+ - id: num_oxygen_atoms
+ description: number of oxygen atoms
+ type: ordinal
+ names:
+ - noun: oxygen atoms
+ - id: num_hydrogen_bond_acceptors
+ description: number of hydrogen bond acceptors
+ type: ordinal
+ names:
+ - noun: hydrogen bond acceptors
+ - id: num_hydrogen_bond_donors
+ description: number of hydrogen bond donors
+ type: ordinal
+ names:
+ - noun: hydrogen bond donors
+ - id: num_lipinski_violations
+ description: number of Lipinski violations
+ type: ordinal
+ names:
+ - noun: Lipinski violations
+ - noun: Lipinski rule of five violations
+ - id: num_chiral_centers
+ description: number of chiral centers
+ type: ordinal
+ names:
+ - noun: chiral center count
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://doi.org/10.1038/s41597-019-0151-1
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/adme/#aqeuous_solubility-aqsoldb
- description: data source
- - url: https://github.com/lamalab-org/chem-caption
- description: software used to generate features
+ - url: https://doi.org/10.1038/s41597-019-0151-1
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/adme/#aqeuous_solubility-aqsoldb
+ description: data source
+ - url: https://github.com/lamalab-org/chem-caption
+ description: software used to generate features
num_points: 2525
bibtex:
- - |-
- @article{Sorkun_2019,
- doi = {10.1038/s41597-019-0151-1},
- url = {https://doi.org/10.1038%2Fs41597-019-0151-1},
- year = {2019},
- month = aug,
- publisher = {Springer Science and Business Media LLC},
- volume = {6},
- number = {1},
- author = {Murat Cihan Sorkun and Abhishek Khetan and
- Suleyman Er},
- title = {AqSolDB, a curated reference set of aqueous aqeuous_solubility
- and 2D descriptors for a diverse set of compounds},
- journal = {Scientific Data}
+ - |-
+ @article{Sorkun_2019,
+ doi = {10.1038/s41597-019-0151-1},
+ url = {https://doi.org/10.1038%2Fs41597-019-0151-1},
+ year = {2019},
+ month = aug,
+ publisher = {Springer Science and Business Media LLC},
+ volume = {6},
+ number = {1},
+ author = {Murat Cihan Sorkun and Abhishek Khetan and
+ Suleyman Er},
+ title = {AqSolDB, a curated reference set of aqueous aqeuous_solubility
+ and 2D descriptors for a diverse set of compounds},
+ journal = {Scientific Data}
templates:
- - |-
- User: {#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units} and is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}.
- Assistant: {#There might be multiple compounds that match these criteria. Do you have additional constraints?|Do you have additional constraints?|Is there anything else I should consider?|Is there anything else I should know?!}
- User: {#No|No, there are no additional constraints.|No, there are no other constraints.|No, there are no other criteria.|No, there are no other requirements.!}
- Assistant: {#In this case, |OK, |Alright, |Understood, |Got it, |I see, !}the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#should|will|is expected to!} fit your criteria.
- - |-
- User: {#I am a medicinal chemist. |I work in drug-discovery. |!}{#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units} and is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. {#Additionally, |Moreover, |On top of that, |In addition, |Furthermore, !}{#I|we!} {#want|need|require|would like|would prefer!} the {#molecule|compound|chemical structure!} to have a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. {#I|We!} {#want|would like to|need to!} ensure that {#there are|the molecule contains|the compound contains!} {carboxyl_count#} {carbonyl_count__names__noun}.
- Assistant: {#Thanks for the detailed description. |Thanks. |!}{#I suggest|I recommend|I propose|I would suggest|I would recommend|I would propose!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}.{# This compound should fulfill your needs.| This chemical should satisfy your constrains|!}
- - |-
- User: {#I am a medicinal chemist. |I work in drug-discovery. |!}{#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} and has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units}. {#Additionally, |Moreover, |On top of that, |In addition, |Furthermore, !}{#I|we!} {#want|need|require|would like|would prefer!} the {#molecule|compound|chemical structure!} to have a {carbon_mass__names__noun} of {carbon_mass#} {carbon_mass__units}. {#I|We!} {#want|would like to|need to!} ensure that {#there are|the molecule contains|the compound contains!} {carboxyl_count#} {carbonyl_count__names__noun} and {ether_count#} {ether_count__names__noun} as well as {alkanol_count#} {alkanol_count__names__noun} and {thiol_count#} {thiol_count__names__noun}.
- Assistant: {#Thanks for the detailed description. |Thanks. |!}{#I suggest|I recommend|I propose|I would suggest|I would recommend|I would propose!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}.{# This compound should fulfill your needs.| This chemical should satisfy your constrains|!}
- - |-
- User: {#I am researching|I am investigating|I am studying!} {#pharmaceuticals|medicinal compounds|drug molecules!} and {#need|require|am looking for!} a {#molecule|compound|chemical structure!} with a {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units}. It should also be {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. {#Additionally,|Moreover,|Furthermore,!} it {#must|should|needs to!} have {amine_count#} {amine_count__names__noun}.
- Assistant: {#To meet these requirements, |Considering your specifications, |Taking into account your needs, !}I {#recommend|suggest|propose!} a {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}. {#This should match your criteria.|This fits your described parameters.|This aligns with your requirements.!}
- - |-
- User: {#As a chemist|Being a chemical researcher,|In my chemical research,!} I {#am looking for|require|need!} a {#molecule|compound|chemical structure!} with {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun} and {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun}. It {#also needs|should also!} to be {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} and have a {oxygen_mass__names__noun} of {oxygen_mass#} {oxygen_mass__units}.
- Assistant: {#I've got the ideal compound for you.|I have a compound that fits these specifications.|I suggest a molecule that meets your needs.!} The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#should work perfectly.|is exactly what you're looking for.|matches your requirements.!}
- - |-
- User: {#In my pharmaceutical research,|For my current drug discovery project,|In my medicinal chemistry studies,!} I {#require|need|am looking for!} a {#molecule|compound|chemical structure!} with {num_chiral_centers#} {num_chiral_centers__names__noun} and {num_lipinski_violations#} {num_lipinski_violations__names__noun}. It should {#also have|also possess|also contain!} a {nitrogen_mass__names__noun} of {nitrogen_mass#} {nitrogen_mass__units} and be {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}.
- Assistant: {#After considering your needs,|Based on your requirements,|Taking your specifications into account,!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would be a great fit.|seems to be a perfect match.|should meet all your criteria.!}
- - |-
- User: {#In my research on|For my study of|While investigating!} {#non-toxic chemicals|safe compounds|environment-friendly substances!}, I {#need|require|am looking for!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} with {num_carbon_atoms#} {num_carbon_atoms__names__noun}. {#Additionally,|Furthermore,|Moreover,!} it {#should have|must have|needs to have!} {num_nitrogen_atoms#} {num_nitrogen_atoms__names__noun}.
- Assistant: {#I have a compound in mind|I can suggest a molecule|I've identified a chemical structure!} that {#fits|meets|aligns with!} these requirements. The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#is suitable for your needs.|meets your specified criteria.|should work well for your research.!}
- - |-
- User: {#As a pharmacologist,|In my pharmacological studies,|For my drug development work,!} I {#require|need|am in need of!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} and has {num_oxygen_atoms#} {num_oxygen_atoms__names__noun}. {#Also,|In addition,|Moreover,!} it {#must|should!} have a {molecular_formula__names__noun} of {molecular_formula#}.
- Assistant: {#I recommend|I suggest|I propose!} a {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} that {#satisfies these conditions.|meets these criteria.|is aligned with your requirements.!}
- - |-
- User: {#In my environmental chemistry work,|For my eco-friendly compound research,|As part of my sustainable chemical studies,!} I {#am looking for|require|need!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} with a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. {#Also,|Additionally,|Moreover,!} it {#should possess|must contain|needs to have!} {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun}.
- Assistant: {#Considering your needs,|Based on your specifications,|With your requirements in mind,!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would be ideal.|seems perfect.|is a great match.!}
- - |-
- User: {#For my bioactive molecule research,|In my study of pharmacologically active substances,|As I explore biologically active compounds,!} I {#need|require|am searching for!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. It {#should also have|must also feature|also needs to have!} {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun} and {halogen_count#} {halogen_count__names__noun}.
- Assistant: {#I've found a compound|I have a molecule|I suggest a chemical structure!} that {#fulfills|meets|matches!} these criteria. The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would suit your research.|is in line with your needs.|fits your specifications perfectly.!}
+ - |-
+ User: {#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units} and is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}.
+ Assistant: {#There might be multiple compounds that match these criteria. Do you have additional constraints?|Do you have additional constraints?|Is there anything else I should consider?|Is there anything else I should know?!}
+ User: {#No|No, there are no additional constraints.|No, there are no other constraints.|No, there are no other criteria.|No, there are no other requirements.!}
+ Assistant: {#In this case, |OK, |Alright, |Understood, |Got it, |I see, !}the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#should|will|is expected to!} fit your criteria.
+ - |-
+ User: {#I am a medicinal chemist. |I work in drug-discovery. |!}{#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units} and is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. {#Additionally, |Moreover, |On top of that, |In addition, |Furthermore, !}{#I|we!} {#want|need|require|would like|would prefer!} the {#molecule|compound|chemical structure!} to have a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. {#I|We!} {#want|would like to|need to!} ensure that {#there are|the molecule contains|the compound contains!} {carboxyl_count#} {carbonyl_count__names__noun}.
+ Assistant: {#Thanks for the detailed description. |Thanks. |!}{#I suggest|I recommend|I propose|I would suggest|I would recommend|I would propose!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}.{# This compound should fulfill your needs.| This chemical should satisfy your constrains|!}
+ - |-
+ User: {#I am a medicinal chemist. |I work in drug-discovery. |!}{#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} and has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units}. {#Additionally, |Moreover, |On top of that, |In addition, |Furthermore, !}{#I|we!} {#want|need|require|would like|would prefer!} the {#molecule|compound|chemical structure!} to have a {carbon_mass__names__noun} of {carbon_mass#} {carbon_mass__units}. {#I|We!} {#want|would like to|need to!} ensure that {#there are|the molecule contains|the compound contains!} {carboxyl_count#} {carbonyl_count__names__noun} and {ether_count#} {ether_count__names__noun} as well as {alkanol_count#} {alkanol_count__names__noun} and {thiol_count#} {thiol_count__names__noun}.
+ Assistant: {#Thanks for the detailed description. |Thanks. |!}{#I suggest|I recommend|I propose|I would suggest|I would recommend|I would propose!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}.{# This compound should fulfill your needs.| This chemical should satisfy your constrains|!}
+ - |-
+ User: {#I am researching|I am investigating|I am studying!} {#pharmaceuticals|medicinal compounds|drug molecules!} and {#need|require|am looking for!} a {#molecule|compound|chemical structure!} with a {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units}. It should also be {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. {#Additionally,|Moreover,|Furthermore,!} it {#must|should|needs to!} have {amine_count#} {amine_count__names__noun}.
+ Assistant: {#To meet these requirements, |Considering your specifications, |Taking into account your needs, !}I {#recommend|suggest|propose!} a {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}. {#This should match your criteria.|This fits your described parameters.|This aligns with your requirements.!}
+ - |-
+ User: {#As a chemist|Being a chemical researcher,|In my chemical research,!} I {#am looking for|require|need!} a {#molecule|compound|chemical structure!} with {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun} and {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun}. It {#also needs|should also!} to be {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} and have a {oxygen_mass__names__noun} of {oxygen_mass#} {oxygen_mass__units}.
+ Assistant: {#I've got the ideal compound for you.|I have a compound that fits these specifications.|I suggest a molecule that meets your needs.!} The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#should work perfectly.|is exactly what you're looking for.|matches your requirements.!}
+ - |-
+ User: {#In my pharmaceutical research,|For my current drug discovery project,|In my medicinal chemistry studies,!} I {#require|need|am looking for!} a {#molecule|compound|chemical structure!} with {num_chiral_centers#} {num_chiral_centers__names__noun} and {num_lipinski_violations#} {num_lipinski_violations__names__noun}. It should {#also have|also possess|also contain!} a {nitrogen_mass__names__noun} of {nitrogen_mass#} {nitrogen_mass__units} and be {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}.
+ Assistant: {#After considering your needs,|Based on your requirements,|Taking your specifications into account,!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would be a great fit.|seems to be a perfect match.|should meet all your criteria.!}
+ - |-
+ User: {#In my research on|For my study of|While investigating!} {#non-toxic chemicals|safe compounds|environment-friendly substances!}, I {#need|require|am looking for!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} with {num_carbon_atoms#} {num_carbon_atoms__names__noun}. {#Additionally,|Furthermore,|Moreover,!} it {#should have|must have|needs to have!} {num_nitrogen_atoms#} {num_nitrogen_atoms__names__noun}.
+ Assistant: {#I have a compound in mind|I can suggest a molecule|I've identified a chemical structure!} that {#fits|meets|aligns with!} these requirements. The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#is suitable for your needs.|meets your specified criteria.|should work well for your research.!}
+ - |-
+ User: {#As a pharmacologist,|In my pharmacological studies,|For my drug development work,!} I {#require|need|am in need of!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} and has {num_oxygen_atoms#} {num_oxygen_atoms__names__noun}. {#Also,|In addition,|Moreover,!} it {#must|should!} have a {molecular_formula__names__noun} of {molecular_formula#}.
+ Assistant: {#I recommend|I suggest|I propose!} a {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} that {#satisfies these conditions.|meets these criteria.|is aligned with your requirements.!}
+ - |-
+ User: {#In my environmental chemistry work,|For my eco-friendly compound research,|As part of my sustainable chemical studies,!} I {#am looking for|require|need!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective} with a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. {#Also,|Additionally,|Moreover,!} it {#should possess|must contain|needs to have!} {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun}.
+ Assistant: {#Considering your needs,|Based on your specifications,|With your requirements in mind,!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would be ideal.|seems perfect.|is a great match.!}
+ - |-
+ User: {#For my bioactive molecule research,|In my study of pharmacologically active substances,|As I explore biologically active compounds,!} I {#need|require|am searching for!} a {#molecule|compound|chemical structure!} that is {toxicity_NR-AR#not &NULL}{toxicity_NR-AR__names__adjective}. It {#should also have|must also feature|also needs to have!} {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun} and {halogen_count#} {halogen_count__names__noun}.
+ Assistant: {#I've found a compound|I have a molecule|I suggest a chemical structure!} that {#fulfills|meets|matches!} these criteria. The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would suit your research.|is in line with your needs.|fits your specifications perfectly.!}
diff --git a/data/tabular/inverse_2/meta.yaml b/data/tabular/inverse_2/meta.yaml
index 169beaa22..945040aac 100644
--- a/data/tabular/inverse_2/meta.yaml
+++ b/data/tabular/inverse_2/meta.yaml
@@ -1,225 +1,224 @@
----
name: inverse_2
description: |-
- Inverse design task constructed by merging solubility_aqsoldb and
- sr_atad5_tox21 and augmenting it with molecular descriptors.
+ Inverse design task constructed by merging solubility_aqsoldb and
+ sr_atad5_tox21 and augmenting it with molecular descriptors.
targets:
- - id: aqeuous_solubility
- description: aqueous solubility
- units: log(mol/L)
- type: continuous
- names:
- - noun: aqueous solubility (logarithmic)
- - noun: water solubility (measured in log(mol/L))
- - noun: water solubility (logarithmic)
- - adjective: dissolves in a water
- uris:
- - http://purl.jp/bio/4/id/200906006880450101
- - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C60821
- - id: toxicity_SR-ATAD5
- description: whether it shows activitiy in the SR-ATAD5 assay (1) or not (0)
- units:
- type: boolean
- names:
- - noun: SR-Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity
- - noun: Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity
- - noun: Luciferase-tagged ATAD5 toxicity
- - verb: shows activity in the SR-Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity assay
- - verb: is active in the Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity assay
- - verb: is active in the Luciferase-tagged ATAD5 toxicity assay
- - adjective: toxic in the SR-Luciferase-tagged ATAD5 in human embryonic kidney cells assay
- - adjective: toxic in the Luciferase-tagged ATAD5 in human embryonic kidney cells assay
- - adjective: toxic in the Luciferase-tagged ATAD5 assay
- - gerund: showing SR-ATAD5 toxicity
- uris:
- - id: carboxyl_count
- description: number of carboxyl groups
- type: ordinal
- names:
- - noun: carboxyl groups
- - id: carbonyl_count
- description: number of carbonyl groups
- type: ordinal
- names:
- - noun: carbonyl groups
- - id: ether_count
- description: number of ether groups
- type: ordinal
- names:
- - noun: ether groups
- - id: alkanol_count
- description: number of alkanol groups
- type: ordinal
- names:
- - noun: alkanol groups
- - id: thiol_count
- description: number of thiol groups
- type: ordinal
- names:
- - noun: thiol groups
- - id: halogen_count
- description: number of halogen groups
- type: ordinal
- names:
- - noun: halogen groups
- - id: amine_count
- description: number of amine groups
- type: ordinal
- names:
- - noun: amine groups
- - id: amide_count
- description: number of amide groups
- type: ordinal
- names:
- - noun: amide groups
- - id: ketone_count
- description: number of ketone groups
- type: ordinal
- names:
- - noun: ketone group count
- - id: num_valence_electrons
- description: number of valence electrons
- type: ordinal
- names:
- - noun: valence electrons
- - id: molecular_formula
- description: molecular formula
- type: text
- names:
- - noun: molecular formula
- - id: monoisotopic_molecular_mass
- description: monoisotopic molecular mass
- type: continuous
- units: g/mol
- names:
- - noun: monoisotopic molecular mass
- - id: carbon_mass
- description: carbon mass
- type: continuous
- units: g/mol
- names:
- - noun: carbon mass
- - id: hydrogen_mass
- description: hydrogen mass
- type: continuous
- units: g/mol
- names:
- - noun: hydrogen mass
- - id: nitrogen_mass
- description: nitrogen mass
- type: continuous
- units: g/mol
- names:
- - noun: nitrogen mass
- - id: oxygen_mass
- description: oxygen mass
- units: g/mol
- type: continuous
- names:
- - noun: oxygen mass
- - id: num_carbon_atoms
- description: number of carbon atoms
- type: ordinal
- names:
- - noun: carbon atoms
- - id: num_hydrogen_atoms
- type: ordinal
- description: number of hydrogen atoms
- names:
- - noun: hydrogen atoms
- - id: num_nitrogen_atoms
- description: number of nitrogen atoms
- type: ordinal
- names:
- - noun: nitrogen atoms
- - id: num_oxygen_atoms
- description: number of oxygen atoms
- type: ordinal
- names:
- - noun: oxygen atoms
- - id: num_hydrogen_bond_acceptors
- description: number of hydrogen bond acceptors
- type: ordinal
- names:
- - noun: hydrogen bond acceptors
- - id: num_hydrogen_bond_donors
- description: number of hydrogen bond donors
- type: ordinal
- names:
- - noun: hydrogen bond donors
- - id: num_lipinski_violations
- description: number of Lipinski violations
- type: ordinal
- names:
- - noun: Lipinski violations
- - noun: Lipinski rule of five violations
- - id: num_chiral_centers
- description: number of chiral centers
- type: ordinal
- names:
- - noun: chiral center count
+ - id: aqeuous_solubility
+ description: aqueous solubility
+ units: log(mol/L)
+ type: continuous
+ names:
+ - noun: aqueous solubility (logarithmic)
+ - noun: water solubility (measured in log(mol/L))
+ - noun: water solubility (logarithmic)
+ - adjective: dissolves in a water
+ uris:
+ - http://purl.jp/bio/4/id/200906006880450101
+ - http://ncicb.nci.nih.gov/xml/owl/EVS/Thesaurus.owl#C60821
+ - id: toxicity_SR-ATAD5
+ description: whether it shows activity in the SR-ATAD5 assay (1) or not (0)
+ units:
+ type: boolean
+ names:
+ - noun: SR-Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity
+ - noun: Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity
+ - noun: Luciferase-tagged ATAD5 toxicity
+ - verb: shows activity in the SR-Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity assay
+ - verb: is active in the Luciferase-tagged ATAD5 in human embryonic kidney cells toxicity assay
+ - verb: is active in the Luciferase-tagged ATAD5 toxicity assay
+ - adjective: toxic in the SR-Luciferase-tagged ATAD5 in human embryonic kidney cells assay
+ - adjective: toxic in the Luciferase-tagged ATAD5 in human embryonic kidney cells assay
+ - adjective: toxic in the Luciferase-tagged ATAD5 assay
+ - gerund: showing SR-ATAD5 toxicity
+ uris:
+ - id: carboxyl_count
+ description: number of carboxyl groups
+ type: ordinal
+ names:
+ - noun: carboxyl groups
+ - id: carbonyl_count
+ description: number of carbonyl groups
+ type: ordinal
+ names:
+ - noun: carbonyl groups
+ - id: ether_count
+ description: number of ether groups
+ type: ordinal
+ names:
+ - noun: ether groups
+ - id: alkanol_count
+ description: number of alkanol groups
+ type: ordinal
+ names:
+ - noun: alkanol groups
+ - id: thiol_count
+ description: number of thiol groups
+ type: ordinal
+ names:
+ - noun: thiol groups
+ - id: halogen_count
+ description: number of halogen groups
+ type: ordinal
+ names:
+ - noun: halogen groups
+ - id: amine_count
+ description: number of amine groups
+ type: ordinal
+ names:
+ - noun: amine groups
+ - id: amide_count
+ description: number of amide groups
+ type: ordinal
+ names:
+ - noun: amide groups
+ - id: ketone_count
+ description: number of ketone groups
+ type: ordinal
+ names:
+ - noun: ketone group count
+ - id: num_valence_electrons
+ description: number of valence electrons
+ type: ordinal
+ names:
+ - noun: valence electrons
+ - id: molecular_formula
+ description: molecular formula
+ type: text
+ names:
+ - noun: molecular formula
+ - id: monoisotopic_molecular_mass
+ description: monoisotopic molecular mass
+ type: continuous
+ units: g/mol
+ names:
+ - noun: monoisotopic molecular mass
+ - id: carbon_mass
+ description: carbon mass
+ type: continuous
+ units: g/mol
+ names:
+ - noun: carbon mass
+ - id: hydrogen_mass
+ description: hydrogen mass
+ type: continuous
+ units: g/mol
+ names:
+ - noun: hydrogen mass
+ - id: nitrogen_mass
+ description: nitrogen mass
+ type: continuous
+ units: g/mol
+ names:
+ - noun: nitrogen mass
+ - id: oxygen_mass
+ description: oxygen mass
+ units: g/mol
+ type: continuous
+ names:
+ - noun: oxygen mass
+ - id: num_carbon_atoms
+ description: number of carbon atoms
+ type: ordinal
+ names:
+ - noun: carbon atoms
+ - id: num_hydrogen_atoms
+ type: ordinal
+ description: number of hydrogen atoms
+ names:
+ - noun: hydrogen atoms
+ - id: num_nitrogen_atoms
+ description: number of nitrogen atoms
+ type: ordinal
+ names:
+ - noun: nitrogen atoms
+ - id: num_oxygen_atoms
+ description: number of oxygen atoms
+ type: ordinal
+ names:
+ - noun: oxygen atoms
+ - id: num_hydrogen_bond_acceptors
+ description: number of hydrogen bond acceptors
+ type: ordinal
+ names:
+ - noun: hydrogen bond acceptors
+ - id: num_hydrogen_bond_donors
+ description: number of hydrogen bond donors
+ type: ordinal
+ names:
+ - noun: hydrogen bond donors
+ - id: num_lipinski_violations
+ description: number of Lipinski violations
+ type: ordinal
+ names:
+ - noun: Lipinski violations
+ - noun: Lipinski rule of five violations
+ - id: num_chiral_centers
+ description: number of chiral centers
+ type: ordinal
+ names:
+ - noun: chiral center count
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://doi.org/10.1038/s41597-019-0151-1
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/adme/#aqeuous_solubility-aqsoldb
- description: data source
- - url: https://github.com/lamalab-org/chem-caption
- description: software used to generate features
+ - url: https://doi.org/10.1038/s41597-019-0151-1
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/adme/#aqeuous_solubility-aqsoldb
+ description: data source
+ - url: https://github.com/lamalab-org/chem-caption
+ description: software used to generate features
num_points: 2517
bibtex:
- - |-
- @article{Sorkun_2019,
- doi = {10.1038/s41597-019-0151-1},
- url = {https://doi.org/10.1038%2Fs41597-019-0151-1},
- year = {2019},
- month = aug,
- publisher = {Springer Science and Business Media LLC},
- volume = {6},
- number = {1},
- author = {Murat Cihan Sorkun and Abhishek Khetan and
- Suleyman Er},
- title = {AqSolDB, a curated reference set of aqueous aqeuous_solubility
- and 2D descriptors for a diverse set of compounds},
- journal = {Scientific Data}
+ - |-
+ @article{Sorkun_2019,
+ doi = {10.1038/s41597-019-0151-1},
+ url = {https://doi.org/10.1038%2Fs41597-019-0151-1},
+ year = {2019},
+ month = aug,
+ publisher = {Springer Science and Business Media LLC},
+ volume = {6},
+ number = {1},
+ author = {Murat Cihan Sorkun and Abhishek Khetan and
+ Suleyman Er},
+ title = {AqSolDB, a curated reference set of aqueous aqeuous_solubility
+ and 2D descriptors for a diverse set of compounds},
+ journal = {Scientific Data}
templates:
- - |-
- User: {#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units} and is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}.
- Assistant: {#There might be multiple compounds that match these criteria. Do you have additional constraints?|Do you have additional constraints?|Is there anything else I should consider?|Is there anything else I should know?!}
- User: {#No|No, there are no additional constraints.|No, there are no other constraints.|No, there are no other criteria.|No, there are no other requirements.!}
- Assistant: {#In this case, |OK, |Alright, |Understood, |Got it, |I see, !}the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#should|will|is expected to!} fit your criteria.
- - |-
- User: {#I am a medicinal chemist. |I work in drug-discovery. |!}{#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units} and is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. {#Additionally, |Moreover, |On top of that, |In addition, |Furthermore, !}{#I|we!} {#want|need|require|would like|would prefer!} the {#molecule|compound|chemical structure!} to have a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. {#I|We!} {#want|would like to|need to!} ensure that {#there are|the molecule contains|the compound contains!} {carboxyl_count#} {carbonyl_count__names__noun}.
- Assistant: {#Thanks for the detailed description. |Thanks. |!}{#I suggest|I recommend|I propose|I would suggest|I would recommend|I would propose!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}.{# This compound should fulfill your needs.| This chemical should satisfy your constrains|!}
- - |-
- User: {#I am a medicinal chemist. |I work in drug-discovery. |!}{#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} and has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units}. {#Additionally, |Moreover, |On top of that, |In addition, |Furthermore, !}{#I|we!} {#want|need|require|would like|would prefer!} the {#molecule|compound|chemical structure!} to have a {carbon_mass__names__noun} of {carbon_mass#} {carbon_mass__units}. {#I|We!} {#want|would like to|need to!} ensure that {#there are|the molecule contains|the compound contains!} {carboxyl_count#} {carbonyl_count__names__noun} and {ether_count#} {ether_count__names__noun} as well as {alkanol_count#} {alkanol_count__names__noun} and {thiol_count#} {thiol_count__names__noun}.
- Assistant: {#Thanks for the detailed description. |Thanks. |!}{#I suggest|I recommend|I propose|I would suggest|I would recommend|I would propose!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}.{# This compound should fulfill your needs.| This chemical should satisfy your constrains|!}
- - |-
- User: {#I am researching|I am investigating|I am studying!} {#pharmaceuticals|medicinal compounds|drug molecules!} and {#need|require|am looking for!} a {#molecule|compound|chemical structure!} with a {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units}. It should also be {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. {#Additionally,|Moreover,|Furthermore,!} it {#must|should|needs to!} have {amine_count#} {amine_count__names__noun}.
- Assistant: {#To meet these requirements, |Considering your specifications, |Taking into account your needs, !}I {#recommend|suggest|propose!} a {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}. {#This should match your criteria.|This fits your described parameters.|This aligns with your requirements.!}
- - |-
- User: {#As a chemist|Being a chemical researcher,|In my chemical research,!} I {#am looking for|require|need!} a {#molecule|compound|chemical structure!} with {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun} and {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun}. It {#also needs|should also!} to be {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} and have a {oxygen_mass__names__noun} of {oxygen_mass#} {oxygen_mass__units}.
- Assistant: {#I've got the ideal compound for you.|I have a compound that fits these specifications.|I suggest a molecule that meets your needs.!} The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#should work perfectly.|is exactly what you're looking for.|matches your requirements.!}
- - |-
- User: {#In my pharmaceutical research,|For my current drug discovery project,|In my medicinal chemistry studies,!} I {#require|need|am looking for!} a {#molecule|compound|chemical structure!} with {num_chiral_centers#} {num_chiral_centers__names__noun} and {num_lipinski_violations#} {num_lipinski_violations__names__noun}. It should {#also have|also possess|also contain!} a {nitrogen_mass__names__noun} of {nitrogen_mass#} {nitrogen_mass__units} and be {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}.
- Assistant: {#After considering your needs,|Based on your requirements,|Taking your specifications into account,!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would be a great fit.|seems to be a perfect match.|should meet all your criteria.!}
- - |-
- User: {#In my research on|For my study of|While investigating!} {#non-toxic chemicals|safe compounds|environment-friendly substances!}, I {#need|require|am looking for!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} with {num_carbon_atoms#} {num_carbon_atoms__names__noun}. {#Additionally,|Furthermore,|Moreover,!} it {#should have|must have|needs to have!} {num_nitrogen_atoms#} {num_nitrogen_atoms__names__noun}.
- Assistant: {#I have a compound in mind|I can suggest a molecule|I've identified a chemical structure!} that {#fits|meets|aligns with!} these requirements. The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#is suitable for your needs.|meets your specified criteria.|should work well for your research.!}
- - |-
- User: {#As a pharmacologist,|In my pharmacological studies,|For my drug development work,!} I {#require|need|am in need of!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} and has {num_oxygen_atoms#} {num_oxygen_atoms__names__noun}. {#Also,|In addition,|Moreover,!} it {#must|should!} have a {molecular_formula__names__noun} of {molecular_formula#}.
- Assistant: {#I recommend|I suggest|I propose!} a {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} that {#satisfies these conditions.|meets these criteria.|is aligned with your requirements.!}
- - |-
- User: {#In my environmental chemistry work,|For my eco-friendly compound research,|As part of my sustainable chemical studies,!} I {#am looking for|require|need!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} with a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. {#Also,|Additionally,|Moreover,!} it {#should possess|must contain|needs to have!} {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun}.
- Assistant: {#Considering your needs,|Based on your specifications,|With your requirements in mind,!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would be ideal.|seems perfect.|is a great match.!}
- - |-
- User: {#For my bioactive molecule research,|In my study of pharmacologically active substances,|As I explore biologically active compounds,!} I {#need|require|am searching for!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. It {#should also have|must also feature|also needs to have!} {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun} and {halogen_count#} {halogen_count__names__noun}.
- Assistant: {#I've found a compound|I have a molecule|I suggest a chemical structure!} that {#fulfills|meets|matches!} these criteria. The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would suit your research.|is in line with your needs.|fits your specifications perfectly.!}
+ - |-
+ User: {#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units} and is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}.
+ Assistant: {#There might be multiple compounds that match these criteria. Do you have additional constraints?|Do you have additional constraints?|Is there anything else I should consider?|Is there anything else I should know?!}
+ User: {#No|No, there are no additional constraints.|No, there are no other constraints.|No, there are no other criteria.|No, there are no other requirements.!}
+ Assistant: {#In this case, |OK, |Alright, |Understood, |Got it, |I see, !}the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#should|will|is expected to!} fit your criteria.
+ - |-
+ User: {#I am a medicinal chemist. |I work in drug-discovery. |!}{#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units} and is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. {#Additionally, |Moreover, |On top of that, |In addition, |Furthermore, !}{#I|we!} {#want|need|require|would like|would prefer!} the {#molecule|compound|chemical structure!} to have a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. {#I|We!} {#want|would like to|need to!} ensure that {#there are|the molecule contains|the compound contains!} {carboxyl_count#} {carbonyl_count__names__noun}.
+ Assistant: {#Thanks for the detailed description. |Thanks. |!}{#I suggest|I recommend|I propose|I would suggest|I would recommend|I would propose!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}.{# This compound should fulfill your needs.| This chemical should satisfy your constrains|!}
+ - |-
+ User: {#I am a medicinal chemist. |I work in drug-discovery. |!}{#I want to|I must|I have to|I need to!} {#design|synthesize|create|make|generate!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} and has {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units}. {#Additionally, |Moreover, |On top of that, |In addition, |Furthermore, !}{#I|we!} {#want|need|require|would like|would prefer!} the {#molecule|compound|chemical structure!} to have a {carbon_mass__names__noun} of {carbon_mass#} {carbon_mass__units}. {#I|We!} {#want|would like to|need to!} ensure that {#there are|the molecule contains|the compound contains!} {carboxyl_count#} {carbonyl_count__names__noun} and {ether_count#} {ether_count__names__noun} as well as {alkanol_count#} {alkanol_count__names__noun} and {thiol_count#} {thiol_count__names__noun}.
+ Assistant: {#Thanks for the detailed description. |Thanks. |!}{#I suggest|I recommend|I propose|I would suggest|I would recommend|I would propose!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}.{# This compound should fulfill your needs.| This chemical should satisfy your constrains|!}
+ - |-
+ User: {#I am researching|I am investigating|I am studying!} {#pharmaceuticals|medicinal compounds|drug molecules!} and {#need|require|am looking for!} a {#molecule|compound|chemical structure!} with a {aqeuous_solubility__names__noun} of {aqeuous_solubility#} {aqeuous_solubility__units}. It should also be {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. {#Additionally,|Moreover,|Furthermore,!} it {#must|should|needs to!} have {amine_count#} {amine_count__names__noun}.
+ Assistant: {#To meet these requirements, |Considering your specifications, |Taking into account your needs, !}I {#recommend|suggest|propose!} a {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#}. {#This should match your criteria.|This fits your described parameters.|This aligns with your requirements.!}
+ - |-
+ User: {#As a chemist|Being a chemical researcher,|In my chemical research,!} I {#am looking for|require|need!} a {#molecule|compound|chemical structure!} with {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun} and {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun}. It {#also needs|should also!} to be {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} and have a {oxygen_mass__names__noun} of {oxygen_mass#} {oxygen_mass__units}.
+ Assistant: {#I've got the ideal compound for you.|I have a compound that fits these specifications.|I suggest a molecule that meets your needs.!} The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#should work perfectly.|is exactly what you're looking for.|matches your requirements.!}
+ - |-
+ User: {#In my pharmaceutical research,|For my current drug discovery project,|In my medicinal chemistry studies,!} I {#require|need|am looking for!} a {#molecule|compound|chemical structure!} with {num_chiral_centers#} {num_chiral_centers__names__noun} and {num_lipinski_violations#} {num_lipinski_violations__names__noun}. It should {#also have|also possess|also contain!} a {nitrogen_mass__names__noun} of {nitrogen_mass#} {nitrogen_mass__units} and be {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}.
+ Assistant: {#After considering your needs,|Based on your requirements,|Taking your specifications into account,!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would be a great fit.|seems to be a perfect match.|should meet all your criteria.!}
+ - |-
+ User: {#In my research on|For my study of|While investigating!} {#non-toxic chemicals|safe compounds|environment-friendly substances!}, I {#need|require|am looking for!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} with {num_carbon_atoms#} {num_carbon_atoms__names__noun}. {#Additionally,|Furthermore,|Moreover,!} it {#should have|must have|needs to have!} {num_nitrogen_atoms#} {num_nitrogen_atoms__names__noun}.
+ Assistant: {#I have a compound in mind|I can suggest a molecule|I've identified a chemical structure!} that {#fits|meets|aligns with!} these requirements. The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#is suitable for your needs.|meets your specified criteria.|should work well for your research.!}
+ - |-
+ User: {#As a pharmacologist,|In my pharmacological studies,|For my drug development work,!} I {#require|need|am in need of!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} and has {num_oxygen_atoms#} {num_oxygen_atoms__names__noun}. {#Also,|In addition,|Moreover,!} it {#must|should!} have a {molecular_formula__names__noun} of {molecular_formula#}.
+ Assistant: {#I recommend|I suggest|I propose!} a {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} that {#satisfies these conditions.|meets these criteria.|is aligned with your requirements.!}
+ - |-
+ User: {#In my environmental chemistry work,|For my eco-friendly compound research,|As part of my sustainable chemical studies,!} I {#am looking for|require|need!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective} with a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}. {#Also,|Additionally,|Moreover,!} it {#should possess|must contain|needs to have!} {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun}.
+ Assistant: {#Considering your needs,|Based on your specifications,|With your requirements in mind,!} the {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would be ideal.|seems perfect.|is a great match.!}
+ - |-
+ User: {#For my bioactive molecule research,|In my study of pharmacologically active substances,|As I explore biologically active compounds,!} I {#need|require|am searching for!} a {#molecule|compound|chemical structure!} that is {toxicity_SR-ATAD5#not &NULL}{toxicity_SR-ATAD5__names__adjective}. It {#should also have|must also feature|also needs to have!} {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun} and {halogen_count#} {halogen_count__names__noun}.
+ Assistant: {#I've found a compound|I have a molecule|I suggest a chemical structure!} that {#fulfills|meets|matches!} these criteria. The {#molecule|compound|chemical structure!} with {SMILES__description} {SMILES#} {#would suit your research.|is in line with your needs.|fits your specifications perfectly.!}
diff --git a/data/tabular/inverse_3/meta.yaml b/data/tabular/inverse_3/meta.yaml
index 9d3d20a5d..27775ce4f 100644
--- a/data/tabular/inverse_3/meta.yaml
+++ b/data/tabular/inverse_3/meta.yaml
@@ -1,240 +1,239 @@
----
name: inverse_3
description: |-
- Inverse design task constructed by merging kcnq2_potassium_channel_butkiewicz and
- choline_transporter_butkiewicz and augmenting it with molecular descriptors.
+ Inverse design task constructed by merging kcnq2_potassium_channel_butkiewicz and
+ choline_transporter_butkiewicz and augmenting it with molecular descriptors.
targets:
- - id: activity_kcnq2_potassium_channel
- description: whether it is active against kcnq2 potassium channel receptor (1) or not (0).
- units:
- type: boolean
- names:
- - adjective: kcnq2 potassium channel inhibiting
- pubchem_aids:
- - 2239
- - 2287
- - 2282
- - 2283
- - 2558
- uris: []
- - id: activity_choline_transporter
- description: inhibition of choline transporter receptor (1) or not (0).
- units:
- type: boolean
- names:
- - adjective: choline transporter activity inhibiting
- pubchem_aids:
- - 488975
- - 493221
- - 504840
- - 588401
- - 493222
- - 602208
- - id: carboxyl_count
- description: number of carboxyl groups
- type: ordinal
- significant_digits: 0
- names:
- - noun: carboxyl groups
- - id: carbonyl_count
- description: number of carbonyl groups
- type: ordinal
- significant_digits: 0
- names:
- - noun: carbonyl groups
- - id: ether_count
- description: number of ether groups
- type: ordinal
- names:
- - noun: ether groups
- - id: alkanol_count
- description: number of alkanol groups
- significant_digits: 0
- type: ordinal
- names:
- - noun: alkanol groups
- - id: thiol_count
- description: number of thiol groups
- type: ordinal
- significant_digits: 0
- names:
- - noun: thiol groups
- - id: halogen_count
- description: number of halogen groups
- type: ordinal
- significant_digits: 0
- names:
- - noun: halogen groups
- - id: amine_count
- description: number of amine groups
- type: ordinal
- significant_digits: 0
- names:
- - noun: amine groups
- - id: amide_count
- description: number of amide groups
- type: ordinal
- significant_digits: 0
- names:
- - noun: amide groups
- - id: ketone_count
- description: number of ketone groups
- significant_digits: 0
- type: ordinal
- names:
- - noun: ketone group count
- - id: num_valence_electrons
- description: number of valence electrons
- significant_digits: 0
- type: ordinal
- names:
- - noun: valence electrons
- - id: molecular_formula
- description: molecular formula
- type: text
- names:
- - noun: molecular formula
- - id: monoisotopic_molecular_mass
- description: monoisotopic molecular mass
- type: continuous
- units: g/mol
- names:
- - noun: monoisotopic molecular mass
- - id: carbon_mass
- description: carbon mass
- type: continuous
- units: g/mol
- names:
- - noun: carbon mass
- - id: hydrogen_mass
- description: hydrogen mass
- type: continuous
- units: g/mol
- names:
- - noun: hydrogen mass
- - id: nitrogen_mass
- description: nitrogen mass
- type: continuous
- units: g/mol
- names:
- - noun: nitrogen mass
- - id: oxygen_mass
- description: oxygen mass
- units: g/mol
- type: continuous
- names:
- - noun: oxygen mass
- - id: num_carbon_atoms
- description: number of carbon atoms
- type: ordinal
- significant_digits: 0
- names:
- - noun: carbon atoms
- - id: num_hydrogen_atoms
- type: ordinal
- significant_digits: 0
- description: number of hydrogen atoms
- names:
- - noun: hydrogen atoms
- - id: num_nitrogen_atoms
- significant_digits: 0
- description: number of nitrogen atoms
- type: ordinal
- names:
- - noun: nitrogen atoms
- - id: num_oxygen_atoms
- significant_digits: 0
- description: number of oxygen atoms
- type: ordinal
- names:
- - noun: oxygen atoms
- - id: num_hydrogen_bond_acceptors
- significant_digits: 0
- description: number of hydrogen bond acceptors
- type: ordinal
- names:
- - noun: hydrogen bond acceptors
- - id: num_hydrogen_bond_donors
- description: number of hydrogen bond donors
- type: ordinal
- significant_digits: 0
- names:
- - noun: hydrogen bond donors
- - id: num_lipinski_violations
- description: number of Lipinski violations
- type: ordinal
- significant_digits: 0
- names:
- - noun: Lipinski violations
- - noun: Lipinski rule of five violations
- - id: num_chiral_centers
- description: number of chiral centers
- significant_digits: 0
- type: ordinal
- names:
- - noun: chiral center count
+ - id: activity_kcnq2_potassium_channel
+ description: whether it is active against kcnq2 potassium channel receptor (1) or not (0).
+ units:
+ type: boolean
+ names:
+ - adjective: kcnq2 potassium channel inhibiting
+ pubchem_aids:
+ - 2239
+ - 2287
+ - 2282
+ - 2283
+ - 2558
+ uris: []
+ - id: activity_choline_transporter
+ description: inhibition of choline transporter receptor (1) or not (0).
+ units:
+ type: boolean
+ names:
+ - adjective: choline transporter activity inhibiting
+ pubchem_aids:
+ - 488975
+ - 493221
+ - 504840
+ - 588401
+ - 493222
+ - 602208
+ - id: carboxyl_count
+ description: number of carboxyl groups
+ type: ordinal
+ significant_digits: 0
+ names:
+ - noun: carboxyl groups
+ - id: carbonyl_count
+ description: number of carbonyl groups
+ type: ordinal
+ significant_digits: 0
+ names:
+ - noun: carbonyl groups
+ - id: ether_count
+ description: number of ether groups
+ type: ordinal
+ names:
+ - noun: ether groups
+ - id: alkanol_count
+ description: number of alkanol groups
+ significant_digits: 0
+ type: ordinal
+ names:
+ - noun: alkanol groups
+ - id: thiol_count
+ description: number of thiol groups
+ type: ordinal
+ significant_digits: 0
+ names:
+ - noun: thiol groups
+ - id: halogen_count
+ description: number of halogen groups
+ type: ordinal
+ significant_digits: 0
+ names:
+ - noun: halogen groups
+ - id: amine_count
+ description: number of amine groups
+ type: ordinal
+ significant_digits: 0
+ names:
+ - noun: amine groups
+ - id: amide_count
+ description: number of amide groups
+ type: ordinal
+ significant_digits: 0
+ names:
+ - noun: amide groups
+ - id: ketone_count
+ description: number of ketone groups
+ significant_digits: 0
+ type: ordinal
+ names:
+ - noun: ketone group count
+ - id: num_valence_electrons
+ description: number of valence electrons
+ significant_digits: 0
+ type: ordinal
+ names:
+ - noun: valence electrons
+ - id: molecular_formula
+ description: molecular formula
+ type: text
+ names:
+ - noun: molecular formula
+ - id: monoisotopic_molecular_mass
+ description: monoisotopic molecular mass
+ type: continuous
+ units: g/mol
+ names:
+ - noun: monoisotopic molecular mass
+ - id: carbon_mass
+ description: carbon mass
+ type: continuous
+ units: g/mol
+ names:
+ - noun: carbon mass
+ - id: hydrogen_mass
+ description: hydrogen mass
+ type: continuous
+ units: g/mol
+ names:
+ - noun: hydrogen mass
+ - id: nitrogen_mass
+ description: nitrogen mass
+ type: continuous
+ units: g/mol
+ names:
+ - noun: nitrogen mass
+ - id: oxygen_mass
+ description: oxygen mass
+ units: g/mol
+ type: continuous
+ names:
+ - noun: oxygen mass
+ - id: num_carbon_atoms
+ description: number of carbon atoms
+ type: ordinal
+ significant_digits: 0
+ names:
+ - noun: carbon atoms
+ - id: num_hydrogen_atoms
+ type: ordinal
+ significant_digits: 0
+ description: number of hydrogen atoms
+ names:
+ - noun: hydrogen atoms
+ - id: num_nitrogen_atoms
+ significant_digits: 0
+ description: number of nitrogen atoms
+ type: ordinal
+ names:
+ - noun: nitrogen atoms
+ - id: num_oxygen_atoms
+ significant_digits: 0
+ description: number of oxygen atoms
+ type: ordinal
+ names:
+ - noun: oxygen atoms
+ - id: num_hydrogen_bond_acceptors
+ significant_digits: 0
+ description: number of hydrogen bond acceptors
+ type: ordinal
+ names:
+ - noun: hydrogen bond acceptors
+ - id: num_hydrogen_bond_donors
+ description: number of hydrogen bond donors
+ type: ordinal
+ significant_digits: 0
+ names:
+ - noun: hydrogen bond donors
+ - id: num_lipinski_violations
+ description: number of Lipinski violations
+ type: ordinal
+ significant_digits: 0
+ names:
+ - noun: Lipinski violations
+ - noun: Lipinski rule of five violations
+ - id: num_chiral_centers
+ description: number of chiral centers
+ significant_digits: 0
+ type: ordinal
+ names:
+ - noun: chiral center count
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://doi.org/10.1038/s41597-019-0151-1
- description: corresponding publication
- - url: https://tdcommons.ai/single_pred_tasks/adme/#aqeuous_solubility-aqsoldb
- description: data source
- - url: https://github.com/lamalab-org/chem-caption
- description: software used to generate features
+ - url: https://doi.org/10.1038/s41597-019-0151-1
+ description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/adme/#aqeuous_solubility-aqsoldb
+ description: data source
+ - url: https://github.com/lamalab-org/chem-caption
+ description: software used to generate features
num_points: 299452
bibtex:
- - |-
- @article{Sorkun_2019,
- doi = {10.1038/s41597-019-0151-1},
- url = {https://doi.org/10.1038%2Fs41597-019-0151-1},
- year = {2019},
- month = aug,
- publisher = {Springer Science and Business Media LLC},
- volume = {6},
- number = {1},
- author = {Murat Cihan Sorkun and Abhishek Khetan and
- Suleyman Er},
- title = {AqSolDB, a curated reference set of aqueous aqeuous_solubility
- and 2D descriptors for a diverse set of compounds},
- journal = {Scientific Data}
+ - |-
+ @article{Sorkun_2019,
+ doi = {10.1038/s41597-019-0151-1},
+ url = {https://doi.org/10.1038%2Fs41597-019-0151-1},
+ year = {2019},
+ month = aug,
+ publisher = {Springer Science and Business Media LLC},
+ volume = {6},
+ number = {1},
+ author = {Murat Cihan Sorkun and Abhishek Khetan and
+ Suleyman Er},
+ title = {AqSolDB, a curated reference set of aqueous aqeuous_solubility
+ and 2D descriptors for a diverse set of compounds},
+ journal = {Scientific Data}
templates:
- - |-
- User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a specific number of carbon atoms?
- Assistant: What is the desired {#count for|number of!} {num_carbon_atoms__names__noun} in the molecule?
- User: {#Preferably |!}{num_carbon_atoms#}.
- Assistant: For a count of {num_carbon_atoms#} {num_carbon_atoms__names__noun}, a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#} {#would be suitable|would satisfy the constraints!}.
- - |-
- User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a specific number of hydrogen atoms?
- Assistant: What is the desired {#count for|number of!} {num_hydrogen_atoms__names__noun} in the molecule?
- User: {#Preferably |!}{num_hydrogen_atoms#}.
- Assistant: For a count of {num_hydrogen_atoms#} {num_hydrogen_atoms__names__noun}, a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#} {#would be suitable|would satisfy the constraints!}.
- - |-
- User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a specific number of nitrogen atoms and molecular formula?
- Assistant: What is the desired {molecular_formula__names__noun}?
- User: {#Preferably |!}{molecular_formula#}.
- Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}.
- - |-
- User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with {num_chiral_centers__names__noun} and a specific molecular formula?
- Assistant: What is the desired {molecular_formula__names__noun}?
- User: {#Preferably |Ideally |!}{molecular_formula#}.
- Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}.
- - |-
- User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun} and a {molecular_formula__names__noun} of {molecular_formula#}?
- Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}.
- - |-
- User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun} and a {molecular_formula__names__noun} of {molecular_formula#}?
- Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}.
- - |-
- User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a {num_lipinski_violations#} {num_lipinski_violations__names__noun} and a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}?
- Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}.
- - |-
- User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a {num_carbon_atoms#} {num_carbon_atoms__names__noun}, {num_hydrogen_atoms#} {num_hydrogen_atoms__names__noun} and {num_nitrogen_atoms#} {num_nitrogen_atoms__names__noun}?
- Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}.
+ - |-
+ User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a specific number of carbon atoms?
+ Assistant: What is the desired {#count for|number of!} {num_carbon_atoms__names__noun} in the molecule?
+ User: {#Preferably |!}{num_carbon_atoms#}.
+ Assistant: For a count of {num_carbon_atoms#} {num_carbon_atoms__names__noun}, a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#} {#would be suitable|would satisfy the constraints!}.
+ - |-
+ User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a specific number of hydrogen atoms?
+ Assistant: What is the desired {#count for|number of!} {num_hydrogen_atoms__names__noun} in the molecule?
+ User: {#Preferably |!}{num_hydrogen_atoms#}.
+ Assistant: For a count of {num_hydrogen_atoms#} {num_hydrogen_atoms__names__noun}, a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#} {#would be suitable|would satisfy the constraints!}.
+ - |-
+ User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a specific number of nitrogen atoms and molecular formula?
+ Assistant: What is the desired {molecular_formula__names__noun}?
+ User: {#Preferably |!}{molecular_formula#}.
+ Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}.
+ - |-
+ User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with {num_chiral_centers__names__noun} and a specific molecular formula?
+ Assistant: What is the desired {molecular_formula__names__noun}?
+ User: {#Preferably |Ideally |!}{molecular_formula#}.
+ Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}.
+ - |-
+ User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a {num_hydrogen_bond_acceptors#} {num_hydrogen_bond_acceptors__names__noun} and a {molecular_formula__names__noun} of {molecular_formula#}?
+ Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}.
+ - |-
+ User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a {num_hydrogen_bond_donors#} {num_hydrogen_bond_donors__names__noun} and a {molecular_formula__names__noun} of {molecular_formula#}?
+ Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}.
+ - |-
+ User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a {num_lipinski_violations#} {num_lipinski_violations__names__noun} and a {monoisotopic_molecular_mass__names__noun} of {monoisotopic_molecular_mass#} {monoisotopic_molecular_mass__units}?
+ Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}.
+ - |-
+ User: I {#need|want!} a {#molecule|chemical|drug|chemical structure!} that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__adjective} and {activity_choline_transporter#not &NULL}{activity_choline_transporter__names__adjective}. {#Can you|Could you!} suggest something with a {num_carbon_atoms#} {num_carbon_atoms__names__noun}, {num_hydrogen_atoms#} {num_hydrogen_atoms__names__noun} and {num_nitrogen_atoms#} {num_nitrogen_atoms__names__noun}?
+ Assistant: {#In this case, |!}I {#recommend|suggest|propose!} a {#molecule|chemical|drug|chemical structure!} with {SMILES__description} {SMILES#}.
diff --git a/data/tabular/iupac_goldbook/meta.yaml b/data/tabular/iupac_goldbook/meta.yaml
index 32fbd8e2a..423db0011 100644
--- a/data/tabular/iupac_goldbook/meta.yaml
+++ b/data/tabular/iupac_goldbook/meta.yaml
@@ -1,95 +1,94 @@
----
name: IUPAC Compendium of Chemical Terminology
description: |-
- The Compendium is popularly referred to as the Gold
- Book, in recognition of the contribution of the late Victor Gold, who
- initiated work on the first edition. It is one of the series of IUPAC
- Colour Books on chemical nomenclature, terminology, symbols and units
- (see the list of source documents), and collects together terminology
- definitions from IUPAC recommendations already published in Pure and
- Applied Chemistry and in the other Colour Books. Terminology
- definitions published by IUPAC are drafted by international committees
- of experts in the appropriate chemistry sub-disciplines, and ratified
- by IUPAC's Interdivisional Committee on Terminology, Nomenclature and
- Symbols (ICTNS). In this edition of the Compendium these IUPAC-approved
- definitions are supplemented with some definitions from ISO and from
- the International Vocabulary of Basic and General Terms in Metrology,
- both these sources are recognised by IUPAC as authoritative. The result
- is a collection of nearly 7000 terms, with authoritative definitions,
- spanning the whole range of chemistry.
+ The Compendium is popularly referred to as the Gold
+ Book, in recognition of the contribution of the late Victor Gold, who
+ initiated work on the first edition. It is one of the series of IUPAC
+ Colour Books on chemical nomenclature, terminology, symbols and units
+ (see the list of source documents), and collects together terminology
+ definitions from IUPAC recommendations already published in Pure and
+ Applied Chemistry and in the other Colour Books. Terminology
+ definitions published by IUPAC are drafted by international committees
+ of experts in the appropriate chemistry sub-disciplines, and ratified
+ by IUPAC's Interdivisional Committee on Terminology, Nomenclature and
+ Symbols (ICTNS). In this edition of the Compendium these IUPAC-approved
+ definitions are supplemented with some definitions from ISO and from
+ the International Vocabulary of Basic and General Terms in Metrology,
+ both these sources are recognised by IUPAC as authoritative. The result
+ is a collection of nearly 7000 terms, with authoritative definitions,
+ spanning the whole range of chemistry.
targets:
- - id: definition
- description: definition of a chemistry term
- units:
- type: string
- names:
- - noun: definition
- - noun: text definition
+ - id: definition
+ description: definition of a chemistry term
+ units:
+ type: string
+ names:
+ - noun: definition
+ - noun: text definition
identifiers:
- - id: term
- type: Other
- description: chemistry term
- names:
- - noun: chemistry term
+ - id: term
+ type: Other
+ description: chemistry term
+ names:
+ - noun: chemistry term
license: CC BY-NC-ND 4.0
links:
- - url: https://goldbook.iupac.org
- description: home page
- - url: https://creativecommons.org/licenses/by-nc-nd/4.0/
- description: license description
+ - url: https://goldbook.iupac.org
+ description: home page
+ - url: https://creativecommons.org/licenses/by-nc-nd/4.0/
+ description: license description
num_points: 5551
bibtex:
- - |-
- @article{iupac2023,title={IUPAC Compendium of Chemical Terminology},
- publisher={International Union of Pure and Applied Chemistry},
- isbn={978-0865426849},
- doi={10.1351/goldbook},
- accessdate={2023-01-13T17:08:12+00:00},
- }
+ - |-
+ @article{iupac2023,title={IUPAC Compendium of Chemical Terminology},
+ publisher={International Union of Pure and Applied Chemistry},
+ isbn={978-0865426849},
+ doi={10.1351/goldbook},
+ accessdate={2023-01-13T17:08:12+00:00},
+ }
templates:
- - |-
- The {term__names__noun} "{term#}" can be {#described|defined!} {#by|as!}:
- {#definition}
- - |-
- Task: Please {#give me|create|generate!} a {definition__names__noun} of a {term__names__noun}.
- Term: {term#}
- Constraint: Answer the question with {#full|complete!} sentences.
- Result: {definition#}
- - |-
- Task: Please {#give me|create|generate!} a {term__names__noun} for the {#following |!}{definition__names__noun}:
- Definition: {definition#}
- Result: {term#}
- - |-
- User: Can you {#give me|create|generate!} a {term__names__noun} {#described|defined!} by:
- {#definition}
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {term#}
- - |-
- User: Can you {#give me|create|generate!} the {definition__names__noun} for the following {term__names__noun}:
- {#term}
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go:
- {#definition}
- - |-
- User: I'm {#searching|looking!} for the {term__names__noun} that can be described {#by|as!}:
- {#definition}
- Assistant: This {term__names__noun} fits {#your|this!} definition: {term#}
- - |-
- User: I want to {#come up with|create|generate!} a {definition__names__noun}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The {term__names__noun} can be described {#by|as!}:
- {#term}
- Assistant: {#Ok|Got it!},{# here you go,|!} this {definition__names__noun} fits {#your|this!} description: {definition#}
- - |-
- User: I want to {#come up with|create|generate!} a {term__names__noun}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. |!}How is the {term__names__noun} described?
- User: The {term__names__noun} can be described {#by|as!}:
- {#definition}
- Assistant: {#Ok|Got it!},{# here you go,|!} this {term__names__noun} fits {#your|this!} description: {term#}
- - |-
- Task: Please {#give me|create|generate!} a {definition__names__noun} of a {term__names__noun}.
- Term: {term#}
- Constraint: Answer the question with {#full|complete!} sentences.
- Result:{definition#}
- - |-
- Task: Please {#give me|create|generate!} a {term__names__noun} for the {#following |!}{definition__names__noun}:
- Definition: {definition#}
- Result:{term#}
+ - |-
+ The {term__names__noun} "{term#}" can be {#described|defined!} {#by|as!}:
+ {#definition}
+ - |-
+ Task: Please {#give me|create|generate!} a {definition__names__noun} of a {term__names__noun}.
+ Term: {term#}
+ Constraint: Answer the question with {#full|complete!} sentences.
+ Result: {definition#}
+ - |-
+ Task: Please {#give me|create|generate!} a {term__names__noun} for the {#following |!}{definition__names__noun}:
+ Definition: {definition#}
+ Result: {term#}
+ - |-
+ User: Can you {#give me|create|generate!} a {term__names__noun} {#described|defined!} by:
+ {#definition}
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {term#}
+ - |-
+ User: Can you {#give me|create|generate!} the {definition__names__noun} for the following {term__names__noun}:
+ {#term}
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go:
+ {#definition}
+ - |-
+ User: I'm {#searching|looking!} for the {term__names__noun} that can be described {#by|as!}:
+ {#definition}
+ Assistant: This {term__names__noun} fits {#your|this!} definition: {term#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {definition__names__noun}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The {term__names__noun} can be described {#by|as!}:
+ {#term}
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {definition__names__noun} fits {#your|this!} description: {definition#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {term__names__noun}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. |!}How is the {term__names__noun} described?
+ User: The {term__names__noun} can be described {#by|as!}:
+ {#definition}
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {term__names__noun} fits {#your|this!} description: {term#}
+ - |-
+ Task: Please {#give me|create|generate!} a {definition__names__noun} of a {term__names__noun}.
+ Term: {term#}
+ Constraint: Answer the question with {#full|complete!} sentences.
+ Result:{definition#}
+ - |-
+ Task: Please {#give me|create|generate!} a {term__names__noun} for the {#following |!}{definition__names__noun}:
+ Definition: {definition#}
+ Result:{term#}
diff --git a/data/tabular/iupac_smiles/meta.yaml b/data/tabular/iupac_smiles/meta.yaml
index fe6f3267d..e61c42cb2 100644
--- a/data/tabular/iupac_smiles/meta.yaml
+++ b/data/tabular/iupac_smiles/meta.yaml
@@ -1,73 +1,72 @@
----
name: iupac_to_smiles
description: |-
- PubChem is an open chemistry database at the National Institutes of Health (NIH).
- This dataset contains the SMILES and different versions of the IUPAC names
+ PubChem is an open chemistry database at the National Institutes of Health (NIH).
+ This dataset contains the SMILES and different versions of the IUPAC names
targets:
- - id: Traditional
- description: traditional IUPAC name
- type: string
- names:
- - noun: traditional IUPAC name
- - id: Systematic
- description: systematic IUPAC name
- type: string
- names:
- - noun: systematic IUPAC name
- - id: CAS_like_Style
- description: CAS-like name
- type: string
- names:
- - noun: CAS-like IUPAC name
- - noun: IUAPC name in CAS-like style
- - id: Preferred
- description: preferred IUPAC name
- type: string
- names:
- - noun: preferred IUPAC name
- - noun: IUPAC name
+ - id: Traditional
+ description: traditional IUPAC name
+ type: string
+ names:
+ - noun: traditional IUPAC name
+ - id: Systematic
+ description: systematic IUPAC name
+ type: string
+ names:
+ - noun: systematic IUPAC name
+ - id: CAS_like_Style
+ description: CAS-like name
+ type: string
+ names:
+ - noun: CAS-like IUPAC name
+ - noun: IUAPC name in CAS-like style
+ - id: Preferred
+ description: preferred IUPAC name
+ type: string
+ names:
+ - noun: preferred IUPAC name
+ - noun: IUPAC name
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
- names:
- - noun: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
+ names:
+ - noun: SMILES
license: CC0 (Public Domain)
links:
- - url: https://pubchem.ncbi.nlm.nih.gov/
- description: original data source
+ - url: https://pubchem.ncbi.nlm.nih.gov/
+ description: original data source
num_points: 27224618
bibtex:
- - |-
- @article{Kim_2022, title={PubChem 2023 update},
- volume={51}, ISSN={1362-4962},
- url={http://dx.doi.org/10.1093/nar/gkac956},
- DOI={10.1093/nar/gkac956}, number={D1},
- journal={Nucleic Acids Research},
- publisher={Oxford University Press (OUP)},
- author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun
- and Gindulyte, Asta and He, Jia and He, Siqian
- and Li, Qingliang and Shoemaker, Benjamin A
- and Thiessen, Paul A and Yu, Bo and Zaslavsky, Leonid
- and Zhang, Jian and Bolton, Evan E},
- year={2022}, month=oct, pages={D1373–D1380} }
+ - |-
+ @article{Kim_2022, title={PubChem 2023 update},
+ volume={51}, ISSN={1362-4962},
+ url={http://dx.doi.org/10.1093/nar/gkac956},
+ DOI={10.1093/nar/gkac956}, number={D1},
+ journal={Nucleic Acids Research},
+ publisher={Oxford University Press (OUP)},
+ author={Kim, Sunghwan and Chen, Jie and Cheng, Tiejun
+ and Gindulyte, Asta and He, Jia and He, Siqian
+ and Li, Qingliang and Shoemaker, Benjamin A
+ and Thiessen, Paul A and Yu, Bo and Zaslavsky, Leonid
+ and Zhang, Jian and Bolton, Evan E},
+ year={2022}, month=oct, pages={D1373–D1380} }
templates:
- - The {Traditional__names__noun} of the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#} is {Traditional#}.
- - The {CAS_like_Style__names__noun} of the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#} is {CAS_like_Style#}.
- - The {Preferred__names__noun} of the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#} is {Preferred#}.
- - The {SMILES__description} of the {#molecule|chemical|compound!} with {Traditional__names__noun} {Traditional#} is {SMILES#}.
- - The {SMILES__description} of the {#molecule|chemical|compound!} with {Systematic__names__noun} {Systematic#} is {SMILES#}.
- - The {SMILES__description} of the {#molecule|chemical|compound!} with {CAS_like_Style__names__noun} {CAS_like_Style#} is {SMILES#}.
- - The {SMILES__description} of the {#molecule|chemical|compound!} with {Preferred__names__noun} {Preferred#} is {SMILES#}.
- - |-
- Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|compound!} {#given the|based on the!} {Traditional__names__noun}.
- IUPAC name: {Traditional#}
- Result: {SMILES#}
- - |-
- Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|compound!} {#given the|based on the!} {Systematic__names__noun}.
- IUPAC name: {Systematic#}
- Result: {SMILES#}
- - |-
- Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|compound!} {#given the|based on the!} {CAS_like_Style__names__noun}.
- IUPAC name: {CAS_like_Style#}
- Result: {SMILES#}
+ - The {Traditional__names__noun} of the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#} is {Traditional#}.
+ - The {CAS_like_Style__names__noun} of the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#} is {CAS_like_Style#}.
+ - The {Preferred__names__noun} of the {#molecule|chemical|compound!} with {SMILES__description} {SMILES#} is {Preferred#}.
+ - The {SMILES__description} of the {#molecule|chemical|compound!} with {Traditional__names__noun} {Traditional#} is {SMILES#}.
+ - The {SMILES__description} of the {#molecule|chemical|compound!} with {Systematic__names__noun} {Systematic#} is {SMILES#}.
+ - The {SMILES__description} of the {#molecule|chemical|compound!} with {CAS_like_Style__names__noun} {CAS_like_Style#} is {SMILES#}.
+ - The {SMILES__description} of the {#molecule|chemical|compound!} with {Preferred__names__noun} {Preferred#} is {SMILES#}.
+ - |-
+ Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|compound!} {#given the|based on the!} {Traditional__names__noun}.
+ IUPAC name: {Traditional#}
+ Result: {SMILES#}
+ - |-
+ Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|compound!} {#given the|based on the!} {Systematic__names__noun}.
+ IUPAC name: {Systematic#}
+ Result: {SMILES#}
+ - |-
+ Task: Please {#give me|create|generate!} the {SMILES__description} of a {#molecule|chemical|compound!} {#given the|based on the!} {CAS_like_Style__names__noun}.
+ IUPAC name: {CAS_like_Style#}
+ Result: {SMILES#}
diff --git a/data/tabular/kcnq2_potassium_channel_butkiewicz/meta.yaml b/data/tabular/kcnq2_potassium_channel_butkiewicz/meta.yaml
index 04a2b86c8..d5e61b49a 100644
--- a/data/tabular/kcnq2_potassium_channel_butkiewicz/meta.yaml
+++ b/data/tabular/kcnq2_potassium_channel_butkiewicz/meta.yaml
@@ -1,170 +1,169 @@
----
name: kcnq2_potassium_channel_butkiewicz
description: |-
- This dataset was initially curated from HTS data at
- the PubChem database. Details are reported by Butkiewicz et al. (2013).
- Primary screen AID 2239, AID 2287 validated active compounds to be
- potentiators. Counter screens are AID 2282, AID 2283, and AID 2558.
- Final set of 213 active compounds was acquired by removing the active
- compounds of AID 2282, AID 2283 and AID 2558 from the confirmatory
- screen active set of compounds (AID 2287).
+ This dataset was initially curated from HTS data at
+ the PubChem database. Details are reported by Butkiewicz et al. (2013).
+ Primary screen AID 2239, AID 2287 validated active compounds to be
+ potentiators. Counter screens are AID 2282, AID 2283, and AID 2558.
+ Final set of 213 active compounds was acquired by removing the active
+ compounds of AID 2282, AID 2283 and AID 2558 from the confirmatory
+ screen active set of compounds (AID 2287).
targets:
- - id: activity_kcnq2_potassium_channel
- description: whether it is active against kcnq2 potassium channel receptor (1) or not (0).
- units:
- type: boolean
- names:
- - noun: inhibition of the kcnq2 potassium channel activity
- - adjective: kcnq2 potassium channel inhibition
- - gerund: inhibiting the activity of kcnq2 potassium channels
- - verb: blocks kcnq2 potassium channels
- - verb: inhibits kcnq2 potassium channels
- pubchem_aids:
- - 2239
- - 2287
- - 2282
- - 2283
- - 2558
- uris: []
+ - id: activity_kcnq2_potassium_channel
+ description: whether it is active against kcnq2 potassium channel receptor (1) or not (0).
+ units:
+ type: boolean
+ names:
+ - noun: inhibition of the kcnq2 potassium channel activity
+ - adjective: kcnq2 potassium channel inhibition
+ - gerund: inhibiting the activity of kcnq2 potassium channels
+ - verb: blocks kcnq2 potassium channels
+ - verb: inhibits kcnq2 potassium channels
+ pubchem_aids:
+ - 2239
+ - 2287
+ - 2282
+ - 2283
+ - 2558
+ uris: []
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al
- description: original dataset
- - url: https://doi.org/10.3390/molecules18010735
- description: corresponding publication
- - url: https://doi.org/10.1093/nar/gky1033
- description: corresponding publication
- - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/
- description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al
+ description: original dataset
+ - url: https://doi.org/10.3390/molecules18010735
+ description: corresponding publication
+ - url: https://doi.org/10.1093/nar/gky1033
+ description: corresponding publication
+ - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/
+ description: corresponding publication
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
num_points: 302405
bibtex:
- - |-
- @article{Butkiewicz2013,
- doi = {10.3390/molecules18010735},
- url = {https://doi.org/10.3390/molecules18010735},
- year = {2013},
- month = jan,
- publisher = {{MDPI} {AG}},
- volume = {18},
- number = {1},
- pages = {735--756},
- author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and
- Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens
- Meiler},
- title = {Benchmarking Ligand-Based Virtual High-Throughput
- Screening with the {PubChem} Database},
- journal = {Molecules}}
- - |-
- @article{Kim2018,
- doi = {10.1093/nar/gky1033},
- url = {https://doi.org/10.1093/nar/gky1033},
- year = {2018},
- month = oct,
- publisher = {Oxford University Press ({OUP})},
- volume = {47},
- number = {D1},
- pages = {D1102--D1109},
- author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta
- Gindulyte and Jia He and Siqian He and Qingliang Li and Benjamin
- A Shoemaker and Paul A Thiessen and Bo Yu and Leonid Zaslavsky
- and Jian Zhang and Evan E Bolton},
- title = {{PubChem} 2019 update: improved access to chemical data},
- journal = {Nucleic Acids Research}}
- - |-
- @article{Butkiewicz2017,
- doi = {},
- url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/},
- year = {2017},
- publisher = {Chem Inform},
- volume = {3},
- number = {1},
- author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe,
- E. W. and Weaver, D. C. and Meiler, J.},
- title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from
- the {P}ub{C}hem {D}atabase}},
- journal = {Chemical Science}}
+ - |-
+ @article{Butkiewicz2013,
+ doi = {10.3390/molecules18010735},
+ url = {https://doi.org/10.3390/molecules18010735},
+ year = {2013},
+ month = jan,
+ publisher = {{MDPI} {AG}},
+ volume = {18},
+ number = {1},
+ pages = {735--756},
+ author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and
+ Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens
+ Meiler},
+ title = {Benchmarking Ligand-Based Virtual High-Throughput
+ Screening with the {PubChem} Database},
+ journal = {Molecules}}
+ - |-
+ @article{Kim2018,
+ doi = {10.1093/nar/gky1033},
+ url = {https://doi.org/10.1093/nar/gky1033},
+ year = {2018},
+ month = oct,
+ publisher = {Oxford University Press ({OUP})},
+ volume = {47},
+ number = {D1},
+ pages = {D1102--D1109},
+ author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta
+ Gindulyte and Jia He and Siqian He and Qingliang Li and Benjamin
+ A Shoemaker and Paul A Thiessen and Bo Yu and Leonid Zaslavsky
+ and Jian Zhang and Evan E Bolton},
+ title = {{PubChem} 2019 update: improved access to chemical data},
+ journal = {Nucleic Acids Research}}
+ - |-
+ @article{Butkiewicz2017,
+ doi = {},
+ url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/},
+ year = {2017},
+ publisher = {Chem Inform},
+ volume = {3},
+ number = {1},
+ author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe,
+ E. W. and Weaver, D. C. and Meiler, J.},
+ title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from
+ the {P}ub{C}hem {D}atabase}},
+ journal = {Chemical Science}}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_kcnq2_potassium_channel#no &NULL}{activity_kcnq2_potassium_channel__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {activity_kcnq2_potassium_channel#no &NULL}{activity_kcnq2_potassium_channel__names__noun}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_kcnq2_potassium_channel#no &NULL}{activity_kcnq2_potassium_channel__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {activity_kcnq2_potassium_channel#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_kcnq2_potassium_channel__names__gerund}?
- Assistant: {activity_kcnq2_potassium_channel#No&Yes}, this molecule is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {activity_kcnq2_potassium_channel__names__gerund}?
- Assistant: {activity_kcnq2_potassium_channel#No&Yes}, it is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}?
- Assistant: This is a molecule that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {activity_kcnq2_potassium_channel#not &NULL}be {activity_kcnq2_potassium_channel__names__gerund}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {activity_kcnq2_potassium_channel#not &NULL}be {activity_kcnq2_potassium_channel__names__gerund}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} {activity_kcnq2_potassium_channel__names__gerund}:{activity_kcnq2_potassium_channel#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{activity_kcnq2_potassium_channel#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result:This molecule is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}.
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_kcnq2_potassium_channel__names__gerund}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {activity_kcnq2_potassium_channel%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%activity_kcnq2_potassium_channel%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%activity_kcnq2_potassium_channel%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {activity_kcnq2_potassium_channel#no &NULL}{activity_kcnq2_potassium_channel__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {activity_kcnq2_potassium_channel#no &NULL}{activity_kcnq2_potassium_channel__names__noun}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {activity_kcnq2_potassium_channel#no &NULL}{activity_kcnq2_potassium_channel__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {activity_kcnq2_potassium_channel#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {activity_kcnq2_potassium_channel__names__gerund}?
+ Assistant: {activity_kcnq2_potassium_channel#No&Yes}, this molecule is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {activity_kcnq2_potassium_channel__names__gerund}?
+ Assistant: {activity_kcnq2_potassium_channel#No&Yes}, it is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}?
+ Assistant: This is a molecule that is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {activity_kcnq2_potassium_channel#not &NULL}be {activity_kcnq2_potassium_channel__names__gerund}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {activity_kcnq2_potassium_channel#not &NULL}be {activity_kcnq2_potassium_channel__names__gerund}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} {activity_kcnq2_potassium_channel__names__gerund}:{activity_kcnq2_potassium_channel#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{activity_kcnq2_potassium_channel#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {activity_kcnq2_potassium_channel__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result:This molecule is {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}.
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {activity_kcnq2_potassium_channel__names__gerund}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {activity_kcnq2_potassium_channel%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%activity_kcnq2_potassium_channel%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {activity_kcnq2_potassium_channel#not &NULL}{activity_kcnq2_potassium_channel__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%activity_kcnq2_potassium_channel%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/ld50_zhu/example_processing_and_templates.ipynb b/data/tabular/ld50_zhu/example_processing_and_templates.ipynb
index 5f2f66c1b..c2a09dcac 100644
--- a/data/tabular/ld50_zhu/example_processing_and_templates.ipynb
+++ b/data/tabular/ld50_zhu/example_processing_and_templates.ipynb
@@ -26,11 +26,9 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"id": "cf59e3e9-8061-4022-9eae-e978311b4155",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
@@ -56,11 +54,9 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"id": "7bb8eb5e-f513-40d2-a68c-7cda1a51ad31",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"fn_data_original = \"data_original.csv\""
@@ -68,11 +64,9 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"id": "b39a142e-ccbc-49d2-98b0-a5f9bde9fd27",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [
{
"name": "stderr",
@@ -86,16 +80,14 @@
}
],
"source": [
- "data = Tox(name = 'LD50_Zhu')"
+ "data = Tox(name=\"LD50_Zhu\")"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"id": "26d9f62a-07f5-4113-8161-d5dfcf0bfb71",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"data.get_data().to_csv(fn_data_original, index=False)"
@@ -103,11 +95,9 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": null,
"id": "43873fc3-20a8-487d-a7c5-33bd58414159",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -136,11 +126,9 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": null,
"id": "77f614e7-b133-40bc-8759-2d930e4c120e",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -160,11 +148,9 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": null,
"id": "8f5a0387-f9e3-4e1a-8d14-5df618195f70",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(fn_data_original, delimiter=\",\")"
@@ -172,12 +158,9 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": null,
"id": "55b0bd63-62a0-469e-9d8a-e9ada3fe01c4",
- "metadata": {
- "scrolled": true,
- "tags": []
- },
+ "metadata": {},
"outputs": [
{
"data": {
@@ -256,7 +239,7 @@
"4 S=C=Nc1ccc(Br)cc1 2.729 "
]
},
- "execution_count": 8,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -276,11 +259,9 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": null,
"id": "ec2458e5-455f-4f03-8ce9-c0d12e9ed371",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [
{
"data": {
@@ -288,7 +269,7 @@
"['Drug_ID', 'Drug', 'Y']"
]
},
- "execution_count": 9,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -300,21 +281,19 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"id": "28c9b695",
"metadata": {},
"outputs": [],
"source": [
- "assert fields_orig == ['Drug_ID', 'Drug', 'Y']"
+ "assert fields_orig == [\"Drug_ID\", \"Drug\", \"Y\"]"
]
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"id": "a46dd8ff-37b3-4894-8226-3bf98226dd09",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"fields_clean = [\n",
@@ -326,11 +305,9 @@
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": null,
"id": "785d37cb-1fb4-4a91-a923-d5a78a37f36a",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"df.columns = fields_clean"
@@ -338,7 +315,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": null,
"id": "aaad8f07",
"metadata": {},
"outputs": [],
@@ -348,11 +325,9 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": null,
"id": "1bf212cb-1653-457b-9f5d-416d4dd14b53",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [
{
"data": {
@@ -431,7 +406,7 @@
"4 S=C=Nc1ccc(Br)cc1 2.729 "
]
},
- "execution_count": 14,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -450,11 +425,9 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": null,
"id": "7e746003-cb1f-434f-bba6-00f0c439c4ac",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"df.compound_name = (\n",
@@ -464,11 +437,9 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"id": "d544fa60-343e-40e1-bd0c-4750f07a7145",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"assert not df.duplicated().sum()"
@@ -484,11 +455,9 @@
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": null,
"id": "d6d5efa5-b4b4-4a25-8626-e10f3d691e83",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"fn_data_csv = \"data_clean.csv\""
@@ -496,11 +465,9 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": null,
"id": "727f8d7b-cbb6-43c7-9eab-9d4d65be6b3f",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"df.to_csv(fn_data_csv, index=False)"
@@ -508,11 +475,9 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": null,
"id": "63c8d4a4-906e-418d-be39-879365b4dfa0",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -528,11 +493,9 @@
},
{
"cell_type": "code",
- "execution_count": 20,
+ "execution_count": null,
"id": "a51b9001-25d7-4e0e-a607-477cfc4a9f1c",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -552,11 +515,9 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": null,
"id": "1a512943-4909-4d56-867d-50c151d8d607",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [
{
"data": {
@@ -635,7 +596,7 @@
"4 S=C=Nc1ccc(Br)cc1 2.729 "
]
},
- "execution_count": 21,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -654,11 +615,9 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": null,
"id": "077b0c5f-8772-4879-9317-3fa28799689b",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"fn_data_csv = \"data_clean.csv\""
@@ -666,11 +625,9 @@
},
{
"cell_type": "code",
- "execution_count": 23,
+ "execution_count": null,
"id": "6eaef0e6-2115-4793-ac43-a196b25d47a0",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(fn_data_csv)"
@@ -678,11 +635,9 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": null,
"id": "43619e7c-9c82-4ff0-ae25-403861304635",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [
{
"data": {
@@ -761,7 +716,7 @@
"4 S=C=Nc1ccc(Br)cc1 2.729 "
]
},
- "execution_count": 24,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -780,11 +735,9 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": null,
"id": "49771077-471d-4d71-a9a7-d6b094bbc4f3",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [
{
"data": {
@@ -863,7 +816,7 @@
"4 S=C=Nc1ccc(Br)cc1 2.729 "
]
},
- "execution_count": 25,
+ "execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
@@ -874,11 +827,9 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": null,
"id": "d3890961-444e-4a26-b8fc-ed8c4e959af9",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"meta = {\n",
@@ -945,11 +896,9 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": null,
"id": "ec455cf0-962a-4c0d-bb3e-066e415ffd9b",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"def str_presenter(dumper, data):\n",
@@ -969,11 +918,9 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": null,
"id": "580bbd79-4845-4515-be94-3e4a9815d048",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"fn_meta = \"meta.yaml\""
@@ -981,11 +928,9 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": null,
"id": "873fa5dd-9b60-40f5-b537-4d7a206414ea",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"with open(fn_meta, \"w\") as f:\n",
@@ -994,11 +939,9 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": null,
"id": "d01686c0-6746-4fc4-b019-350270dfc26f",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -1014,11 +957,9 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": null,
"id": "ef6063c5-7a8b-4344-bccf-a073443feebf",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -1087,11 +1028,9 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": null,
"id": "9aab00fd-58a8-40b0-be30-1e269e0d323b",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [],
"source": [
"path_file = \"transform.py\""
@@ -1099,11 +1038,9 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": null,
"id": "8368bb20-8e1c-4b7d-b0e2-b39da36b5972",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -1245,11 +1182,9 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": null,
"id": "d0474f26-70f3-4655-b81a-df4ada90e7a6",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -1268,11 +1203,9 @@
},
{
"cell_type": "code",
- "execution_count": 35,
+ "execution_count": null,
"id": "953e7bee-bd5e-41d0-a2be-506e0bc97727",
- "metadata": {
- "tags": []
- },
+ "metadata": {},
"outputs": [
{
"name": "stdout",
@@ -1316,8 +1249,7 @@
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.9.13"
+ "pygments_lexer": "ipython3"
}
},
"nbformat": 4,
diff --git a/data/tabular/ld50_zhu/meta.yaml b/data/tabular/ld50_zhu/meta.yaml
index bf89726c0..7f77efa10 100644
--- a/data/tabular/ld50_zhu/meta.yaml
+++ b/data/tabular/ld50_zhu/meta.yaml
@@ -1,54 +1,53 @@
----
name: ld50_zhu
description: |-
- Acute toxicity LD50 measures
- the most conservative dose that can lead to lethal adverse effects.
- The higher the dose, the more lethal of a drug.
+ Acute toxicity LD50 measures
+ the most conservative dose that can lead to lethal adverse effects.
+ The higher the dose, the more lethal of a drug.
targets:
- - id: acute_toxicity
- description: Acute Toxicity LD50.
- units: log10(1/(mol/kg))
- type: continuous
- names:
- - noun: acute oral toxicity rat LD50
- - noun: acute oral toxicity (LD50 in rats)
- - noun: LD50 in rats (oral exposure)
- - noun: rat LD50 (oral exposure)
- uris:
- - http://www.bioassayontology.org/bao#BAO_0002117
+ - id: acute_toxicity
+ description: Acute Toxicity LD50.
+ units: log10(1/(mol/kg))
+ type: continuous
+ names:
+ - noun: acute oral toxicity rat LD50
+ - noun: acute oral toxicity (LD50 in rats)
+ - noun: LD50 in rats (oral exposure)
+ - noun: rat LD50 (oral exposure)
+ uris:
+ - http://www.bioassayontology.org/bao#BAO_0002117
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
- - id: compound_name
- type: Other
- description: compound name
- names:
- - noun: compound
- - noun: compound name
- - noun: drug
+ - id: SMILES
+ type: SMILES
+ description: SMILES
+ - id: compound_name
+ type: Other
+ description: compound name
+ names:
+ - noun: compound
+ - noun: compound name
+ - noun: drug
license: CC BY 4.0
links:
- - url: https://doi.org/10.1021/tx900189p
- description: corresponding publication
+ - url: https://doi.org/10.1021/tx900189p
+ description: corresponding publication
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
num_points: 7385
bibtex:
- - |-
- @article{Zhu2009,
- doi = {10.1021/tx900189p},
- url = {https://doi.org/10.1021/tx900189p},
- year = {2009},
- month = oct,
- publisher = {American Chemical Society ({ACS})},
- volume = {22},
- number = {12},
- pages = {1913--1921},
- author = {Hao Zhu and Todd M. Martin and Lin Ye and Alexander
- Sedykh and Douglas M. Young and Alexander Tropsha},
- title = {Quantitative Structure-Activity Relationship Modeling
- of Rat Acute Toxicity by Oral Exposure},
- journal = {Chemical Research in Toxicology}}
+ - |-
+ @article{Zhu2009,
+ doi = {10.1021/tx900189p},
+ url = {https://doi.org/10.1021/tx900189p},
+ year = {2009},
+ month = oct,
+ publisher = {American Chemical Society ({ACS})},
+ volume = {22},
+ number = {12},
+ pages = {1913--1921},
+ author = {Hao Zhu and Todd M. Martin and Lin Ye and Alexander
+ Sedykh and Douglas M. Young and Alexander Tropsha},
+ title = {Quantitative Structure-Activity Relationship Modeling
+ of Rat Acute Toxicity by Oral Exposure},
+ journal = {Chemical Research in Toxicology}}
diff --git a/data/tabular/lipophilicity/meta.yaml b/data/tabular/lipophilicity/meta.yaml
index c06a26d5c..a8ee84384 100644
--- a/data/tabular/lipophilicity/meta.yaml
+++ b/data/tabular/lipophilicity/meta.yaml
@@ -1,60 +1,59 @@
----
name: lipophilicity
description: Experimental results of octanol/water distribution coefficient (logD at pH 7.4).
targets:
- - id: exp
- description: experimental results of octanol/water distribution coefficient (logD at pH 7.4)
- units: (dimensionless)
- type: continuous
- names:
- - noun: octanol/water distribution coefficient (logD at pH 7.4)
- - noun: logD at pH 7.4
- - noun: octanol/water distribution coefficient
- uris:
- - http://www.bioassayontology.org/bao#BAO_0002129
- - http://purl.obolibrary.org/obo/MI_2107
+ - id: exp
+ description: experimental results of octanol/water distribution coefficient (logD at pH 7.4)
+ units: (dimensionless)
+ type: continuous
+ names:
+ - noun: octanol/water distribution coefficient (logD at pH 7.4)
+ - noun: logD at pH 7.4
+ - noun: octanol/water distribution coefficient
+ uris:
+ - http://www.bioassayontology.org/bao#BAO_0002129
+ - http://purl.obolibrary.org/obo/MI_2107
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY-SA 3.0
links:
- - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/Lipophilicity.csv
- description: original dataset link
- - url: https://github.com/cheminfo/molecule-features/blob/main/data/lipophilicity/meta.yaml
- description: original meta data
- - url: https://deepchem.readthedocs.io/en/latest/api_reference/moleculenet.html#lipo-datasets
- description: original dataset link from moleculenet
- - url: https://www.ebi.ac.uk/chembl/document_report_card/CHEMBL3301361/
- description: original report card
- - url: https://chembl.gitbook.io/chembl-interface-documentation/about#data-licensing
- description: original dataset license from chembl
- - url: https://creativecommons.org/licenses/by-sa/3.0/
- description: used dataset license
+ - url: https://deepchemdata.s3-us-west-1.amazonaws.com/datasets/Lipophilicity.csv
+ description: original dataset link
+ - url: https://github.com/cheminfo/molecule-features/blob/main/data/lipophilicity/meta.yaml
+ description: original meta data
+ - url: https://deepchem.readthedocs.io/en/latest/api_reference/moleculenet.html#lipo-datasets
+ description: original dataset link from moleculenet
+ - url: https://www.ebi.ac.uk/chembl/document_report_card/CHEMBL3301361/
+ description: original report card
+ - url: https://chembl.gitbook.io/chembl-interface-documentation/about#data-licensing
+ description: original dataset license from chembl
+ - url: https://creativecommons.org/licenses/by-sa/3.0/
+ description: used dataset license
num_points: 4200
bibtex:
- - |-
- @techreport{hersey2015chembl,
- title={ChEMBL Deposited Data Set-AZ dataset},
- author={Hersey, Anne},
- year={2015},
- institution={Technical Report, Technical report, EMBL-EBI, 2015. https://www. ebi. ac. uk}
- }
+ - |-
+ @techreport{hersey2015chembl,
+ title={ChEMBL Deposited Data Set-AZ dataset},
+ author={Hersey, Anne},
+ year={2015},
+ institution={Technical Report, Technical report, EMBL-EBI, 2015. https://www. ebi. ac. uk}
+ }
templates:
- - |-
- Task: Please answer the multiple choice question below with {%multiple_choice_enum%3-6%aA1}.
- Question: What is the {exp__names__noun} of the {SMILES__description} {SMILES#}?
- Options:
- {exp%}
- Answer: {%multiple_choice_result}
- - |-
- Question: Please estimate the {exp__names__noun} of {SMILES#} by picking one choice of {%multiple_choice_enum%3-6%aA1}.
- Options:
- {exp%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question below with {%multiple_choice_enum%3-6%aA1}.
- Question: What is the {exp__names__noun} of the {SMILES__description} {SMILES#}?
- Options:
- {exp%}
- Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question below with {%multiple_choice_enum%3-6%aA1}.
+ Question: What is the {exp__names__noun} of the {SMILES__description} {SMILES#}?
+ Options:
+ {exp%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Question: Please estimate the {exp__names__noun} of {SMILES#} by picking one choice of {%multiple_choice_enum%3-6%aA1}.
+ Options:
+ {exp%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question below with {%multiple_choice_enum%3-6%aA1}.
+ Question: What is the {exp__names__noun} of the {SMILES__description} {SMILES#}?
+ Options:
+ {exp%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/m1_muscarinic_receptor_agonists_butkiewicz/meta.yaml b/data/tabular/m1_muscarinic_receptor_agonists_butkiewicz/meta.yaml
index 59a1f8b79..c222cdaa0 100644
--- a/data/tabular/m1_muscarinic_receptor_agonists_butkiewicz/meta.yaml
+++ b/data/tabular/m1_muscarinic_receptor_agonists_butkiewicz/meta.yaml
@@ -1,162 +1,161 @@
----
name: m1_muscarinic_receptor_agonists_butkiewicz
description: |-
- Positive allosteric modulation of the M1 Muscarinic
- receptor screened with AID626. Confirmed by screen AID 1488. A second
- counter screen AID 1741. The final set of selective positive
- allosteric modulators of M1 was obtained by removing compounds active
- in AID 1741 from the compounds active in AID 1488 resulting in 188
- compounds.
+ Positive allosteric modulation of the M1 Muscarinic
+ receptor screened with AID626. Confirmed by screen AID 1488. A second
+ counter screen AID 1741. The final set of selective positive
+ allosteric modulators of M1 was obtained by removing compounds active
+ in AID 1741 from the compounds active in AID 1488 resulting in 188
+ compounds.
targets:
- - id: m1_muscarinic_agonist
- description: whether it agonist on m1 muscarinic receptor (1) or not (0).
- units:
- type: boolean
- names:
- - noun: positive allosteric modulation of the M1 muscarinic receptor activity
- - gerund: modulating the M1 muscarinic receptor activity in a positive allosteric way
- pubchem_aids:
- - 626
- - 1488
- - 1741
- uris: []
+ - id: m1_muscarinic_agonist
+ description: whether it agonist on m1 muscarinic receptor (1) or not (0).
+ units:
+ type: boolean
+ names:
+ - noun: positive allosteric modulation of the M1 muscarinic receptor activity
+ - gerund: modulating the M1 muscarinic receptor activity in a positive allosteric way
+ pubchem_aids:
+ - 626
+ - 1488
+ - 1741
+ uris: []
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al
- description: original dataset
- - url: https://doi.org/10.3390/molecules18010735
- description: corresponding publication
- - url: https://doi.org/10.1093/nar/gky1033
- description: corresponding publication
- - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/
- description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al
+ description: original dataset
+ - url: https://doi.org/10.3390/molecules18010735
+ description: corresponding publication
+ - url: https://doi.org/10.1093/nar/gky1033
+ description: corresponding publication
+ - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/
+ description: corresponding publication
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
num_points: 61833
bibtex:
- - |-
- @article{Butkiewicz2013,
- doi = {10.3390/molecules18010735},
- url = {https://doi.org/10.3390/molecules18010735},
- year = {2013},
- month = jan,
- publisher = {{MDPI} {AG}},
- volume = {18},
- number = {1},
- pages = {735--756},
- author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller
- and Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens Meiler},
- title = {Benchmarking Ligand-Based Virtual High-Throughput Screening
- with the {PubChem} Database},
- journal = {Molecules}}
- - |-
- @article{Kim2018,
- doi = {10.1093/nar/gky1033},
- url = {https://doi.org/10.1093/nar/gky1033},
- year = {2018},
- month = oct,
- publisher = {Oxford University Press ({OUP})},
- volume = {47},
- number = {D1},
- pages = {D1102--D1109},
- author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta Gindulyte
- and Jia He and Siqian He and Qingliang Li and Benjamin A Shoemaker
- and Paul A Thiessen and Bo Yu and Leonid Zaslavsky and Jian Zhang and Evan E Bolton},
- title = {{PubChem} 2019 update: improved access to chemical data},
- journal = {Nucleic Acids Research}}
- - |-
- @article{Butkiewicz2017,
- doi = {},
- url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/},
- year = {2017},
- publisher = {Chem Inform},
- volume = {3},
- number = {1},
- author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H.
- and Lowe, E. W. and Weaver, D. C. and Meiler, J.},
- title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets
- from the {P}ub{C}hem {D}atabase}},
- journal = {Chemical Science}}
+ - |-
+ @article{Butkiewicz2013,
+ doi = {10.3390/molecules18010735},
+ url = {https://doi.org/10.3390/molecules18010735},
+ year = {2013},
+ month = jan,
+ publisher = {{MDPI} {AG}},
+ volume = {18},
+ number = {1},
+ pages = {735--756},
+ author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller
+ and Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens Meiler},
+ title = {Benchmarking Ligand-Based Virtual High-Throughput Screening
+ with the {PubChem} Database},
+ journal = {Molecules}}
+ - |-
+ @article{Kim2018,
+ doi = {10.1093/nar/gky1033},
+ url = {https://doi.org/10.1093/nar/gky1033},
+ year = {2018},
+ month = oct,
+ publisher = {Oxford University Press ({OUP})},
+ volume = {47},
+ number = {D1},
+ pages = {D1102--D1109},
+ author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta Gindulyte
+ and Jia He and Siqian He and Qingliang Li and Benjamin A Shoemaker
+ and Paul A Thiessen and Bo Yu and Leonid Zaslavsky and Jian Zhang and Evan E Bolton},
+ title = {{PubChem} 2019 update: improved access to chemical data},
+ journal = {Nucleic Acids Research}}
+ - |-
+ @article{Butkiewicz2017,
+ doi = {},
+ url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/},
+ year = {2017},
+ publisher = {Chem Inform},
+ volume = {3},
+ number = {1},
+ author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H.
+ and Lowe, E. W. and Weaver, D. C. and Meiler, J.},
+ title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets
+ from the {P}ub{C}hem {D}atabase}},
+ journal = {Chemical Science}}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {m1_muscarinic_agonist#no &NULL}{m1_muscarinic_agonist__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {m1_muscarinic_agonist#no &NULL}{m1_muscarinic_agonist__names__noun}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {m1_muscarinic_agonist#no &NULL}{m1_muscarinic_agonist__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {m1_muscarinic_agonist__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {m1_muscarinic_agonist#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {m1_muscarinic_agonist__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {m1_muscarinic_agonist__names__gerund}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {m1_muscarinic_agonist__names__gerund}?
- Assistant: {m1_muscarinic_agonist#No&Yes}, this molecule is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {m1_muscarinic_agonist__names__gerund}?
- Assistant: {m1_muscarinic_agonist#No&Yes}, it is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}?
- Assistant: This is a molecule that is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {m1_muscarinic_agonist#not &NULL}be {m1_muscarinic_agonist__names__gerund}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {m1_muscarinic_agonist#not &NULL}be {m1_muscarinic_agonist__names__gerund}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} {m1_muscarinic_agonist__names__gerund}:{m1_muscarinic_agonist#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {m1_muscarinic_agonist__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{m1_muscarinic_agonist#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {m1_muscarinic_agonist__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result:This molecule is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}.
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {m1_muscarinic_agonist__names__gerund}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {m1_muscarinic_agonist%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%m1_muscarinic_agonist%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%m1_muscarinic_agonist%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {m1_muscarinic_agonist#no &NULL}{m1_muscarinic_agonist__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {m1_muscarinic_agonist#no &NULL}{m1_muscarinic_agonist__names__noun}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {m1_muscarinic_agonist#no &NULL}{m1_muscarinic_agonist__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {m1_muscarinic_agonist__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {m1_muscarinic_agonist#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {m1_muscarinic_agonist__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {m1_muscarinic_agonist__names__gerund}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {m1_muscarinic_agonist__names__gerund}?
+ Assistant: {m1_muscarinic_agonist#No&Yes}, this molecule is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {m1_muscarinic_agonist__names__gerund}?
+ Assistant: {m1_muscarinic_agonist#No&Yes}, it is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}?
+ Assistant: This is a molecule that is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {m1_muscarinic_agonist#not &NULL}be {m1_muscarinic_agonist__names__gerund}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {m1_muscarinic_agonist#not &NULL}be {m1_muscarinic_agonist__names__gerund}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} {m1_muscarinic_agonist__names__gerund}:{m1_muscarinic_agonist#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {m1_muscarinic_agonist__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result:{m1_muscarinic_agonist#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {m1_muscarinic_agonist__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result:This molecule is {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}.
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {m1_muscarinic_agonist__names__gerund}?
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
+ Options:
+ {m1_muscarinic_agonist%}
+ Answer:{%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%m1_muscarinic_agonist%}
+ Answer: {%multiple_choice_result}
+ - |-
+ Task: Please answer the multiple choice question.
+ Question: Which molecules are {m1_muscarinic_agonist#not &NULL}{m1_muscarinic_agonist__names__gerund}?
+ Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
+ Options:
+ {SMILES%m1_muscarinic_agonist%}
+ Answer:{%multiple_choice_result}
diff --git a/data/tabular/m1_muscarinic_receptor_antagonists_butkiewicz/meta.yaml b/data/tabular/m1_muscarinic_receptor_antagonists_butkiewicz/meta.yaml
index 848ee4cbd..188ad2067 100644
--- a/data/tabular/m1_muscarinic_receptor_antagonists_butkiewicz/meta.yaml
+++ b/data/tabular/m1_muscarinic_receptor_antagonists_butkiewicz/meta.yaml
@@ -1,164 +1,163 @@
----
name: m1_muscarinic_receptor_antagonists_butkiewicz
description: |-
- Primary screen AID628 confirmed by screen AID677.
- AID859 confirmed activity on rat M1 receptor.
- The counter screen AID860 removed non-selective compounds
- being active also at the rat M4 receptor.
- Final set of active compoundsobtained by subtracting active compounds of AID860
- from those in AID677, resulting in 448 total active compounds.
+ Primary screen AID628 confirmed by screen AID677.
+ AID859 confirmed activity on rat M1 receptor.
+ The counter screen AID860 removed non-selective compounds
+ being active also at the rat M4 receptor.
+ Final set of active compoundsobtained by subtracting active compounds of AID860
+ from those in AID677, resulting in 448 total active compounds.
targets:
- - id: m1_muscarinic_antagonist
- description: whether it negatively modulates the m1 muscarinic receptor (1) or not (0).
- units:
- type: boolean
- names:
- - noun: negative modulation of the M1 muscarinic receptor activity
- - gerund: modulating the M1 muscarinic receptor activity in a negative way
- pubchem_aids:
- - 628
- - 677
- - 860
- uris: []
+ - id: m1_muscarinic_antagonist
+ description: whether it negatively modulates the m1 muscarinic receptor (1) or not (0).
+ units:
+ type: boolean
+ names:
+ - noun: negative modulation of the M1 muscarinic receptor activity
+ - gerund: modulating the M1 muscarinic receptor activity in a negative way
+ pubchem_aids:
+ - 628
+ - 677
+ - 860
+ uris: []
identifiers:
- - id: SMILES
- type: SMILES
- description: SMILES
+ - id: SMILES
+ type: SMILES
+ description: SMILES
license: CC BY 4.0
links:
- - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al
- description: original dataset
- - url: https://doi.org/10.3390/molecules18010735
- description: corresponding publication
- - url: https://doi.org/10.1093/nar/gky1033
- description: corresponding publication
- - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/
- description: corresponding publication
+ - url: https://tdcommons.ai/single_pred_tasks/hts/#butkiewicz-et-al
+ description: original dataset
+ - url: https://doi.org/10.3390/molecules18010735
+ description: corresponding publication
+ - url: https://doi.org/10.1093/nar/gky1033
+ description: corresponding publication
+ - url: https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/
+ description: corresponding publication
benchmarks:
- - name: TDC
- link: https://tdcommons.ai/
- split_column: split
+ - name: TDC
+ link: https://tdcommons.ai/
+ split_column: split
num_points: 61756
bibtex:
- - |-
- @article{Butkiewicz2013,
- doi = {10.3390/molecules18010735},
- url = {https://doi.org/10.3390/molecules18010735},
- year = {2013},
- month = jan,
- publisher = {{MDPI} {AG}},
- volume = {18},
- number = {1},
- pages = {735--756},
- author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and
- Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens
- Meiler},
- title = {Benchmarking Ligand-Based Virtual High-Throughput
- Screening with the {PubChem} Database},
- journal = {Molecules}}
- - |-
- @article{Kim2018,
- doi = {10.1093/nar/gky1033},
- url = {https://doi.org/10.1093/nar/gky1033},
- year = {2018},
- month = oct,
- publisher = {Oxford University Press ({OUP})},
- volume = {47},
- number = {D1},
- pages = {D1102--D1109},
- author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta
- Gindulyte and Jia He and Siqian He and Qingliang Li and Benjamin
- A Shoemaker and Paul A Thiessen and Bo Yu and Leonid Zaslavsky
- and Jian Zhang and Evan E Bolton},
- title = {{PubChem} 2019 update: improved access to chemical data},
- journal = {Nucleic Acids Research}}
- - |-
- @article{Butkiewicz2017,
- doi = {},
- url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/},
- year = {2017},
- publisher = {Chem Inform},
- volume = {3},
- number = {1},
- author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe,
- E. W. and Weaver, D. C. and Meiler, J.},
- title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from
- the {P}ub{C}hem {D}atabase}},
- journal = {Chemical Science}}
+ - |-
+ @article{Butkiewicz2013,
+ doi = {10.3390/molecules18010735},
+ url = {https://doi.org/10.3390/molecules18010735},
+ year = {2013},
+ month = jan,
+ publisher = {{MDPI} {AG}},
+ volume = {18},
+ number = {1},
+ pages = {735--756},
+ author = {Mariusz Butkiewicz and Edward Lowe and Ralf Mueller and
+ Jeffrey Mendenhall and Pedro Teixeira and C. Weaver and Jens
+ Meiler},
+ title = {Benchmarking Ligand-Based Virtual High-Throughput
+ Screening with the {PubChem} Database},
+ journal = {Molecules}}
+ - |-
+ @article{Kim2018,
+ doi = {10.1093/nar/gky1033},
+ url = {https://doi.org/10.1093/nar/gky1033},
+ year = {2018},
+ month = oct,
+ publisher = {Oxford University Press ({OUP})},
+ volume = {47},
+ number = {D1},
+ pages = {D1102--D1109},
+ author = {Sunghwan Kim and Jie Chen and Tiejun Cheng and Asta
+ Gindulyte and Jia He and Siqian He and Qingliang Li and Benjamin
+ A Shoemaker and Paul A Thiessen and Bo Yu and Leonid Zaslavsky
+ and Jian Zhang and Evan E Bolton},
+ title = {{PubChem} 2019 update: improved access to chemical data},
+ journal = {Nucleic Acids Research}}
+ - |-
+ @article{Butkiewicz2017,
+ doi = {},
+ url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5962024/},
+ year = {2017},
+ publisher = {Chem Inform},
+ volume = {3},
+ number = {1},
+ author = {Butkiewicz, M. and Wang, Y. and Bryant, S. H. and Lowe,
+ E. W. and Weaver, D. C. and Meiler, J.},
+ title = {{H}igh-{T}hroughput {S}creening {A}ssay {D}atasets from
+ the {P}ub{C}hem {D}atabase}},
+ journal = {Chemical Science}}
templates:
- - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {m1_muscarinic_antagonist#no &NULL}{m1_muscarinic_antagonist__names__noun}.
- - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {m1_muscarinic_antagonist#no &NULL}{m1_muscarinic_antagonist__names__noun}.
- - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {m1_muscarinic_antagonist#no &NULL}{m1_muscarinic_antagonist__names__noun}.
- - The {#molecule |!}{SMILES__description} {SMILES#} is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}.
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result: {m1_muscarinic_antagonist#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result: This molecule is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}.
- - |-
- Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
- Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}.
- Result: {SMILES#}
- - |-
- User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {m1_muscarinic_antagonist__names__gerund}?
- Assistant: {m1_muscarinic_antagonist#No&Yes}, this molecule is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}.
- - |-
- User: Is the molecule with the {SMILES__description} {SMILES#} {m1_muscarinic_antagonist__names__gerund}?
- Assistant: {m1_muscarinic_antagonist#No&Yes}, it is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}.
- - |-
- User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}?
- Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
- - |-
- User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}?
- Assistant: This is a molecule that is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
- User: Yes, please. The molecule should {m1_muscarinic_antagonist#not &NULL}be {m1_muscarinic_antagonist__names__gerund}.
- Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}: {SMILES#}
- - |-
- User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
- Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
- User: Yes, the molecule should {m1_muscarinic_antagonist#not &NULL}be {m1_muscarinic_antagonist__names__gerund}.
- Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}: {SMILES#}
- - Is the {SMILES__description} {SMILES#} {m1_muscarinic_antagonist__names__gerund}:{m1_muscarinic_antagonist#no&yes}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
- Result:{m1_muscarinic_antagonist#False&True}
- - |-
- Task: Please classify a molecule based on the description.
- Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}.
- {#Molecule |!}{SMILES__description}: {SMILES#}
- Constraint: Answer the question in a {#full|complete!} sentence.
- Result:This molecule is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}.
- - |-
- Task: Please answer the multiple choice question.
- Question: Is the molecule with the {SMILES__description} {#representation of |!}{SMILES#} {m1_muscarinic_antagonist__names__gerund}?
- Constraint: Even if you are {#uncertain|not sure!}, you must pick either {%multiple_choice_enum%2%aA1} without using any {#other|additional!} words.
- Options:
- {m1_muscarinic_antagonist%}
- Answer:{%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%m1_muscarinic_antagonist%}
- Answer: {%multiple_choice_result}
- - |-
- Task: Please answer the multiple choice question.
- Question: Which molecules are {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}?
- Constraint: You must select none, one or more options from {%multiple_choice_enum%2-5%aA1} without using any {#other|additional!} words.
- Options:
- {SMILES%m1_muscarinic_antagonist%}
- Answer:{%multiple_choice_result}
+ - The molecule with the {SMILES__description} {#representation of |!}{SMILES#} {#shows|exhibits|displays!} {m1_muscarinic_antagonist#no &NULL}{m1_muscarinic_antagonist__names__noun}.
+ - Based on the {SMILES__description} {#representation |!}{SMILES#}, the molecule {#shows|exhibits|displays!} {m1_muscarinic_antagonist#no &NULL}{m1_muscarinic_antagonist__names__noun}.
+ - The {SMILES__description} {SMILES#} {#represents|is from!} a molecule that {#shows|exhibits|displays!} {m1_muscarinic_antagonist#no &NULL}{m1_muscarinic_antagonist__names__noun}.
+ - The {#molecule |!}{SMILES__description} {SMILES#} is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}.
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: {m1_muscarinic_antagonist#False&True}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Answer the question in a {#full|complete!} sentence.
+ Result: This molecule is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}.
+ - |-
+ Task: Please {#give me|create|generate!} a {#molecule |!}{SMILES__description} based on the {#text |!}description{# below|!}.
+ Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}.
+ Result: {SMILES#}
+ - |-
+ User: Can you {#tell me|derive|estimate!} if the molecule with the {SMILES__description} {SMILES#} is {m1_muscarinic_antagonist__names__gerund}?
+ Assistant: {m1_muscarinic_antagonist#No&Yes}, this molecule is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}.
+ - |-
+ User: Is the molecule with the {SMILES__description} {SMILES#} {m1_muscarinic_antagonist__names__gerund}?
+ Assistant: {m1_muscarinic_antagonist#No&Yes}, it is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}.
+ - |-
+ User: Can you {#give me|create|generate!} the {SMILES__description} of a molecule that is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}?
+ Assistant: {#Yes|Of course|Sure|Yes, I'm happy to help!}, here you go: {SMILES#}
+ - |-
+ User: I'm {#searching|looking!} for the {SMILES__description} of a molecule that is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}?
+ Assistant: This is a molecule that is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should I consider any {#constraints|specific points!} for the {#generation|creation!}?
+ User: Yes, please. The molecule should {m1_muscarinic_antagonist#not &NULL}be {m1_muscarinic_antagonist__names__gerund}.
+ Assistant: {#Ok|Got it!},{# here you go,|!} this {SMILES__description} is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}: {SMILES#}
+ - |-
+ User: I want to {#come up with|create|generate!} a {#molecule |!}{SMILES__description}.
+ Assistant: {#This sounds very exciting. |This sounds very interesting. !}Should it be a special {#molecule|one!}?
+ User: Yes, the molecule should {m1_muscarinic_antagonist#not &NULL}be {m1_muscarinic_antagonist__names__gerund}.
+ Assistant: {#Understood|Got it|Ok!}, this {SMILES__description} is {m1_muscarinic_antagonist#not &NULL}{m1_muscarinic_antagonist__names__gerund}: {SMILES#}
+ - Is the {SMILES__description} {SMILES#} {m1_muscarinic_antagonist__names__gerund}:{m1_muscarinic_antagonist#no&yes}
+ - |-
+ Task: Please classify a molecule based on the description.
+ Description: A molecule that is {m1_muscarinic_antagonist__names__gerund}.
+ {#Molecule |!}{SMILES__description}: {SMILES#}
+ Constraint: Even if you are {#uncertain|not sure!}, you must pick either "True" or "False" without using any {#other|additional!} words.
+ Result: