default parameters

mxfold · Aug 19, 2020 · 89be8a9 · 89be8a9
1 parent 552d134
commit 89be8a9
Show file tree

Hide file tree

Showing 8 changed files with 149 additions and 100 deletions.
diff --git a/.gitignore b/.gitignore
@@ -163,3 +163,4 @@ data
 experiments
 .DS_Store
 poetry.lock
+models
diff --git a/README.md b/README.md
@@ -4,28 +4,37 @@ RNA secondary structure prediction using deep learning with thermodynamic integr
 ## Installation
 
 ### System requirements
-* python (>=3.6)
-* pytorch (>=1.3)
+* python (>=3.7)
+* pytorch (>=1.4)
 * C++17 compatible compiler (tested on Apple clang version 12.0.0 and GCC version 7.4.0) (optional)
 * cmake (>=3.10) (optional)
 
 ### Install from wheel
 
 We provide the wheel python packages for several platforms at [the release](https://github.com/keio-bioinformatics/mxfold2/releases). You can download an appropriate package and install it as follows:
 
-    % pip3 install mxfold2-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl
+    % pip3 install mxfold2-0.1.1-cp38-cp38-macosx_10_15_x86_64.whl
 
 ### Install from sdist
 
 You can build and install from the source distribution downloaded from [the release](https://github.com/keio-bioinformatics/mxfold2/releases) as follows:
 
-    % pip3 install mxfold2-0.1.0.tar.gz
+    % pip3 install mxfold2-0.1.1.tar.gz
 
-TO build MXfold2 from the source distribution, you need a C++17 compatible compiler and cmake.
+To build MXfold2 from the source distribution, you need a C++17 compatible compiler and cmake.
 
 ## Prediction
 
-We provide the pre-trained models at [the release](https://github.com/keio-bioinformatics/mxfold2/releases). You can download ``models-0.1.0.tar.gz`` and extract the pre-trained models from it as follows:
+You can predict RNA secondary structures of given FASTA-formatted RNA sequences like:
+
+    % mxfold2 predict test.fa
+    >DS4440
+    GGAUGGAUGUCUGAGCGGUUGAAAGAGUCGGUCUUGAAAACCGAAGUAUUGAUAGGAAUACCGGGGGUUCGAAUCCCUCUCCAUCCG
+    (((((((........(((((..((((.....))))...)))))...................(((((.......)))))))))))). (24.8)
+
+By default, MXfold2 employs the parameters trained from TrainSetA and TrainSetB (see our paper).
+
+We provide other pre-trained models used in our paper. You can download [``models-0.1.0.tar.gz``](https://github.com/keio-bioinformatics/mxfold2/releases/download/v0.1.0/models-0.1.0.tar.gz) and extract the pre-trained models from it as follows:
 
     % tar -zxvf models-0.1.0.tar.gz
 
@@ -40,7 +49,7 @@ Here, ``./models/TrainSetA.conf`` specifies a lot of parameters including hyper-
 
 ## Training
 
-MXfold2 can train its parameters from BPSEQ-formatted RNA sequences. You can also download the datasets used in our manuscript at [the release](https://github.com/keio-bioinformatics/mxfold2/releases). 
+MXfold2 can train its parameters from BPSEQ-formatted RNA sequences. You can also download the datasets used in our paper at [the release](https://github.com/keio-bioinformatics/mxfold2/releases/tag/v0.1.0). 
 
     % mxfold2 train --model MixC --param model.pth --save-config model.conf data/TrainSetA.lst
 

diff --git a/mxfold2/__main__.py b/mxfold2/__main__.py
@@ -1,14 +1,14 @@
-from argparse import ArgumentParser
+import os
 import sys
+from argparse import ArgumentParser
 
-from .train import Train
 from .predict import Predict
-from .show_param import ShowParam
+from .train import Train
+#from .show_param import ShowParam
 
-def main(args=None):
-    conf = list(filter(lambda x: x[0]=='@', sys.argv))
-    conf = None if len(conf)==0 else conf[-1][1:]
-
+default_conf = os.path.join(os.path.dirname(__file__), 'models', 'TrainSetAB.conf')
+
+def main():
     parser = ArgumentParser(
         description='RNA secondary structure prediction using deep learning with thermodynamic integrations',
         fromfile_prefix_chars='@',
@@ -18,7 +18,18 @@ def main(args=None):
     Train.add_args(subparser)
     Predict.add_args(subparser)
     # ShowParam.add_args(subparser)
-    args = parser.parse_args(args=args)
+    args = parser.parse_args()
+
+    if hasattr(args, 'param'):
+        if args.param == '':
+            sys.argv.append('@'+default_conf)
+            args = parser.parse_args()
+        elif args.param == 'turner2004':
+            args.param = ''
+
+    conf = list(filter(lambda x: x[0]=='@', sys.argv))
+    conf = None if len(conf)==0 else conf[-1][1:]
+
     args.func(args, conf)
 
 if __name__ == '__main__':

diff --git a/mxfold2/models/TrainSetAB.conf b/mxfold2/models/TrainSetAB.conf
@@ -0,0 +1,96 @@
+--max-helix-length
+30
+--embed-size
+64
+--num-filters
+64
+--num-filters
+64
+--num-filters
+64
+--num-filters
+64
+--num-filters
+64
+--num-filters
+64
+--num-filters
+64
+--num-filters
+64
+--filter-size
+5
+--filter-size
+3
+--filter-size
+5
+--filter-size
+3
+--filter-size
+5
+--filter-size
+3
+--filter-size
+5
+--filter-size
+3
+--pool-size
+1
+--dilation
+0
+--num-lstm-layers
+2
+--num-lstm-units
+32
+--num-transformer-layers
+0
+--num-transformer-hidden-units
+2048
+--num-transformer-att
+8
+--num-hidden-units
+32
+--num-paired-filters
+64
+--num-paired-filters
+64
+--num-paired-filters
+64
+--num-paired-filters
+64
+--num-paired-filters
+64
+--num-paired-filters
+64
+--num-paired-filters
+64
+--num-paired-filters
+64
+--paired-filter-size
+5
+--paired-filter-size
+3
+--paired-filter-size
+5
+--paired-filter-size
+3
+--paired-filter-size
+5
+--paired-filter-size
+3
+--paired-filter-size
+5
+--paired-filter-size
+3
+--dropout-rate
+0.5
+--fc-dropout-rate
+0.5
+--num-att
+8
+--pair-join
+cat
+--model
+MixC
+--param
+TrainSetAB.pth
diff --git a/mxfold2/models/TrainSetAB.pth b/mxfold2/models/TrainSetAB.pth
diff --git a/mxfold2/train.py b/mxfold2/train.py
@@ -9,7 +9,6 @@
 import torch.nn.functional as F
 import torch.optim as optim
 from torch.utils.data import DataLoader
-from torch.utils.tensorboard import SummaryWriter
 from tqdm import tqdm
 
 from .dataset import BPseqDataset
@@ -18,6 +17,11 @@
 from .fold.zuker import ZukerFold
 from .loss import StructuredLoss, StructuredLossWithTurner
 
+try:
+    from torch.utils.tensorboard import SummaryWriter
+except ImportError:
+    pass
+
 
 class Train:
     step = 0
@@ -205,7 +209,7 @@ def run(self, args, conf=None):
         self.disable_progress_bar = args.disable_progress_bar
         self.verbose = args.verbose
         self.writer = None
-        if args.log_dir is not None:
+        if args.log_dir is not None and 'SummaryWriter' in globals():
             self.writer = SummaryWriter(log_dir=args.log_dir)
 
         train_dataset = BPseqDataset(args.input)

diff --git a/mxfold2/utils/fakedata.py b/mxfold2/utils/fakedata.py
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "mxfold2"
-version = "0.1.0"
+version = "0.1.1"
 description = "RNA secondary structure prediction using deep neural networks with thermodynamic integrations"
 authors = ["Kengo Sato <satoken@bio.keio.ac.jp>"]
 repository = "https://github.com/keio-bioinformatics/mxfold2"
@@ -9,22 +9,23 @@ license = "MIT"
 build = "build.py"
 
 [tool.poetry.dependencies]
-python = "^3.6"
-numpy = "^1.18.0"
-torch = "^1.3"
+python = "^3.7"
+numpy = "^1.18"
+torch = "^1.4"
 torchvision = "^0"
-tqdm = "^4.40.0"
-tensorboard = "^2.1"
+tqdm = "^4.40"
+wheel = "^0.35.1"
 
 [tool.poetry.dev-dependencies]
-# pylint = "^2.5.3"
-# jupyter = "^1.0.0"
-# pandas = "^1.0.5"
+pylint = "^2.5.3"
+jupyter = "^1.0.0"
+pandas = "^1.1.0"
+tensorboard = "^2.3.0"
 setuptools_cpp = "^0.1.0"
 
 [tool.poetry.scripts]
 mxfold2 = "mxfold2.__main__:main"
 
 [build-system]
 requires = ["poetry>=0.12", "setuptools", "wheel", "setuptools-cpp"]
-build-backend = "poetry.masonry.api"
+build-backend = "poetry.masonry.api"