Skip to content

Commit

Permalink
default parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
satoken committed Aug 19, 2020
1 parent 552d134 commit 89be8a9
Show file tree
Hide file tree
Showing 8 changed files with 149 additions and 100 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,4 @@ data
experiments
.DS_Store
poetry.lock
models
23 changes: 16 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,37 @@ RNA secondary structure prediction using deep learning with thermodynamic integr
## Installation

### System requirements
* python (>=3.6)
* pytorch (>=1.3)
* python (>=3.7)
* pytorch (>=1.4)
* C++17 compatible compiler (tested on Apple clang version 12.0.0 and GCC version 7.4.0) (optional)
* cmake (>=3.10) (optional)

### Install from wheel

We provide the wheel python packages for several platforms at [the release](https://github.com/keio-bioinformatics/mxfold2/releases). You can download an appropriate package and install it as follows:

% pip3 install mxfold2-0.1.0-cp38-cp38-macosx_10_15_x86_64.whl
% pip3 install mxfold2-0.1.1-cp38-cp38-macosx_10_15_x86_64.whl

### Install from sdist

You can build and install from the source distribution downloaded from [the release](https://github.com/keio-bioinformatics/mxfold2/releases) as follows:

% pip3 install mxfold2-0.1.0.tar.gz
% pip3 install mxfold2-0.1.1.tar.gz

TO build MXfold2 from the source distribution, you need a C++17 compatible compiler and cmake.
To build MXfold2 from the source distribution, you need a C++17 compatible compiler and cmake.

## Prediction

We provide the pre-trained models at [the release](https://github.com/keio-bioinformatics/mxfold2/releases). You can download ``models-0.1.0.tar.gz`` and extract the pre-trained models from it as follows:
You can predict RNA secondary structures of given FASTA-formatted RNA sequences like:

% mxfold2 predict test.fa
>DS4440
GGAUGGAUGUCUGAGCGGUUGAAAGAGUCGGUCUUGAAAACCGAAGUAUUGAUAGGAAUACCGGGGGUUCGAAUCCCUCUCCAUCCG
(((((((........(((((..((((.....))))...)))))...................(((((.......)))))))))))). (24.8)

By default, MXfold2 employs the parameters trained from TrainSetA and TrainSetB (see our paper).

We provide other pre-trained models used in our paper. You can download [``models-0.1.0.tar.gz``](https://github.com/keio-bioinformatics/mxfold2/releases/download/v0.1.0/models-0.1.0.tar.gz) and extract the pre-trained models from it as follows:

% tar -zxvf models-0.1.0.tar.gz

Expand All @@ -40,7 +49,7 @@ Here, ``./models/TrainSetA.conf`` specifies a lot of parameters including hyper-

## Training

MXfold2 can train its parameters from BPSEQ-formatted RNA sequences. You can also download the datasets used in our manuscript at [the release](https://github.com/keio-bioinformatics/mxfold2/releases).
MXfold2 can train its parameters from BPSEQ-formatted RNA sequences. You can also download the datasets used in our paper at [the release](https://github.com/keio-bioinformatics/mxfold2/releases/tag/v0.1.0).

% mxfold2 train --model MixC --param model.pth --save-config model.conf data/TrainSetA.lst

Expand Down
27 changes: 19 additions & 8 deletions mxfold2/__main__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from argparse import ArgumentParser
import os
import sys
from argparse import ArgumentParser

from .train import Train
from .predict import Predict
from .show_param import ShowParam
from .train import Train
#from .show_param import ShowParam

def main(args=None):
conf = list(filter(lambda x: x[0]=='@', sys.argv))
conf = None if len(conf)==0 else conf[-1][1:]

default_conf = os.path.join(os.path.dirname(__file__), 'models', 'TrainSetAB.conf')

def main():
parser = ArgumentParser(
description='RNA secondary structure prediction using deep learning with thermodynamic integrations',
fromfile_prefix_chars='@',
Expand All @@ -18,7 +18,18 @@ def main(args=None):
Train.add_args(subparser)
Predict.add_args(subparser)
# ShowParam.add_args(subparser)
args = parser.parse_args(args=args)
args = parser.parse_args()

if hasattr(args, 'param'):
if args.param == '':
sys.argv.append('@'+default_conf)
args = parser.parse_args()
elif args.param == 'turner2004':
args.param = ''

conf = list(filter(lambda x: x[0]=='@', sys.argv))
conf = None if len(conf)==0 else conf[-1][1:]

args.func(args, conf)

if __name__ == '__main__':
Expand Down
96 changes: 96 additions & 0 deletions mxfold2/models/TrainSetAB.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
--max-helix-length
30
--embed-size
64
--num-filters
64
--num-filters
64
--num-filters
64
--num-filters
64
--num-filters
64
--num-filters
64
--num-filters
64
--num-filters
64
--filter-size
5
--filter-size
3
--filter-size
5
--filter-size
3
--filter-size
5
--filter-size
3
--filter-size
5
--filter-size
3
--pool-size
1
--dilation
0
--num-lstm-layers
2
--num-lstm-units
32
--num-transformer-layers
0
--num-transformer-hidden-units
2048
--num-transformer-att
8
--num-hidden-units
32
--num-paired-filters
64
--num-paired-filters
64
--num-paired-filters
64
--num-paired-filters
64
--num-paired-filters
64
--num-paired-filters
64
--num-paired-filters
64
--num-paired-filters
64
--paired-filter-size
5
--paired-filter-size
3
--paired-filter-size
5
--paired-filter-size
3
--paired-filter-size
5
--paired-filter-size
3
--paired-filter-size
5
--paired-filter-size
3
--dropout-rate
0.5
--fc-dropout-rate
0.5
--num-att
8
--pair-join
cat
--model
MixC
--param
TrainSetAB.pth
Binary file added mxfold2/models/TrainSetAB.pth
Binary file not shown.
8 changes: 6 additions & 2 deletions mxfold2/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

from .dataset import BPseqDataset
Expand All @@ -18,6 +17,11 @@
from .fold.zuker import ZukerFold
from .loss import StructuredLoss, StructuredLossWithTurner

try:
from torch.utils.tensorboard import SummaryWriter
except ImportError:
pass


class Train:
step = 0
Expand Down Expand Up @@ -205,7 +209,7 @@ def run(self, args, conf=None):
self.disable_progress_bar = args.disable_progress_bar
self.verbose = args.verbose
self.writer = None
if args.log_dir is not None:
if args.log_dir is not None and 'SummaryWriter' in globals():
self.writer = SummaryWriter(log_dir=args.log_dir)

train_dataset = BPseqDataset(args.input)
Expand Down
73 changes: 0 additions & 73 deletions mxfold2/utils/fakedata.py

This file was deleted.

21 changes: 11 additions & 10 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "mxfold2"
version = "0.1.0"
version = "0.1.1"
description = "RNA secondary structure prediction using deep neural networks with thermodynamic integrations"
authors = ["Kengo Sato <satoken@bio.keio.ac.jp>"]
repository = "https://github.com/keio-bioinformatics/mxfold2"
Expand All @@ -9,22 +9,23 @@ license = "MIT"
build = "build.py"

[tool.poetry.dependencies]
python = "^3.6"
numpy = "^1.18.0"
torch = "^1.3"
python = "^3.7"
numpy = "^1.18"
torch = "^1.4"
torchvision = "^0"
tqdm = "^4.40.0"
tensorboard = "^2.1"
tqdm = "^4.40"
wheel = "^0.35.1"

[tool.poetry.dev-dependencies]
# pylint = "^2.5.3"
# jupyter = "^1.0.0"
# pandas = "^1.0.5"
pylint = "^2.5.3"
jupyter = "^1.0.0"
pandas = "^1.1.0"
tensorboard = "^2.3.0"
setuptools_cpp = "^0.1.0"

[tool.poetry.scripts]
mxfold2 = "mxfold2.__main__:main"

[build-system]
requires = ["poetry>=0.12", "setuptools", "wheel", "setuptools-cpp"]
build-backend = "poetry.masonry.api"
build-backend = "poetry.masonry.api"

0 comments on commit 89be8a9

Please sign in to comment.