-
Notifications
You must be signed in to change notification settings - Fork 7
/
remock
executable file
·150 lines (136 loc) · 5.13 KB
/
remock
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/usr/bin/env python3
#
# Copyright (C) 2017–2024, Jose Manuel Martí Martínez
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
#
"""
Generate mock samples for Recentrifuge
"""
import argparse
import os
import sys
from recentrifuge import __version__, __author__, __date__
from recentrifuge.mock import generate_mock
from recentrifuge.config import Filename, gray, blue
from recentrifuge.config import LICENSE, NODES_FILE, NAMES_FILE, TAXDUMP_PATH
from recentrifuge.taxonomy import Taxonomy
# optional package pandas (to read Excel with mock layout)
_USE_PANDAS = True
try:
import pandas as pd
except ImportError:
pd = None
_USE_PANDAS = False
def main():
"""Main entry point to script."""
def configure_parser():
"""Argument Parser Configuration"""
parser = argparse.ArgumentParser(
description='Generate mock samples for Recentrifuge',
epilog=f'%(prog)s - Release {__version__} - {__date__}' + LICENSE,
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser_mode = parser.add_mutually_exclusive_group(required=True)
parser_mode.add_argument(
'-f', '--file',
action='store',
metavar='FILE',
type=Filename,
help='Explicit source: Centrifuge output file as source'
)
parser_mode.add_argument(
'-r', '--random',
action='store',
metavar='MHL',
type=int,
default=15,
help=('Random score generated. Please provide the minimum hit '
'length (mhl) of the classification; 15 by default')
)
parser.add_argument(
'-d', '--debug',
action='store_true',
help='increase output verbosity and perform additional checks'
)
parser_input = parser.add_mutually_exclusive_group(required=True)
parser_input.add_argument(
'-m', '--mock',
action='append',
metavar='FILE',
type=Filename,
help=('Mock files to be read for mock Centrifuge sequences layout.'
' If a single directory is entered, every .mck file inside '
'will be taken as a different sample. '
'Multiple -f is available to include several samples.')
)
if _USE_PANDAS:
parser_input.add_argument(
'-x', '--xcel',
action='store',
metavar='FILE',
type=Filename,
help='Excel file with the mock layout.'
)
# Test mode really characterized by None in mock and xcel arguments
parser_input.add_argument(
'-t', '--test',
action='store_true',
help='generate mock data ready for testing Recentrifuge'
)
parser.add_argument(
'-n', '--nodespath',
action='store',
metavar='PATH',
default=TAXDUMP_PATH,
help=('path for the nodes information files '
'(nodes.dmp and names.dmp from NCBI)')
)
parser.add_argument(
'-c', '--compress',
action='store_true',
help='Any generated FASTQ file will be gzipped'
)
parser.add_argument(
'-V', '--version',
action='version',
version=f'%(prog)s release {__version__} ({__date__})'
)
return parser
def check_debug():
"""Check debugging mode"""
if args.debug:
print(blue('INFO:'), gray('Debugging mode activated'))
print(blue('INFO:'), gray('Active parameters:'))
for key, value in vars(args).items():
if value:
print(gray(f'\t{key} ='), f'{value}')
# Program header
print(f'\n=-= {sys.argv[0]} =-= v{__version__} - {__date__}'
f' =-= by {__author__} =-=\n')
sys.stdout.flush()
# Parse arguments
argparser = configure_parser()
args = argparser.parse_args()
nodesfile: Filename = Filename(os.path.join(args.nodespath, NODES_FILE))
namesfile: Filename = Filename(os.path.join(args.nodespath, NAMES_FILE))
check_debug()
# Load NCBI nodes, names and build children
ncbi: Taxonomy = Taxonomy(nodesfile, namesfile, None, False)
generate_mock(ncbi,
args.file, args.random,
args.mock, args.xcel,
args.debug, gzipped=args.compress)
if __name__ == '__main__':
main()