Skip to content

Commit

Permalink
Merge pull request #169 from ComparativeGenomicsToolkit/develop
Browse files Browse the repository at this point in the history
Bug fixes.
  • Loading branch information
ifiddes authored Apr 3, 2020
2 parents af6e4f0 + a279010 commit bc3cdfb
Show file tree
Hide file tree
Showing 10 changed files with 139 additions and 105 deletions.
64 changes: 51 additions & 13 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,12 +1,36 @@
sudo: required
language: python
python: 3.7
os: linux

language: generic

python: 3.7.1
go: "1.11"

dist: xenial

services:
- docker
stages:
- name: docker-build
if: branch = master
- name: test

addons:
apt:
packages:
- flawfinder
- squashfs-tools
- uuid-dev
- libuuid1
- libffi-dev
- libssl-dev
- libssl1.0.0
- libarchive-dev
- libgpgme11-dev
- libseccomp-dev
- liblzo2-dev
- python3-dev
update: true

jobs:
include:
- stage: docker-build
Expand All @@ -15,19 +39,33 @@ jobs:
- if [[ "$TRAVIS_PULL_REQUEST" == "false" ]]; then docker login --username $QUAY_USERNAME --password $QUAY_PASSWORD quay.io; docker push quay.io/ucsc_cgl/cat:latest; fi
- stage: test
script:
- set ex
# Enable logging
- "sed -i 's/level: .*/level: DEBUG/g' logging.cfg"
- sudo apt-get -qq update
- sudo apt-get install liblzo2-dev
- pyenv shell 3.7.1
- pip install --upgrade pip
- pip install cython wheel setuptools pycparser
- pip install .
# Have to screw with toil's settings to get the jobs to
# actually run properly on such a small VM.
- sed -i "s/maxDisk = self.physicalDisk/pass/g" $VIRTUAL_ENV/lib/*/site-packages/toil/batchSystems/singleMachine.py
- sed -i "s/maxCores = self.numCores/maxCores = 8/g" $VIRTUAL_ENV/lib/*/site-packages/toil/batchSystems/singleMachine.py
- sed -i "s/maxMemory = self.physicalMemory/pass/g" $VIRTUAL_ENV/lib/*/site-packages/toil/batchSystems/singleMachine.py
# Just go through the test set and make sure it doesn't crash. It's not much, but it's better than nothing.
- sed -i "s/maxDisk = self.physicalDisk/pass/g" /opt/pyenv/versions/3.7.1/lib/python3.7/site-packages/toil/batchSystems/singleMachine.py
- sed -i "s/maxCores = self.numCores/maxCores = 8/g" /opt/pyenv/versions/3.7.1/lib/python3.7/site-packages/toil/batchSystems/singleMachine.py
- sed -i "s/maxMemory = self.physicalMemory/pass/g" /opt/pyenv/versions/3.7.1/lib/python3.7/site-packages/toil/batchSystems/singleMachine.py
- >
luigi --module cat RunCat --hal=test_data/vertebrates.hal --target-genomes='("hg38", "galGal4")' --ref-genome=mm10
--workers=2 --config=test_data/test.config --work-dir test_install --out-dir test_install --local-scheduler
--augustus --augustus-cgp --augustus-pb --assembly-hub
--augustus --augustus-cgp --augustus-pb --assembly-hub --log-level DEBUG
- stage: test
script:
- sudo chmod u+x .travis/*.sh
- /bin/bash .travis/setup.sh
- pyenv shell 3.7.1
- pip install --upgrade pip
- pip install cython wheel setuptools pycparser
- pip install .
# Have to screw with toil's settings to get the jobs to
# actually run properly on such a small VM.
- sed -i "s/maxDisk = self.physicalDisk/pass/g" /opt/pyenv/versions/3.7.1/lib/python3.7/site-packages/toil/batchSystems/singleMachine.py
- sed -i "s/maxCores = self.numCores/maxCores = 8/g" /opt/pyenv/versions/3.7.1/lib/python3.7/site-packages/toil/batchSystems/singleMachine.py
- sed -i "s/maxMemory = self.physicalMemory/pass/g" /opt/pyenv/versions/3.7.1/lib/python3.7/site-packages/toil/batchSystems/singleMachine.py
- >
luigi --module cat RunCat --hal=test_data/vertebrates.hal --target-genomes='("hg38", "galGal4")' --ref-genome=mm10
--workers=2 --config=test_data/test.config --work-dir test_install --out-dir test_install --local-scheduler
--augustus --augustus-cgp --augustus-pb --assembly-hub --binary-mode singularity --log-level DEBUG
26 changes: 26 additions & 0 deletions .travis/setup.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash -ex

sudo sed -i -e 's/^Defaults\tsecure_path.*$//' /etc/sudoers

echo "GO Version:"
go version

echo "Python Version:"
python --version
pip install --user sregistry[all]

echo "sregistry Version:"
sregistry version

# Install Singularity

export PATH="${GOPATH}/bin:${PATH}"

mkdir -p "${GOPATH}/src/github.com/sylabs"
cd "${GOPATH}/src/github.com/sylabs"

git clone -b vault/release-3.3 https://github.com/sylabs/singularity
cd singularity
./mconfig -v -p /usr/local
make -j `nproc 2>/dev/null || echo 1` -C ./builddir all
sudo make -C ./builddir install
22 changes: 14 additions & 8 deletions cat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,18 @@
import os
import shutil
import json
import subprocess
from collections import OrderedDict
from frozendict import frozendict
from configobj import ConfigObj
from subprocess import check_call, DEVNULL

import luigi
import luigi.contrib.sqla
from luigi.util import requires
from toil.job import Job
import pandas as pd
from bx.intervals.cluster import ClusterTree
from toil.lib.memoize import memoize

import tools.bio
import tools.fileOps
Expand Down Expand Up @@ -52,6 +53,7 @@
from .exceptions import *

logger = logging.getLogger('cat')
logger.setLevel('INFO')


###
Expand Down Expand Up @@ -138,6 +140,7 @@ class PipelineTask(luigi.Task):
minNode = luigi.Parameter(default=None, significant=False)
metrics = luigi.Parameter(default=None, significant=False)
zone = luigi.Parameter(default=None, significant=False)
logLevel = luigi.ChoiceParameter(default="INFO", choices=["INFO", "DEBUG", "ERROR", "WARNING"], significant=False)

def __repr__(self):
"""override the repr to make logging cleaner"""
Expand Down Expand Up @@ -374,6 +377,7 @@ def get_module_args(self, module, **args):
pipeline_args = self.get_pipeline_args()
return module.get_args(pipeline_args, **args)

@memoize
def load_docker(self):
"""
Download Docker or Singularity container, if applicable
Expand All @@ -387,16 +391,18 @@ def load_docker(self):
raise ToolMissingException('docker binary not found. '
'Either install it or use a different option for --binary-mode.')
# Update docker container
check_call(['docker', 'pull', 'quay.io/ucsc_cgl/cat:latest'], stdout=DEVNULL, stderr=DEVNULL)
subprocess.check_call(['docker', 'pull', 'quay.io/ucsc_cgl/cat:latest'])
elif self.binary_mode == 'singularity':
if not tools.misc.is_exec('singularity'):
raise ToolMissingException('singularity binary not found. '
'Either install it or use a different option for --binary-mode.')
os.environ['SINGULARITY_PULLFOLDER'] = self.work_dir
os.environ['SINGULARITY_CACHEDIR'] = self.work_dir
os.environ['SINGULARITY_PULLFOLDER'] = os.path.abspath(self.work_dir)
os.environ['SINGULARITY_CACHEDIR'] = os.path.abspath(self.work_dir)
tools.fileOps.ensure_dir(self.work_dir)
if not os.path.isfile(os.path.join(self.work_dir, 'cat.img')):
check_call(['singularity', 'pull', '--name', 'cat.img',
'docker://quay.io/ucsc_cgl/cat:latest'], stdout=DEVNULL, stderr=DEVNULL)
subprocess.check_call(['singularity', 'pull', '--name', 'cat.img',
'docker://quay.io/ucsc_cgl/cat:latest'])
assert os.path.exists(os.path.join(self.work_dir, 'cat.img'))

@staticmethod
def get_databases(pipeline_args):
Expand Down Expand Up @@ -517,8 +523,6 @@ def prepare_toil_options(self, work_dir):
toil_args.restart = True
except OSError:
toil_args.restart = True
except IOError:
shutil.rmtree(job_store)

if tools.misc.running_in_container():
# Caching doesn't work in containers, because the
Expand All @@ -544,6 +548,7 @@ def get_toil_defaults(self):
parser = Job.Runner.getDefaultArgumentParser()
namespace = parser.parse_args(['']) # empty jobStore attribute
namespace.jobStore = None # jobStore attribute will be updated per-batch
namespace.logLevel = self.logLevel
return namespace


Expand Down Expand Up @@ -641,6 +646,7 @@ def validate(self, pipeline_args):

def requires(self):
self.load_docker()
logger.setLevel(self.logLevel)
pipeline_args = self.get_pipeline_args()
self.validate(pipeline_args)
yield self.clone(PrepareFiles)
Expand Down
23 changes: 18 additions & 5 deletions cat/augustus_cgp.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,15 +294,21 @@ def join_genes(job, gff_chunks):
raw_gtf_file = tools.fileOps.get_tmp_toil_file()
raw_gtf_fofn = tools.fileOps.get_tmp_toil_file()
useful_lines = 0
files = []
with open(raw_gtf_file, 'w') as raw_handle, open(raw_gtf_fofn, 'w') as fofn_handle:
for (chrom, start, chunksize), chunk in gff_chunks.items():
local_path = job.fileStore.readGlobalFile(chunk)
fofn_handle.write(local_path + '\n')
raw_handle.write('## BEGIN CHUNK chrom: {} start: {} chunksize: {}\n'.format(chrom, start, chunksize))
for line in open(local_path):
if not line.startswith('#'):
useful_lines += 1
raw_handle.write(line)
if os.environ.get('CAT_BINARY_MODE') == 'singularity':
local_path = tools.procOps.singularify_arg(local_path)
files.append(local_path)
else:
files.append(os.path.basename(local_path))
fofn_handle.write(local_path + '\n')

# make sure CGP didn't fail entirely
if useful_lines == 0:
Expand All @@ -312,10 +318,17 @@ def join_genes(job, gff_chunks):

join_genes_file = tools.fileOps.get_tmp_toil_file()
join_genes_gp = tools.fileOps.get_tmp_toil_file()
cmd = [['joingenes', '-f', raw_gtf_fofn, '-o', '/dev/stdout'],
['grep', '-P', '\tAUGUSTUS\t(exon|CDS|start_codon|stop_codon|tts|tss)\t'],
['sed', ' s/jg/augCGP-/g']]
tools.procOps.run_proc(cmd, stdout=join_genes_file)
# TODO: figure out why this fails on certain filesystems
try:
cmd = [['joingenes', '-f', raw_gtf_fofn, '-o', '/dev/stdout'],
['grep', '-P', '\tAUGUSTUS\t(exon|CDS|start_codon|stop_codon|tts|tss)\t'],
['sed', ' s/jg/augCGP-/g']]
tools.procOps.run_proc(cmd, stdout=join_genes_file)
except:
cmd = [['joingenes', '-g', ','.join(files), '-o', '/dev/stdout'],
['grep', '-P', '\tAUGUSTUS\t(exon|CDS|start_codon|stop_codon|tts|tss)\t'],
['sed', ' s/jg/augCGP-/g']]
tools.procOps.run_proc(cmd, stdout=join_genes_file)

# passing the joingenes output through gtfToGenePred then genePredToGtf fixes the sort order for homGeneMapping
cmd = ['gtfToGenePred', '-genePredExt', join_genes_file, join_genes_gp]
Expand Down
8 changes: 6 additions & 2 deletions cat/augustus_pb.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,14 @@ def join_genes(job, gff_chunks):
with open(raw_gtf_file, 'w') as raw_handle, open(raw_gtf_fofn, 'w') as fofn_handle:
for chunk in gff_chunks:
local_path = job.fileStore.readGlobalFile(chunk)
files.append(os.path.basename(local_path))
fofn_handle.write(local_path + '\n')
for line in open(local_path):
raw_handle.write(line)
if os.environ.get('CAT_BINARY_MODE') == 'singularity':
local_path = tools.procOps.singularify_arg(local_path)
files.append(local_path)
else:
files.append(os.path.basename(local_path))
fofn_handle.write(local_path + '\n')

join_genes_file = tools.fileOps.get_tmp_toil_file()
join_genes_gp = tools.fileOps.get_tmp_toil_file()
Expand Down
2 changes: 1 addition & 1 deletion cat/consensus.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import tools.procOps
from tools.defaultOrderedDict import DefaultOrderedDict

logger = logging.getLogger(__name__)
logger = logging.getLogger('cat')

id_template = '{genome:.10}_{tag_type}{unique_id:07d}'

Expand Down
5 changes: 3 additions & 2 deletions cat/hints_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import tools.bio
from .exceptions import UserException

logger = logging.getLogger(__name__)
logger = logging.getLogger('cat')


def hints_db(hints_args, toil_options):
Expand Down Expand Up @@ -160,7 +160,8 @@ def write_bam(r, ns_handle):
job.fileStore.readGlobalFile(bai_file_id, bam_path + '.bai')
name_sorted = tools.fileOps.get_tmp_toil_file(suffix='name_sorted.bam')
cmd = [['samtools', 'view', '-b', bam_path] + list(reference_subset),
['sambamba', 'sort', '-t', '4', '-m', '15G', '-o', '/dev/stdout', '-n', '/dev/stdin']]
['sambamba', 'sort', '--tmpdir={}'.format(job.fileStore.getLocalTempDir()),
'-t', '4', '-m', '15G', '-o', '/dev/stdout', '-n', '/dev/stdin']]
tools.procOps.run_proc(cmd, stdout=name_sorted)
ns_handle = pysam.Samfile(name_sorted)
# this group may come up empty -- check to see if we have at least one mapped read
Expand Down
2 changes: 1 addition & 1 deletion cat/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import tools.sqlInterface
import tools.nameConversions

logger = logging.getLogger(__name__)
logger = logging.getLogger('cat')

# suppress all warnings to make logging cleaner. The only warnings should be the chained assignment warning from pandas
# as well as the bottom == top when plots have no data.
Expand Down
83 changes: 14 additions & 69 deletions logging.cfg
Original file line number Diff line number Diff line change
@@ -1,76 +1,21 @@
[formatters]
keys: default

[handlers]
keys: console

[loggers]
keys: root, cat, filter_transmap, generate_hints_db, luigi-interface, toil, toil.batchSystems, toil.leader, toil.worker, toil.jobStores.fileJobStore
keys=root

[formatter_default]
format: %(asctime)s %(name)-5s %(levelname)-5s %(message)s
datefmt: %m-%d %H:%M:%S
[handlers]
keys=consoleHandler

[handler_console]
class: StreamHandler
args: []
formatter: default
[formatters]
keys=simpleFormatter

[logger_root]
level: ERROR
handlers: console
qualname: root

[logger_cat]
level: INFO
handlers: console
qualname: cat
propagate: 0

[logger_filter_transmap]
level: INFO
handlers: console
qualname: filter_transmap
propagate: 0

[logger_generate_hints_db]
level: INFO
handlers: console
qualname: generate_hints_db
propagate: 0

[logger_luigi-interface]
level: INFO
handlers: console
qualname: luigi-interface
propagate: 0

[logger_toil]
level: WARNING
handlers: console
qualname: toil

[logger_toil.batchSystems]
level: ERROR
handlers: console
qualname: toil.batchSystems

[logger_toil.leader]
level: WARNING
handlers: console
qualname: toil.leader

[logger_toil.jobStores.abstractJobStore]
level: WARNING
handlers: console
qualname: toil.jobStores.abstractJobStore
level=INFO
handlers=consoleHandler

[logger_toil.worker]
level: WARNING
handlers: console
qualname: toil.worker
[handler_consoleHandler]
class=StreamHandler
level=INFO
formatter=simpleFormatter
args=(sys.stdout,)

[logger_toil.jobStores.fileJobStore]
level: INFO
handlers: console
qualname: toil.jobStores.fileJobStore
[formatter_simpleFormatter]
format=%(levelname)s: %(message)s
Loading

0 comments on commit bc3cdfb

Please sign in to comment.