Skip to content

Commit

Permalink
Add ensembl id to entrez query
Browse files Browse the repository at this point in the history
  • Loading branch information
BBQuercus committed Jun 23, 2022
1 parent 66876fe commit 1621698
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 28 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -173,3 +173,5 @@ data_tables/
.vscode/
*.tsv
*.png
*.parq
*.ipynb
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,19 +65,20 @@ Probe filtering workflow:
* [x] Add spacing option (length +- nt in optimization step)
* [x] Remove gene of interest when using rna-seq data
* [x] Save alignment and kmer scores to csv output
* [x] Add GTF file reading & saving as parquet/csv step
* [x] Add EnsembleID support in entrez query
* [x] Verify bowtie parameters run on endo and exo targets
* [ ] Allow selection of isoform in entrez query
* [ ] Add EnsembleID support in entrez query
* [ ] Select intron/exon based on blast from sequence
* [ ] Verify bowtie parameters run on endo and exo targets
* [ ] Add GTF file reading & saving as parquet/csv step

* **Interface**
* [x] Clean up output files
* [x] Clean up logging and error handling
* [x] Decide on way to handle temporary files (tempdir?)
* [x] Find way to handle rerunning same gene with different parameters (unique name hash?)
* [x] Find way to make CLI alter config (luigi.configuration.add_config_path)
* [ ] Use final output file as indicator if pipeline finished running
* [x] Use final output file as indicator if pipeline finished running
* [x] Proper boolean support :monkey:

* **Testing**
* [ ] Add unit tests for core components
Expand Down
61 changes: 37 additions & 24 deletions src/prepare_sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
Select the right strand and select intronic/exonic regions.
"""

import os
import logging
import os
import subprocess

import luigi
import Bio.SeqIO
import luigi

from config import GeneralConfig
from config import SequenceConfig
Expand All @@ -24,26 +25,34 @@ def output(self):
return luigi.LocalTarget(os.path.join(util.get_output_dir(), fname))

def run(self):
if not SequenceConfig().gene_name or not SequenceConfig().organism_name:
has_ensembl = SequenceConfig().ensemble_id
has_gene_and_organism = (
SequenceConfig().gene_name and SequenceConfig().organism_name
)
if not has_ensembl and not has_gene_and_organism:
raise ValueError(
"For downloading Entrez Gene Probes, "
" you need to specify the gene name and organism name."
" you need to specify the gene name and organism name or provide an Emsembl ID."
)

fasta = os.popen(
f"esearch\
-db gene\
-query '{SequenceConfig().gene_name} [GENE]\
{SequenceConfig().organism_name} [ORGN]' |"
" elink\
-db gene\
-target nuccore\
-name gene_nuccore_refseqrna |"
" efetch -format fasta"
).read()
self.logger.debug(
f"Fetched Entrez Gene Probes for {SequenceConfig().gene_name} in {SequenceConfig().organism_name}."
)
if has_ensembl:
query = SequenceConfig().ensemble_id
else:
query = f"{SequenceConfig().gene_name} [GENE] {SequenceConfig().organism_name} [ORGN]"
args_search = ["esearch", "-db", "gene", "-query", query]
args_link = [
"elink",
"-db",
"gene",
"-target",
"nuccore",
"-name",
"gene_nuccore_refseqrna",
]
args_fetch = ["efetch", "-format", "fasta"]
args_entrez = [*args_search, "|", *args_link, "|", *args_fetch]
self.logger.debug(f"Fetching from Entrez using query '{query}'.")
fasta = subprocess.check_output(args_entrez, stderr=subprocess.STDOUT).decode()

if not fasta:
raise LookupError(
Expand All @@ -64,12 +73,16 @@ def output(self):
]

def run(self):
os.system(
f"makeblastdb\
-dbtype nucl\
-in {GeneralConfig().reference_genome}\
-out {util.get_genome_name()}"
)
args_blast = [
"makeblastdb",
"-dbtype",
"nucl",
"-in",
GeneralConfig().reference_genome,
"-out",
util.get_genome_name(),
]
subprocess.check_call(args_blast)


class PrepareSequence(luigi.Task):
Expand Down

0 comments on commit 1621698

Please sign in to comment.