Skip to content

Commit

Permalink
GFF3 parsing was still wrong.
Browse files Browse the repository at this point in the history
For issue #65
  • Loading branch information
ifiddes committed Aug 29, 2017
1 parent 7f70679 commit 9c84e48
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions cat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,11 +794,15 @@ def run(self):
for tx_id, d in df.groupby('transcript_id'):
d = dict(zip(d.key, d.value))
if 'gbkey' in d: # this is a NCBI GFF3
if d['gbkey'] in ['mRNA', 'CDS']:
if d['gbkey'] == 'mRNA':
gene_biotype = tx_biotype = 'protein_coding'
else:
gene_biotype = tx_biotype = d['gbkey']
gene_name = gene_id = d['gene']
if 'gene' in d:
gene_name = d['gene']
gene_id = d['Dbxref'].replace('GeneID:', '')
else:
gene_name = gene_id = d['ID']
tx_name = d.get('product', tx_id)
else: # this is either ensembl or gencode
if 'biotype' in d: # Ensembl
Expand Down

0 comments on commit 9c84e48

Please sign in to comment.