mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
Add support and test cases for hmmscan 3b1.1 tab output
This commit is contained in:
@ -97,7 +97,10 @@ class Hmmer3TabParser(object):
|
||||
prev = cur
|
||||
prev_qid = cur_qid
|
||||
# only parse the result row if it's not EOF
|
||||
if self.line:
|
||||
# NOTE: we are not parsing the extra '#' lines appended to the end
|
||||
# of hmmer31b1 tabular results since storing them in qresult
|
||||
# objects means we can not do a single-pass parsing
|
||||
if self.line and not self.line.startswith('#'):
|
||||
cur = self._parse_row()
|
||||
cur_qid = cur['qresult']['id']
|
||||
else:
|
||||
|
@ -24,6 +24,7 @@ text_30_hmmsearch_003.out single query, multiple matches, multiple hsps per m
|
||||
text_30_hmmsearch_004.out single query, multiple matches, multiple hsps per match, no alignment width
|
||||
text_30_hmmsearch_005.out multiple queries
|
||||
|
||||
tab_31b1_hmmscan_001.out multiple queries
|
||||
tab_30_hmmscan_001.out multiple queries
|
||||
tab_30_hmmscan_002.out single query, no match
|
||||
tab_30_hmmscan_003.out single query, one match, one hsp per match
|
||||
|
24
Tests/Hmmer/tab_31b1_hmmscan_001.out
Normal file
24
Tests/Hmmer/tab_31b1_hmmscan_001.out
Normal file
@ -0,0 +1,24 @@
|
||||
# --- full sequence ---- --- best 1 domain ---- --- domain number estimation ----
|
||||
# target name accession query name accession E-value score bias E-value score bias exp reg clu ov env dom rep inc description of target
|
||||
#------------------- ---------- -------------------- ---------- --------- ------ ----- --------- ------ ----- --- --- --- --- --- --- --- --- ---------------------
|
||||
Globin PF00042.17 gi|4885477|ref|NP_005359.1| - 1e-22 80.5 0.3 1.6e-22 79.8 0.3 1.3 1 0 0 1 1 1 1 Globin
|
||||
Ig_3 PF13927.1 gi|126362951|ref|NP_001075106.1| - 9.3e-10 38.8 0.4 1.4e-09 38.3 0.4 1.3 1 0 0 1 1 1 1 Immunoglobulin domain
|
||||
Ig_2 PF13895.1 gi|126362951|ref|NP_001075106.1| - 1.6e-06 28.1 0.1 2e-06 27.8 0.1 1.1 1 0 0 1 1 1 1 Immunoglobulin domain
|
||||
Xpo1 PF08389.7 gi|22748937|ref|NP_065801.1| - 8.5e-34 116.6 7.8 1.2e-33 116.1 4.9 2.8 2 0 0 2 2 2 1 Exportin 1-like protein
|
||||
IBN_N PF03810.14 gi|22748937|ref|NP_065801.1| - 0.0044 16.9 0.0 0.036 13.9 0.0 2.7 2 0 0 2 2 2 1 Importin-beta N-terminal domain
|
||||
Rac1 PF09632.5 gi|22748937|ref|NP_065801.1| - 0.095 11.7 0.3 1.3 8.0 0.3 2.3 2 0 0 2 2 2 0 Rac1-binding domain
|
||||
Pou PF00157.12 gi|125490392|ref|NP_038661.2| - 7.6e-37 124.8 0.5 1.5e-36 123.9 0.5 1.5 1 0 0 1 1 1 1 Pou domain - N-terminal to homeobox domain
|
||||
Homeobox PF00046.24 gi|125490392|ref|NP_038661.2| - 1.8e-18 65.8 1.1 3.4e-18 64.9 1.1 1.5 1 0 0 1 1 1 1 Homeobox domain
|
||||
HTH_31 PF13560.1 gi|125490392|ref|NP_038661.2| - 0.013 15.6 0.0 0.18 12.0 0.0 2.2 2 0 0 2 2 2 0 Helix-turn-helix domain
|
||||
Homeobox_KN PF05920.6 gi|125490392|ref|NP_038661.2| - 0.043 13.5 0.0 0.1 12.2 0.0 1.6 1 0 0 1 1 1 0 Homeobox KN domain
|
||||
DUF521 PF04412.8 gi|125490392|ref|NP_038661.2| - 0.15 10.5 0.1 0.28 9.6 0.1 1.4 1 0 0 1 1 1 0 Protein of unknown function (DUF521)
|
||||
#
|
||||
# Program: hmmscan
|
||||
# Version: 3.1b1 (May 2013)
|
||||
# Pipeline mode: SCAN
|
||||
# Query file: prot_multi.fa
|
||||
# Target file: /home/bow/db/hmmer/protdb/Pfam-A.hmm
|
||||
# Option settings: hmmscan -o hmmscan/text_31b1_hmmscan_001.out --tblout hmmscan/tab_31b1_hmmscan_001.out --domtblout hmmscan/domtab_31b1_hmmscan_001.out --pfamtblout hmmscan/pfamtab_31b1_hmmscan_001.out --cpu 2 /home/bow/db/hmmer/protdb/Pfam-A.hmm prot_multi.fa
|
||||
# Current dir: /home/bow/devel/sandbox/biopy_cases
|
||||
# Date: Sun May 11 21:26:57 2014
|
||||
# [ok]
|
@ -28,6 +28,65 @@ def get_file(filename):
|
||||
|
||||
class HmmscanCases(unittest.TestCase):
|
||||
|
||||
def test_31b1_hmmscan_001(self):
|
||||
"""Test parsing hmmer3-tab, hmmscan 3.1b1, multiple queries (tab_31b1_hmmscan_001)"""
|
||||
|
||||
tab_file = get_file('tab_31b1_hmmscan_001.out')
|
||||
qresults = list(parse(tab_file, FMT))
|
||||
self.assertEqual(4, len(qresults))
|
||||
|
||||
# first qresult, first hit, first hsp
|
||||
qresult = qresults[0]
|
||||
self.assertEqual(1, len(qresult))
|
||||
self.assertEqual('gi|4885477|ref|NP_005359.1|', qresult.id)
|
||||
self.assertEqual('-', qresult.acc)
|
||||
hit = qresult[0]
|
||||
self.assertEqual(1, len(hit))
|
||||
self.assertEqual('Globin', hit.id)
|
||||
self.assertEqual('PF00042.17', hit.acc)
|
||||
self.assertEqual(1e-22, hit.evalue)
|
||||
self.assertEqual(80.5, hit.bitscore)
|
||||
self.assertEqual(0.3, hit.bias)
|
||||
self.assertEqual(1.3, hit.domain_exp_num)
|
||||
self.assertEqual(1, hit.region_num)
|
||||
self.assertEqual(0, hit.cluster_num)
|
||||
self.assertEqual(0, hit.overlap_num)
|
||||
self.assertEqual(1, hit.env_num)
|
||||
self.assertEqual(1, hit.domain_obs_num)
|
||||
self.assertEqual(1, hit.domain_reported_num)
|
||||
self.assertEqual(1, hit.domain_included_num)
|
||||
self.assertEqual('Globin', hit.description)
|
||||
hsp = hit.hsps[0]
|
||||
self.assertEqual(1.6e-22, hsp.evalue)
|
||||
self.assertEqual(79.8, hsp.bitscore)
|
||||
self.assertEqual(0.3, hsp.bias)
|
||||
|
||||
# last qresult, last hit, last hsp
|
||||
qresult = qresults[-1]
|
||||
self.assertEqual(5, len(qresult))
|
||||
self.assertEqual('gi|125490392|ref|NP_038661.2|', qresult.id)
|
||||
self.assertEqual('-', qresult.acc)
|
||||
hit = qresult[-1]
|
||||
self.assertEqual(1, len(hit))
|
||||
self.assertEqual('DUF521', hit.id)
|
||||
self.assertEqual('PF04412.8', hit.acc)
|
||||
self.assertEqual(0.15, hit.evalue)
|
||||
self.assertEqual(10.5, hit.bitscore)
|
||||
self.assertEqual(0.1, hit.bias)
|
||||
self.assertEqual(1.4, hit.domain_exp_num)
|
||||
self.assertEqual(1, hit.region_num)
|
||||
self.assertEqual(0, hit.cluster_num)
|
||||
self.assertEqual(0, hit.overlap_num)
|
||||
self.assertEqual(1, hit.env_num)
|
||||
self.assertEqual(1, hit.domain_obs_num)
|
||||
self.assertEqual(1, hit.domain_reported_num)
|
||||
self.assertEqual(0, hit.domain_included_num)
|
||||
self.assertEqual('Protein of unknown function (DUF521)', hit.description)
|
||||
hsp = hit.hsps[0]
|
||||
self.assertEqual(0.28, hsp.evalue)
|
||||
self.assertEqual(9.6, hsp.bitscore)
|
||||
self.assertEqual(0.1, hsp.bias)
|
||||
|
||||
def test_30_hmmscan_001(self):
|
||||
"Test parsing hmmer3-tab, hmmscan 3.0, multiple queries (tab_30_hmmscan_001)"
|
||||
|
||||
|
Reference in New Issue
Block a user