SearchIO: Correctly parse empty consensus lines in hmmer2 parser

Hmmer2TextParser.read_next() usually reads over lines only containing
whitespace. The original code to deal with the consensus line would already
turn off the strip() call, so the consensus lines would come out in the
correct length. However, if the consensus line was completely empty,
read_next() would erroneously skip over the consensus line.
This fix disables the empty line skip when whitespace stripping is
turned off.

Signed-off-by: Kai Blin <kai.blin@biotech.uni-tuebingen.de>
This commit is contained in:
Kai Blin
2013-05-25 11:39:41 +02:00
committed by peterjc
parent 618c0a26a7
commit c943eb5ebf
4 changed files with 74 additions and 1 deletions

View File

@ -51,7 +51,7 @@ class Hmmer2TextParser(object):
if len(self.buf) > 0:
return self.buf.pop()
self.line = self.handle.readline()
while self.line and not self.line.strip():
while self.line and rstrip and not self.line.strip():
self.line = self.handle.readline()
if self.line:
if rstrip:

View File

@ -41,6 +41,7 @@ text_21_hmmpfam_001.out single query, two matches, bioperl's hmmpfam.out fil
text_22_hmmpfam_001.out single query, one match, bioperl's L77119.hmmer file
text_23_hmmpfam_001.out single query, multiple matches, bioperl's hmmpfam_cs.out file
text_23_hmmpfam_002.out single query, no match
text_23_hmmpfam_003.out single query, one match, missing some consensus content
text_24_hmmpfam_001.out multiple queries
text_20_hmmsearch_001.out single query, multiple matches, bioperl's hmmsearch.out file
text_22_hmmsearch_001.out single query, multiple matches, bioperl's cysprot1b.hmmsearch file

View File

@ -0,0 +1,34 @@
hmmpfam - search one or more sequences against HMM database
HMMER 2.3.2 (Oct 2003)
Copyright (C) 1992-2003 HHMI/Washington University School of Medicine
Freely distributed under the GNU General Public License (GPL)
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
HMM file: antismash/specific_modules/lantipeptides/ClassIVLanti.hmm
Sequence file: -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Query sequence: small_input
Accession: [none]
Description: [none]
Scores for sequence family classification (score includes all domains):
Model Description Score E-value N
-------- ----------- ----- ------- ---
ClassIVLanti Class-IV -79.3 1 1
Parsed for domains:
Model Domain seq-f seq-t hmm-f hmm-t score E-value
-------- ------- ----- ----- ----- ----- ----- -------
ClassIVLanti 1/1 6 20 .. 1 66 [] -79.3 1
Alignments of top-scoring domains:
ClassIVLanti: domain 1 of 1, from 6 to 20: score -79.3, E = 1
*->msEEqLKAFiAKvqaDtsLqEqLKaEGADvvaiAKAaGFtitteDLn
F+ G +t Ln
small_inpu 6 -------CFL---------------------------GCLVTNWVLN 18
ahiqakeLsdeeLEgvaGg<-*
small_inpu 19 RS----------------- 20
//

View File

@ -166,6 +166,44 @@ class HmmpfamTests(unittest.TestCase):
self.assertEqual('SEQ0002', res.id)
self.assertEqual(0, len(res.hits))
def test_hmmpfam_23_missing_consensus(self):
"""Test parsing hmmpfam 2.3 file (text_23_hmmpfam_003.out)"""
results = parse(path.join("Hmmer", "text_23_hmmpfam_003.out"), self.fmt)
res = results.next()
self.assertEqual('small_input', res.id)
self.assertEqual('[none]', res.description)
self.assertEqual('[none]', res.accession)
self.assertEqual('hmmpfam', res.program)
self.assertEqual('2.3.2', res.version)
self.assertEqual('antismash/specific_modules/lantipeptides/ClassIVLanti.hmm', res.target)
self.assertEqual(1, len(res))
hit = res[0]
self.assertEqual('ClassIVLanti', hit.id)
self.assertEqual('Class-IV', hit.description)
self.assertAlmostEqual(-79.3, hit.bitscore)
self.assertAlmostEqual(1, hit.evalue)
self.assertEqual(1, hit.domain_obs_num)
self.assertEqual(1, len(hit))
hsp = hit[0]
self.assertEqual(1, hsp.domain_index)
self.assertEqual(0, hsp.hit_start)
self.assertEqual(66, hsp.hit_end)
self.assertEqual('[]', hsp.hit_endtype)
self.assertEqual(5, hsp.query_start)
self.assertEqual(20, hsp.query_end)
self.assertEqual('..', hsp.query_endtype)
self.assertAlmostEqual(-79.3, hsp.bitscore)
self.assertAlmostEqual(1, hsp.evalue)
self.assertEqual('msEEqLKAFiAKvqaDtsLqEqLKaEGADvvaiAKAaGFtitteDLnahiqakeLsdeeLEgvaGg',
str(hsp.hit.seq))
self.assertEqual(' F+ G +t Ln',
str(hsp.aln_annotation['homology']))
self.assertEqual('-------CFL---------------------------GCLVTNWVLNRS-----------------',
str(hsp.query.seq))
def test_hmmpfam_24(self):
"""Test parsing hmmpfam 2.4 file (text_24_hmmpfam_001.out)"""
results = list(parse(path.join("Hmmer", "text_24_hmmpfam_001.out"), self.fmt))