mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
SearchIO: Correctly parse empty consensus lines in hmmer2 parser
Hmmer2TextParser.read_next() usually reads over lines only containing whitespace. The original code to deal with the consensus line would already turn off the strip() call, so the consensus lines would come out in the correct length. However, if the consensus line was completely empty, read_next() would erroneously skip over the consensus line. This fix disables the empty line skip when whitespace stripping is turned off. Signed-off-by: Kai Blin <kai.blin@biotech.uni-tuebingen.de>
This commit is contained in:
@ -51,7 +51,7 @@ class Hmmer2TextParser(object):
|
||||
if len(self.buf) > 0:
|
||||
return self.buf.pop()
|
||||
self.line = self.handle.readline()
|
||||
while self.line and not self.line.strip():
|
||||
while self.line and rstrip and not self.line.strip():
|
||||
self.line = self.handle.readline()
|
||||
if self.line:
|
||||
if rstrip:
|
||||
|
@ -41,6 +41,7 @@ text_21_hmmpfam_001.out single query, two matches, bioperl's hmmpfam.out fil
|
||||
text_22_hmmpfam_001.out single query, one match, bioperl's L77119.hmmer file
|
||||
text_23_hmmpfam_001.out single query, multiple matches, bioperl's hmmpfam_cs.out file
|
||||
text_23_hmmpfam_002.out single query, no match
|
||||
text_23_hmmpfam_003.out single query, one match, missing some consensus content
|
||||
text_24_hmmpfam_001.out multiple queries
|
||||
text_20_hmmsearch_001.out single query, multiple matches, bioperl's hmmsearch.out file
|
||||
text_22_hmmsearch_001.out single query, multiple matches, bioperl's cysprot1b.hmmsearch file
|
||||
|
34
Tests/Hmmer/text_23_hmmpfam_003.out
Normal file
34
Tests/Hmmer/text_23_hmmpfam_003.out
Normal file
@ -0,0 +1,34 @@
|
||||
hmmpfam - search one or more sequences against HMM database
|
||||
HMMER 2.3.2 (Oct 2003)
|
||||
Copyright (C) 1992-2003 HHMI/Washington University School of Medicine
|
||||
Freely distributed under the GNU General Public License (GPL)
|
||||
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
HMM file: antismash/specific_modules/lantipeptides/ClassIVLanti.hmm
|
||||
Sequence file: -
|
||||
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
||||
|
||||
Query sequence: small_input
|
||||
Accession: [none]
|
||||
Description: [none]
|
||||
|
||||
Scores for sequence family classification (score includes all domains):
|
||||
Model Description Score E-value N
|
||||
-------- ----------- ----- ------- ---
|
||||
ClassIVLanti Class-IV -79.3 1 1
|
||||
|
||||
Parsed for domains:
|
||||
Model Domain seq-f seq-t hmm-f hmm-t score E-value
|
||||
-------- ------- ----- ----- ----- ----- ----- -------
|
||||
ClassIVLanti 1/1 6 20 .. 1 66 [] -79.3 1
|
||||
|
||||
Alignments of top-scoring domains:
|
||||
ClassIVLanti: domain 1 of 1, from 6 to 20: score -79.3, E = 1
|
||||
*->msEEqLKAFiAKvqaDtsLqEqLKaEGADvvaiAKAaGFtitteDLn
|
||||
F+ G +t Ln
|
||||
small_inpu 6 -------CFL---------------------------GCLVTNWVLN 18
|
||||
|
||||
ahiqakeLsdeeLEgvaGg<-*
|
||||
|
||||
small_inpu 19 RS----------------- 20
|
||||
|
||||
//
|
@ -166,6 +166,44 @@ class HmmpfamTests(unittest.TestCase):
|
||||
self.assertEqual('SEQ0002', res.id)
|
||||
self.assertEqual(0, len(res.hits))
|
||||
|
||||
def test_hmmpfam_23_missing_consensus(self):
|
||||
"""Test parsing hmmpfam 2.3 file (text_23_hmmpfam_003.out)"""
|
||||
results = parse(path.join("Hmmer", "text_23_hmmpfam_003.out"), self.fmt)
|
||||
res = results.next()
|
||||
|
||||
self.assertEqual('small_input', res.id)
|
||||
self.assertEqual('[none]', res.description)
|
||||
self.assertEqual('[none]', res.accession)
|
||||
self.assertEqual('hmmpfam', res.program)
|
||||
self.assertEqual('2.3.2', res.version)
|
||||
self.assertEqual('antismash/specific_modules/lantipeptides/ClassIVLanti.hmm', res.target)
|
||||
self.assertEqual(1, len(res))
|
||||
|
||||
hit = res[0]
|
||||
self.assertEqual('ClassIVLanti', hit.id)
|
||||
self.assertEqual('Class-IV', hit.description)
|
||||
self.assertAlmostEqual(-79.3, hit.bitscore)
|
||||
self.assertAlmostEqual(1, hit.evalue)
|
||||
self.assertEqual(1, hit.domain_obs_num)
|
||||
self.assertEqual(1, len(hit))
|
||||
|
||||
hsp = hit[0]
|
||||
self.assertEqual(1, hsp.domain_index)
|
||||
self.assertEqual(0, hsp.hit_start)
|
||||
self.assertEqual(66, hsp.hit_end)
|
||||
self.assertEqual('[]', hsp.hit_endtype)
|
||||
self.assertEqual(5, hsp.query_start)
|
||||
self.assertEqual(20, hsp.query_end)
|
||||
self.assertEqual('..', hsp.query_endtype)
|
||||
self.assertAlmostEqual(-79.3, hsp.bitscore)
|
||||
self.assertAlmostEqual(1, hsp.evalue)
|
||||
self.assertEqual('msEEqLKAFiAKvqaDtsLqEqLKaEGADvvaiAKAaGFtitteDLnahiqakeLsdeeLEgvaGg',
|
||||
str(hsp.hit.seq))
|
||||
self.assertEqual(' F+ G +t Ln',
|
||||
str(hsp.aln_annotation['homology']))
|
||||
self.assertEqual('-------CFL---------------------------GCLVTNWVLNRS-----------------',
|
||||
str(hsp.query.seq))
|
||||
|
||||
def test_hmmpfam_24(self):
|
||||
"""Test parsing hmmpfam 2.4 file (text_24_hmmpfam_001.out)"""
|
||||
results = list(parse(path.join("Hmmer", "text_24_hmmpfam_001.out"), self.fmt))
|
||||
|
Reference in New Issue
Block a user