From c943eb5ebf4760d363981e68f45d89c25aca3034 Mon Sep 17 00:00:00 2001 From: Kai Blin Date: Sat, 25 May 2013 11:39:41 +0200 Subject: [PATCH] SearchIO: Correctly parse empty consensus lines in hmmer2 parser Hmmer2TextParser.read_next() usually reads over lines only containing whitespace. The original code to deal with the consensus line would already turn off the strip() call, so the consensus lines would come out in the correct length. However, if the consensus line was completely empty, read_next() would erroneously skip over the consensus line. This fix disables the empty line skip when whitespace stripping is turned off. Signed-off-by: Kai Blin --- Bio/SearchIO/HmmerIO/hmmer2_text.py | 2 +- Tests/Hmmer/README.txt | 1 + Tests/Hmmer/text_23_hmmpfam_003.out | 34 ++++++++++++++++++++++++++ Tests/test_SearchIO_hmmer2_text.py | 38 +++++++++++++++++++++++++++++ 4 files changed, 74 insertions(+), 1 deletion(-) create mode 100644 Tests/Hmmer/text_23_hmmpfam_003.out diff --git a/Bio/SearchIO/HmmerIO/hmmer2_text.py b/Bio/SearchIO/HmmerIO/hmmer2_text.py index f4d5ae33a..f267ca412 100644 --- a/Bio/SearchIO/HmmerIO/hmmer2_text.py +++ b/Bio/SearchIO/HmmerIO/hmmer2_text.py @@ -51,7 +51,7 @@ class Hmmer2TextParser(object): if len(self.buf) > 0: return self.buf.pop() self.line = self.handle.readline() - while self.line and not self.line.strip(): + while self.line and rstrip and not self.line.strip(): self.line = self.handle.readline() if self.line: if rstrip: diff --git a/Tests/Hmmer/README.txt b/Tests/Hmmer/README.txt index e738dba0e..c1d28e31e 100644 --- a/Tests/Hmmer/README.txt +++ b/Tests/Hmmer/README.txt @@ -41,6 +41,7 @@ text_21_hmmpfam_001.out single query, two matches, bioperl's hmmpfam.out fil text_22_hmmpfam_001.out single query, one match, bioperl's L77119.hmmer file text_23_hmmpfam_001.out single query, multiple matches, bioperl's hmmpfam_cs.out file text_23_hmmpfam_002.out single query, no match +text_23_hmmpfam_003.out single query, one match, missing some consensus content text_24_hmmpfam_001.out multiple queries text_20_hmmsearch_001.out single query, multiple matches, bioperl's hmmsearch.out file text_22_hmmsearch_001.out single query, multiple matches, bioperl's cysprot1b.hmmsearch file diff --git a/Tests/Hmmer/text_23_hmmpfam_003.out b/Tests/Hmmer/text_23_hmmpfam_003.out new file mode 100644 index 000000000..1ec9f4305 --- /dev/null +++ b/Tests/Hmmer/text_23_hmmpfam_003.out @@ -0,0 +1,34 @@ +hmmpfam - search one or more sequences against HMM database +HMMER 2.3.2 (Oct 2003) +Copyright (C) 1992-2003 HHMI/Washington University School of Medicine +Freely distributed under the GNU General Public License (GPL) +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - +HMM file: antismash/specific_modules/lantipeptides/ClassIVLanti.hmm +Sequence file: - +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +Query sequence: small_input +Accession: [none] +Description: [none] + +Scores for sequence family classification (score includes all domains): +Model Description Score E-value N +-------- ----------- ----- ------- --- +ClassIVLanti Class-IV -79.3 1 1 + +Parsed for domains: +Model Domain seq-f seq-t hmm-f hmm-t score E-value +-------- ------- ----- ----- ----- ----- ----- ------- +ClassIVLanti 1/1 6 20 .. 1 66 [] -79.3 1 + +Alignments of top-scoring domains: +ClassIVLanti: domain 1 of 1, from 6 to 20: score -79.3, E = 1 + *->msEEqLKAFiAKvqaDtsLqEqLKaEGADvvaiAKAaGFtitteDLn + F+ G +t Ln + small_inpu 6 -------CFL---------------------------GCLVTNWVLN 18 + + ahiqakeLsdeeLEgvaGg<-* + + small_inpu 19 RS----------------- 20 + +// diff --git a/Tests/test_SearchIO_hmmer2_text.py b/Tests/test_SearchIO_hmmer2_text.py index 91f7bca56..f9b3db8f3 100644 --- a/Tests/test_SearchIO_hmmer2_text.py +++ b/Tests/test_SearchIO_hmmer2_text.py @@ -166,6 +166,44 @@ class HmmpfamTests(unittest.TestCase): self.assertEqual('SEQ0002', res.id) self.assertEqual(0, len(res.hits)) + def test_hmmpfam_23_missing_consensus(self): + """Test parsing hmmpfam 2.3 file (text_23_hmmpfam_003.out)""" + results = parse(path.join("Hmmer", "text_23_hmmpfam_003.out"), self.fmt) + res = results.next() + + self.assertEqual('small_input', res.id) + self.assertEqual('[none]', res.description) + self.assertEqual('[none]', res.accession) + self.assertEqual('hmmpfam', res.program) + self.assertEqual('2.3.2', res.version) + self.assertEqual('antismash/specific_modules/lantipeptides/ClassIVLanti.hmm', res.target) + self.assertEqual(1, len(res)) + + hit = res[0] + self.assertEqual('ClassIVLanti', hit.id) + self.assertEqual('Class-IV', hit.description) + self.assertAlmostEqual(-79.3, hit.bitscore) + self.assertAlmostEqual(1, hit.evalue) + self.assertEqual(1, hit.domain_obs_num) + self.assertEqual(1, len(hit)) + + hsp = hit[0] + self.assertEqual(1, hsp.domain_index) + self.assertEqual(0, hsp.hit_start) + self.assertEqual(66, hsp.hit_end) + self.assertEqual('[]', hsp.hit_endtype) + self.assertEqual(5, hsp.query_start) + self.assertEqual(20, hsp.query_end) + self.assertEqual('..', hsp.query_endtype) + self.assertAlmostEqual(-79.3, hsp.bitscore) + self.assertAlmostEqual(1, hsp.evalue) + self.assertEqual('msEEqLKAFiAKvqaDtsLqEqLKaEGADvvaiAKAaGFtitteDLnahiqakeLsdeeLEgvaGg', + str(hsp.hit.seq)) + self.assertEqual(' F+ G +t Ln', + str(hsp.aln_annotation['homology'])) + self.assertEqual('-------CFL---------------------------GCLVTNWVLNRS-----------------', + str(hsp.query.seq)) + def test_hmmpfam_24(self): """Test parsing hmmpfam 2.4 file (text_24_hmmpfam_001.out)""" results = list(parse(path.join("Hmmer", "text_24_hmmpfam_001.out"), self.fmt))