diff --git a/Bio/GenBank/Scanner.py b/Bio/GenBank/Scanner.py index a8e44668e..b348ba9e3 100644 --- a/Bio/GenBank/Scanner.py +++ b/Bio/GenBank/Scanner.py @@ -876,7 +876,10 @@ class EmblScanner(InsdcScanner): parts = data.rstrip(".").split(";") # Turn it into "database_identifier:primary_identifier" to # mimic the GenBank parser. e.g. "MGI:98599" - consumer.dblink("%s:%s" % (parts[0].strip(), + if len(parts) == 1: + warnings.warn("Malformed DR line in EMBL file.", BiopythonParserWarning) + else: + consumer.dblink("%s:%s" % (parts[0].strip(), parts[1].strip())) elif line_type == 'RA': # Remove trailing ; at end of authors list diff --git a/Tests/EMBL/RepBase23.02.embl b/Tests/EMBL/RepBase23.02.embl new file mode 100644 index 000000000..31de2d0e6 --- /dev/null +++ b/Tests/EMBL/RepBase23.02.embl @@ -0,0 +1,43 @@ +ID MuDR-N273_OS repbase; DNA; ORY; 799 BP. +XX +AC . +XX +DT 26-OCT-2017 (Rel. 22.11, Created) +DT 26-OCT-2017 (Rel. 22.11, Last updated, Version 1) +XX +DE DNA transposon - consensus. +XX +KW MuDR; DNA transposon; Transposable Element; Nonautonomous; +KW MuDR-N273_OS. +XX +OS Oryza sativa +OC Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; +OC Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae; +OC BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza. +XX +RN [1] +RP 1-799 +RA Bao W.; +RT "DNA transposons from the rice genome."; +RL Repbase Reports 17(11), 1316-1316 (2017). +XX +DR [1] (Consensus) +XX +CC ~91% identical to the consensus. +XX +SQ Sequence 799 BP; 262 A; 122 C; 125 G; 289 T; 1 other; + ggggtttttt tttaactatt tgccactctt acggagtgsc actattgaga tgccactctt 60 + ccaaattttt ttacacaaac accacatgag agagatattt ttcctaacta gagtgccatg 120 + tcagcaagag agacttgaaa tgaccttttt acccctagtt tgagaaatat gagagattat 180 + atacaaacat atataagaaa atattattta ttatatctta aaaagagtta tcacgggcaa 240 + atatacgtcg aacaagttca ccataattta ataatagatt caacataggg tgaacttgtt 300 + caacatatat ttatccgtga taactctttt tagatataat atagtacgta cttactatgt 360 + tgaatctatt attaaattgt ggtgaacttg tccaacgtat actgctgtga taactctttt 420 + ttagatacaa tatattgtgt tgaatctatt attaaattgt ggtgaacttg ttcaacgtat 480 + attgcccgtg ataaattttt ttagatataa tatactatgc taaatctatt attaaattgt 540 + ggtgaacttg ttcaacgtat atttgcccgt gataactctt ttttaagata taataaacaa 600 + tattttctta tatatgtttg tatataatct ctcatatttc tcaaactagg ggtaaaaagg 660 + tcatttcaag tcccccttgc tgacatggca ctctagttag gaaaaatatc tctctcatgt 720 + ggtgtttgtg taaaaaaatt tggaagagtg gcatctcaat agtggcactc cgtaagagtg 780 + gcaaatagtt aaaaacccc 799 +// \ No newline at end of file diff --git a/Tests/test_EMBL_unittest.py b/Tests/test_EMBL_unittest.py index 55d990e12..e6cdc7fcb 100644 --- a/Tests/test_EMBL_unittest.py +++ b/Tests/test_EMBL_unittest.py @@ -51,6 +51,13 @@ class EMBLTests(unittest.TestCase): self.assertEqual(rec.seq[:10], "MVLSEGEWQL") self.assertEqual(rec.seq[-10:], "AKYKELGYQG") + def test_embl_wrong_dr_line(self): + """Test files with wrong DR lines""" + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + fasta_seq = SeqIO.read('EMBL/RepBase23.02.embl', 'embl') + self.assertTrue(w, "Malformed DR line in EMBL file.") + if __name__ == "__main__": runner = unittest.TextTestRunner(verbosity=2)