mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
Ignore but warn on invalid EMBL DR lines (e.g. from RepBase)
Solves issue #1579. Adds example with invalid DR structure, Tests/EMBL/RepBase23.02.embl
This commit is contained in:
@ -876,6 +876,9 @@ class EmblScanner(InsdcScanner):
|
||||
parts = data.rstrip(".").split(";")
|
||||
# Turn it into "database_identifier:primary_identifier" to
|
||||
# mimic the GenBank parser. e.g. "MGI:98599"
|
||||
if len(parts) == 1:
|
||||
warnings.warn("Malformed DR line in EMBL file.", BiopythonParserWarning)
|
||||
else:
|
||||
consumer.dblink("%s:%s" % (parts[0].strip(),
|
||||
parts[1].strip()))
|
||||
elif line_type == 'RA':
|
||||
|
43
Tests/EMBL/RepBase23.02.embl
Normal file
43
Tests/EMBL/RepBase23.02.embl
Normal file
@ -0,0 +1,43 @@
|
||||
ID MuDR-N273_OS repbase; DNA; ORY; 799 BP.
|
||||
XX
|
||||
AC .
|
||||
XX
|
||||
DT 26-OCT-2017 (Rel. 22.11, Created)
|
||||
DT 26-OCT-2017 (Rel. 22.11, Last updated, Version 1)
|
||||
XX
|
||||
DE DNA transposon - consensus.
|
||||
XX
|
||||
KW MuDR; DNA transposon; Transposable Element; Nonautonomous;
|
||||
KW MuDR-N273_OS.
|
||||
XX
|
||||
OS Oryza sativa
|
||||
OC Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
|
||||
OC Spermatophyta; Magnoliophyta; Liliopsida; Poales; Poaceae;
|
||||
OC BOP clade; Oryzoideae; Oryzeae; Oryzinae; Oryza.
|
||||
XX
|
||||
RN [1]
|
||||
RP 1-799
|
||||
RA Bao W.;
|
||||
RT "DNA transposons from the rice genome.";
|
||||
RL Repbase Reports 17(11), 1316-1316 (2017).
|
||||
XX
|
||||
DR [1] (Consensus)
|
||||
XX
|
||||
CC ~91% identical to the consensus.
|
||||
XX
|
||||
SQ Sequence 799 BP; 262 A; 122 C; 125 G; 289 T; 1 other;
|
||||
ggggtttttt tttaactatt tgccactctt acggagtgsc actattgaga tgccactctt 60
|
||||
ccaaattttt ttacacaaac accacatgag agagatattt ttcctaacta gagtgccatg 120
|
||||
tcagcaagag agacttgaaa tgaccttttt acccctagtt tgagaaatat gagagattat 180
|
||||
atacaaacat atataagaaa atattattta ttatatctta aaaagagtta tcacgggcaa 240
|
||||
atatacgtcg aacaagttca ccataattta ataatagatt caacataggg tgaacttgtt 300
|
||||
caacatatat ttatccgtga taactctttt tagatataat atagtacgta cttactatgt 360
|
||||
tgaatctatt attaaattgt ggtgaacttg tccaacgtat actgctgtga taactctttt 420
|
||||
ttagatacaa tatattgtgt tgaatctatt attaaattgt ggtgaacttg ttcaacgtat 480
|
||||
attgcccgtg ataaattttt ttagatataa tatactatgc taaatctatt attaaattgt 540
|
||||
ggtgaacttg ttcaacgtat atttgcccgt gataactctt ttttaagata taataaacaa 600
|
||||
tattttctta tatatgtttg tatataatct ctcatatttc tcaaactagg ggtaaaaagg 660
|
||||
tcatttcaag tcccccttgc tgacatggca ctctagttag gaaaaatatc tctctcatgt 720
|
||||
ggtgtttgtg taaaaaaatt tggaagagtg gcatctcaat agtggcactc cgtaagagtg 780
|
||||
gcaaatagtt aaaaacccc 799
|
||||
//
|
@ -51,6 +51,13 @@ class EMBLTests(unittest.TestCase):
|
||||
self.assertEqual(rec.seq[:10], "MVLSEGEWQL")
|
||||
self.assertEqual(rec.seq[-10:], "AKYKELGYQG")
|
||||
|
||||
def test_embl_wrong_dr_line(self):
|
||||
"""Test files with wrong DR lines"""
|
||||
with warnings.catch_warnings(record=True) as w:
|
||||
warnings.simplefilter("always")
|
||||
fasta_seq = SeqIO.read('EMBL/RepBase23.02.embl', 'embl')
|
||||
self.assertTrue(w, "Malformed DR line in EMBL file.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
runner = unittest.TextTestRunner(verbosity=2)
|
||||
|
Reference in New Issue
Block a user