mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
Support all versions of SeqXML
This commit is contained in:
@ -79,12 +79,24 @@ class ContentHandler(handler.ContentHandler):
|
||||
)
|
||||
if self.seqXMLversion is None:
|
||||
raise ValueError("Failed to find seqXMLversion")
|
||||
elif self.seqXMLversion not in ["0.1", "0.2", "0.3", "0.4"]:
|
||||
raise ValueError("Unsupported seqXMLversion")
|
||||
url = f"http://www.seqxml.org/{self.seqXMLversion}/seqxml.xsd"
|
||||
if schema != url:
|
||||
if schema is not None and schema != url:
|
||||
raise ValueError(
|
||||
"XML Schema '%s' found not consistent with reported seqXML version %s"
|
||||
% (schema, self.seqXMLversion)
|
||||
)
|
||||
# speciesName and ncbiTaxID attributes on the root are only supported
|
||||
# in 0.4
|
||||
if self.speciesName and float(self.seqXMLversion) < 0.4:
|
||||
raise ValueError(
|
||||
"Attribute 'speciesName' on root is only supported in version 0.4"
|
||||
)
|
||||
if self.ncbiTaxID and float(self.seqXMLversion) < 0.4:
|
||||
raise ValueError(
|
||||
"Attribute 'speciesName' on root is only supported in version 0.4"
|
||||
)
|
||||
self.endElementNS = self.endSeqXMLElement
|
||||
self.startElementNS = self.startEntryElement
|
||||
|
||||
@ -117,7 +129,7 @@ class ContentHandler(handler.ContentHandler):
|
||||
if namespace is None:
|
||||
if localname == "id":
|
||||
record.id = value
|
||||
elif localname == "source":
|
||||
elif localname == "source" and float(self.seqXMLversion) >= 0.3:
|
||||
record.annotations["source"] = value
|
||||
else:
|
||||
raise ValueError(
|
||||
@ -139,6 +151,8 @@ class ContentHandler(handler.ContentHandler):
|
||||
raise ValueError("Expected to find the end of an entry element")
|
||||
if qname is not None:
|
||||
raise RuntimeError("Unexpected qname for entry element")
|
||||
if self.records[-1].seq is None:
|
||||
raise ValueError("Failed to find a sequence for entry element")
|
||||
self.startElementNS = self.startEntryElement
|
||||
self.endElementNS = self.endSeqXMLElement
|
||||
|
||||
@ -155,9 +169,16 @@ class ContentHandler(handler.ContentHandler):
|
||||
return self.startSpeciesElement(attrs)
|
||||
if localname == "description":
|
||||
return self.startDescriptionElement(attrs)
|
||||
if localname in ("DNAseq", "RNAseq", "AAseq"):
|
||||
if (
|
||||
localname in ("DNAseq", "RNAseq", "AAseq")
|
||||
and float(self.seqXMLversion) >= 0.2
|
||||
) or (
|
||||
localname in ["dnaSeq", "rnaSeq", "aaSeq"] and self.seqXMLversion == "0.1"
|
||||
):
|
||||
return self.startSequenceElement(attrs)
|
||||
if localname == "DBRef":
|
||||
if (localname == "DBRef" and float(self.seqXMLversion) >= 0.2) or (
|
||||
localname == "alternativeID" and self.seqXMLversion == "0.1"
|
||||
):
|
||||
return self.startDBRefElement(attrs)
|
||||
if localname == "property":
|
||||
return self.startPropertyElement(attrs)
|
||||
@ -251,11 +272,17 @@ class ContentHandler(handler.ContentHandler):
|
||||
if qname is not None:
|
||||
raise RuntimeError(f"Unexpected qname '{qname}' for sequence end")
|
||||
record = self.records[-1]
|
||||
if localname == "DNAseq":
|
||||
if (localname == "DNAseq" and float(self.seqXMLversion) >= 0.2) or (
|
||||
localname == "dnaSeq" and self.seqXMLversion == "0.1"
|
||||
):
|
||||
record.annotations["molecule_type"] = "DNA"
|
||||
elif localname == "RNAseq":
|
||||
elif (localname == "RNAseq" and float(self.seqXMLversion) >= 0.2) or (
|
||||
localname == "rnaSeq" and self.seqXMLversion == "0.1"
|
||||
):
|
||||
record.annotations["molecule_type"] = "RNA"
|
||||
elif localname == "AAseq":
|
||||
elif (localname == "AAseq" and float(self.seqXMLversion) >= 0.2) or (
|
||||
localname == "aaSeq" and self.seqXMLversion == "0.1"
|
||||
):
|
||||
record.annotations["molecule_type"] = "protein"
|
||||
else:
|
||||
raise RuntimeError(
|
||||
@ -267,12 +294,15 @@ class ContentHandler(handler.ContentHandler):
|
||||
|
||||
def startDBRefElement(self, attrs):
|
||||
"""Parse a database cross reference."""
|
||||
TYPE = None
|
||||
source = None
|
||||
ID = None
|
||||
for key, value in attrs.items():
|
||||
namespace, localname = key
|
||||
if namespace is None:
|
||||
if localname == "source":
|
||||
if localname == "type":
|
||||
TYPE = value
|
||||
elif localname == "source":
|
||||
source = value
|
||||
elif localname == "id":
|
||||
ID = value
|
||||
@ -284,11 +314,14 @@ class ContentHandler(handler.ContentHandler):
|
||||
raise ValueError(
|
||||
f"Unexpected namespace '{namespace}' for DBRef attribute"
|
||||
)
|
||||
# The attributes "source" and "id" are required:
|
||||
# The attributes "source" and "id" are required, and "type" in versions
|
||||
# 0.2-0.3:
|
||||
if source is None:
|
||||
raise ValueError("Failed to find source for DBRef element")
|
||||
if ID is None:
|
||||
raise ValueError("Failed to find id for DBRef element")
|
||||
if TYPE is None and 0.2 <= float(self.seqXMLversion) <= 0.3:
|
||||
raise ValueError("Failed to find type for DBRef element")
|
||||
if self.data is not None:
|
||||
raise RuntimeError(f"Unexpected data found: '{self.data}'")
|
||||
self.data = ""
|
||||
@ -305,7 +338,9 @@ class ContentHandler(handler.ContentHandler):
|
||||
raise RuntimeError(f"Unexpected namespace '{namespace}' for DBRef element")
|
||||
if qname is not None:
|
||||
raise RuntimeError(f"Unexpected qname '{qname}' for DBRef element")
|
||||
if localname != "DBRef":
|
||||
if (localname != "DBRef" and float(self.seqXMLversion) >= 0.2) or (
|
||||
localname != "alternativeID" and self.seqXMLversion == "0.1"
|
||||
):
|
||||
raise RuntimeError(f"Unexpected localname '{localname}' for DBRef element")
|
||||
if self.data:
|
||||
raise RuntimeError(
|
||||
|
6
Tests/SeqXML/corrupt_example3.xml
Normal file
6
Tests/SeqXML/corrupt_example3.xml
Normal file
@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<seqXML seqXMLversion="0.4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://www.seqxml.org/0.3/seqxml.xsd" speciesName="fake">
|
||||
<entry id="fake">
|
||||
<description>entry has no sequence data</description>
|
||||
</entry>
|
||||
</seqXML>
|
13
Tests/SeqXML/version_01_example.xml
Normal file
13
Tests/SeqXML/version_01_example.xml
Normal file
@ -0,0 +1,13 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<seqXML seqXMLversion="0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://www.seqxml.org/0.1/seqxml.xsd">
|
||||
<entry id="fakeDNA">
|
||||
<dnaSeq>AAAAAACGTAAA</dnaSeq>
|
||||
<alternativeID source="db" id="1"/>
|
||||
</entry>
|
||||
<entry id="fakeRNA">
|
||||
<rnaSeq>AAAUUUUCTGAA</rnaSeq>
|
||||
</entry>
|
||||
<entry id="fakeAA">
|
||||
<aaSeq>GAKKVFIEDVSKEFVEEFIWPAVQSSALYE</aaSeq>
|
||||
</entry>
|
||||
</seqXML>
|
14
Tests/SeqXML/wrong_version1.xml
Normal file
14
Tests/SeqXML/wrong_version1.xml
Normal file
@ -0,0 +1,14 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<seqXML seqXMLversion="0.2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://www.seqxml.org/0.2/seqxml.xsd">
|
||||
<entry id="fakeDNA">
|
||||
<DNAseq>AAAAAACGTAAA</DNAseq>
|
||||
<!-- Should fail because v0.2 DBRef tags must have a type attribute -->
|
||||
<DBRef source="test" id="test"/>
|
||||
</entry>
|
||||
<entry id="fakeRNA">
|
||||
<RNAseq>AAAUUUUCTGAA</RNAseq>
|
||||
</entry>
|
||||
<entry id="fakeAA">
|
||||
<AAseq>GAKKVFIEDVSKEFVEEFIWPAVQSSALYE</AAseq>
|
||||
</entry>
|
||||
</seqXML>
|
8
Tests/SeqXML/wrong_version2.xml
Normal file
8
Tests/SeqXML/wrong_version2.xml
Normal file
@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!-- Should fail because speciesName in root is unsupported in v0.3 -->
|
||||
<seqXML seqXMLversion="0.3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://www.seqxml.org/0.3/seqxml.xsd" speciesName="fake">
|
||||
<entry id="fakeDNA">
|
||||
<DNAseq>AAAAAACGTAAA</DNAseq>
|
||||
<DBRef source="test" id="test"/>
|
||||
</entry>
|
||||
</seqXML>
|
7
Tests/SeqXML/wrong_version3.xml
Normal file
7
Tests/SeqXML/wrong_version3.xml
Normal file
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<seqXML seqXMLversion="0.2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://www.seqxml.org/0.2/seqxml.xsd" speciesName="fake">
|
||||
<!-- Should fail because "source" attribute on entries is only version >=0.3 -->
|
||||
<entry id="fakeDNA" source="Fake">
|
||||
<DNAseq>AAAAAACGTAAA</DNAseq>
|
||||
</entry>
|
||||
</seqXML>
|
@ -226,6 +226,29 @@ class TestReadCorruptFiles(unittest.TestCase):
|
||||
|
||||
self.assertRaises(ValueError, f, "SeqXML/corrupt_example1.xml")
|
||||
self.assertRaises(ValueError, f, "SeqXML/corrupt_example2.xml")
|
||||
self.assertRaises(ValueError, f, "SeqXML/corrupt_example3.xml")
|
||||
|
||||
|
||||
class TestOldVersions(unittest.TestCase):
|
||||
def test_version01(self):
|
||||
"""Test for version 0.1 specific features."""
|
||||
records = list(SeqIO.parse("SeqXML/version_01_example.xml", "seqxml"))
|
||||
self.assertEqual(records[0].seq, "AAAAAACGTAAA")
|
||||
self.assertEqual(records[0].dbxrefs[0], "db:1")
|
||||
self.assertEqual(records[1].seq, "AAAUUUUCTGAA")
|
||||
self.assertEqual(records[2].seq, "GAKKVFIEDVSKEFVEEFIWPAVQSSALYE")
|
||||
|
||||
def test_wrong_version(self):
|
||||
"""Handling of wrong versions."""
|
||||
|
||||
def f(path):
|
||||
records = SeqIO.parse(path, "seqxml")
|
||||
for record in records:
|
||||
pass
|
||||
|
||||
self.assertRaises(ValueError, f, "SeqXML/wrong_version1.xml")
|
||||
self.assertRaises(ValueError, f, "SeqXML/wrong_version2.xml")
|
||||
self.assertRaises(ValueError, f, "SeqXML/wrong_version3.xml")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Reference in New Issue
Block a user