mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
651 lines
27 KiB
Python
651 lines
27 KiB
Python
# Copyright 2010 by Thomas Schmitt.
|
|
#
|
|
# This file is part of the Biopython distribution and governed by your
|
|
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
|
|
# Please see the LICENSE file that should have been included as part of this
|
|
# package.
|
|
"""Bio.SeqIO support for the "seqxml" file format, SeqXML.
|
|
|
|
This module is for reading and writing SeqXML format files as
|
|
SeqRecord objects, and is expected to be used via the Bio.SeqIO API.
|
|
|
|
SeqXML is a lightweight XML format which is supposed be an alternative for
|
|
FASTA files. For more Information see http://www.seqXML.org and Schmitt et al
|
|
(2011), https://doi.org/10.1093/bib/bbr025
|
|
"""
|
|
from xml import sax
|
|
from xml.sax import handler
|
|
from xml.sax.saxutils import XMLGenerator
|
|
from xml.sax.xmlreader import AttributesImpl
|
|
|
|
from Bio.Seq import Seq
|
|
from Bio.SeqRecord import SeqRecord
|
|
|
|
from .Interfaces import SequenceIterator
|
|
from .Interfaces import SequenceWriter
|
|
|
|
|
|
class ContentHandler(handler.ContentHandler):
|
|
"""Handles XML events generated by the parser (PRIVATE)."""
|
|
|
|
def __init__(self):
|
|
"""Create a handler to handle XML events."""
|
|
super().__init__()
|
|
self.source = None
|
|
self.sourceVersion = None
|
|
self.seqXMLversion = None
|
|
self.ncbiTaxID = None
|
|
self.speciesName = None
|
|
self.startElementNS = None
|
|
self.data = None
|
|
self.records = []
|
|
|
|
def startDocument(self):
|
|
"""Set XML handlers when an XML declaration is found."""
|
|
self.startElementNS = self.startSeqXMLElement
|
|
|
|
def startSeqXMLElement(self, name, qname, attrs):
|
|
"""Handle start of a seqXML element."""
|
|
if name != (None, "seqXML"):
|
|
raise ValueError("Failed to find the start of seqXML element")
|
|
if qname is not None:
|
|
raise RuntimeError("Unexpected qname for seqXML element")
|
|
schema = None
|
|
for key, value in attrs.items():
|
|
namespace, localname = key
|
|
if namespace is None:
|
|
if localname == "source":
|
|
self.source = value
|
|
elif localname == "sourceVersion":
|
|
self.sourceVersion = value
|
|
elif localname == "seqXMLversion":
|
|
self.seqXMLversion = value
|
|
elif localname == "ncbiTaxID":
|
|
# check if it is an integer, but store as string
|
|
number = int(value)
|
|
self.ncbiTaxID = value
|
|
elif localname == "speciesName":
|
|
self.speciesName = value
|
|
else:
|
|
raise ValueError("Unexpected attribute for XML Schema")
|
|
elif namespace == "http://www.w3.org/2001/XMLSchema-instance":
|
|
if localname == "noNamespaceSchemaLocation":
|
|
schema = value
|
|
else:
|
|
raise ValueError("Unexpected attribute for XML Schema in namespace")
|
|
else:
|
|
raise ValueError(
|
|
f"Unexpected namespace '{namespace}' for seqXML attribute"
|
|
)
|
|
if self.seqXMLversion is None:
|
|
raise ValueError("Failed to find seqXMLversion")
|
|
url = f"http://www.seqxml.org/{self.seqXMLversion}/seqxml.xsd"
|
|
if schema != url:
|
|
raise ValueError(
|
|
"XML Schema '%s' found not consistent with reported seqXML version %s"
|
|
% (schema, self.seqXMLversion)
|
|
)
|
|
self.endElementNS = self.endSeqXMLElement
|
|
self.startElementNS = self.startEntryElement
|
|
|
|
def endSeqXMLElement(self, name, qname):
|
|
"""Handle end of the seqXML element."""
|
|
namespace, localname = name
|
|
if namespace is not None:
|
|
raise RuntimeError(f"Unexpected namespace '{namespace}' for seqXML end")
|
|
if qname is not None:
|
|
raise RuntimeError(f"Unexpected qname '{qname}' for seqXML end")
|
|
if localname != "seqXML":
|
|
raise RuntimeError("Failed to find end of seqXML element")
|
|
self.startElementNS = None
|
|
self.endElementNS = None
|
|
|
|
def startEntryElement(self, name, qname, attrs):
|
|
"""Set new entry with id and the optional entry source (PRIVATE)."""
|
|
if name != (None, "entry"):
|
|
raise ValueError("Expected to find the start of an entry element")
|
|
if qname is not None:
|
|
raise RuntimeError("Unexpected qname for entry element")
|
|
record = SeqRecord("", id=None)
|
|
if self.speciesName is not None:
|
|
record.annotations["organism"] = self.speciesName
|
|
if self.ncbiTaxID is not None:
|
|
record.annotations["ncbi_taxid"] = self.ncbiTaxID
|
|
record.annotations["source"] = self.source
|
|
for key, value in attrs.items():
|
|
namespace, localname = key
|
|
if namespace is None:
|
|
if localname == "id":
|
|
record.id = value
|
|
elif localname == "source":
|
|
record.annotations["source"] = value
|
|
else:
|
|
raise ValueError(
|
|
f"Unexpected attribute {localname} in entry element"
|
|
)
|
|
else:
|
|
raise ValueError(
|
|
f"Unexpected namespace '{namespace}' for entry attribute"
|
|
)
|
|
if record.id is None:
|
|
raise ValueError("Failed to find entry ID")
|
|
self.records.append(record)
|
|
self.startElementNS = self.startEntryFieldElement
|
|
self.endElementNS = self.endEntryElement
|
|
|
|
def endEntryElement(self, name, qname):
|
|
"""Handle end of an entry element."""
|
|
if name != (None, "entry"):
|
|
raise ValueError("Expected to find the end of an entry element")
|
|
if qname is not None:
|
|
raise RuntimeError("Unexpected qname for entry element")
|
|
self.startElementNS = self.startEntryElement
|
|
self.endElementNS = self.endSeqXMLElement
|
|
|
|
def startEntryFieldElement(self, name, qname, attrs):
|
|
"""Receive a field of an entry element and forward it."""
|
|
namespace, localname = name
|
|
if namespace is not None:
|
|
raise ValueError(
|
|
f"Unexpected namespace '{namespace}' for {localname} element"
|
|
)
|
|
if qname is not None:
|
|
raise RuntimeError(f"Unexpected qname '{qname}' for {localname} element")
|
|
if localname == "species":
|
|
return self.startSpeciesElement(attrs)
|
|
if localname == "description":
|
|
return self.startDescriptionElement(attrs)
|
|
if localname in ("DNAseq", "RNAseq", "AAseq"):
|
|
return self.startSequenceElement(attrs)
|
|
if localname == "DBRef":
|
|
return self.startDBRefElement(attrs)
|
|
if localname == "property":
|
|
return self.startPropertyElement(attrs)
|
|
raise ValueError(f"Unexpected field {localname} in entry")
|
|
|
|
def startSpeciesElement(self, attrs):
|
|
"""Parse the species information."""
|
|
name = None
|
|
ncbiTaxID = None
|
|
for key, value in attrs.items():
|
|
namespace, localname = key
|
|
if namespace is None:
|
|
if localname == "name":
|
|
name = value
|
|
elif localname == "ncbiTaxID":
|
|
# check if it is an integer, but store as string
|
|
number = int(value)
|
|
ncbiTaxID = value
|
|
else:
|
|
raise ValueError(
|
|
f"Unexpected attribute '{key}' found in species tag"
|
|
)
|
|
else:
|
|
raise ValueError(
|
|
f"Unexpected namespace '{namespace}' for species attribute"
|
|
)
|
|
# The attributes "name" and "ncbiTaxID" are required:
|
|
if name is None:
|
|
raise ValueError("Failed to find species name")
|
|
if ncbiTaxID is None:
|
|
raise ValueError("Failed to find ncbiTaxId")
|
|
record = self.records[-1]
|
|
# The keywords for the species annotation are taken from SwissIO
|
|
record.annotations["organism"] = name
|
|
# TODO - Should have been a list to match SwissProt parser:
|
|
record.annotations["ncbi_taxid"] = ncbiTaxID
|
|
self.endElementNS = self.endSpeciesElement
|
|
|
|
def endSpeciesElement(self, name, qname):
|
|
"""Handle end of a species element."""
|
|
namespace, localname = name
|
|
if namespace is not None:
|
|
raise RuntimeError(f"Unexpected namespace '{namespace}' for species end")
|
|
if qname is not None:
|
|
raise RuntimeError(f"Unexpected qname '{qname}' for species end")
|
|
if localname != "species":
|
|
raise RuntimeError("Failed to find end of species element")
|
|
self.endElementNS = self.endEntryElement
|
|
|
|
def startDescriptionElement(self, attrs):
|
|
"""Parse the description."""
|
|
if attrs:
|
|
raise ValueError("Unexpected attributes found in description element")
|
|
if self.data is not None:
|
|
raise RuntimeError(f"Unexpected data found: '{self.data}'")
|
|
self.data = ""
|
|
self.endElementNS = self.endDescriptionElement
|
|
|
|
def endDescriptionElement(self, name, qname):
|
|
"""Handle the end of a description element."""
|
|
namespace, localname = name
|
|
if namespace is not None:
|
|
raise RuntimeError(
|
|
f"Unexpected namespace '{namespace}' for description end"
|
|
)
|
|
if qname is not None:
|
|
raise RuntimeError(f"Unexpected qname '{qname}' for description end")
|
|
if localname != "description":
|
|
raise RuntimeError("Failed to find end of description element")
|
|
record = self.records[-1]
|
|
description = self.data
|
|
if description: # ignore if empty string
|
|
record.description = description
|
|
self.data = None
|
|
self.endElementNS = self.endEntryElement
|
|
|
|
def startSequenceElement(self, attrs):
|
|
"""Parse DNA, RNA, or protein sequence."""
|
|
if attrs:
|
|
raise ValueError("Unexpected attributes found in sequence element")
|
|
if self.data is not None:
|
|
raise RuntimeError(f"Unexpected data found: '{self.data}'")
|
|
self.data = ""
|
|
self.endElementNS = self.endSequenceElement
|
|
|
|
def endSequenceElement(self, name, qname):
|
|
"""Handle the end of a sequence element."""
|
|
namespace, localname = name
|
|
if namespace is not None:
|
|
raise RuntimeError(f"Unexpected namespace '{namespace}' for sequence end")
|
|
if qname is not None:
|
|
raise RuntimeError(f"Unexpected qname '{qname}' for sequence end")
|
|
record = self.records[-1]
|
|
if localname == "DNAseq":
|
|
record.annotations["molecule_type"] = "DNA"
|
|
elif localname == "RNAseq":
|
|
record.annotations["molecule_type"] = "RNA"
|
|
elif localname == "AAseq":
|
|
record.annotations["molecule_type"] = "protein"
|
|
else:
|
|
raise RuntimeError(
|
|
f"Failed to find end of sequence (localname = {localname})"
|
|
)
|
|
record.seq = Seq(self.data)
|
|
self.data = None
|
|
self.endElementNS = self.endEntryElement
|
|
|
|
def startDBRefElement(self, attrs):
|
|
"""Parse a database cross reference."""
|
|
source = None
|
|
ID = None
|
|
for key, value in attrs.items():
|
|
namespace, localname = key
|
|
if namespace is None:
|
|
if localname == "source":
|
|
source = value
|
|
elif localname == "id":
|
|
ID = value
|
|
else:
|
|
raise ValueError(
|
|
f"Unexpected attribute '{key}' found for DBRef element"
|
|
)
|
|
else:
|
|
raise ValueError(
|
|
f"Unexpected namespace '{namespace}' for DBRef attribute"
|
|
)
|
|
# The attributes "source" and "id" are required:
|
|
if source is None:
|
|
raise ValueError("Failed to find source for DBRef element")
|
|
if ID is None:
|
|
raise ValueError("Failed to find id for DBRef element")
|
|
if self.data is not None:
|
|
raise RuntimeError(f"Unexpected data found: '{self.data}'")
|
|
self.data = ""
|
|
record = self.records[-1]
|
|
dbxref = f"{source}:{ID}"
|
|
if dbxref not in record.dbxrefs:
|
|
record.dbxrefs.append(dbxref)
|
|
self.endElementNS = self.endDBRefElement
|
|
|
|
def endDBRefElement(self, name, qname):
|
|
"""Handle the end of a DBRef element."""
|
|
namespace, localname = name
|
|
if namespace is not None:
|
|
raise RuntimeError(f"Unexpected namespace '{namespace}' for DBRef element")
|
|
if qname is not None:
|
|
raise RuntimeError(f"Unexpected qname '{qname}' for DBRef element")
|
|
if localname != "DBRef":
|
|
raise RuntimeError(f"Unexpected localname '{localname}' for DBRef element")
|
|
if self.data:
|
|
raise RuntimeError(
|
|
f"Unexpected data received for DBRef element: '{self.data}'"
|
|
)
|
|
self.data = None
|
|
self.endElementNS = self.endEntryElement
|
|
|
|
def startPropertyElement(self, attrs):
|
|
"""Handle the start of a property element."""
|
|
property_name = None
|
|
property_value = None
|
|
for key, value in attrs.items():
|
|
namespace, localname = key
|
|
if namespace is None:
|
|
if localname == "name":
|
|
property_name = value
|
|
elif localname == "value":
|
|
property_value = value
|
|
else:
|
|
raise ValueError(
|
|
"Unexpected attribute '%s' found for property element", key
|
|
)
|
|
else:
|
|
raise ValueError(
|
|
f"Unexpected namespace '{namespace}' for property attribute"
|
|
)
|
|
# The attribute "name" is required:
|
|
if property_name is None:
|
|
raise ValueError("Failed to find name for property element")
|
|
record = self.records[-1]
|
|
if property_name == "molecule_type":
|
|
# At this point, record.annotations["molecule_type"] is either
|
|
# "DNA", "RNA", or "protein"; property_value may be a more detailed
|
|
# description such as "mRNA" or "genomic DNA".
|
|
assert record.annotations[property_name] in property_value
|
|
record.annotations[property_name] = property_value
|
|
else:
|
|
if property_name not in record.annotations:
|
|
record.annotations[property_name] = []
|
|
record.annotations[property_name].append(property_value)
|
|
self.endElementNS = self.endPropertyElement
|
|
|
|
def endPropertyElement(self, name, qname):
|
|
"""Handle the end of a property element."""
|
|
namespace, localname = name
|
|
if namespace is not None:
|
|
raise RuntimeError(
|
|
f"Unexpected namespace '{namespace}' for property element"
|
|
)
|
|
if qname is not None:
|
|
raise RuntimeError(f"Unexpected qname '{qname}' for property element")
|
|
if localname != "property":
|
|
raise RuntimeError(
|
|
f"Unexpected localname '{localname}' for property element"
|
|
)
|
|
self.endElementNS = self.endEntryElement
|
|
|
|
def characters(self, data):
|
|
"""Handle character data."""
|
|
if self.data is not None:
|
|
self.data += data
|
|
|
|
|
|
class SeqXmlIterator(SequenceIterator):
|
|
"""Parser for seqXML files.
|
|
|
|
Parses seqXML files and creates SeqRecords.
|
|
Assumes valid seqXML please validate beforehand.
|
|
It is assumed that all information for one record can be found within a
|
|
record element or above. Two types of methods are called when the start
|
|
tag of an element is reached. To receive only the attributes of an
|
|
element before its end tag is reached implement _attr_TAGNAME.
|
|
To get an element and its children as a DOM tree implement _elem_TAGNAME.
|
|
Everything that is part of the DOM tree will not trigger any further
|
|
method calls.
|
|
"""
|
|
|
|
BLOCK = 1024
|
|
|
|
def __init__(self, stream_or_path, namespace=None):
|
|
"""Create the object and initialize the XML parser."""
|
|
# Make sure we got a binary handle. If we got a text handle, then
|
|
# the parser will still run but unicode characters will be garbled
|
|
# if the text handle was opened with a different encoding than the
|
|
# one specified in the XML file. With a binary handle, the correct
|
|
# encoding is picked up by the parser from the XML file.
|
|
self.parser = sax.make_parser()
|
|
content_handler = ContentHandler()
|
|
self.parser.setContentHandler(content_handler)
|
|
self.parser.setFeature(handler.feature_namespaces, True)
|
|
super().__init__(stream_or_path, mode="b", fmt="SeqXML")
|
|
|
|
def parse(self, handle):
|
|
"""Start parsing the file, and return a SeqRecord generator."""
|
|
parser = self.parser
|
|
content_handler = parser.getContentHandler()
|
|
BLOCK = self.BLOCK
|
|
while True:
|
|
# Read in another block of the file...
|
|
text = handle.read(BLOCK)
|
|
if not text:
|
|
if content_handler.startElementNS is None:
|
|
raise ValueError("Empty file.")
|
|
else:
|
|
raise ValueError("XML file contains no data.")
|
|
parser.feed(text)
|
|
seqXMLversion = content_handler.seqXMLversion
|
|
if seqXMLversion is not None:
|
|
break
|
|
self.seqXMLversion = seqXMLversion
|
|
self.source = content_handler.source
|
|
self.sourceVersion = content_handler.sourceVersion
|
|
self.ncbiTaxID = content_handler.ncbiTaxID
|
|
self.speciesName = content_handler.speciesName
|
|
records = self.iterate(handle)
|
|
return records
|
|
|
|
def iterate(self, handle):
|
|
"""Iterate over the records in the XML file."""
|
|
parser = self.parser
|
|
content_handler = parser.getContentHandler()
|
|
records = content_handler.records
|
|
BLOCK = self.BLOCK
|
|
while True:
|
|
if len(records) > 1:
|
|
# Then at least the first record is finished
|
|
record = records.pop(0)
|
|
yield record
|
|
# Read in another block of the file...
|
|
text = handle.read(BLOCK)
|
|
if not text:
|
|
break
|
|
parser.feed(text)
|
|
# We have reached the end of the XML file;
|
|
# send out the remaining records
|
|
yield from records
|
|
records.clear()
|
|
parser.close()
|
|
|
|
|
|
class SeqXmlWriter(SequenceWriter):
|
|
"""Writes SeqRecords into seqXML file.
|
|
|
|
SeqXML requires the SeqRecord annotations to specify the molecule_type;
|
|
the molecule type is required to contain the term "DNA", "RNA", or
|
|
"protein".
|
|
"""
|
|
|
|
def __init__(
|
|
self, target, source=None, source_version=None, species=None, ncbiTaxId=None
|
|
):
|
|
"""Create Object and start the xml generator.
|
|
|
|
Arguments:
|
|
- target - Output stream opened in binary mode, or a path to a file.
|
|
- source - The source program/database of the file, for example
|
|
UniProt.
|
|
- source_version - The version or release number of the source
|
|
program or database from which the data originated.
|
|
- species - The scientific name of the species of origin of all
|
|
entries in the file.
|
|
- ncbiTaxId - The NCBI taxonomy identifier of the species of origin.
|
|
|
|
"""
|
|
super().__init__(target, "wb")
|
|
handle = self.handle
|
|
self.xml_generator = XMLGenerator(handle, "utf-8")
|
|
self.xml_generator.startDocument()
|
|
self.source = source
|
|
self.source_version = source_version
|
|
self.species = species
|
|
self.ncbiTaxId = ncbiTaxId
|
|
|
|
def write_header(self):
|
|
"""Write root node with document metadata."""
|
|
attrs = {
|
|
"xmlns:xsi": "http://www.w3.org/2001/XMLSchema-instance",
|
|
"xsi:noNamespaceSchemaLocation": "http://www.seqxml.org/0.4/seqxml.xsd",
|
|
"seqXMLversion": "0.4",
|
|
}
|
|
|
|
if self.source is not None:
|
|
attrs["source"] = self.source
|
|
if self.source_version is not None:
|
|
attrs["sourceVersion"] = self.source_version
|
|
if self.species is not None:
|
|
if not isinstance(self.species, str):
|
|
raise TypeError("species should be of type string")
|
|
attrs["speciesName"] = self.species
|
|
if self.ncbiTaxId is not None:
|
|
if not isinstance(self.ncbiTaxId, (str, int)):
|
|
raise TypeError("ncbiTaxID should be of type string or int")
|
|
attrs["ncbiTaxID"] = self.ncbiTaxId
|
|
|
|
self.xml_generator.startElement("seqXML", AttributesImpl(attrs))
|
|
|
|
def write_record(self, record):
|
|
"""Write one record."""
|
|
if not record.id or record.id == "<unknown id>":
|
|
raise ValueError("SeqXML requires identifier")
|
|
|
|
if not isinstance(record.id, str):
|
|
raise TypeError("Identifier should be of type string")
|
|
|
|
attrb = {"id": record.id}
|
|
|
|
if (
|
|
"source" in record.annotations
|
|
and self.source != record.annotations["source"]
|
|
):
|
|
if not isinstance(record.annotations["source"], str):
|
|
raise TypeError("source should be of type string")
|
|
attrb["source"] = record.annotations["source"]
|
|
|
|
self.xml_generator.startElement("entry", AttributesImpl(attrb))
|
|
self._write_species(record)
|
|
self._write_description(record)
|
|
self._write_seq(record)
|
|
self._write_dbxrefs(record)
|
|
self._write_properties(record)
|
|
self.xml_generator.endElement("entry")
|
|
|
|
def write_footer(self):
|
|
"""Close the root node and finish the XML document."""
|
|
self.xml_generator.endElement("seqXML")
|
|
self.xml_generator.endDocument()
|
|
|
|
def _write_species(self, record):
|
|
"""Write the species if given (PRIVATE)."""
|
|
local_ncbi_taxid = None
|
|
if "ncbi_taxid" in record.annotations:
|
|
local_ncbi_taxid = record.annotations["ncbi_taxid"]
|
|
if isinstance(local_ncbi_taxid, list):
|
|
# SwissProt parser uses a list (which could cope with chimeras)
|
|
if len(local_ncbi_taxid) == 1:
|
|
local_ncbi_taxid = local_ncbi_taxid[0]
|
|
elif len(local_ncbi_taxid) == 0:
|
|
local_ncbi_taxid = None
|
|
else:
|
|
raise ValueError(
|
|
"Multiple entries for record.annotations['ncbi_taxid'], %r"
|
|
% local_ncbi_taxid
|
|
)
|
|
if "organism" in record.annotations and local_ncbi_taxid:
|
|
local_org = record.annotations["organism"]
|
|
|
|
if not isinstance(local_org, str):
|
|
raise TypeError("organism should be of type string")
|
|
|
|
if not isinstance(local_ncbi_taxid, (str, int)):
|
|
raise TypeError("ncbiTaxID should be of type string or int")
|
|
|
|
# The local species definition is only written if it differs from the global species definition
|
|
if local_org != self.species or local_ncbi_taxid != self.ncbiTaxId:
|
|
attr = {"name": local_org, "ncbiTaxID": str(local_ncbi_taxid)}
|
|
self.xml_generator.startElement("species", AttributesImpl(attr))
|
|
self.xml_generator.endElement("species")
|
|
|
|
def _write_description(self, record):
|
|
"""Write the description if given (PRIVATE)."""
|
|
if record.description:
|
|
if not isinstance(record.description, str):
|
|
raise TypeError("Description should be of type string")
|
|
|
|
description = record.description
|
|
if description == "<unknown description>":
|
|
description = ""
|
|
|
|
if len(record.description) > 0:
|
|
self.xml_generator.startElement("description", AttributesImpl({}))
|
|
self.xml_generator.characters(description)
|
|
self.xml_generator.endElement("description")
|
|
|
|
def _write_seq(self, record):
|
|
"""Write the sequence (PRIVATE).
|
|
|
|
Note that SeqXML requires the molecule type to contain the term
|
|
"DNA", "RNA", or "protein".
|
|
"""
|
|
seq = bytes(record.seq)
|
|
|
|
if not len(seq) > 0:
|
|
raise ValueError("The sequence length should be greater than 0")
|
|
|
|
molecule_type = record.annotations.get("molecule_type")
|
|
if molecule_type is None:
|
|
raise ValueError("molecule_type is not defined")
|
|
elif "DNA" in molecule_type:
|
|
seqElem = "DNAseq"
|
|
elif "RNA" in molecule_type:
|
|
seqElem = "RNAseq"
|
|
elif "protein" in molecule_type:
|
|
seqElem = "AAseq"
|
|
else:
|
|
raise ValueError(f"unknown molecule_type '{molecule_type}'")
|
|
|
|
self.xml_generator.startElement(seqElem, AttributesImpl({}))
|
|
self.xml_generator.characters(seq)
|
|
self.xml_generator.endElement(seqElem)
|
|
|
|
def _write_dbxrefs(self, record):
|
|
"""Write all database cross references (PRIVATE)."""
|
|
if record.dbxrefs is not None:
|
|
for dbxref in record.dbxrefs:
|
|
if not isinstance(dbxref, str):
|
|
raise TypeError("dbxrefs should be of type list of string")
|
|
if dbxref.find(":") < 1:
|
|
raise ValueError(
|
|
"dbxrefs should be in the form ['source:id', 'source:id' ]"
|
|
)
|
|
|
|
dbsource, dbid = dbxref.split(":", 1)
|
|
|
|
attr = {"source": dbsource, "id": dbid}
|
|
self.xml_generator.startElement("DBRef", AttributesImpl(attr))
|
|
self.xml_generator.endElement("DBRef")
|
|
|
|
def _write_properties(self, record):
|
|
"""Write all annotations that are key value pairs with values of a primitive type or list of primitive types (PRIVATE)."""
|
|
for key, value in record.annotations.items():
|
|
if key not in ("organism", "ncbi_taxid", "source"):
|
|
if value is None:
|
|
attr = {"name": key}
|
|
self.xml_generator.startElement("property", AttributesImpl(attr))
|
|
self.xml_generator.endElement("property")
|
|
|
|
elif isinstance(value, list):
|
|
for v in value:
|
|
if v is None:
|
|
attr = {"name": key}
|
|
else:
|
|
attr = {"name": key, "value": str(v)}
|
|
self.xml_generator.startElement(
|
|
"property", AttributesImpl(attr)
|
|
)
|
|
self.xml_generator.endElement("property")
|
|
|
|
elif isinstance(value, (int, float, str)):
|
|
attr = {"name": key, "value": str(value)}
|
|
self.xml_generator.startElement("property", AttributesImpl(attr))
|
|
self.xml_generator.endElement("property")
|