mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
Add a Blast XML2 writer (#4669)
* added BLAST XML2 writer * reorder * abstract * finished
This commit is contained in:
@ -952,12 +952,17 @@ def write(records, destination, fmt="XML"):
|
||||
written.
|
||||
- fmt - string describing the file format to write
|
||||
(case-insensitive).
|
||||
Currently, only "XML" is accepted.
|
||||
Currently, only "XML" and "XML2" are accepted.
|
||||
|
||||
Returns the number of records written (as an integer).
|
||||
"""
|
||||
if fmt.upper() == "XML":
|
||||
fmt = fmt.upper()
|
||||
if fmt == "XML":
|
||||
Writer = _writers.XMLWriter
|
||||
elif fmt == "XML2":
|
||||
Writer = _writers.XML2Writer
|
||||
else:
|
||||
raise ValueError(f"Unknown format {fmt}; expected 'XML' or 'XML2'")
|
||||
try:
|
||||
stream = open(destination, "wb")
|
||||
except TypeError: # not a path, assume we received a stream
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -435,9 +435,11 @@ These parsers have now been removed from Biopython, as the BLAST output in
|
||||
these formats kept changing, each time breaking the Biopython parsers.
|
||||
Nowadays, Biopython can parse BLAST output in the XML format, the XML2 format,
|
||||
and tabular format. This chapter describes the parser for BLAST output in the
|
||||
XML format; parsing XML2 output is done in exactly the same way as parsing XML.
|
||||
BLAST output in tabular format can be parsed as alignments (see the section
|
||||
:ref:`subsec:align_tabular`).
|
||||
XML and XML2 formats using the ``Bio.Blast.parse`` function. This function
|
||||
automatically detects if the XML file is in the XML format or in the XML2
|
||||
format.
|
||||
BLAST output in tabular format can be parsed as alignments using the
|
||||
``Bio.Align.parse`` function (see the section :ref:`subsec:align_tabular`).
|
||||
|
||||
You can get BLAST output in XML format in various ways. For the parser,
|
||||
it doesn’t matter how the output was generated, as long as it is in the
|
||||
@ -1389,7 +1391,9 @@ Writing BLAST records
|
||||
---------------------
|
||||
|
||||
Use the ``write`` function in ``Bio.Blast`` to save BLAST records as an XML
|
||||
file:
|
||||
file. By default, the (DTD-based) XML format is used; you can also save the
|
||||
BLAST records in the (schema-based) XML2 format by using the ``fmt="XML2"``
|
||||
argument to the ``write`` function.
|
||||
|
||||
.. code:: pycon
|
||||
|
||||
@ -1398,6 +1402,13 @@ file:
|
||||
>>> records = Blast.parse(stream)
|
||||
>>> Blast.write(records, "my_qblast_output.xml")
|
||||
|
||||
or
|
||||
|
||||
.. code:: pycon
|
||||
|
||||
>>> Blast.write(records, "my_qblast_output.xml", fmt="XML2")
|
||||
|
||||
|
||||
In this example, we could have saved the data returned by ``Blast.qblast``
|
||||
directly to an XML file (see section :ref:`subsec:saving-blast-results`).
|
||||
However, by parsing the data returned by qblast into records, we can sort or
|
||||
|
@ -3317,6 +3317,18 @@ G26684.1 228
|
||||
written_records = Blast.parse(stream)
|
||||
self.check_xml_2900_blastn_001_records(written_records)
|
||||
|
||||
def test_xml_2900_blastn_001_v2_writer(self):
|
||||
"""Writing BLASTN 2.9.0+ XML2 (xml_2900_blastn_001_v2.xml)."""
|
||||
filename = "xml_2900_blastn_001_v2.xml"
|
||||
path = os.path.join("Blast", filename)
|
||||
with Blast.parse(path) as records:
|
||||
stream = io.BytesIO()
|
||||
n = Blast.write(records, stream, fmt="XML2")
|
||||
self.assertEqual(n, 1)
|
||||
stream.seek(0)
|
||||
written_records = Blast.parse(stream)
|
||||
self.check_xml_2900_blastn_001_records(written_records, xml2=True)
|
||||
|
||||
def test_megablast_legacy(self):
|
||||
"""Parsing megablast 2.2.26 [Sep-21-2011] (megablast_legacy.xml)."""
|
||||
filename = "megablast_legacy.xml"
|
||||
@ -9480,6 +9492,18 @@ AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
||||
written_records = Blast.parse(stream)
|
||||
self.check_xml_2900_blastx_001_records(written_records, xml2=False)
|
||||
|
||||
def test_xml_2900_blastx_001_v2_writer(self):
|
||||
"""Writing BLASTX 2.9.0+ XML2 (xml_2900_blastx_001_v2.xml)."""
|
||||
filename = "xml_2900_blastx_001_v2.xml"
|
||||
path = os.path.join("Blast", filename)
|
||||
with Blast.parse(path) as records:
|
||||
stream = io.BytesIO()
|
||||
n = Blast.write(records, stream, fmt="XML2")
|
||||
self.assertEqual(n, 1)
|
||||
stream.seek(0)
|
||||
written_records = Blast.parse(stream)
|
||||
self.check_xml_2900_blastx_001_records(written_records, xml2=True)
|
||||
|
||||
|
||||
class TestTBlastn(unittest.TestCase):
|
||||
"""Test the Blast XML parser for tblastn output."""
|
||||
@ -10524,6 +10548,18 @@ CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
||||
written_records = Blast.parse(stream)
|
||||
self.check_xml_2900_tblastn_001_records(written_records)
|
||||
|
||||
def test_xml_2900_tblastn_001_v2_writer(self):
|
||||
"""Writing TBLASTN 2.9.0+ XML2 (xml_2900_tblastn_001_v2.xml)."""
|
||||
filename = "xml_2900_tblastn_001_v2.xml"
|
||||
path = os.path.join("Blast", filename)
|
||||
with Blast.parse(path) as records:
|
||||
stream = io.BytesIO()
|
||||
n = Blast.write(records, stream, fmt="XML2")
|
||||
self.assertEqual(n, 1)
|
||||
stream.seek(0)
|
||||
written_records = Blast.parse(stream)
|
||||
self.check_xml_2900_tblastn_001_records(written_records, xml2=True)
|
||||
|
||||
|
||||
class TestTBlastx(unittest.TestCase):
|
||||
"""Test the Blast XML parser for tblastx output."""
|
||||
@ -11266,9 +11302,9 @@ class TestRPSBlast(unittest.TestCase):
|
||||
path = os.path.join("Blast", filename)
|
||||
with open(path, "rb") as stream:
|
||||
records = Blast.parse(stream)
|
||||
self.check_xml_2900_rpsblast_001(records)
|
||||
self.check_xml_2900_rpsblast_001_records(records)
|
||||
with Blast.parse(path) as records:
|
||||
self.check_xml_2900_rpsblast_001(records)
|
||||
self.check_xml_2900_rpsblast_001_records(records)
|
||||
with open(path, "rb") as stream:
|
||||
record = Blast.read(stream)
|
||||
self.check_xml_2900_rpsblast_001_record(record)
|
||||
@ -11281,9 +11317,9 @@ class TestRPSBlast(unittest.TestCase):
|
||||
path = os.path.join("Blast", filename)
|
||||
with open(path, "rb") as stream:
|
||||
records = Blast.parse(stream)
|
||||
self.check_xml_2900_rpsblast_001(records, xml2=True)
|
||||
self.check_xml_2900_rpsblast_001_records(records, xml2=True)
|
||||
with Blast.parse(path) as records:
|
||||
self.check_xml_2900_rpsblast_001(records, xml2=True)
|
||||
self.check_xml_2900_rpsblast_001_records(records, xml2=True)
|
||||
with open(path, "rb") as stream:
|
||||
record = Blast.read(stream)
|
||||
self.check_xml_2900_rpsblast_001_record(record, xml2=True)
|
||||
@ -11300,9 +11336,21 @@ class TestRPSBlast(unittest.TestCase):
|
||||
self.assertEqual(n, 1)
|
||||
stream.seek(0)
|
||||
written_records = Blast.parse(stream)
|
||||
self.check_xml_2900_rpsblast_001(written_records)
|
||||
self.check_xml_2900_rpsblast_001_records(written_records)
|
||||
|
||||
def check_xml_2900_rpsblast_001(self, records, xml2=False):
|
||||
def test_xml_2900_rpsblast_001_v2_writer(self):
|
||||
"""Writing rpsblast 2.9.0+ XML2 (xml_2900_rpsblast_001_v2.xml)."""
|
||||
filename = "xml_2900_rpsblast_001_v2.xml"
|
||||
path = os.path.join("Blast", filename)
|
||||
with Blast.parse(path) as records:
|
||||
stream = io.BytesIO()
|
||||
n = Blast.write(records, stream, fmt="XML2")
|
||||
self.assertEqual(n, 1)
|
||||
stream.seek(0)
|
||||
written_records = Blast.parse(stream)
|
||||
self.check_xml_2900_rpsblast_001_records(written_records, xml2=True)
|
||||
|
||||
def check_xml_2900_rpsblast_001_records(self, records, xml2=False):
|
||||
self.assertEqual(records.program, "rpsblast")
|
||||
self.assertEqual(records.version, "RPSBLAST 2.9.0+")
|
||||
self.assertEqual(
|
||||
|
Reference in New Issue
Block a user