mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
Changed several parsers avoid using next.
tempfile.NamedTemporaryFiles do not support __next__.
This commit is contained in:
@ -140,10 +140,8 @@ def MafIterator(handle, seq_count=None):
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
# allows parsing of the last bundle without duplicating code
|
# allows parsing of the last bundle without duplicating code
|
||||||
try:
|
line = handle.readline()
|
||||||
line = next(handle)
|
|
||||||
except StopIteration:
|
|
||||||
line = ""
|
|
||||||
try:
|
try:
|
||||||
# Will be in binary mode if called via the indexing code
|
# Will be in binary mode if called via the indexing code
|
||||||
# (which needs the raw offsets for cross platform indexing)
|
# (which needs the raw offsets for cross platform indexing)
|
||||||
|
@ -194,9 +194,8 @@ class FastaIterator(SequenceIterator):
|
|||||||
if alphabet is not None:
|
if alphabet is not None:
|
||||||
raise ValueError("The alphabet argument is no longer supported")
|
raise ValueError("The alphabet argument is no longer supported")
|
||||||
super().__init__(source, fmt="Fasta")
|
super().__init__(source, fmt="Fasta")
|
||||||
try:
|
line = self.stream.readline()
|
||||||
line = next(self.stream)
|
if not line:
|
||||||
except StopIteration:
|
|
||||||
line = None
|
line = None
|
||||||
else:
|
else:
|
||||||
if not line.startswith(">"):
|
if not line.startswith(">"):
|
||||||
|
@ -144,16 +144,22 @@ class PirIterator(SequenceIterator):
|
|||||||
line = self._line
|
line = self._line
|
||||||
if line is None:
|
if line is None:
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
stream = self.stream
|
|
||||||
pir_type = line[1:3]
|
pir_type = line[1:3]
|
||||||
if pir_type not in _pir_mol_type or line[3] != ";":
|
if pir_type not in _pir_mol_type or line[3] != ";":
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Records should start with '>XX;' where XX is a valid sequence type"
|
"Records should start with '>XX;' where XX is a valid sequence type"
|
||||||
)
|
)
|
||||||
|
|
||||||
identifier = line[4:].strip()
|
identifier = line[4:].strip()
|
||||||
description = next(stream).strip()
|
description = self.stream.readline()
|
||||||
|
if description == "":
|
||||||
|
raise StopIteration
|
||||||
|
else:
|
||||||
|
description = description.strip()
|
||||||
|
|
||||||
lines = []
|
lines = []
|
||||||
for line in stream:
|
for line in self.stream:
|
||||||
if line[0] == ">":
|
if line[0] == ">":
|
||||||
self._line = line
|
self._line = line
|
||||||
break
|
break
|
||||||
|
@ -933,9 +933,9 @@ def FastqGeneralIterator(source: _TextIOSource) -> Iterator[tuple[str, str, str]
|
|||||||
with as_handle(source) as handle:
|
with as_handle(source) as handle:
|
||||||
if handle.read(0) != "":
|
if handle.read(0) != "":
|
||||||
raise StreamModeError("Fastq files must be opened in text mode") from None
|
raise StreamModeError("Fastq files must be opened in text mode") from None
|
||||||
try:
|
|
||||||
line = next(handle)
|
line = handle.readline()
|
||||||
except StopIteration:
|
if line == "":
|
||||||
return # Premature end of file, or just empty?
|
return # Premature end of file, or just empty?
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
@ -1031,15 +1031,11 @@ class FastqIteratorAbstractBaseClass(SequenceIterator[str]):
|
|||||||
|
|
||||||
def __next__(self) -> SeqRecord:
|
def __next__(self) -> SeqRecord:
|
||||||
"""Parse the file and generate SeqRecord objects."""
|
"""Parse the file and generate SeqRecord objects."""
|
||||||
|
|
||||||
line = self.line
|
line = self.line
|
||||||
if line is None:
|
if line is None:
|
||||||
try:
|
line = self.stream.readline()
|
||||||
line = next(self.stream)
|
if not line:
|
||||||
except StopIteration: # empty file?
|
|
||||||
self.line = None
|
|
||||||
else:
|
|
||||||
self.line = line
|
|
||||||
if line is None:
|
|
||||||
raise StopIteration
|
raise StopIteration
|
||||||
if line[0] != "@":
|
if line[0] != "@":
|
||||||
raise ValueError("Records in Fastq files should start with '@' character")
|
raise ValueError("Records in Fastq files should start with '@' character")
|
||||||
|
@ -301,7 +301,7 @@ please open an issue on GitHub or mention it on the mailing list.
|
|||||||
- Sebastian Bassi <https://about.me/bassi>
|
- Sebastian Bassi <https://about.me/bassi>
|
||||||
- Sergei Lebedev <https://github.com/superbobry>
|
- Sergei Lebedev <https://github.com/superbobry>
|
||||||
- Sergio Valqui <https://github.com/svalqui>
|
- Sergio Valqui <https://github.com/svalqui>
|
||||||
- Seth Sims <seth.sims at gmail>
|
- Seth Sims <https://github.com/xzy3>
|
||||||
- She Zhang <https://github.com/shz66>
|
- She Zhang <https://github.com/shz66>
|
||||||
- Shoichiro Kawauchi <https://github.com/lacrosse91>
|
- Shoichiro Kawauchi <https://github.com/lacrosse91>
|
||||||
- Shuichiro MAKIGAKI <https://github.com/shuichiro-makigaki>
|
- Shuichiro MAKIGAKI <https://github.com/shuichiro-makigaki>
|
||||||
|
@ -10,6 +10,8 @@ import unittest
|
|||||||
import warnings
|
import warnings
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
|
from tempfile import NamedTemporaryFile
|
||||||
|
from contextlib import ExitStack
|
||||||
|
|
||||||
from Bio import AlignIO
|
from Bio import AlignIO
|
||||||
from Bio import BiopythonParserWarning
|
from Bio import BiopythonParserWarning
|
||||||
@ -301,6 +303,7 @@ class TestSeqIO(SeqIOTestBaseClass):
|
|||||||
else:
|
else:
|
||||||
debug = False
|
debug = False
|
||||||
unequal_length = len({len(_) for _ in records}) != 1
|
unequal_length = len({len(_) for _ in records}) != 1
|
||||||
|
|
||||||
for fmt in test_write_read_alignment_formats:
|
for fmt in test_write_read_alignment_formats:
|
||||||
if fmt not in possible_unknown_seq_formats and len(records[0].seq) > 100:
|
if fmt not in possible_unknown_seq_formats and len(records[0].seq) > 100:
|
||||||
try:
|
try:
|
||||||
@ -318,12 +321,30 @@ class TestSeqIO(SeqIOTestBaseClass):
|
|||||||
if molecule_type is not None:
|
if molecule_type is not None:
|
||||||
for record in records1:
|
for record in records1:
|
||||||
record.annotations["molecule_type"] = molecule_type
|
record.annotations["molecule_type"] = molecule_type
|
||||||
|
|
||||||
|
for test_type in ("IO", "NamedTemporaryFile"):
|
||||||
|
with ExitStack() as exit_stack:
|
||||||
# Going to write to a handle...
|
# Going to write to a handle...
|
||||||
mode = self.get_mode(fmt)
|
mode = self.get_mode(fmt)
|
||||||
|
if test_type == "IO":
|
||||||
if mode == "t":
|
if mode == "t":
|
||||||
handle = StringIO()
|
handle = exit_stack.enter_context(StringIO())
|
||||||
elif mode == "b":
|
elif mode == "b":
|
||||||
handle = BytesIO()
|
handle = exit_stack.enter_context(BytesIO())
|
||||||
|
|
||||||
|
elif test_type == "NamedTemporaryFile":
|
||||||
|
file_mode = "wb+"
|
||||||
|
encoding = None
|
||||||
|
if mode == "t":
|
||||||
|
encoding = "utf8"
|
||||||
|
file_mode = "w+"
|
||||||
|
|
||||||
|
handle = exit_stack.enter_context(
|
||||||
|
NamedTemporaryFile(mode=file_mode, encoding=encoding)
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.fail(f"test type is not recognized: {test_type}")
|
||||||
|
|
||||||
if unequal_length and fmt in AlignIO._FormatToWriter:
|
if unequal_length and fmt in AlignIO._FormatToWriter:
|
||||||
msg = "Sequences must all be the same length"
|
msg = "Sequences must all be the same length"
|
||||||
@ -338,7 +359,9 @@ class TestSeqIO(SeqIOTestBaseClass):
|
|||||||
# Should fail.
|
# Should fail.
|
||||||
if debug:
|
if debug:
|
||||||
try:
|
try:
|
||||||
SeqIO.write(sequences=records1, handle=handle, format=fmt)
|
SeqIO.write(
|
||||||
|
sequences=records1, handle=handle, format=fmt
|
||||||
|
)
|
||||||
except (ValueError, TypeError) as e:
|
except (ValueError, TypeError) as e:
|
||||||
messages[fmt] = str(e)
|
messages[fmt] = str(e)
|
||||||
else:
|
else:
|
||||||
@ -347,9 +370,12 @@ class TestSeqIO(SeqIOTestBaseClass):
|
|||||||
with warnings.catch_warnings():
|
with warnings.catch_warnings():
|
||||||
# e.g. data loss
|
# e.g. data loss
|
||||||
warnings.simplefilter("ignore", BiopythonWarning)
|
warnings.simplefilter("ignore", BiopythonWarning)
|
||||||
SeqIO.write(sequences=records1, handle=handle, format=fmt)
|
SeqIO.write(
|
||||||
|
sequences=records1, handle=handle, format=fmt
|
||||||
|
)
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
isinstance(cm.exception, (ValueError, TypeError)), msg=message
|
isinstance(cm.exception, (ValueError, TypeError)),
|
||||||
|
msg=message,
|
||||||
)
|
)
|
||||||
self.assertEqual(str(cm.exception), msg, msg=message)
|
self.assertEqual(str(cm.exception), msg, msg=message)
|
||||||
|
|
||||||
@ -427,7 +453,9 @@ class TestSeqIO(SeqIOTestBaseClass):
|
|||||||
self.assertEqual(r1_id, r2.id, f"'{r1.id}' vs '{r2.id}'")
|
self.assertEqual(r1_id, r2.id, f"'{r1.id}' vs '{r2.id}'")
|
||||||
elif fmt == "maf":
|
elif fmt == "maf":
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
r1.id.replace(" ", "_"), r2.id, f"'{r1.id}' vs '{r2.id}'"
|
r1.id.replace(" ", "_"),
|
||||||
|
r2.id,
|
||||||
|
f"'{r1.id}' vs '{r2.id}'",
|
||||||
)
|
)
|
||||||
elif fmt in ["fasta", "fasta-2line"]:
|
elif fmt in ["fasta", "fasta-2line"]:
|
||||||
self.assertEqual(r1.id.split()[0], r2.id)
|
self.assertEqual(r1.id.split()[0], r2.id)
|
||||||
@ -446,7 +474,10 @@ class TestSeqIO(SeqIOTestBaseClass):
|
|||||||
warnings.simplefilter("ignore", BiopythonWarning)
|
warnings.simplefilter("ignore", BiopythonWarning)
|
||||||
SeqIO.write(records1[0], handle, fmt)
|
SeqIO.write(records1[0], handle, fmt)
|
||||||
if mode == "t":
|
if mode == "t":
|
||||||
self.assertEqual(handle.getvalue(), records1[0].format(fmt))
|
self.assertEqual(
|
||||||
|
handle.getvalue(), records1[0].format(fmt)
|
||||||
|
)
|
||||||
|
|
||||||
if debug:
|
if debug:
|
||||||
self.fail(
|
self.fail(
|
||||||
f"Update {t_format} test to use this dict:\nmessages = {messages!r}"
|
f"Update {t_format} test to use this dict:\nmessages = {messages!r}"
|
||||||
|
Reference in New Issue
Block a user