Changed several parsers avoid using next.

tempfile.NamedTemporaryFiles do not support __next__.
This commit is contained in:
Seth Sims
2025-02-04 10:45:24 -05:00
committed by Peter Cock
parent bbf3a40ed0
commit d6fcdfa131
6 changed files with 174 additions and 144 deletions

View File

@ -140,10 +140,8 @@ def MafIterator(handle, seq_count=None):
while True:
# allows parsing of the last bundle without duplicating code
try:
line = next(handle)
except StopIteration:
line = ""
line = handle.readline()
try:
# Will be in binary mode if called via the indexing code
# (which needs the raw offsets for cross platform indexing)

View File

@ -194,9 +194,8 @@ class FastaIterator(SequenceIterator):
if alphabet is not None:
raise ValueError("The alphabet argument is no longer supported")
super().__init__(source, fmt="Fasta")
try:
line = next(self.stream)
except StopIteration:
line = self.stream.readline()
if not line:
line = None
else:
if not line.startswith(">"):

View File

@ -144,16 +144,22 @@ class PirIterator(SequenceIterator):
line = self._line
if line is None:
raise StopIteration
stream = self.stream
pir_type = line[1:3]
if pir_type not in _pir_mol_type or line[3] != ";":
raise ValueError(
"Records should start with '>XX;' where XX is a valid sequence type"
)
identifier = line[4:].strip()
description = next(stream).strip()
description = self.stream.readline()
if description == "":
raise StopIteration
else:
description = description.strip()
lines = []
for line in stream:
for line in self.stream:
if line[0] == ">":
self._line = line
break

View File

@ -933,9 +933,9 @@ def FastqGeneralIterator(source: _TextIOSource) -> Iterator[tuple[str, str, str]
with as_handle(source) as handle:
if handle.read(0) != "":
raise StreamModeError("Fastq files must be opened in text mode") from None
try:
line = next(handle)
except StopIteration:
line = handle.readline()
if line == "":
return # Premature end of file, or just empty?
while True:
@ -1031,15 +1031,11 @@ class FastqIteratorAbstractBaseClass(SequenceIterator[str]):
def __next__(self) -> SeqRecord:
"""Parse the file and generate SeqRecord objects."""
line = self.line
if line is None:
try:
line = next(self.stream)
except StopIteration: # empty file?
self.line = None
else:
self.line = line
if line is None:
line = self.stream.readline()
if not line:
raise StopIteration
if line[0] != "@":
raise ValueError("Records in Fastq files should start with '@' character")

View File

@ -301,7 +301,7 @@ please open an issue on GitHub or mention it on the mailing list.
- Sebastian Bassi <https://about.me/bassi>
- Sergei Lebedev <https://github.com/superbobry>
- Sergio Valqui <https://github.com/svalqui>
- Seth Sims <seth.sims at gmail>
- Seth Sims <https://github.com/xzy3>
- She Zhang <https://github.com/shz66>
- Shoichiro Kawauchi <https://github.com/lacrosse91>
- Shuichiro MAKIGAKI <https://github.com/shuichiro-makigaki>

View File

@ -10,6 +10,8 @@ import unittest
import warnings
from io import BytesIO
from io import StringIO
from tempfile import NamedTemporaryFile
from contextlib import ExitStack
from Bio import AlignIO
from Bio import BiopythonParserWarning
@ -301,6 +303,7 @@ class TestSeqIO(SeqIOTestBaseClass):
else:
debug = False
unequal_length = len({len(_) for _ in records}) != 1
for fmt in test_write_read_alignment_formats:
if fmt not in possible_unknown_seq_formats and len(records[0].seq) > 100:
try:
@ -318,12 +321,30 @@ class TestSeqIO(SeqIOTestBaseClass):
if molecule_type is not None:
for record in records1:
record.annotations["molecule_type"] = molecule_type
for test_type in ("IO", "NamedTemporaryFile"):
with ExitStack() as exit_stack:
# Going to write to a handle...
mode = self.get_mode(fmt)
if test_type == "IO":
if mode == "t":
handle = StringIO()
handle = exit_stack.enter_context(StringIO())
elif mode == "b":
handle = BytesIO()
handle = exit_stack.enter_context(BytesIO())
elif test_type == "NamedTemporaryFile":
file_mode = "wb+"
encoding = None
if mode == "t":
encoding = "utf8"
file_mode = "w+"
handle = exit_stack.enter_context(
NamedTemporaryFile(mode=file_mode, encoding=encoding)
)
else:
self.fail(f"test type is not recognized: {test_type}")
if unequal_length and fmt in AlignIO._FormatToWriter:
msg = "Sequences must all be the same length"
@ -338,7 +359,9 @@ class TestSeqIO(SeqIOTestBaseClass):
# Should fail.
if debug:
try:
SeqIO.write(sequences=records1, handle=handle, format=fmt)
SeqIO.write(
sequences=records1, handle=handle, format=fmt
)
except (ValueError, TypeError) as e:
messages[fmt] = str(e)
else:
@ -347,9 +370,12 @@ class TestSeqIO(SeqIOTestBaseClass):
with warnings.catch_warnings():
# e.g. data loss
warnings.simplefilter("ignore", BiopythonWarning)
SeqIO.write(sequences=records1, handle=handle, format=fmt)
SeqIO.write(
sequences=records1, handle=handle, format=fmt
)
self.assertTrue(
isinstance(cm.exception, (ValueError, TypeError)), msg=message
isinstance(cm.exception, (ValueError, TypeError)),
msg=message,
)
self.assertEqual(str(cm.exception), msg, msg=message)
@ -427,7 +453,9 @@ class TestSeqIO(SeqIOTestBaseClass):
self.assertEqual(r1_id, r2.id, f"'{r1.id}' vs '{r2.id}'")
elif fmt == "maf":
self.assertEqual(
r1.id.replace(" ", "_"), r2.id, f"'{r1.id}' vs '{r2.id}'"
r1.id.replace(" ", "_"),
r2.id,
f"'{r1.id}' vs '{r2.id}'",
)
elif fmt in ["fasta", "fasta-2line"]:
self.assertEqual(r1.id.split()[0], r2.id)
@ -446,7 +474,10 @@ class TestSeqIO(SeqIOTestBaseClass):
warnings.simplefilter("ignore", BiopythonWarning)
SeqIO.write(records1[0], handle, fmt)
if mode == "t":
self.assertEqual(handle.getvalue(), records1[0].format(fmt))
self.assertEqual(
handle.getvalue(), records1[0].format(fmt)
)
if debug:
self.fail(
f"Update {t_format} test to use this dict:\nmessages = {messages!r}"