Changed several parsers avoid using next.

tempfile.NamedTemporaryFiles do not support __next__.
This commit is contained in:
Seth Sims
2025-02-04 10:45:24 -05:00
committed by Peter Cock
parent bbf3a40ed0
commit d6fcdfa131
6 changed files with 174 additions and 144 deletions

View File

@ -140,10 +140,8 @@ def MafIterator(handle, seq_count=None):
while True: while True:
# allows parsing of the last bundle without duplicating code # allows parsing of the last bundle without duplicating code
try: line = handle.readline()
line = next(handle)
except StopIteration:
line = ""
try: try:
# Will be in binary mode if called via the indexing code # Will be in binary mode if called via the indexing code
# (which needs the raw offsets for cross platform indexing) # (which needs the raw offsets for cross platform indexing)

View File

@ -194,9 +194,8 @@ class FastaIterator(SequenceIterator):
if alphabet is not None: if alphabet is not None:
raise ValueError("The alphabet argument is no longer supported") raise ValueError("The alphabet argument is no longer supported")
super().__init__(source, fmt="Fasta") super().__init__(source, fmt="Fasta")
try: line = self.stream.readline()
line = next(self.stream) if not line:
except StopIteration:
line = None line = None
else: else:
if not line.startswith(">"): if not line.startswith(">"):

View File

@ -144,16 +144,22 @@ class PirIterator(SequenceIterator):
line = self._line line = self._line
if line is None: if line is None:
raise StopIteration raise StopIteration
stream = self.stream
pir_type = line[1:3] pir_type = line[1:3]
if pir_type not in _pir_mol_type or line[3] != ";": if pir_type not in _pir_mol_type or line[3] != ";":
raise ValueError( raise ValueError(
"Records should start with '>XX;' where XX is a valid sequence type" "Records should start with '>XX;' where XX is a valid sequence type"
) )
identifier = line[4:].strip() identifier = line[4:].strip()
description = next(stream).strip() description = self.stream.readline()
if description == "":
raise StopIteration
else:
description = description.strip()
lines = [] lines = []
for line in stream: for line in self.stream:
if line[0] == ">": if line[0] == ">":
self._line = line self._line = line
break break

View File

@ -933,9 +933,9 @@ def FastqGeneralIterator(source: _TextIOSource) -> Iterator[tuple[str, str, str]
with as_handle(source) as handle: with as_handle(source) as handle:
if handle.read(0) != "": if handle.read(0) != "":
raise StreamModeError("Fastq files must be opened in text mode") from None raise StreamModeError("Fastq files must be opened in text mode") from None
try:
line = next(handle) line = handle.readline()
except StopIteration: if line == "":
return # Premature end of file, or just empty? return # Premature end of file, or just empty?
while True: while True:
@ -1031,15 +1031,11 @@ class FastqIteratorAbstractBaseClass(SequenceIterator[str]):
def __next__(self) -> SeqRecord: def __next__(self) -> SeqRecord:
"""Parse the file and generate SeqRecord objects.""" """Parse the file and generate SeqRecord objects."""
line = self.line line = self.line
if line is None: if line is None:
try: line = self.stream.readline()
line = next(self.stream) if not line:
except StopIteration: # empty file?
self.line = None
else:
self.line = line
if line is None:
raise StopIteration raise StopIteration
if line[0] != "@": if line[0] != "@":
raise ValueError("Records in Fastq files should start with '@' character") raise ValueError("Records in Fastq files should start with '@' character")

View File

@ -301,7 +301,7 @@ please open an issue on GitHub or mention it on the mailing list.
- Sebastian Bassi <https://about.me/bassi> - Sebastian Bassi <https://about.me/bassi>
- Sergei Lebedev <https://github.com/superbobry> - Sergei Lebedev <https://github.com/superbobry>
- Sergio Valqui <https://github.com/svalqui> - Sergio Valqui <https://github.com/svalqui>
- Seth Sims <seth.sims at gmail> - Seth Sims <https://github.com/xzy3>
- She Zhang <https://github.com/shz66> - She Zhang <https://github.com/shz66>
- Shoichiro Kawauchi <https://github.com/lacrosse91> - Shoichiro Kawauchi <https://github.com/lacrosse91>
- Shuichiro MAKIGAKI <https://github.com/shuichiro-makigaki> - Shuichiro MAKIGAKI <https://github.com/shuichiro-makigaki>

View File

@ -10,6 +10,8 @@ import unittest
import warnings import warnings
from io import BytesIO from io import BytesIO
from io import StringIO from io import StringIO
from tempfile import NamedTemporaryFile
from contextlib import ExitStack
from Bio import AlignIO from Bio import AlignIO
from Bio import BiopythonParserWarning from Bio import BiopythonParserWarning
@ -301,6 +303,7 @@ class TestSeqIO(SeqIOTestBaseClass):
else: else:
debug = False debug = False
unequal_length = len({len(_) for _ in records}) != 1 unequal_length = len({len(_) for _ in records}) != 1
for fmt in test_write_read_alignment_formats: for fmt in test_write_read_alignment_formats:
if fmt not in possible_unknown_seq_formats and len(records[0].seq) > 100: if fmt not in possible_unknown_seq_formats and len(records[0].seq) > 100:
try: try:
@ -318,12 +321,30 @@ class TestSeqIO(SeqIOTestBaseClass):
if molecule_type is not None: if molecule_type is not None:
for record in records1: for record in records1:
record.annotations["molecule_type"] = molecule_type record.annotations["molecule_type"] = molecule_type
for test_type in ("IO", "NamedTemporaryFile"):
with ExitStack() as exit_stack:
# Going to write to a handle... # Going to write to a handle...
mode = self.get_mode(fmt) mode = self.get_mode(fmt)
if test_type == "IO":
if mode == "t": if mode == "t":
handle = StringIO() handle = exit_stack.enter_context(StringIO())
elif mode == "b": elif mode == "b":
handle = BytesIO() handle = exit_stack.enter_context(BytesIO())
elif test_type == "NamedTemporaryFile":
file_mode = "wb+"
encoding = None
if mode == "t":
encoding = "utf8"
file_mode = "w+"
handle = exit_stack.enter_context(
NamedTemporaryFile(mode=file_mode, encoding=encoding)
)
else:
self.fail(f"test type is not recognized: {test_type}")
if unequal_length and fmt in AlignIO._FormatToWriter: if unequal_length and fmt in AlignIO._FormatToWriter:
msg = "Sequences must all be the same length" msg = "Sequences must all be the same length"
@ -338,7 +359,9 @@ class TestSeqIO(SeqIOTestBaseClass):
# Should fail. # Should fail.
if debug: if debug:
try: try:
SeqIO.write(sequences=records1, handle=handle, format=fmt) SeqIO.write(
sequences=records1, handle=handle, format=fmt
)
except (ValueError, TypeError) as e: except (ValueError, TypeError) as e:
messages[fmt] = str(e) messages[fmt] = str(e)
else: else:
@ -347,9 +370,12 @@ class TestSeqIO(SeqIOTestBaseClass):
with warnings.catch_warnings(): with warnings.catch_warnings():
# e.g. data loss # e.g. data loss
warnings.simplefilter("ignore", BiopythonWarning) warnings.simplefilter("ignore", BiopythonWarning)
SeqIO.write(sequences=records1, handle=handle, format=fmt) SeqIO.write(
sequences=records1, handle=handle, format=fmt
)
self.assertTrue( self.assertTrue(
isinstance(cm.exception, (ValueError, TypeError)), msg=message isinstance(cm.exception, (ValueError, TypeError)),
msg=message,
) )
self.assertEqual(str(cm.exception), msg, msg=message) self.assertEqual(str(cm.exception), msg, msg=message)
@ -427,7 +453,9 @@ class TestSeqIO(SeqIOTestBaseClass):
self.assertEqual(r1_id, r2.id, f"'{r1.id}' vs '{r2.id}'") self.assertEqual(r1_id, r2.id, f"'{r1.id}' vs '{r2.id}'")
elif fmt == "maf": elif fmt == "maf":
self.assertEqual( self.assertEqual(
r1.id.replace(" ", "_"), r2.id, f"'{r1.id}' vs '{r2.id}'" r1.id.replace(" ", "_"),
r2.id,
f"'{r1.id}' vs '{r2.id}'",
) )
elif fmt in ["fasta", "fasta-2line"]: elif fmt in ["fasta", "fasta-2line"]:
self.assertEqual(r1.id.split()[0], r2.id) self.assertEqual(r1.id.split()[0], r2.id)
@ -446,7 +474,10 @@ class TestSeqIO(SeqIOTestBaseClass):
warnings.simplefilter("ignore", BiopythonWarning) warnings.simplefilter("ignore", BiopythonWarning)
SeqIO.write(records1[0], handle, fmt) SeqIO.write(records1[0], handle, fmt)
if mode == "t": if mode == "t":
self.assertEqual(handle.getvalue(), records1[0].format(fmt)) self.assertEqual(
handle.getvalue(), records1[0].format(fmt)
)
if debug: if debug:
self.fail( self.fail(
f"Update {t_format} test to use this dict:\nmessages = {messages!r}" f"Update {t_format} test to use this dict:\nmessages = {messages!r}"