Fix byte mode detection on BgzfWriter

In commit c4a47ffff7f3e7de32ab3d8846983d3531ea63b4, there was an
attempt to detect handles which are not in binary mode. However, there's
a core problem here in that most handles open in write mode *cannot* be
read from. For example:

```python
>>> handle = open('/tmp/demo.txt', 'wb')
>>> handle.read(0)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
io.UnsupportedOperation: read
```

This means that one cannot pass a plain writeable file object to
`BgzfWriter`:

```python
>>> Bio.__version__
'1.83'
>>> Bio.bgzf
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
AttributeError: module 'Bio' has no attribute 'bgzf'
>>> from Bio import bgzf
>>> with open('/tmp/demo.txt', 'wb') as handle:
...     bgzf.BgzfWriter(fileobj=handle)
...
    if fileobj.read(0) != b"":
       ^^^^^^^^^^^^^^^
io.UnsupportedOperation: read
```

If eager detection of the handle's mode is desired, one can check
`handle.mode` as was done before -- I opt here to just hardcode handling
of `io.StringIO`. The simplest option in my opinion is to just rely on
ducktyping -- the `_write_block()` method will try to write the data --
either the handle accepts `bytes` and all is well, or it fails.
This commit is contained in:
David Cain
2024-05-09 13:57:50 -07:00
committed by Peter Cock
parent 24e0c3edf3
commit 20efe735d9
2 changed files with 21 additions and 4 deletions

View File

@ -247,6 +247,7 @@ If your data is in UTF-8 or any other incompatible encoding, you must use
binary mode, and decode the appropriate fragments yourself.
"""
import io
import struct
import sys
import zlib
@ -799,9 +800,13 @@ class BgzfWriter:
"""Initialize the class."""
if filename and fileobj:
raise ValueError("Supply either filename or fileobj, not both")
# If an open file was passed, make sure it was opened in binary mode.
if fileobj:
if fileobj.read(0) != b"":
# If an open file was passed, make sure it was opened in binary mode.
# This is a courtesy -- we can't detect mode for all file-like objects.
# Notably, `StringIO` does not have a `mode` attribute but plain files *do*.
if isinstance(fileobj, io.StringIO) or "b" not in getattr(
fileobj, "mode", "wb"
):
raise ValueError("fileobj not opened in binary mode")
handle = fileobj
else:

View File

@ -8,12 +8,12 @@
See also the doctests in bgzf.py which are called via run_tests.py
"""
import unittest
import gzip
import io
import os
import tempfile
import unittest
from random import shuffle
import io
from Bio import bgzf
@ -504,6 +504,18 @@ class BgzfTests(unittest.TestCase):
with self.assertRaisesRegex(ValueError, error):
bgzf.BgzfWriter(fileobj=io.StringIO())
def test_writer_with_non_binary_file(self):
"""A BgzfWriter must raise ValueError on a non-binary file handle."""
error = "^fileobj not opened in binary mode$"
with open(self.temp_file, "w") as handle:
with self.assertRaisesRegex(ValueError, error):
bgzf.BgzfWriter(fileobj=handle)
def test_writer_passes_on_plain_file_handle(self):
"""A BgzfWriter must be able to work with plain file handles."""
with open(self.temp_file, "wb") as handle:
bgzf.BgzfWriter(fileobj=handle)
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)