Fixes formatting for extremely high B-factor values in PDB files (#5047)

This commit is contained in:
Oliver Wissett
2025-09-03 15:05:07 +01:00
committed by GitHub
parent d1c4b3d0e2
commit d3de878863
5 changed files with 100 additions and 9 deletions

View File

@ -28,3 +28,7 @@ class PDBConstructionWarning(BiopythonWarning):
# The SMCRA structure could not be written to file
class PDBIOException(Exception):
"""Define class PDBIOException."""
class PDBIOWarning(BiopythonWarning):
"""Define class PDBIOWarning."""

View File

@ -8,14 +8,12 @@
import os
import warnings
from Bio import BiopythonWarning
from Bio.Data.IUPACData import atom_weights
from Bio.PDB.PDBExceptions import PDBIOException
from Bio.PDB.PDBExceptions import PDBIOException, PDBIOWarning
from Bio.PDB.StructureBuilder import StructureBuilder
_ATOM_FORMAT_STRING = (
"%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%s%6.2f %4s%2s%2s\n"
)
_ATOM_FORMAT_STRING = "%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%s%s %4s%2s%2s\n"
_PQR_ATOM_FORMAT_STRING = (
"%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f %7s %6s %2s\n"
)
@ -24,6 +22,42 @@ _TER_FORMAT_STRING = (
"TER %5i %3s %c%4i%c \n"
)
_MAX_B_FACTOR_2DP = 1_000
_MAX_B_FACTOR_1DP = 10_000
_MAX_B_FACTOR = 999_999
def _format_b_factor(value: float) -> str:
"""Returns the b-factor value as a formatted string
Formats the b-factor value to fit the wwPDB specification of 6 characters
maximum, otherwise truncating to 6 characters if necessary.
"""
_bfactor_str = ""
if value < _MAX_B_FACTOR_2DP:
if len(f"{value:.2f}") > 6:
_bfactor_str = f"{value:6.1f}"
else:
_bfactor_str = f"{value:6.2f}"
elif value < _MAX_B_FACTOR_1DP:
if len(f"{value:.1f}") > 6:
_bfactor_str = f"{value:6.0f}"
else:
_bfactor_str = f"{value:6.1f}"
elif value < _MAX_B_FACTOR:
_bfactor_str = f"{int(value):6d}"
else:
warnings.warn(
f"Truncated bfactor {value!r} to {_MAX_B_FACTOR} to fit wwPDB spec."
" Consider using mmCIF instead!",
PDBIOWarning,
)
_bfactor_str = f"{_MAX_B_FACTOR:6d}"
assert len(_bfactor_str) <= 6
return _bfactor_str
class Select:
"""Select everything for PDB output (for use as a base class).
@ -214,13 +248,15 @@ class PDBIO(StructureIO):
occupancy = " " * 6
warnings.warn(
f"Missing occupancy in atom {atom.full_id!r} written as blank",
BiopythonWarning,
PDBIOWarning,
)
else:
raise ValueError(
f"Invalid occupancy value: {atom.occupancy!r}"
) from None
bfactor = _format_b_factor(bfactor)
args = (
record_type,
atom_number,
@ -239,6 +275,7 @@ class PDBIO(StructureIO):
element,
charge,
)
return _ATOM_FORMAT_STRING % args
# Write PQR format line
@ -250,7 +287,7 @@ class PDBIO(StructureIO):
pqr_charge = " " * 7
warnings.warn(
f"Missing PQR charge in atom {atom.full_id} written as blank",
BiopythonWarning,
PDBIOWarning,
)
else:
raise ValueError(
@ -264,7 +301,7 @@ class PDBIO(StructureIO):
radius = " " * 6
warnings.warn(
f"Missing radius in atom {atom.full_id} written as blank",
BiopythonWarning,
PDBIOWarning,
)
else:
raise ValueError(f"Invalid radius value: {atom.radius}") from None

View File

@ -270,6 +270,7 @@ please open an issue on GitHub or mention it on the mailing list.
- Nicolas Fontrodona <https://github.com/NFontrodona>
- Nigel Delaney <https://github.com/evolvedmicrobe>
- Noam Kremen <https://github.com/noamkremen>
- Oliver Wissett <https://github.com/OWissett>
- Olivier Morelle <https://github.com/Oli4>
- Oscar G. Garcia <https://github.com/oscarmaestre>
- Osvaldo Zagordi <https://github.com/ozagordi>

View File

@ -17,12 +17,23 @@ This release of Biopython supports Python 3.10, 3.11, 3.12 and 3.13. It
has also been tested on PyPy3.10 v7.3.17.
``Bio.SearchIO`` now supports parsing the tabular and plain text output of
`Infernal <http://eddylab.org/infernal/>` (v1.0.0+) RNA search tool. The
`Infernal <http://eddylab.org/infernal/>` (v1.0.0+) RNA search tool. The
format are ``infernal-tab`` and ``infernal-text``.
``Bio.PDB.PDBIO`` now ensures that b-factor values are always at most 6 characters to
ensure that we do not violate the wwPDB specification. This should not have an impact
on the majority of uses, as b-factor values are generally small (less than 100). When
1000 \<= b-factor \< 10_000, the value is rounded to a single decimal place. When,
10_000 \<= b-factor \< 999_999, the value is rounded to zero decimal place. Values above
999_999 are now clamped. The justification for this is the rise in the b-factor field
being used for additional metadata, typically from computational tools.
``Bio.PDB.PDBIO`` will now raise module specific warnings: ``Bio.PDB.PDBExceptions.PDBIOWarning``.
Many thanks to the Biopython developers and community for making this release
possible, especially the following contributors:
- Oliver Wissett (first contribution)
- Samuel Prince (first contribution)
15 January 2025: Biopython 1.85

View File

@ -26,6 +26,8 @@ from Bio.PDB import Residue
from Bio.PDB import Select
from Bio.PDB.PDBExceptions import PDBConstructionWarning
from Bio.PDB.PDBExceptions import PDBIOException
from Bio.PDB.PDBExceptions import PDBIOWarning
from Bio.PDB.PDBIO import _MAX_B_FACTOR
class WriteTest(unittest.TestCase):
@ -381,6 +383,42 @@ class WriteTest(unittest.TestCase):
self.io.save(filename)
self.assertFalse(os.path.exists(filename))
def test_pdbio_write_high_b_factor(self):
def check_pdb(filename, expected_bfactor):
struct = self.parser.get_structure("high_b", filename)
atom = next(struct.get_atoms())
self.assertEqual(atom.bfactor, expected_bfactor)
def test_b_factor(
b_factor: float, expected_bfactor: float, assert_warn: bool = False
) -> None:
struct = self.structure
atom = next(struct.get_atoms())
atom.bfactor = b_factor
self.io.set_structure(struct)
filenumber, filename = tempfile.mkstemp()
os.close(filenumber)
if assert_warn:
with self.assertWarns(PDBIOWarning):
self.io.save(filename)
else:
self.io.save(filename)
try:
check_pdb(filename, expected_bfactor)
finally:
os.remove(filename)
test_b_factor(99.999999, 100)
test_b_factor(999.99999, 1000)
test_b_factor(9999.9999, 10000)
test_b_factor(424.4242, 424.42)
test_b_factor(4242.4242, 4242.4)
test_b_factor(42424.4242, 42424)
test_b_factor(_MAX_B_FACTOR + 10, _MAX_B_FACTOR, assert_warn=True)
# Test revert_write
def test_pdbio_revert_write_on_filename(self):
"""Test removing file when exception is caught (string)."""