mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
Fixes formatting for extremely high B-factor values in PDB files (#5047)
This commit is contained in:
@ -28,3 +28,7 @@ class PDBConstructionWarning(BiopythonWarning):
|
||||
# The SMCRA structure could not be written to file
|
||||
class PDBIOException(Exception):
|
||||
"""Define class PDBIOException."""
|
||||
|
||||
|
||||
class PDBIOWarning(BiopythonWarning):
|
||||
"""Define class PDBIOWarning."""
|
||||
|
@ -8,14 +8,12 @@
|
||||
import os
|
||||
import warnings
|
||||
|
||||
from Bio import BiopythonWarning
|
||||
from Bio.Data.IUPACData import atom_weights
|
||||
from Bio.PDB.PDBExceptions import PDBIOException
|
||||
from Bio.PDB.PDBExceptions import PDBIOException, PDBIOWarning
|
||||
from Bio.PDB.StructureBuilder import StructureBuilder
|
||||
|
||||
_ATOM_FORMAT_STRING = (
|
||||
"%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%s%6.2f %4s%2s%2s\n"
|
||||
)
|
||||
_ATOM_FORMAT_STRING = "%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%s%s %4s%2s%2s\n"
|
||||
|
||||
_PQR_ATOM_FORMAT_STRING = (
|
||||
"%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f %7s %6s %2s\n"
|
||||
)
|
||||
@ -24,6 +22,42 @@ _TER_FORMAT_STRING = (
|
||||
"TER %5i %3s %c%4i%c \n"
|
||||
)
|
||||
|
||||
_MAX_B_FACTOR_2DP = 1_000
|
||||
_MAX_B_FACTOR_1DP = 10_000
|
||||
_MAX_B_FACTOR = 999_999
|
||||
|
||||
|
||||
def _format_b_factor(value: float) -> str:
|
||||
"""Returns the b-factor value as a formatted string
|
||||
|
||||
Formats the b-factor value to fit the wwPDB specification of 6 characters
|
||||
maximum, otherwise truncating to 6 characters if necessary.
|
||||
"""
|
||||
_bfactor_str = ""
|
||||
if value < _MAX_B_FACTOR_2DP:
|
||||
if len(f"{value:.2f}") > 6:
|
||||
_bfactor_str = f"{value:6.1f}"
|
||||
else:
|
||||
_bfactor_str = f"{value:6.2f}"
|
||||
elif value < _MAX_B_FACTOR_1DP:
|
||||
if len(f"{value:.1f}") > 6:
|
||||
_bfactor_str = f"{value:6.0f}"
|
||||
else:
|
||||
_bfactor_str = f"{value:6.1f}"
|
||||
elif value < _MAX_B_FACTOR:
|
||||
_bfactor_str = f"{int(value):6d}"
|
||||
else:
|
||||
warnings.warn(
|
||||
f"Truncated bfactor {value!r} to {_MAX_B_FACTOR} to fit wwPDB spec."
|
||||
" Consider using mmCIF instead!",
|
||||
PDBIOWarning,
|
||||
)
|
||||
_bfactor_str = f"{_MAX_B_FACTOR:6d}"
|
||||
|
||||
assert len(_bfactor_str) <= 6
|
||||
|
||||
return _bfactor_str
|
||||
|
||||
|
||||
class Select:
|
||||
"""Select everything for PDB output (for use as a base class).
|
||||
@ -214,13 +248,15 @@ class PDBIO(StructureIO):
|
||||
occupancy = " " * 6
|
||||
warnings.warn(
|
||||
f"Missing occupancy in atom {atom.full_id!r} written as blank",
|
||||
BiopythonWarning,
|
||||
PDBIOWarning,
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Invalid occupancy value: {atom.occupancy!r}"
|
||||
) from None
|
||||
|
||||
bfactor = _format_b_factor(bfactor)
|
||||
|
||||
args = (
|
||||
record_type,
|
||||
atom_number,
|
||||
@ -239,6 +275,7 @@ class PDBIO(StructureIO):
|
||||
element,
|
||||
charge,
|
||||
)
|
||||
|
||||
return _ATOM_FORMAT_STRING % args
|
||||
|
||||
# Write PQR format line
|
||||
@ -250,7 +287,7 @@ class PDBIO(StructureIO):
|
||||
pqr_charge = " " * 7
|
||||
warnings.warn(
|
||||
f"Missing PQR charge in atom {atom.full_id} written as blank",
|
||||
BiopythonWarning,
|
||||
PDBIOWarning,
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
@ -264,7 +301,7 @@ class PDBIO(StructureIO):
|
||||
radius = " " * 6
|
||||
warnings.warn(
|
||||
f"Missing radius in atom {atom.full_id} written as blank",
|
||||
BiopythonWarning,
|
||||
PDBIOWarning,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Invalid radius value: {atom.radius}") from None
|
||||
|
@ -270,6 +270,7 @@ please open an issue on GitHub or mention it on the mailing list.
|
||||
- Nicolas Fontrodona <https://github.com/NFontrodona>
|
||||
- Nigel Delaney <https://github.com/evolvedmicrobe>
|
||||
- Noam Kremen <https://github.com/noamkremen>
|
||||
- Oliver Wissett <https://github.com/OWissett>
|
||||
- Olivier Morelle <https://github.com/Oli4>
|
||||
- Oscar G. Garcia <https://github.com/oscarmaestre>
|
||||
- Osvaldo Zagordi <https://github.com/ozagordi>
|
||||
|
11
NEWS.rst
11
NEWS.rst
@ -20,9 +20,20 @@ has also been tested on PyPy3.10 v7.3.17.
|
||||
`Infernal <http://eddylab.org/infernal/>` (v1.0.0+) RNA search tool. The
|
||||
format are ``infernal-tab`` and ``infernal-text``.
|
||||
|
||||
``Bio.PDB.PDBIO`` now ensures that b-factor values are always at most 6 characters to
|
||||
ensure that we do not violate the wwPDB specification. This should not have an impact
|
||||
on the majority of uses, as b-factor values are generally small (less than 100). When
|
||||
1000 \<= b-factor \< 10_000, the value is rounded to a single decimal place. When,
|
||||
10_000 \<= b-factor \< 999_999, the value is rounded to zero decimal place. Values above
|
||||
999_999 are now clamped. The justification for this is the rise in the b-factor field
|
||||
being used for additional metadata, typically from computational tools.
|
||||
|
||||
``Bio.PDB.PDBIO`` will now raise module specific warnings: ``Bio.PDB.PDBExceptions.PDBIOWarning``.
|
||||
|
||||
Many thanks to the Biopython developers and community for making this release
|
||||
possible, especially the following contributors:
|
||||
|
||||
- Oliver Wissett (first contribution)
|
||||
- Samuel Prince (first contribution)
|
||||
|
||||
15 January 2025: Biopython 1.85
|
||||
|
@ -26,6 +26,8 @@ from Bio.PDB import Residue
|
||||
from Bio.PDB import Select
|
||||
from Bio.PDB.PDBExceptions import PDBConstructionWarning
|
||||
from Bio.PDB.PDBExceptions import PDBIOException
|
||||
from Bio.PDB.PDBExceptions import PDBIOWarning
|
||||
from Bio.PDB.PDBIO import _MAX_B_FACTOR
|
||||
|
||||
|
||||
class WriteTest(unittest.TestCase):
|
||||
@ -381,6 +383,42 @@ class WriteTest(unittest.TestCase):
|
||||
self.io.save(filename)
|
||||
self.assertFalse(os.path.exists(filename))
|
||||
|
||||
def test_pdbio_write_high_b_factor(self):
|
||||
def check_pdb(filename, expected_bfactor):
|
||||
struct = self.parser.get_structure("high_b", filename)
|
||||
atom = next(struct.get_atoms())
|
||||
self.assertEqual(atom.bfactor, expected_bfactor)
|
||||
|
||||
def test_b_factor(
|
||||
b_factor: float, expected_bfactor: float, assert_warn: bool = False
|
||||
) -> None:
|
||||
struct = self.structure
|
||||
atom = next(struct.get_atoms())
|
||||
atom.bfactor = b_factor
|
||||
self.io.set_structure(struct)
|
||||
filenumber, filename = tempfile.mkstemp()
|
||||
os.close(filenumber)
|
||||
if assert_warn:
|
||||
with self.assertWarns(PDBIOWarning):
|
||||
self.io.save(filename)
|
||||
else:
|
||||
self.io.save(filename)
|
||||
|
||||
try:
|
||||
check_pdb(filename, expected_bfactor)
|
||||
finally:
|
||||
os.remove(filename)
|
||||
|
||||
test_b_factor(99.999999, 100)
|
||||
test_b_factor(999.99999, 1000)
|
||||
test_b_factor(9999.9999, 10000)
|
||||
|
||||
test_b_factor(424.4242, 424.42)
|
||||
test_b_factor(4242.4242, 4242.4)
|
||||
test_b_factor(42424.4242, 42424)
|
||||
|
||||
test_b_factor(_MAX_B_FACTOR + 10, _MAX_B_FACTOR, assert_warn=True)
|
||||
|
||||
# Test revert_write
|
||||
def test_pdbio_revert_write_on_filename(self):
|
||||
"""Test removing file when exception is caught (string)."""
|
||||
|
Reference in New Issue
Block a user