mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
Fixes formatting for extremely high B-factor values in PDB files (#5047)
This commit is contained in:
@ -28,3 +28,7 @@ class PDBConstructionWarning(BiopythonWarning):
|
|||||||
# The SMCRA structure could not be written to file
|
# The SMCRA structure could not be written to file
|
||||||
class PDBIOException(Exception):
|
class PDBIOException(Exception):
|
||||||
"""Define class PDBIOException."""
|
"""Define class PDBIOException."""
|
||||||
|
|
||||||
|
|
||||||
|
class PDBIOWarning(BiopythonWarning):
|
||||||
|
"""Define class PDBIOWarning."""
|
||||||
|
@ -8,14 +8,12 @@
|
|||||||
import os
|
import os
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from Bio import BiopythonWarning
|
|
||||||
from Bio.Data.IUPACData import atom_weights
|
from Bio.Data.IUPACData import atom_weights
|
||||||
from Bio.PDB.PDBExceptions import PDBIOException
|
from Bio.PDB.PDBExceptions import PDBIOException, PDBIOWarning
|
||||||
from Bio.PDB.StructureBuilder import StructureBuilder
|
from Bio.PDB.StructureBuilder import StructureBuilder
|
||||||
|
|
||||||
_ATOM_FORMAT_STRING = (
|
_ATOM_FORMAT_STRING = "%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%s%s %4s%2s%2s\n"
|
||||||
"%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%s%6.2f %4s%2s%2s\n"
|
|
||||||
)
|
|
||||||
_PQR_ATOM_FORMAT_STRING = (
|
_PQR_ATOM_FORMAT_STRING = (
|
||||||
"%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f %7s %6s %2s\n"
|
"%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f %7s %6s %2s\n"
|
||||||
)
|
)
|
||||||
@ -24,6 +22,42 @@ _TER_FORMAT_STRING = (
|
|||||||
"TER %5i %3s %c%4i%c \n"
|
"TER %5i %3s %c%4i%c \n"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
_MAX_B_FACTOR_2DP = 1_000
|
||||||
|
_MAX_B_FACTOR_1DP = 10_000
|
||||||
|
_MAX_B_FACTOR = 999_999
|
||||||
|
|
||||||
|
|
||||||
|
def _format_b_factor(value: float) -> str:
|
||||||
|
"""Returns the b-factor value as a formatted string
|
||||||
|
|
||||||
|
Formats the b-factor value to fit the wwPDB specification of 6 characters
|
||||||
|
maximum, otherwise truncating to 6 characters if necessary.
|
||||||
|
"""
|
||||||
|
_bfactor_str = ""
|
||||||
|
if value < _MAX_B_FACTOR_2DP:
|
||||||
|
if len(f"{value:.2f}") > 6:
|
||||||
|
_bfactor_str = f"{value:6.1f}"
|
||||||
|
else:
|
||||||
|
_bfactor_str = f"{value:6.2f}"
|
||||||
|
elif value < _MAX_B_FACTOR_1DP:
|
||||||
|
if len(f"{value:.1f}") > 6:
|
||||||
|
_bfactor_str = f"{value:6.0f}"
|
||||||
|
else:
|
||||||
|
_bfactor_str = f"{value:6.1f}"
|
||||||
|
elif value < _MAX_B_FACTOR:
|
||||||
|
_bfactor_str = f"{int(value):6d}"
|
||||||
|
else:
|
||||||
|
warnings.warn(
|
||||||
|
f"Truncated bfactor {value!r} to {_MAX_B_FACTOR} to fit wwPDB spec."
|
||||||
|
" Consider using mmCIF instead!",
|
||||||
|
PDBIOWarning,
|
||||||
|
)
|
||||||
|
_bfactor_str = f"{_MAX_B_FACTOR:6d}"
|
||||||
|
|
||||||
|
assert len(_bfactor_str) <= 6
|
||||||
|
|
||||||
|
return _bfactor_str
|
||||||
|
|
||||||
|
|
||||||
class Select:
|
class Select:
|
||||||
"""Select everything for PDB output (for use as a base class).
|
"""Select everything for PDB output (for use as a base class).
|
||||||
@ -214,13 +248,15 @@ class PDBIO(StructureIO):
|
|||||||
occupancy = " " * 6
|
occupancy = " " * 6
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
f"Missing occupancy in atom {atom.full_id!r} written as blank",
|
f"Missing occupancy in atom {atom.full_id!r} written as blank",
|
||||||
BiopythonWarning,
|
PDBIOWarning,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Invalid occupancy value: {atom.occupancy!r}"
|
f"Invalid occupancy value: {atom.occupancy!r}"
|
||||||
) from None
|
) from None
|
||||||
|
|
||||||
|
bfactor = _format_b_factor(bfactor)
|
||||||
|
|
||||||
args = (
|
args = (
|
||||||
record_type,
|
record_type,
|
||||||
atom_number,
|
atom_number,
|
||||||
@ -239,6 +275,7 @@ class PDBIO(StructureIO):
|
|||||||
element,
|
element,
|
||||||
charge,
|
charge,
|
||||||
)
|
)
|
||||||
|
|
||||||
return _ATOM_FORMAT_STRING % args
|
return _ATOM_FORMAT_STRING % args
|
||||||
|
|
||||||
# Write PQR format line
|
# Write PQR format line
|
||||||
@ -250,7 +287,7 @@ class PDBIO(StructureIO):
|
|||||||
pqr_charge = " " * 7
|
pqr_charge = " " * 7
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
f"Missing PQR charge in atom {atom.full_id} written as blank",
|
f"Missing PQR charge in atom {atom.full_id} written as blank",
|
||||||
BiopythonWarning,
|
PDBIOWarning,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
@ -264,7 +301,7 @@ class PDBIO(StructureIO):
|
|||||||
radius = " " * 6
|
radius = " " * 6
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
f"Missing radius in atom {atom.full_id} written as blank",
|
f"Missing radius in atom {atom.full_id} written as blank",
|
||||||
BiopythonWarning,
|
PDBIOWarning,
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Invalid radius value: {atom.radius}") from None
|
raise ValueError(f"Invalid radius value: {atom.radius}") from None
|
||||||
|
@ -270,6 +270,7 @@ please open an issue on GitHub or mention it on the mailing list.
|
|||||||
- Nicolas Fontrodona <https://github.com/NFontrodona>
|
- Nicolas Fontrodona <https://github.com/NFontrodona>
|
||||||
- Nigel Delaney <https://github.com/evolvedmicrobe>
|
- Nigel Delaney <https://github.com/evolvedmicrobe>
|
||||||
- Noam Kremen <https://github.com/noamkremen>
|
- Noam Kremen <https://github.com/noamkremen>
|
||||||
|
- Oliver Wissett <https://github.com/OWissett>
|
||||||
- Olivier Morelle <https://github.com/Oli4>
|
- Olivier Morelle <https://github.com/Oli4>
|
||||||
- Oscar G. Garcia <https://github.com/oscarmaestre>
|
- Oscar G. Garcia <https://github.com/oscarmaestre>
|
||||||
- Osvaldo Zagordi <https://github.com/ozagordi>
|
- Osvaldo Zagordi <https://github.com/ozagordi>
|
||||||
|
11
NEWS.rst
11
NEWS.rst
@ -20,9 +20,20 @@ has also been tested on PyPy3.10 v7.3.17.
|
|||||||
`Infernal <http://eddylab.org/infernal/>` (v1.0.0+) RNA search tool. The
|
`Infernal <http://eddylab.org/infernal/>` (v1.0.0+) RNA search tool. The
|
||||||
format are ``infernal-tab`` and ``infernal-text``.
|
format are ``infernal-tab`` and ``infernal-text``.
|
||||||
|
|
||||||
|
``Bio.PDB.PDBIO`` now ensures that b-factor values are always at most 6 characters to
|
||||||
|
ensure that we do not violate the wwPDB specification. This should not have an impact
|
||||||
|
on the majority of uses, as b-factor values are generally small (less than 100). When
|
||||||
|
1000 \<= b-factor \< 10_000, the value is rounded to a single decimal place. When,
|
||||||
|
10_000 \<= b-factor \< 999_999, the value is rounded to zero decimal place. Values above
|
||||||
|
999_999 are now clamped. The justification for this is the rise in the b-factor field
|
||||||
|
being used for additional metadata, typically from computational tools.
|
||||||
|
|
||||||
|
``Bio.PDB.PDBIO`` will now raise module specific warnings: ``Bio.PDB.PDBExceptions.PDBIOWarning``.
|
||||||
|
|
||||||
Many thanks to the Biopython developers and community for making this release
|
Many thanks to the Biopython developers and community for making this release
|
||||||
possible, especially the following contributors:
|
possible, especially the following contributors:
|
||||||
|
|
||||||
|
- Oliver Wissett (first contribution)
|
||||||
- Samuel Prince (first contribution)
|
- Samuel Prince (first contribution)
|
||||||
|
|
||||||
15 January 2025: Biopython 1.85
|
15 January 2025: Biopython 1.85
|
||||||
|
@ -26,6 +26,8 @@ from Bio.PDB import Residue
|
|||||||
from Bio.PDB import Select
|
from Bio.PDB import Select
|
||||||
from Bio.PDB.PDBExceptions import PDBConstructionWarning
|
from Bio.PDB.PDBExceptions import PDBConstructionWarning
|
||||||
from Bio.PDB.PDBExceptions import PDBIOException
|
from Bio.PDB.PDBExceptions import PDBIOException
|
||||||
|
from Bio.PDB.PDBExceptions import PDBIOWarning
|
||||||
|
from Bio.PDB.PDBIO import _MAX_B_FACTOR
|
||||||
|
|
||||||
|
|
||||||
class WriteTest(unittest.TestCase):
|
class WriteTest(unittest.TestCase):
|
||||||
@ -381,6 +383,42 @@ class WriteTest(unittest.TestCase):
|
|||||||
self.io.save(filename)
|
self.io.save(filename)
|
||||||
self.assertFalse(os.path.exists(filename))
|
self.assertFalse(os.path.exists(filename))
|
||||||
|
|
||||||
|
def test_pdbio_write_high_b_factor(self):
|
||||||
|
def check_pdb(filename, expected_bfactor):
|
||||||
|
struct = self.parser.get_structure("high_b", filename)
|
||||||
|
atom = next(struct.get_atoms())
|
||||||
|
self.assertEqual(atom.bfactor, expected_bfactor)
|
||||||
|
|
||||||
|
def test_b_factor(
|
||||||
|
b_factor: float, expected_bfactor: float, assert_warn: bool = False
|
||||||
|
) -> None:
|
||||||
|
struct = self.structure
|
||||||
|
atom = next(struct.get_atoms())
|
||||||
|
atom.bfactor = b_factor
|
||||||
|
self.io.set_structure(struct)
|
||||||
|
filenumber, filename = tempfile.mkstemp()
|
||||||
|
os.close(filenumber)
|
||||||
|
if assert_warn:
|
||||||
|
with self.assertWarns(PDBIOWarning):
|
||||||
|
self.io.save(filename)
|
||||||
|
else:
|
||||||
|
self.io.save(filename)
|
||||||
|
|
||||||
|
try:
|
||||||
|
check_pdb(filename, expected_bfactor)
|
||||||
|
finally:
|
||||||
|
os.remove(filename)
|
||||||
|
|
||||||
|
test_b_factor(99.999999, 100)
|
||||||
|
test_b_factor(999.99999, 1000)
|
||||||
|
test_b_factor(9999.9999, 10000)
|
||||||
|
|
||||||
|
test_b_factor(424.4242, 424.42)
|
||||||
|
test_b_factor(4242.4242, 4242.4)
|
||||||
|
test_b_factor(42424.4242, 42424)
|
||||||
|
|
||||||
|
test_b_factor(_MAX_B_FACTOR + 10, _MAX_B_FACTOR, assert_warn=True)
|
||||||
|
|
||||||
# Test revert_write
|
# Test revert_write
|
||||||
def test_pdbio_revert_write_on_filename(self):
|
def test_pdbio_revert_write_on_filename(self):
|
||||||
"""Test removing file when exception is caught (string)."""
|
"""Test removing file when exception is caught (string)."""
|
||||||
|
Reference in New Issue
Block a user