diff --git a/Bio/PDB/PDBExceptions.py b/Bio/PDB/PDBExceptions.py index 8fc06ed50..12328fc10 100644 --- a/Bio/PDB/PDBExceptions.py +++ b/Bio/PDB/PDBExceptions.py @@ -28,3 +28,7 @@ class PDBConstructionWarning(BiopythonWarning): # The SMCRA structure could not be written to file class PDBIOException(Exception): """Define class PDBIOException.""" + + +class PDBIOWarning(BiopythonWarning): + """Define class PDBIOWarning.""" diff --git a/Bio/PDB/PDBIO.py b/Bio/PDB/PDBIO.py index 69c526df1..ac824f17c 100644 --- a/Bio/PDB/PDBIO.py +++ b/Bio/PDB/PDBIO.py @@ -8,14 +8,12 @@ import os import warnings -from Bio import BiopythonWarning from Bio.Data.IUPACData import atom_weights -from Bio.PDB.PDBExceptions import PDBIOException +from Bio.PDB.PDBExceptions import PDBIOException, PDBIOWarning from Bio.PDB.StructureBuilder import StructureBuilder -_ATOM_FORMAT_STRING = ( - "%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%s%6.2f %4s%2s%2s\n" -) +_ATOM_FORMAT_STRING = "%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f%s%s %4s%2s%2s\n" + _PQR_ATOM_FORMAT_STRING = ( "%s%5i %-4s%c%3s %c%4i%c %8.3f%8.3f%8.3f %7s %6s %2s\n" ) @@ -24,6 +22,42 @@ _TER_FORMAT_STRING = ( "TER %5i %3s %c%4i%c \n" ) +_MAX_B_FACTOR_2DP = 1_000 +_MAX_B_FACTOR_1DP = 10_000 +_MAX_B_FACTOR = 999_999 + + +def _format_b_factor(value: float) -> str: + """Returns the b-factor value as a formatted string + + Formats the b-factor value to fit the wwPDB specification of 6 characters + maximum, otherwise truncating to 6 characters if necessary. + """ + _bfactor_str = "" + if value < _MAX_B_FACTOR_2DP: + if len(f"{value:.2f}") > 6: + _bfactor_str = f"{value:6.1f}" + else: + _bfactor_str = f"{value:6.2f}" + elif value < _MAX_B_FACTOR_1DP: + if len(f"{value:.1f}") > 6: + _bfactor_str = f"{value:6.0f}" + else: + _bfactor_str = f"{value:6.1f}" + elif value < _MAX_B_FACTOR: + _bfactor_str = f"{int(value):6d}" + else: + warnings.warn( + f"Truncated bfactor {value!r} to {_MAX_B_FACTOR} to fit wwPDB spec." + " Consider using mmCIF instead!", + PDBIOWarning, + ) + _bfactor_str = f"{_MAX_B_FACTOR:6d}" + + assert len(_bfactor_str) <= 6 + + return _bfactor_str + class Select: """Select everything for PDB output (for use as a base class). @@ -214,13 +248,15 @@ class PDBIO(StructureIO): occupancy = " " * 6 warnings.warn( f"Missing occupancy in atom {atom.full_id!r} written as blank", - BiopythonWarning, + PDBIOWarning, ) else: raise ValueError( f"Invalid occupancy value: {atom.occupancy!r}" ) from None + bfactor = _format_b_factor(bfactor) + args = ( record_type, atom_number, @@ -239,6 +275,7 @@ class PDBIO(StructureIO): element, charge, ) + return _ATOM_FORMAT_STRING % args # Write PQR format line @@ -250,7 +287,7 @@ class PDBIO(StructureIO): pqr_charge = " " * 7 warnings.warn( f"Missing PQR charge in atom {atom.full_id} written as blank", - BiopythonWarning, + PDBIOWarning, ) else: raise ValueError( @@ -264,7 +301,7 @@ class PDBIO(StructureIO): radius = " " * 6 warnings.warn( f"Missing radius in atom {atom.full_id} written as blank", - BiopythonWarning, + PDBIOWarning, ) else: raise ValueError(f"Invalid radius value: {atom.radius}") from None diff --git a/CONTRIB.rst b/CONTRIB.rst index e2b73af5d..b13d1bdd1 100644 --- a/CONTRIB.rst +++ b/CONTRIB.rst @@ -270,6 +270,7 @@ please open an issue on GitHub or mention it on the mailing list. - Nicolas Fontrodona - Nigel Delaney - Noam Kremen +- Oliver Wissett - Olivier Morelle - Oscar G. Garcia - Osvaldo Zagordi diff --git a/NEWS.rst b/NEWS.rst index bb7ef987d..a30b37380 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -17,12 +17,23 @@ This release of Biopython supports Python 3.10, 3.11, 3.12 and 3.13. It has also been tested on PyPy3.10 v7.3.17. ``Bio.SearchIO`` now supports parsing the tabular and plain text output of -`Infernal ` (v1.0.0+) RNA search tool. The +`Infernal ` (v1.0.0+) RNA search tool. The format are ``infernal-tab`` and ``infernal-text``. +``Bio.PDB.PDBIO`` now ensures that b-factor values are always at most 6 characters to +ensure that we do not violate the wwPDB specification. This should not have an impact +on the majority of uses, as b-factor values are generally small (less than 100). When +1000 \<= b-factor \< 10_000, the value is rounded to a single decimal place. When, +10_000 \<= b-factor \< 999_999, the value is rounded to zero decimal place. Values above +999_999 are now clamped. The justification for this is the rise in the b-factor field +being used for additional metadata, typically from computational tools. + +``Bio.PDB.PDBIO`` will now raise module specific warnings: ``Bio.PDB.PDBExceptions.PDBIOWarning``. + Many thanks to the Biopython developers and community for making this release possible, especially the following contributors: +- Oliver Wissett (first contribution) - Samuel Prince (first contribution) 15 January 2025: Biopython 1.85 diff --git a/Tests/test_PDB_PDBIO.py b/Tests/test_PDB_PDBIO.py index 33c4c80bc..2ad9d9cbc 100644 --- a/Tests/test_PDB_PDBIO.py +++ b/Tests/test_PDB_PDBIO.py @@ -26,6 +26,8 @@ from Bio.PDB import Residue from Bio.PDB import Select from Bio.PDB.PDBExceptions import PDBConstructionWarning from Bio.PDB.PDBExceptions import PDBIOException +from Bio.PDB.PDBExceptions import PDBIOWarning +from Bio.PDB.PDBIO import _MAX_B_FACTOR class WriteTest(unittest.TestCase): @@ -381,6 +383,42 @@ class WriteTest(unittest.TestCase): self.io.save(filename) self.assertFalse(os.path.exists(filename)) + def test_pdbio_write_high_b_factor(self): + def check_pdb(filename, expected_bfactor): + struct = self.parser.get_structure("high_b", filename) + atom = next(struct.get_atoms()) + self.assertEqual(atom.bfactor, expected_bfactor) + + def test_b_factor( + b_factor: float, expected_bfactor: float, assert_warn: bool = False + ) -> None: + struct = self.structure + atom = next(struct.get_atoms()) + atom.bfactor = b_factor + self.io.set_structure(struct) + filenumber, filename = tempfile.mkstemp() + os.close(filenumber) + if assert_warn: + with self.assertWarns(PDBIOWarning): + self.io.save(filename) + else: + self.io.save(filename) + + try: + check_pdb(filename, expected_bfactor) + finally: + os.remove(filename) + + test_b_factor(99.999999, 100) + test_b_factor(999.99999, 1000) + test_b_factor(9999.9999, 10000) + + test_b_factor(424.4242, 424.42) + test_b_factor(4242.4242, 4242.4) + test_b_factor(42424.4242, 42424) + + test_b_factor(_MAX_B_FACTOR + 10, _MAX_B_FACTOR, assert_warn=True) + # Test revert_write def test_pdbio_revert_write_on_filename(self): """Test removing file when exception is caught (string)."""