Files
biopython/Tests/test_ProtParam.py
Michael M. 3d1980f88b Deprecate ProteinAnalysis.get_amino_acids_percent() and create ProteinAnalysis.amino_acids_percent (#4783)
* Make ProteinAnalysis.get_amino_acids_percent() return a precent

* fix examples in Bio/SeqUtils/ProtParam.py

* Deprecate get_amino_acids_percent, create amino_acids_percent
2024-07-31 14:48:47 +09:00

272 lines
13 KiB
Python

# Copyright 2003-2004 by Iddo Friedberg. All rights reserved.
# Revisions copyright 2008-2010 by Peter Cock. All rights reserved.
# Revisions copyright 2012 by Matt Fenwick. All rights reserved.
# Revisions copyright 2012 by Kai Blin. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Tests for Bio.SeqUtils.ProtParam and related code."""
import unittest
from Bio import BiopythonDeprecationWarning
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.SeqUtils import molecular_weight
from Bio.SeqUtils import ProtParam
from Bio.SeqUtils import ProtParamData
class ProtParamTest(unittest.TestCase):
"""Tests for ProtParam."""
def setUp(self):
"""Initialise objects."""
text = "MAEGEITTFTALTEKFNLPPGNYKKPKLLYCSNGGHFLRILPDGTVDGTRDRSDQHIQLQLSAESVGEVYIKSTETGQYLAMDTSGLLYGSQTPSEECLFLERLEENHYNTYTSKKHAEKNWFVGLKKNGSCKRGPRTHYGQKAILFLPLPV"
seq = Seq(text)
record = SeqRecord(seq)
analysis_text = ProtParam.ProteinAnalysis(text)
analysis_seq = ProtParam.ProteinAnalysis(seq)
analysis_record = ProtParam.ProteinAnalysis(record)
self.text = text
self.sequences = (text, seq, record)
self.analyses = (analysis_text, analysis_seq, analysis_record)
def test_count_amino_acids(self):
"""Calculate amino acid counts."""
for analysis in self.analyses:
count_dict = analysis.count_amino_acids()
for i in count_dict:
self.assertEqual(count_dict[i], self.text.count(i))
def test_get_amino_acids_percent(self):
"""Calculate amino acid percentages (DEPRECATED)."""
with self.assertWarns(BiopythonDeprecationWarning):
for analysis in self.analyses:
percent_dict = analysis.get_amino_acids_percent()
seq_len = len(self.text)
for i in percent_dict:
self.assertAlmostEqual(
percent_dict[i], self.text.count(i) / seq_len
)
def test_amino_acids_percent(self):
"""Calculate amino acid percentages."""
for analysis in self.analyses:
seq_len = len(self.text)
for i in analysis.amino_acids_percent:
self.assertAlmostEqual(
analysis.amino_acids_percent[i],
(self.text.count(i) * 100 / seq_len),
)
def test_get_molecular_weight(self):
"""Calculate protein molecular weight."""
for analysis in self.analyses:
self.assertAlmostEqual(analysis.molecular_weight(), 17103.16, 2)
def test_get_monoisotopic_molecular_weight(self):
"""Calculate monoisotopic molecular weight."""
for sequence in self.sequences:
analysis = ProtParam.ProteinAnalysis(sequence, monoisotopic=True)
self.assertAlmostEqual(analysis.molecular_weight(), 17092.61, 2)
def test_get_molecular_weight_identical(self):
"""Confirm protein molecular weight agrees with calculation from Bio.SeqUtils."""
# This test is somehow useless, since ProteinAnalysis.molecular_weight
# is internally calling SeqUtils.molecular_weight.
mw_2 = molecular_weight(self.text, seq_type="protein")
for analysis in self.analyses:
mw_1 = analysis.molecular_weight()
self.assertAlmostEqual(mw_1, mw_2)
def test_get_monoisotopic_molecular_weight_identical(self):
"""Confirm protein molecular weight agrees with calculation from Bio.SeqUtils."""
# This test is somehow useless, since ProteinAnalysis.molecular_weight
# is internally calling SeqUtils.molecular_weight.
mw_2 = molecular_weight(self.text, seq_type="protein", monoisotopic=True)
for sequence in self.sequences:
analysis = ProtParam.ProteinAnalysis(sequence, monoisotopic=True)
mw_1 = analysis.molecular_weight()
self.assertAlmostEqual(mw_1, mw_2)
def test_aromaticity(self):
"""Calculate protein aromaticity."""
for analysis in self.analyses:
# Old test used a number rounded to two digits, so use the same
self.assertAlmostEqual(analysis.aromaticity(), 0.10, 2)
def test_instability_index(self):
"""Calculate protein instability index."""
for analysis in self.analyses:
# Old test used a number rounded to two digits, so use the same
self.assertAlmostEqual(analysis.instability_index(), 41.98, 2)
def test_flexibility(self):
"""Calculate protein flexibility."""
# Turn black code style off
# fmt: off
expected_flexibility = [
0.9825119047619049, 1.0166904761904763, 0.9947857142857144,
0.9660238095238095, 0.9890714285714285, 0.9737261904761906,
0.9789166666666669, 1.004547619047619, 1.0235357142857144,
1.0163214285714286, 0.981297619047619, 1.0388809523809523,
0.9956309523809524, 1.0379047619047619, 1.014654761904762,
1.015154761904762, 1.0317619047619049, 1.0100833333333334,
1.0738333333333334, 1.0460952380952382, 1.0333571428571429,
1.0429761904761905, 0.9842738095238095, 0.9984404761904762,
0.9814404761904763, 0.9715357142857144, 1.0063690476190477,
0.988952380952381, 0.9930952380952381, 0.9962619047619047,
0.9774523809523811, 0.9747857142857144, 0.9701547619047618,
0.9759404761904762, 0.9515119047619047, 0.9745714285714286,
1.007642857142857, 1.0024523809523809, 1.0019761904761904,
1.0053571428571428, 1.000595238095238, 1.0385238095238094,
1.0090357142857143, 1.0095, 1.0207142857142857, 1.0371071428571428,
1.0223690476190477, 1.0373809523809523, 1.030095238095238,
1.0166190476190475, 0.9939404761904762, 0.9935833333333335,
1.009547619047619, 0.9678095238095237, 1.0027380952380953,
0.9720595238095241, 1.0215952380952382, 0.996904761904762,
1.0330238095238098, 1.0140714285714285, 0.9977976190476192,
1.0319285714285715, 1.016714285714286, 0.9713928571428571,
0.9921666666666669, 0.9904404761904763, 1.0350238095238096,
1.0021904761904763, 1.0092738095238096, 1.0462380952380952,
1.012392857142857, 1.0289761904761903, 1.0108095238095236,
0.9762619047619049, 0.9885357142857144, 0.9901428571428571,
0.9795119047619048, 1.014059523809524, 0.9857976190476189,
1.0114404761904763, 0.9970357142857144, 0.9755238095238097,
0.9871428571428573, 0.9820952380952382, 1.006095238095238,
0.9997023809523811, 1.0065238095238094, 1.0149047619047618,
1.0451071428571428, 1.0396785714285717, 1.0452142857142857,
1.0262619047619048, 0.9735952380952381, 0.998404761904762,
0.9777976190476191, 0.9773809523809524, 1.008214285714286,
0.9772619047619048, 0.9955000000000002, 1.0482261904761907,
1.0262499999999999, 1.0189404761904763, 0.9988452380952382,
1.0009285714285714, 1.0204404761904762, 0.9839880952380953,
0.9809166666666669, 1.01475, 1.0234166666666669, 1.0355476190476192,
1.033107142857143, 1.011154761904762, 1.0480714285714288,
1.0591190476190477, 1.033809523809524, 1.008107142857143,
0.9757261904761906, 0.9885714285714287, 0.9831428571428572,
1.000595238095238, 0.9739761904761908, 1.0377619047619049,
1.0295357142857142, 1.0269642857142858, 1.0371904761904762,
1.0385476190476193, 1.0057023809523808, 1.06075, 1.006714285714286,
1.0269642857142858, 1.0229761904761905, 1.0046309523809522,
1.0053690476190476, 0.9886666666666667, 0.9931666666666669, 1.01775,
1.003297619047619, 1.0161666666666667, 0.977440476190476,
0.9762738095238096, 0.9785833333333332, 0.9609642857142857,
0.9650833333333334]
# Turn black code style on
# fmt: on
for analysis in self.analyses:
flexibility = analysis.flexibility()
self.assertEqual(
len(flexibility), len(expected_flexibility), "Output length differs"
)
for f, e in zip(flexibility, expected_flexibility):
self.assertAlmostEqual(f, e)
def test_isoelectric_point(self):
"""Calculate the isoelectric point."""
for analysis in self.analyses:
# Old test used a number rounded to two digits, so use the same
self.assertAlmostEqual(analysis.isoelectric_point(), 7.72, 2)
def test_charge_at_pH(self):
"""Test charge_at_pH function."""
for analysis in self.analyses:
self.assertAlmostEqual(analysis.charge_at_pH(7.72), 0.00, 2)
def test_secondary_structure_fraction(self):
"""Calculate secondary structure fractions."""
for analysis in self.analyses:
helix, turn, sheet = analysis.secondary_structure_fraction()
# Old test used numbers rounded to two digits, so use the same
self.assertAlmostEqual(helix, 0.33, 2)
self.assertAlmostEqual(turn, 0.29, 2)
self.assertAlmostEqual(sheet, 0.37, 2)
def test_protein_scale(self):
"""Calculate the Kite Doolittle scale."""
# Turn black code style off
# fmt: off
expected = [-0.0783, +0.0358, +0.1258, +0.6950, +0.8775, +0.8350, +0.2925, +0.3383,
-0.1733, -0.4142, -0.5292, -0.6108, -0.8308, -0.8100, -0.8208, -1.0283,
-1.6300, -1.8233, -2.4267, -2.2292, -1.7817, -1.4742, -0.7467, -0.1608,
+0.1108, +0.2142, +0.1792, -0.1217, -0.4808, -0.4333, -0.5167, -0.2833,
+0.3758, +0.7225, +0.4958, +0.6033, +0.5625, +0.3108, -0.2408, -0.0575,
-0.3717, -0.7800, -1.1242, -1.4083, -1.7550, -2.2642, -2.8575, -2.9175,
-2.5358, -2.5325, -1.8142, -1.4667, -0.6058, -0.4483, +0.1300, +0.1225,
+0.2825, +0.1650, +0.3317, -0.2000, +0.2683, +0.1233, +0.4092, +0.1392,
+0.4192, +0.2758, -0.2350, -0.5750, -0.5983, -1.2067, -1.3867, -1.3583,
-0.8708, -0.5383, -0.3675, +0.0667, +0.0825, -0.0150, +0.1817, +0.4692,
+0.3017, +0.3800, +0.4825, +0.4675, +0.1575, -0.1783, -0.5175, -1.2017,
-1.7033, -1.5500, -1.2375, -0.8500, -0.0583, +0.3125, +0.4242, +0.7133,
+0.5633, +0.0483, -0.7167, -1.3158, -1.9217, -2.5033, -2.4117, -2.2483,
-2.3758, -2.0633, -1.8900, -1.8667, -1.9292, -1.8625, -2.0050, -2.2708,
-2.4050, -2.3508, -2.1758, -1.5533, -1.0350, -0.1983, -0.0233, +0.1800,
+0.0317, -0.0917, -0.6375, -0.9650, -1.4500, -1.6008, -1.7558, -1.5450,
-1.7900, -1.8133, -2.0125, -2.1383, -2.3142, -2.1525, -2.1425, -1.9733,
-1.4742, -0.8083, -0.2100, +0.8067, +1.3092, +1.8367, +2.0283, +2.3558]
# Turn black code style on
# fmt: on
for analysis in self.analyses:
for i, e in zip(analysis.protein_scale(ProtParamData.kd, 9, 0.4), expected):
# Expected values have 4 decimal places, so restrict to that exactness
self.assertAlmostEqual(i, e, places=4)
def test_gravy(self):
"""Calculate gravy. Tests all pre-defined scales."""
expected_values = {
"KyteDoolitle": -0.5974,
"Aboderin": 4.5671,
"AbrahamLeo": 0.2378,
"Argos": 0.8607,
"BlackMould": 0.5074,
"BullBreese": -0.0445,
"Casari": -0.2414,
"Cid": -0.0678,
"Cowan3.4": 0.0234,
"Cowan7.5": -0.0733,
"Eisenberg": -0.0435,
"Engelman": 1.600,
"Fasman": -0.3614,
"Fauchere": 0.327,
"GoldSack": 1.1564,
"Guy": 0.0675,
"Jones": 1.223,
"Juretic": -0.6672,
"Kidera": 0.1383,
"Miyazawa": 5.3109,
"Parker": 1.7487,
"Ponnuswamy": 0.3491,
"Rose": 0.7147,
"Roseman": -0.4729,
"Sweet": -0.0791,
"Tanford": 0.0625,
"Wilson": 1.5493,
"Zimmerman": 1.2841,
}
for analysis in self.analyses:
for scale, exp_v in expected_values.items():
self.assertAlmostEqual(analysis.gravy(scale=scale), exp_v, places=4)
with self.assertRaises(ValueError) as cm:
analysis.gravy("Wrong Scale")
self.assertEqual("scale: Wrong Scale not known", str(cm.exception))
def test_molar_extinction_coefficient(self):
"""Molar extinction coefficient."""
for analysis in self.analyses:
self.assertAlmostEqual(
analysis.molar_extinction_coefficient()[0], 17420, places=5
)
self.assertAlmostEqual(
analysis.molar_extinction_coefficient()[1], 17545, places=5
)
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=2)
unittest.main(testRunner=runner)