diff --git a/Doc/Tutorial/chapter_align.rst b/Doc/Tutorial/chapter_align.rst index cf19daf2e..70c64edc8 100644 --- a/Doc/Tutorial/chapter_align.rst +++ b/Doc/Tutorial/chapter_align.rst @@ -1940,6 +1940,58 @@ the string representation of the alignment: 0 AG-TTT-- 5 +As optional keyword arguments cannot be used with Python’s built-in +``format`` function or with formatted strings, the ``Alignment`` class +has a ``format`` method with optional arguments to customize the +alignment format. For example, you can use the optional ``scoring`` argument +to provide a substitution matrix (see Section +:ref:`sec:pairwise-substitution-scores`) to let the printed alignment reflect +the substitution scores as follows: + +* ``|`` for identical residues, +* ``:`` for substitutions with a positive score, +* ``.`` for substitutions with a negative score, +* ``-`` for gaps. + +.. cont-doctest + +.. code:: pycon + + >>> M = substitution_matrices.load("NUC.4.4") + >>> print(M[:, :]) + A T G C S W R Y K M B V H D N + A 5.0 -4.0 -4.0 -4.0 -4.0 1.0 1.0 -4.0 -4.0 1.0 -4.0 -1.0 -1.0 -1.0 -2.0 + T -4.0 5.0 -4.0 -4.0 -4.0 1.0 -4.0 1.0 1.0 -4.0 -1.0 -4.0 -1.0 -1.0 -2.0 + G -4.0 -4.0 5.0 -4.0 1.0 -4.0 1.0 -4.0 1.0 -4.0 -1.0 -1.0 -4.0 -1.0 -2.0 + C -4.0 -4.0 -4.0 5.0 1.0 -4.0 -4.0 1.0 -4.0 1.0 -1.0 -1.0 -1.0 -4.0 -2.0 + S -4.0 -4.0 1.0 1.0 -1.0 -4.0 -2.0 -2.0 -2.0 -2.0 -1.0 -1.0 -3.0 -3.0 -1.0 + W 1.0 1.0 -4.0 -4.0 -4.0 -1.0 -2.0 -2.0 -2.0 -2.0 -3.0 -3.0 -1.0 -1.0 -1.0 + R 1.0 -4.0 1.0 -4.0 -2.0 -2.0 -1.0 -4.0 -2.0 -2.0 -3.0 -1.0 -3.0 -1.0 -1.0 + Y -4.0 1.0 -4.0 1.0 -2.0 -2.0 -4.0 -1.0 -2.0 -2.0 -1.0 -3.0 -1.0 -3.0 -1.0 + K -4.0 1.0 1.0 -4.0 -2.0 -2.0 -2.0 -2.0 -1.0 -4.0 -1.0 -3.0 -3.0 -1.0 -1.0 + M 1.0 -4.0 -4.0 1.0 -2.0 -2.0 -2.0 -2.0 -4.0 -1.0 -3.0 -1.0 -1.0 -3.0 -1.0 + B -4.0 -1.0 -1.0 -1.0 -1.0 -3.0 -3.0 -1.0 -1.0 -3.0 -1.0 -2.0 -2.0 -2.0 -1.0 + V -1.0 -4.0 -1.0 -1.0 -1.0 -3.0 -1.0 -3.0 -3.0 -1.0 -2.0 -1.0 -2.0 -2.0 -1.0 + H -1.0 -1.0 -4.0 -1.0 -3.0 -1.0 -3.0 -1.0 -3.0 -1.0 -2.0 -2.0 -1.0 -2.0 -1.0 + D -1.0 -1.0 -1.0 -4.0 -3.0 -1.0 -1.0 -3.0 -1.0 -3.0 -2.0 -2.0 -2.0 -1.0 -1.0 + N -2.0 -2.0 -2.0 -2.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 + + >>> M["T", "Y"] + 1.0 + >>> M["T", "C"] + -4.0 + >>> aln = Align.Alignment(["GATTACAT", "GATYACAC"]) + >>> print(aln.format(scoring=M)) + target 0 GATTACAT 8 + 0 |||:|||. 8 + query 0 GATYACAC 8 + + + +Instead of the substitution matrix, you can also use a ``PairwiseAligner`` +object (see Chapter :ref:`chapter:pairwise`) as the ``scoring`` argument +to use the substitution matrix associated with the aligner. + By specifying one of the formats shown in Section :ref:`sec:alignformats`, ``format`` will create a string showing the alignment in the requested format: @@ -1976,13 +2028,8 @@ showing the alignment in the requested format: -As optional keyword arguments cannot be used with Python’s built-in -``format`` function or with formatted strings, the ``Alignment`` class -has a ``format`` method with optional arguments to customize the -alignment format, as described in the subsections below. For example, we -can print the alignment in BED format (see -section :ref:`subsec:align_bed`) with a specific number of -columns: +As another example, we can print the alignment in BED format (see +section :ref:`subsec:align_bed`) with a specific number of columns: .. cont-doctest @@ -3209,33 +3256,6 @@ using Python’s built-in ``format`` function writes a vulgar line: vulgar: gi|296143771|ref|NM_001180731.1| 0 1230 + gi|330443520|ref|NC_001136.10| 1319275 1318045 - 6146 M 1 1 C 3 3 M 1226 1226 -The ``Alignment.format()`` method also accepts an optional ``scoring`` argument. -If you provide a substitution matrix (for example, ``scoring=M`` where -``M = Bio.Align.substitution_matrices.load("NUC.4.4")``), the middle pattern -line will reflect the substitution scores: - -* ``|`` for identical residues, -* ``:`` for substitutions with a positive score, -* ``.`` for substitutions with a negative score, -* ``-`` for gaps. - -.. cont-doctest - -.. code:: pycon - - >>> from Bio.Align import PairwiseAligner - >>> from Bio.Align import substitution_matrices - >>> M = substitution_matrices.load("NUC.4.4") - >>> aligner = PairwiseAligner() - >>> aligner.open_gap_score = -100 - >>> aligner.extend_gap_score = -100 - >>> aln = aligner.align("GATTACAT", "GATYACAC")[0] - >>> print(aln.format("", scoring=M)) - target 0 GATTACAT 8 - 0 |||:|||. 8 - query 0 GATYACAC 8 - - Using the ``format`` method allows us to request either a vulgar line (default) or a cigar line: diff --git a/Tests/test_format_matrix_unittest.py b/Tests/test_Align_format_matrix.py similarity index 89% rename from Tests/test_format_matrix_unittest.py rename to Tests/test_Align_format_matrix.py index 7c868a6a9..e7d105527 100644 --- a/Tests/test_format_matrix_unittest.py +++ b/Tests/test_Align_format_matrix.py @@ -28,14 +28,6 @@ from Bio.Align import PairwiseAligner from Bio.Align.substitution_matrices import load, Array -def _blastn_like_matrix(): - alphabet = "ACGTY" - n = len(alphabet) - data = np.full((n, n), -1, dtype=int) - np.fill_diagonal(data, 1) - return Array(alphabet=alphabet, dims=2, data=data) - - class TestFormatMatrix(unittest.TestCase): """Unit tests for Alignment.format() with substitution matrices.""" @@ -47,7 +39,7 @@ class TestFormatMatrix(unittest.TestCase): aligner.extend_gap_score = -0.5 aln = aligner.align("GATTACAT", "GATYACAC")[0] self.assertEqual( - aln.format("", scoring=M), + aln.format(scoring=M), """\ target 0 GATTACAT 8 0 |||:|||. 8 @@ -57,13 +49,13 @@ query 0 GATYACAC 8 def test_blastn_like_has_no_colon_only_pipes_for_identities(self): """In a BLASTN-like +1/-1 matrix, mismatches are always negative -> no ':' expected.""" - M = _blastn_like_matrix() + M = load("BLASTN") aligner = PairwiseAligner() aligner.open_gap_score = -1 aligner.extend_gap_score = -0.5 aln = aligner.align("GATTACAT", "GATYACAC")[0] self.assertEqual( - aln.format("", scoring=M), + aln.format(scoring=M), """\ target 0 GATTACAT 8 0 |||.|||. 8 @@ -80,7 +72,7 @@ query 0 GATYACAC 8 aligner.extend_gap_score = -2 aln = aligner.align("GATTACAT", "GATYACAC")[0] self.assertEqual( - aln.format("", scoring=M), + aln.format(scoring=M), """\ target 0 GATTACAT 8 0 |||:|||. 8 @@ -96,7 +88,7 @@ query 0 GATYACAC 8 aligner.extend_gap_score = -2 aln = aligner.align("t", "y")[0] self.assertEqual( - aln.format("", scoring=M), + aln.format(scoring=M), """\ target 0 t 1 0 : 1 @@ -106,13 +98,13 @@ query 0 y 1 def test_negative_mismatch_dot_with_blastn_like_matrix(self): """In the BLASTN-like matrix, mismatches are negative -> expect '.' in the pattern.""" - M = _blastn_like_matrix() + M = load("BLASTN") aligner = PairwiseAligner() aligner.open_gap_score = -10 aligner.extend_gap_score = -2 aln = aligner.align("A", "C")[0] self.assertEqual( - aln.format("", scoring=M), + aln.format(scoring=M), """\ target 0 A 1 0 . 1 @@ -128,7 +120,7 @@ query 0 C 1 aligner.extend_gap_score = -0.5 aln = aligner.align("AC", "AGC")[0] self.assertEqual( - aln.format("", scoring=M), + aln.format(scoring=M), """\ target 0 A-C 2 0 |-| 3 @@ -147,7 +139,7 @@ query 0 AGC 3 seq2 = "TYGG" aln = aligner.align(seq1, seq2)[0] self.assertEqual( - aln.format("", scoring=M), + aln.format(scoring=M), """\ target 0 TTTG 4 0 |:.| 4