documentation tweaks (#5055)

* documentation tweaks

* update

---------

Co-authored-by: Michiel de Hoon <mdehoon@tkx288.genome.gsc.riken.jp>
This commit is contained in:
mdehoon
2025-09-04 16:23:53 +09:00
committed by GitHub
parent ad9456f3bb
commit 6e43775844
2 changed files with 63 additions and 51 deletions

View File

@ -1940,6 +1940,58 @@ the string representation of the alignment:
0 AG-TTT-- 5 0 AG-TTT-- 5
<BLANKLINE> <BLANKLINE>
As optional keyword arguments cannot be used with Pythons built-in
``format`` function or with formatted strings, the ``Alignment`` class
has a ``format`` method with optional arguments to customize the
alignment format. For example, you can use the optional ``scoring`` argument
to provide a substitution matrix (see Section
:ref:`sec:pairwise-substitution-scores`) to let the printed alignment reflect
the substitution scores as follows:
* ``|`` for identical residues,
* ``:`` for substitutions with a positive score,
* ``.`` for substitutions with a negative score,
* ``-`` for gaps.
.. cont-doctest
.. code:: pycon
>>> M = substitution_matrices.load("NUC.4.4")
>>> print(M[:, :])
A T G C S W R Y K M B V H D N
A 5.0 -4.0 -4.0 -4.0 -4.0 1.0 1.0 -4.0 -4.0 1.0 -4.0 -1.0 -1.0 -1.0 -2.0
T -4.0 5.0 -4.0 -4.0 -4.0 1.0 -4.0 1.0 1.0 -4.0 -1.0 -4.0 -1.0 -1.0 -2.0
G -4.0 -4.0 5.0 -4.0 1.0 -4.0 1.0 -4.0 1.0 -4.0 -1.0 -1.0 -4.0 -1.0 -2.0
C -4.0 -4.0 -4.0 5.0 1.0 -4.0 -4.0 1.0 -4.0 1.0 -1.0 -1.0 -1.0 -4.0 -2.0
S -4.0 -4.0 1.0 1.0 -1.0 -4.0 -2.0 -2.0 -2.0 -2.0 -1.0 -1.0 -3.0 -3.0 -1.0
W 1.0 1.0 -4.0 -4.0 -4.0 -1.0 -2.0 -2.0 -2.0 -2.0 -3.0 -3.0 -1.0 -1.0 -1.0
R 1.0 -4.0 1.0 -4.0 -2.0 -2.0 -1.0 -4.0 -2.0 -2.0 -3.0 -1.0 -3.0 -1.0 -1.0
Y -4.0 1.0 -4.0 1.0 -2.0 -2.0 -4.0 -1.0 -2.0 -2.0 -1.0 -3.0 -1.0 -3.0 -1.0
K -4.0 1.0 1.0 -4.0 -2.0 -2.0 -2.0 -2.0 -1.0 -4.0 -1.0 -3.0 -3.0 -1.0 -1.0
M 1.0 -4.0 -4.0 1.0 -2.0 -2.0 -2.0 -2.0 -4.0 -1.0 -3.0 -1.0 -1.0 -3.0 -1.0
B -4.0 -1.0 -1.0 -1.0 -1.0 -3.0 -3.0 -1.0 -1.0 -3.0 -1.0 -2.0 -2.0 -2.0 -1.0
V -1.0 -4.0 -1.0 -1.0 -1.0 -3.0 -1.0 -3.0 -3.0 -1.0 -2.0 -1.0 -2.0 -2.0 -1.0
H -1.0 -1.0 -4.0 -1.0 -3.0 -1.0 -3.0 -1.0 -3.0 -1.0 -2.0 -2.0 -1.0 -2.0 -1.0
D -1.0 -1.0 -1.0 -4.0 -3.0 -1.0 -1.0 -3.0 -1.0 -3.0 -2.0 -2.0 -2.0 -1.0 -1.0
N -2.0 -2.0 -2.0 -2.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0
<BLANKLINE>
>>> M["T", "Y"]
1.0
>>> M["T", "C"]
-4.0
>>> aln = Align.Alignment(["GATTACAT", "GATYACAC"])
>>> print(aln.format(scoring=M))
target 0 GATTACAT 8
0 |||:|||. 8
query 0 GATYACAC 8
<BLANKLINE>
Instead of the substitution matrix, you can also use a ``PairwiseAligner``
object (see Chapter :ref:`chapter:pairwise`) as the ``scoring`` argument
to use the substitution matrix associated with the aligner.
By specifying one of the formats shown in By specifying one of the formats shown in
Section :ref:`sec:alignformats`, ``format`` will create a string Section :ref:`sec:alignformats`, ``format`` will create a string
showing the alignment in the requested format: showing the alignment in the requested format:
@ -1976,13 +2028,8 @@ showing the alignment in the requested format:
<BLANKLINE> <BLANKLINE>
<BLANKLINE> <BLANKLINE>
As optional keyword arguments cannot be used with Pythons built-in As another example, we can print the alignment in BED format (see
``format`` function or with formatted strings, the ``Alignment`` class section :ref:`subsec:align_bed`) with a specific number of columns:
has a ``format`` method with optional arguments to customize the
alignment format, as described in the subsections below. For example, we
can print the alignment in BED format (see
section :ref:`subsec:align_bed`) with a specific number of
columns:
.. cont-doctest .. cont-doctest
@ -3209,33 +3256,6 @@ using Pythons built-in ``format`` function writes a vulgar line:
vulgar: gi|296143771|ref|NM_001180731.1| 0 1230 + gi|330443520|ref|NC_001136.10| 1319275 1318045 - 6146 M 1 1 C 3 3 M 1226 1226 vulgar: gi|296143771|ref|NM_001180731.1| 0 1230 + gi|330443520|ref|NC_001136.10| 1319275 1318045 - 6146 M 1 1 C 3 3 M 1226 1226
<BLANKLINE> <BLANKLINE>
The ``Alignment.format()`` method also accepts an optional ``scoring`` argument.
If you provide a substitution matrix (for example, ``scoring=M`` where
``M = Bio.Align.substitution_matrices.load("NUC.4.4")``), the middle pattern
line will reflect the substitution scores:
* ``|`` for identical residues,
* ``:`` for substitutions with a positive score,
* ``.`` for substitutions with a negative score,
* ``-`` for gaps.
.. cont-doctest
.. code:: pycon
>>> from Bio.Align import PairwiseAligner
>>> from Bio.Align import substitution_matrices
>>> M = substitution_matrices.load("NUC.4.4")
>>> aligner = PairwiseAligner()
>>> aligner.open_gap_score = -100
>>> aligner.extend_gap_score = -100
>>> aln = aligner.align("GATTACAT", "GATYACAC")[0]
>>> print(aln.format("", scoring=M))
target 0 GATTACAT 8
0 |||:|||. 8
query 0 GATYACAC 8
<BLANKLINE>
Using the ``format`` method allows us to request either a vulgar line Using the ``format`` method allows us to request either a vulgar line
(default) or a cigar line: (default) or a cigar line:

View File

@ -28,14 +28,6 @@ from Bio.Align import PairwiseAligner
from Bio.Align.substitution_matrices import load, Array from Bio.Align.substitution_matrices import load, Array
def _blastn_like_matrix():
alphabet = "ACGTY"
n = len(alphabet)
data = np.full((n, n), -1, dtype=int)
np.fill_diagonal(data, 1)
return Array(alphabet=alphabet, dims=2, data=data)
class TestFormatMatrix(unittest.TestCase): class TestFormatMatrix(unittest.TestCase):
"""Unit tests for Alignment.format() with substitution matrices.""" """Unit tests for Alignment.format() with substitution matrices."""
@ -47,7 +39,7 @@ class TestFormatMatrix(unittest.TestCase):
aligner.extend_gap_score = -0.5 aligner.extend_gap_score = -0.5
aln = aligner.align("GATTACAT", "GATYACAC")[0] aln = aligner.align("GATTACAT", "GATYACAC")[0]
self.assertEqual( self.assertEqual(
aln.format("", scoring=M), aln.format(scoring=M),
"""\ """\
target 0 GATTACAT 8 target 0 GATTACAT 8
0 |||:|||. 8 0 |||:|||. 8
@ -57,13 +49,13 @@ query 0 GATYACAC 8
def test_blastn_like_has_no_colon_only_pipes_for_identities(self): def test_blastn_like_has_no_colon_only_pipes_for_identities(self):
"""In a BLASTN-like +1/-1 matrix, mismatches are always negative -> no ':' expected.""" """In a BLASTN-like +1/-1 matrix, mismatches are always negative -> no ':' expected."""
M = _blastn_like_matrix() M = load("BLASTN")
aligner = PairwiseAligner() aligner = PairwiseAligner()
aligner.open_gap_score = -1 aligner.open_gap_score = -1
aligner.extend_gap_score = -0.5 aligner.extend_gap_score = -0.5
aln = aligner.align("GATTACAT", "GATYACAC")[0] aln = aligner.align("GATTACAT", "GATYACAC")[0]
self.assertEqual( self.assertEqual(
aln.format("", scoring=M), aln.format(scoring=M),
"""\ """\
target 0 GATTACAT 8 target 0 GATTACAT 8
0 |||.|||. 8 0 |||.|||. 8
@ -80,7 +72,7 @@ query 0 GATYACAC 8
aligner.extend_gap_score = -2 aligner.extend_gap_score = -2
aln = aligner.align("GATTACAT", "GATYACAC")[0] aln = aligner.align("GATTACAT", "GATYACAC")[0]
self.assertEqual( self.assertEqual(
aln.format("", scoring=M), aln.format(scoring=M),
"""\ """\
target 0 GATTACAT 8 target 0 GATTACAT 8
0 |||:|||. 8 0 |||:|||. 8
@ -96,7 +88,7 @@ query 0 GATYACAC 8
aligner.extend_gap_score = -2 aligner.extend_gap_score = -2
aln = aligner.align("t", "y")[0] aln = aligner.align("t", "y")[0]
self.assertEqual( self.assertEqual(
aln.format("", scoring=M), aln.format(scoring=M),
"""\ """\
target 0 t 1 target 0 t 1
0 : 1 0 : 1
@ -106,13 +98,13 @@ query 0 y 1
def test_negative_mismatch_dot_with_blastn_like_matrix(self): def test_negative_mismatch_dot_with_blastn_like_matrix(self):
"""In the BLASTN-like matrix, mismatches are negative -> expect '.' in the pattern.""" """In the BLASTN-like matrix, mismatches are negative -> expect '.' in the pattern."""
M = _blastn_like_matrix() M = load("BLASTN")
aligner = PairwiseAligner() aligner = PairwiseAligner()
aligner.open_gap_score = -10 aligner.open_gap_score = -10
aligner.extend_gap_score = -2 aligner.extend_gap_score = -2
aln = aligner.align("A", "C")[0] aln = aligner.align("A", "C")[0]
self.assertEqual( self.assertEqual(
aln.format("", scoring=M), aln.format(scoring=M),
"""\ """\
target 0 A 1 target 0 A 1
0 . 1 0 . 1
@ -128,7 +120,7 @@ query 0 C 1
aligner.extend_gap_score = -0.5 aligner.extend_gap_score = -0.5
aln = aligner.align("AC", "AGC")[0] aln = aligner.align("AC", "AGC")[0]
self.assertEqual( self.assertEqual(
aln.format("", scoring=M), aln.format(scoring=M),
"""\ """\
target 0 A-C 2 target 0 A-C 2
0 |-| 3 0 |-| 3
@ -147,7 +139,7 @@ query 0 AGC 3
seq2 = "TYGG" seq2 = "TYGG"
aln = aligner.align(seq1, seq2)[0] aln = aligner.align(seq1, seq2)[0]
self.assertEqual( self.assertEqual(
aln.format("", scoring=M), aln.format(scoring=M),
"""\ """\
target 0 TTTG 4 target 0 TTTG 4
0 |:.| 4 0 |:.| 4