mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
documentation tweaks (#5055)
* documentation tweaks * update --------- Co-authored-by: Michiel de Hoon <mdehoon@tkx288.genome.gsc.riken.jp>
This commit is contained in:
@ -1940,6 +1940,58 @@ the string representation of the alignment:
|
||||
0 AG-TTT-- 5
|
||||
<BLANKLINE>
|
||||
|
||||
As optional keyword arguments cannot be used with Python’s built-in
|
||||
``format`` function or with formatted strings, the ``Alignment`` class
|
||||
has a ``format`` method with optional arguments to customize the
|
||||
alignment format. For example, you can use the optional ``scoring`` argument
|
||||
to provide a substitution matrix (see Section
|
||||
:ref:`sec:pairwise-substitution-scores`) to let the printed alignment reflect
|
||||
the substitution scores as follows:
|
||||
|
||||
* ``|`` for identical residues,
|
||||
* ``:`` for substitutions with a positive score,
|
||||
* ``.`` for substitutions with a negative score,
|
||||
* ``-`` for gaps.
|
||||
|
||||
.. cont-doctest
|
||||
|
||||
.. code:: pycon
|
||||
|
||||
>>> M = substitution_matrices.load("NUC.4.4")
|
||||
>>> print(M[:, :])
|
||||
A T G C S W R Y K M B V H D N
|
||||
A 5.0 -4.0 -4.0 -4.0 -4.0 1.0 1.0 -4.0 -4.0 1.0 -4.0 -1.0 -1.0 -1.0 -2.0
|
||||
T -4.0 5.0 -4.0 -4.0 -4.0 1.0 -4.0 1.0 1.0 -4.0 -1.0 -4.0 -1.0 -1.0 -2.0
|
||||
G -4.0 -4.0 5.0 -4.0 1.0 -4.0 1.0 -4.0 1.0 -4.0 -1.0 -1.0 -4.0 -1.0 -2.0
|
||||
C -4.0 -4.0 -4.0 5.0 1.0 -4.0 -4.0 1.0 -4.0 1.0 -1.0 -1.0 -1.0 -4.0 -2.0
|
||||
S -4.0 -4.0 1.0 1.0 -1.0 -4.0 -2.0 -2.0 -2.0 -2.0 -1.0 -1.0 -3.0 -3.0 -1.0
|
||||
W 1.0 1.0 -4.0 -4.0 -4.0 -1.0 -2.0 -2.0 -2.0 -2.0 -3.0 -3.0 -1.0 -1.0 -1.0
|
||||
R 1.0 -4.0 1.0 -4.0 -2.0 -2.0 -1.0 -4.0 -2.0 -2.0 -3.0 -1.0 -3.0 -1.0 -1.0
|
||||
Y -4.0 1.0 -4.0 1.0 -2.0 -2.0 -4.0 -1.0 -2.0 -2.0 -1.0 -3.0 -1.0 -3.0 -1.0
|
||||
K -4.0 1.0 1.0 -4.0 -2.0 -2.0 -2.0 -2.0 -1.0 -4.0 -1.0 -3.0 -3.0 -1.0 -1.0
|
||||
M 1.0 -4.0 -4.0 1.0 -2.0 -2.0 -2.0 -2.0 -4.0 -1.0 -3.0 -1.0 -1.0 -3.0 -1.0
|
||||
B -4.0 -1.0 -1.0 -1.0 -1.0 -3.0 -3.0 -1.0 -1.0 -3.0 -1.0 -2.0 -2.0 -2.0 -1.0
|
||||
V -1.0 -4.0 -1.0 -1.0 -1.0 -3.0 -1.0 -3.0 -3.0 -1.0 -2.0 -1.0 -2.0 -2.0 -1.0
|
||||
H -1.0 -1.0 -4.0 -1.0 -3.0 -1.0 -3.0 -1.0 -3.0 -1.0 -2.0 -2.0 -1.0 -2.0 -1.0
|
||||
D -1.0 -1.0 -1.0 -4.0 -3.0 -1.0 -1.0 -3.0 -1.0 -3.0 -2.0 -2.0 -2.0 -1.0 -1.0
|
||||
N -2.0 -2.0 -2.0 -2.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0 -1.0
|
||||
<BLANKLINE>
|
||||
>>> M["T", "Y"]
|
||||
1.0
|
||||
>>> M["T", "C"]
|
||||
-4.0
|
||||
>>> aln = Align.Alignment(["GATTACAT", "GATYACAC"])
|
||||
>>> print(aln.format(scoring=M))
|
||||
target 0 GATTACAT 8
|
||||
0 |||:|||. 8
|
||||
query 0 GATYACAC 8
|
||||
<BLANKLINE>
|
||||
|
||||
|
||||
Instead of the substitution matrix, you can also use a ``PairwiseAligner``
|
||||
object (see Chapter :ref:`chapter:pairwise`) as the ``scoring`` argument
|
||||
to use the substitution matrix associated with the aligner.
|
||||
|
||||
By specifying one of the formats shown in
|
||||
Section :ref:`sec:alignformats`, ``format`` will create a string
|
||||
showing the alignment in the requested format:
|
||||
@ -1976,13 +2028,8 @@ showing the alignment in the requested format:
|
||||
<BLANKLINE>
|
||||
<BLANKLINE>
|
||||
|
||||
As optional keyword arguments cannot be used with Python’s built-in
|
||||
``format`` function or with formatted strings, the ``Alignment`` class
|
||||
has a ``format`` method with optional arguments to customize the
|
||||
alignment format, as described in the subsections below. For example, we
|
||||
can print the alignment in BED format (see
|
||||
section :ref:`subsec:align_bed`) with a specific number of
|
||||
columns:
|
||||
As another example, we can print the alignment in BED format (see
|
||||
section :ref:`subsec:align_bed`) with a specific number of columns:
|
||||
|
||||
.. cont-doctest
|
||||
|
||||
@ -3209,33 +3256,6 @@ using Python’s built-in ``format`` function writes a vulgar line:
|
||||
vulgar: gi|296143771|ref|NM_001180731.1| 0 1230 + gi|330443520|ref|NC_001136.10| 1319275 1318045 - 6146 M 1 1 C 3 3 M 1226 1226
|
||||
<BLANKLINE>
|
||||
|
||||
The ``Alignment.format()`` method also accepts an optional ``scoring`` argument.
|
||||
If you provide a substitution matrix (for example, ``scoring=M`` where
|
||||
``M = Bio.Align.substitution_matrices.load("NUC.4.4")``), the middle pattern
|
||||
line will reflect the substitution scores:
|
||||
|
||||
* ``|`` for identical residues,
|
||||
* ``:`` for substitutions with a positive score,
|
||||
* ``.`` for substitutions with a negative score,
|
||||
* ``-`` for gaps.
|
||||
|
||||
.. cont-doctest
|
||||
|
||||
.. code:: pycon
|
||||
|
||||
>>> from Bio.Align import PairwiseAligner
|
||||
>>> from Bio.Align import substitution_matrices
|
||||
>>> M = substitution_matrices.load("NUC.4.4")
|
||||
>>> aligner = PairwiseAligner()
|
||||
>>> aligner.open_gap_score = -100
|
||||
>>> aligner.extend_gap_score = -100
|
||||
>>> aln = aligner.align("GATTACAT", "GATYACAC")[0]
|
||||
>>> print(aln.format("", scoring=M))
|
||||
target 0 GATTACAT 8
|
||||
0 |||:|||. 8
|
||||
query 0 GATYACAC 8
|
||||
<BLANKLINE>
|
||||
|
||||
Using the ``format`` method allows us to request either a vulgar line
|
||||
(default) or a cigar line:
|
||||
|
||||
|
@ -28,14 +28,6 @@ from Bio.Align import PairwiseAligner
|
||||
from Bio.Align.substitution_matrices import load, Array
|
||||
|
||||
|
||||
def _blastn_like_matrix():
|
||||
alphabet = "ACGTY"
|
||||
n = len(alphabet)
|
||||
data = np.full((n, n), -1, dtype=int)
|
||||
np.fill_diagonal(data, 1)
|
||||
return Array(alphabet=alphabet, dims=2, data=data)
|
||||
|
||||
|
||||
class TestFormatMatrix(unittest.TestCase):
|
||||
"""Unit tests for Alignment.format() with substitution matrices."""
|
||||
|
||||
@ -47,7 +39,7 @@ class TestFormatMatrix(unittest.TestCase):
|
||||
aligner.extend_gap_score = -0.5
|
||||
aln = aligner.align("GATTACAT", "GATYACAC")[0]
|
||||
self.assertEqual(
|
||||
aln.format("", scoring=M),
|
||||
aln.format(scoring=M),
|
||||
"""\
|
||||
target 0 GATTACAT 8
|
||||
0 |||:|||. 8
|
||||
@ -57,13 +49,13 @@ query 0 GATYACAC 8
|
||||
|
||||
def test_blastn_like_has_no_colon_only_pipes_for_identities(self):
|
||||
"""In a BLASTN-like +1/-1 matrix, mismatches are always negative -> no ':' expected."""
|
||||
M = _blastn_like_matrix()
|
||||
M = load("BLASTN")
|
||||
aligner = PairwiseAligner()
|
||||
aligner.open_gap_score = -1
|
||||
aligner.extend_gap_score = -0.5
|
||||
aln = aligner.align("GATTACAT", "GATYACAC")[0]
|
||||
self.assertEqual(
|
||||
aln.format("", scoring=M),
|
||||
aln.format(scoring=M),
|
||||
"""\
|
||||
target 0 GATTACAT 8
|
||||
0 |||.|||. 8
|
||||
@ -80,7 +72,7 @@ query 0 GATYACAC 8
|
||||
aligner.extend_gap_score = -2
|
||||
aln = aligner.align("GATTACAT", "GATYACAC")[0]
|
||||
self.assertEqual(
|
||||
aln.format("", scoring=M),
|
||||
aln.format(scoring=M),
|
||||
"""\
|
||||
target 0 GATTACAT 8
|
||||
0 |||:|||. 8
|
||||
@ -96,7 +88,7 @@ query 0 GATYACAC 8
|
||||
aligner.extend_gap_score = -2
|
||||
aln = aligner.align("t", "y")[0]
|
||||
self.assertEqual(
|
||||
aln.format("", scoring=M),
|
||||
aln.format(scoring=M),
|
||||
"""\
|
||||
target 0 t 1
|
||||
0 : 1
|
||||
@ -106,13 +98,13 @@ query 0 y 1
|
||||
|
||||
def test_negative_mismatch_dot_with_blastn_like_matrix(self):
|
||||
"""In the BLASTN-like matrix, mismatches are negative -> expect '.' in the pattern."""
|
||||
M = _blastn_like_matrix()
|
||||
M = load("BLASTN")
|
||||
aligner = PairwiseAligner()
|
||||
aligner.open_gap_score = -10
|
||||
aligner.extend_gap_score = -2
|
||||
aln = aligner.align("A", "C")[0]
|
||||
self.assertEqual(
|
||||
aln.format("", scoring=M),
|
||||
aln.format(scoring=M),
|
||||
"""\
|
||||
target 0 A 1
|
||||
0 . 1
|
||||
@ -128,7 +120,7 @@ query 0 C 1
|
||||
aligner.extend_gap_score = -0.5
|
||||
aln = aligner.align("AC", "AGC")[0]
|
||||
self.assertEqual(
|
||||
aln.format("", scoring=M),
|
||||
aln.format(scoring=M),
|
||||
"""\
|
||||
target 0 A-C 2
|
||||
0 |-| 3
|
||||
@ -147,7 +139,7 @@ query 0 AGC 3
|
||||
seq2 = "TYGG"
|
||||
aln = aligner.align(seq1, seq2)[0]
|
||||
self.assertEqual(
|
||||
aln.format("", scoring=M),
|
||||
aln.format(scoring=M),
|
||||
"""\
|
||||
target 0 TTTG 4
|
||||
0 |:.| 4
|
Reference in New Issue
Block a user