mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
use ignore_sequences instead of gaps_only (#4929)
* update * update * update --------- Co-authored-by: Michiel Jan Laurens de Hoon <mdehoon@Michiels-MacBook-Air.local>
This commit is contained in:
@ -3723,7 +3723,7 @@ class Alignment:
|
||||
start1, start2 = end1, end2
|
||||
return m
|
||||
|
||||
def counts(self, substitution_matrix=None, gaps_only=False):
|
||||
def counts(self, substitution_matrix=None, ignore_sequences=False):
|
||||
"""Count the number of identities, mismatches, and gaps of an alignment.
|
||||
|
||||
Arguments:
|
||||
@ -3733,12 +3733,13 @@ class Alignment:
|
||||
(typically from the ``Bio.Align.substitution_matrices``
|
||||
submodule) to also calculate the number of positive
|
||||
matches in an amino acid alignment.
|
||||
- gaps_only - If True, do not calculate the number of identities,
|
||||
- ignore_sequences - If True, do not calculate the number of identities,
|
||||
positives, and mismatches, but only calculate the
|
||||
number of gaps. This will speed up the calculation.
|
||||
number of aligned sequences and number of gaps
|
||||
to speed up the calculation.
|
||||
Default value: False.
|
||||
|
||||
A ValueError is raised if gaps_only is True and substitution_matrix is not None.
|
||||
A ValueError is raised if ignore_sequences is True and substitution_matrix is not None.
|
||||
|
||||
>>> aligner = PairwiseAligner(mode='global', match_score=2, mismatch_score=-1)
|
||||
>>> for alignment in aligner.align("TACCG", "ACG"):
|
||||
@ -3793,7 +3794,7 @@ class Alignment:
|
||||
right_insertions = right_deletions = 0
|
||||
internal_insertions = internal_deletions = 0
|
||||
aligned = 0
|
||||
if gaps_only:
|
||||
if ignore_sequences:
|
||||
identities = None
|
||||
mismatches = None
|
||||
else:
|
||||
@ -3801,8 +3802,10 @@ class Alignment:
|
||||
mismatches = 0
|
||||
if substitution_matrix is None:
|
||||
positives = None
|
||||
elif gaps_only:
|
||||
raise ValueError("gaps_only cannot be True if substitution_matrix is used")
|
||||
elif ignore_sequences:
|
||||
raise ValueError(
|
||||
"ignore_sequences cannot be True if substitution_matrix is used"
|
||||
)
|
||||
else:
|
||||
positives = 0
|
||||
sequences = [None] * len(self.sequences)
|
||||
@ -3813,7 +3816,7 @@ class Alignment:
|
||||
for i, sequence in enumerate(self.sequences):
|
||||
start = min(coordinates[i, :])
|
||||
end = max(coordinates[i, :])
|
||||
if not gaps_only:
|
||||
if not ignore_sequences:
|
||||
try:
|
||||
sequence = sequence[start:end]
|
||||
except ValueError:
|
||||
@ -3823,10 +3826,10 @@ class Alignment:
|
||||
if sum(aligned_steps > 0) > sum(aligned_steps < 0):
|
||||
coordinates[i, :] = coordinates[i, :] - start
|
||||
else:
|
||||
if not gaps_only:
|
||||
if not ignore_sequences:
|
||||
sequence = reverse_complement(sequence)
|
||||
coordinates[i, :] = end - coordinates[i, :]
|
||||
if gaps_only:
|
||||
if ignore_sequences:
|
||||
sequences[i] = None
|
||||
else:
|
||||
try:
|
||||
|
@ -581,9 +581,10 @@ alignment are indicated by -1:
|
||||
Counting identities, mismatches, and gaps
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The ``counts`` method counts the number of identities, mismatches, and gaps
|
||||
(insertions and deletions) of an alignment. The return value is an
|
||||
``AlignmentCounts`` object, from which the counts can be obtained as properties.
|
||||
The ``counts`` method counts the number of identities, mismatches, aligned
|
||||
letters, and agaps (insertions and deletions) of an alignment. The return
|
||||
value is an ``AlignmentCounts`` object, from which the counts can be obtained
|
||||
as properties.
|
||||
|
||||
.. cont-doctest
|
||||
|
||||
@ -656,6 +657,26 @@ number of gaps (= insertions + deletions):
|
||||
>>> counts.internal_gaps
|
||||
2
|
||||
|
||||
To speed up the calculation, you can use ``ignore_sequences=True`` to skip
|
||||
counting the number of matches and mismatches (this will still calculate the
|
||||
number of aligned sequences):
|
||||
|
||||
.. cont-doctest
|
||||
|
||||
.. code:: pycon
|
||||
|
||||
>>> counts = alignment.counts(ignore_sequences=True)
|
||||
>>> counts.aligned
|
||||
16
|
||||
>>> print(counts.identities)
|
||||
None
|
||||
>>> print(counts.mismatches)
|
||||
None
|
||||
>>> counts.insertions
|
||||
1
|
||||
>>> counts.deletions
|
||||
5
|
||||
|
||||
For protein alignments, in addition to the number of identities and mismatches,
|
||||
you can also count the number of positive matches by supplying a substitution
|
||||
matrix (see Chapter :ref:`sec:substitution_matrices`):
|
||||
|
@ -2376,13 +2376,15 @@ T 6.0 14.0 0.0 874.0
|
||||
str(counts),
|
||||
"AlignmentCounts(left_insertions=0, left_deletions=0, internal_insertions=0, internal_deletions=0, right_insertions=80, right_deletions=4, aligned=3084, identities=3020, mismatches=64, positives=None)",
|
||||
)
|
||||
counts = alignment.counts(gaps_only=True)
|
||||
counts = alignment.counts(ignore_sequences=True)
|
||||
self.assertEqual(
|
||||
str(counts),
|
||||
"AlignmentCounts(left_insertions=0, left_deletions=0, internal_insertions=0, internal_deletions=0, right_insertions=80, right_deletions=4, aligned=3084, identities=None, mismatches=None, positives=None)",
|
||||
)
|
||||
with self.assertRaises(ValueError):
|
||||
alignment.counts(substitution_matrix=substitution_matrix, gaps_only=True)
|
||||
alignment.counts(
|
||||
substitution_matrix=substitution_matrix, ignore_sequences=True
|
||||
)
|
||||
for i, sequence in enumerate(alignment.sequences):
|
||||
length = len(sequence)
|
||||
alignment.sequences[i] = Seq(None, length)
|
||||
|
Reference in New Issue
Block a user