use ignore_sequences instead of gaps_only (#4929)

* update

* update

* update

---------

Co-authored-by: Michiel Jan Laurens de Hoon <mdehoon@Michiels-MacBook-Air.local>
This commit is contained in:
mdehoon
2025-02-06 21:44:09 +09:00
committed by GitHub
parent e838eb90e6
commit 4d380fab03
3 changed files with 41 additions and 15 deletions

View File

@ -3723,7 +3723,7 @@ class Alignment:
start1, start2 = end1, end2
return m
def counts(self, substitution_matrix=None, gaps_only=False):
def counts(self, substitution_matrix=None, ignore_sequences=False):
"""Count the number of identities, mismatches, and gaps of an alignment.
Arguments:
@ -3733,12 +3733,13 @@ class Alignment:
(typically from the ``Bio.Align.substitution_matrices``
submodule) to also calculate the number of positive
matches in an amino acid alignment.
- gaps_only - If True, do not calculate the number of identities,
- ignore_sequences - If True, do not calculate the number of identities,
positives, and mismatches, but only calculate the
number of gaps. This will speed up the calculation.
number of aligned sequences and number of gaps
to speed up the calculation.
Default value: False.
A ValueError is raised if gaps_only is True and substitution_matrix is not None.
A ValueError is raised if ignore_sequences is True and substitution_matrix is not None.
>>> aligner = PairwiseAligner(mode='global', match_score=2, mismatch_score=-1)
>>> for alignment in aligner.align("TACCG", "ACG"):
@ -3793,7 +3794,7 @@ class Alignment:
right_insertions = right_deletions = 0
internal_insertions = internal_deletions = 0
aligned = 0
if gaps_only:
if ignore_sequences:
identities = None
mismatches = None
else:
@ -3801,8 +3802,10 @@ class Alignment:
mismatches = 0
if substitution_matrix is None:
positives = None
elif gaps_only:
raise ValueError("gaps_only cannot be True if substitution_matrix is used")
elif ignore_sequences:
raise ValueError(
"ignore_sequences cannot be True if substitution_matrix is used"
)
else:
positives = 0
sequences = [None] * len(self.sequences)
@ -3813,7 +3816,7 @@ class Alignment:
for i, sequence in enumerate(self.sequences):
start = min(coordinates[i, :])
end = max(coordinates[i, :])
if not gaps_only:
if not ignore_sequences:
try:
sequence = sequence[start:end]
except ValueError:
@ -3823,10 +3826,10 @@ class Alignment:
if sum(aligned_steps > 0) > sum(aligned_steps < 0):
coordinates[i, :] = coordinates[i, :] - start
else:
if not gaps_only:
if not ignore_sequences:
sequence = reverse_complement(sequence)
coordinates[i, :] = end - coordinates[i, :]
if gaps_only:
if ignore_sequences:
sequences[i] = None
else:
try:

View File

@ -581,9 +581,10 @@ alignment are indicated by -1:
Counting identities, mismatches, and gaps
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The ``counts`` method counts the number of identities, mismatches, and gaps
(insertions and deletions) of an alignment. The return value is an
``AlignmentCounts`` object, from which the counts can be obtained as properties.
The ``counts`` method counts the number of identities, mismatches, aligned
letters, and agaps (insertions and deletions) of an alignment. The return
value is an ``AlignmentCounts`` object, from which the counts can be obtained
as properties.
.. cont-doctest
@ -656,6 +657,26 @@ number of gaps (= insertions + deletions):
>>> counts.internal_gaps
2
To speed up the calculation, you can use ``ignore_sequences=True`` to skip
counting the number of matches and mismatches (this will still calculate the
number of aligned sequences):
.. cont-doctest
.. code:: pycon
>>> counts = alignment.counts(ignore_sequences=True)
>>> counts.aligned
16
>>> print(counts.identities)
None
>>> print(counts.mismatches)
None
>>> counts.insertions
1
>>> counts.deletions
5
For protein alignments, in addition to the number of identities and mismatches,
you can also count the number of positive matches by supplying a substitution
matrix (see Chapter :ref:`sec:substitution_matrices`):

View File

@ -2376,13 +2376,15 @@ T 6.0 14.0 0.0 874.0
str(counts),
"AlignmentCounts(left_insertions=0, left_deletions=0, internal_insertions=0, internal_deletions=0, right_insertions=80, right_deletions=4, aligned=3084, identities=3020, mismatches=64, positives=None)",
)
counts = alignment.counts(gaps_only=True)
counts = alignment.counts(ignore_sequences=True)
self.assertEqual(
str(counts),
"AlignmentCounts(left_insertions=0, left_deletions=0, internal_insertions=0, internal_deletions=0, right_insertions=80, right_deletions=4, aligned=3084, identities=None, mismatches=None, positives=None)",
)
with self.assertRaises(ValueError):
alignment.counts(substitution_matrix=substitution_matrix, gaps_only=True)
alignment.counts(
substitution_matrix=substitution_matrix, ignore_sequences=True
)
for i, sequence in enumerate(alignment.sequences):
length = len(sequence)
alignment.sequences[i] = Seq(None, length)