Aligner default arguments (#5029)

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* doctests_fixed

* update

* update

* documentation

* add warning

* add a note to the DEPRECATED file

* adding a NEWS entry

* remove stray comments

---------

Co-authored-by: Michiel de Hoon <mdehoon@lacg01.local>
Co-authored-by: Michiel de Hoon <mdehoon@tkx380.genome.gsc.riken.jp>
Co-authored-by: Michiel Jan Laurens de Hoon <mdehoon@Michiels-MacBook-Air.local>
Co-authored-by: Michiel de Hoon <mdehoon@tkx288.genome.gsc.riken.jp>
This commit is contained in:
mdehoon
2025-09-16 22:46:35 +09:00
committed by GitHub
parent b4d7c115d2
commit be97b2d4e7
10 changed files with 959 additions and 707 deletions

View File

@ -57,6 +57,9 @@ from Bio.SeqRecord import SeqRecord
# https://github.com/biopython/biopython/pull/2007
from Bio import BiopythonWarning
class MultipleSeqAlignment:
"""Represents a classical multiple sequence alignment (MSA).
@ -1330,22 +1333,22 @@ class Alignment:
>>> alignments = aligner.align("GACCTG", "CGATCG")
>>> alignment = alignments[0]
>>> print(alignment)
target 0 -GACCT-G 6
0 -||--|-| 8
query 0 CGA--TCG 6
target 0 -GACCTG 6
0 -||.|-| 7
query 0 CGATC-G 6
<BLANKLINE>
>>> alignment.frequencies
{'-': array([1., 0., 0., 1., 1., 0., 1., 0.]), 'G': array([0., 2., 0., 0., 0., 0., 0., 2.]), 'A': array([0., 0., 2., 0., 0., 0., 0., 0.]), 'C': array([1., 0., 0., 1., 1., 0., 1., 0.]), 'T': array([0., 0., 0., 0., 0., 2., 0., 0.])}
{'-': array([1., 0., 0., 0., 0., 1., 0.]), 'G': array([0., 2., 0., 0., 0., 0., 2.]), 'A': array([0., 0., 2., 0., 0., 0., 0.]), 'C': array([1., 0., 0., 1., 2., 0., 0.]), 'T': array([0., 0., 0., 1., 0., 1., 0.])}
>>> aligner.mode = "local"
>>> alignments = aligner.align("GACCTG", "CGATCG")
>>> alignment = alignments[0]
>>> print(alignment)
target 0 GACCT-G 6
0 ||--|-| 7
query 1 GA--TCG 6
target 0 GACC 4
0 ||.| 4
query 1 GATC 5
<BLANKLINE>
>>> alignment.frequencies
{'G': array([2., 0., 0., 0., 0., 0., 2.]), 'A': array([0., 2., 0., 0., 0., 0., 0.]), 'C': array([0., 0., 1., 1., 0., 1., 0.]), 'T': array([0., 0., 0., 0., 2., 0., 0.]), '-': array([0., 0., 1., 1., 0., 1., 0.])}
{'G': array([2., 0., 0., 0.]), 'A': array([0., 2., 0., 0.]), 'C': array([0., 0., 1., 2.]), 'T': array([0., 0., 1., 0.])}
"""
coordinates = self.coordinates.copy()
sequences = list(self.sequences)
@ -1709,7 +1712,9 @@ class Alignment:
if steps[i] == 0:
line = "-" * length
else:
start = coordinate[i] + start_index - indices[i - 1]
start = coordinate[i] + start_index
if i > 0:
start -= indices[i - 1]
stop = start + length
line = str(sequence[start:stop])
else:
@ -1744,7 +1749,9 @@ class Alignment:
if steps[i] == 0:
line = [None] * length
else:
start = coordinate[i] + start_index - indices[i - 1]
start = coordinate[i] + start_index
if i > 0:
start -= indices[i - 1]
stop = start + length
line = sequence[start:stop]
else:
@ -2000,68 +2007,68 @@ class Alignment:
>>> alignments = aligner.align("ACCGGTTT", "ACGGGTT")
>>> alignment = alignments[0]
>>> print(alignment)
target 0 ACCGG-TTT 8
0 ||-||-||- 9
query 0 AC-GGGTT- 7
target 0 ACCGGTTT 8
0 ||.||||- 8
query 0 ACGGGTT- 7
<BLANKLINE>
>>> alignment[0, :]
'ACCGG-TTT'
'ACCGGTTT'
>>> alignment[1, :]
'AC-GGGTT-'
'ACGGGTT-'
>>> alignment[0]
'ACCGG-TTT'
'ACCGGTTT'
>>> alignment[1]
'AC-GGGTT-'
'ACGGGTT-'
>>> alignment[0, 1:-2]
'CCGG-T'
'CCGGT'
>>> alignment[1, 1:-2]
'C-GGGT'
>>> alignment[0, (1, 5, 2)]
'C-C'
'CGGGT'
>>> alignment[1, (1, 7, 2)]
'C-G'
>>> alignment[1, ::2]
'A-GT-'
>>> alignment[1, range(0, 9, 2)]
'A-GT-'
'AGGT'
>>> alignment[1, range(0, 8, 2)]
'AGGT'
>>> alignment[:, 0]
'AA'
>>> alignment[:, 5]
'-G'
'TT'
>>> alignment[:, 1:] # doctest:+ELLIPSIS
<Alignment object (2 rows x 8 columns) at 0x...>
<Alignment object (2 rows x 7 columns) at 0x...>
>>> print(alignment[:, 1:])
target 1 CCGG-TTT 8
0 |-||-||- 8
query 1 C-GGGTT- 7
target 1 CCGGTTT 8
0 |.||||- 7
query 1 CGGGTT- 7
<BLANKLINE>
>>> print(alignment[:, 2:])
target 2 CGG-TTT 8
0 -||-||- 7
query 2 -GGGTT- 7
<BLANKLINE>
>>> print(alignment[:, 3:])
target 3 GG-TTT 8
0 ||-||- 6
target 2 CGGTTT 8
0 .||||- 6
query 2 GGGTT- 7
<BLANKLINE>
>>> print(alignment[:, 3:])
target 3 GGTTT 8
0 ||||- 5
query 3 GGTT- 7
<BLANKLINE>
>>> print(alignment[:, 3:-1])
target 3 GG-TT 7
0 ||-|| 5
query 2 GGGTT 7
target 3 GGTT 7
0 |||| 4
query 3 GGTT 7
<BLANKLINE>
>>> print(alignment[:, ::2])
target 0 ACGTT 5
0 |-||- 5
query 0 A-GT- 3
target 0 ACGT 4
0 |.|| 4
query 0 AGGT 4
<BLANKLINE>
>>> print(alignment[:, range(1, 9, 2)])
target 0 CG-T 3
0 ||-| 4
query 0 CGGT 4
>>> print(alignment[:, range(1, 8, 2)])
target 0 CGTT 4
0 |||- 4
query 0 CGT- 3
<BLANKLINE>
>>> print(alignment[:, (2, 7, 3)])
target 0 CTG 3
0 -|| 3
query 0 -TG 2
0 .-| 3
query 0 G-G 2
<BLANKLINE>
"""
if isinstance(key, numbers.Integral):
@ -2648,12 +2655,24 @@ class Alignment:
>>> alignments = aligner.align(seqA, seqB)
>>> len(alignments)
1
3
>>> alignment = alignments[0]
>>> print(alignment)
target 0 TTAA-CCCCATTTG 13
0 --||-||||-|||- 14
query 0 --AAGCCCC-TTT- 10
target 0 TTAACCCCATTTG 13
0 .-|.||||-|||- 13
query 0 A-AGCCCC-TTT- 10
<BLANKLINE>
>>> alignment = alignments[1]
>>> print(alignment)
target 0 TTAACCCCATTTG 13
0 -.|.||||-|||- 13
query 0 -AAGCCCC-TTT- 10
<BLANKLINE>
>>> alignment = alignments[2]
>>> print(alignment)
target 0 TTAACCCCATTTG 13
0 --||.|||.|||- 13
query 0 --AAGCCCCTTT- 10
<BLANKLINE>
Note that seqC is the reverse complement of seqB. Aligning it to the
@ -2662,12 +2681,24 @@ class Alignment:
>>> alignments = aligner.align(seqA, seqC, strand="-")
>>> len(alignments)
1
3
>>> alignment = alignments[0]
>>> print(alignment)
target 0 TTAA-CCCCATTTG 13
0 --||-||||-|||- 14
query 10 --AAGCCCC-TTT- 0
target 0 TTAACCCCATTTG 13
0 .-|.||||-|||- 13
query 10 A-AGCCCC-TTT- 0
<BLANKLINE>
>>> alignment = alignments[1]
>>> print(alignment)
target 0 TTAACCCCATTTG 13
0 -.|.||||-|||- 13
query 10 -AAGCCCC-TTT- 0
<BLANKLINE>
>>> alignment = alignments[2]
>>> print(alignment)
target 0 TTAACCCCATTTG 13
0 --||.|||.|||- 13
query 10 --AAGCCCCTTT- 0
<BLANKLINE>
"""
@ -2734,24 +2765,24 @@ class Alignment:
>>> alignments = aligner.align("GACCTG", "CGATCG")
>>> alignment = alignments[0]
>>> print(alignment)
target 0 -GACCT-G 6
0 -||--|-| 8
query 0 CGA--TCG 6
target 0 -GACCTG 6
0 -||.|-| 7
query 0 CGATC-G 6
<BLANKLINE>
>>> alignment.length
8
7
>>> aligner.mode = "local"
>>> alignments = aligner.align("GACCTG", "CGATCG")
>>> alignment = alignments[0]
>>> print(alignment)
target 0 GACCT-G 6
0 ||--|-| 7
query 1 GA--TCG 6
target 0 GACC 4
0 ||.| 4
query 1 GATC 5
<BLANKLINE>
>>> len(alignment)
2
>>> alignment.length
7
4
"""
n = len(self.coordinates)
if n == 0: # no sequences
@ -2793,26 +2824,26 @@ class Alignment:
>>> alignments = aligner.align("GACCTG", "CGATCG")
>>> alignment = alignments[0]
>>> print(alignment)
target 0 -GACCT-G 6
0 -||--|-| 8
query 0 CGA--TCG 6
<BLANKLINE>
>>> len(alignment)
2
>>> alignment.shape
(2, 8)
>>> aligner.mode = "local"
>>> alignments = aligner.align("GACCTG", "CGATCG")
>>> alignment = alignments[0]
>>> print(alignment)
target 0 GACCT-G 6
0 ||--|-| 7
query 1 GA--TCG 6
target 0 -GACCTG 6
0 -||.|-| 7
query 0 CGATC-G 6
<BLANKLINE>
>>> len(alignment)
2
>>> alignment.shape
(2, 7)
>>> aligner.mode = "local"
>>> alignments = aligner.align("GACCTG", "CGATCG")
>>> alignment = alignments[0]
>>> print(alignment)
target 0 GACC 4
0 ||.| 4
query 1 GATC 5
<BLANKLINE>
>>> len(alignment)
2
>>> alignment.shape
(2, 4)
"""
n = len(self.coordinates)
m = self.length
@ -2958,13 +2989,13 @@ class Alignment:
[ 0, 1, -1, 2, 3]])
>>> alignment = alignments[1]
>>> print(alignment)
target 1 AACTGG 7
0 ||-|-| 6
query 0 AA-T-G 4
target 2 ACTG 6
0 |.|| 4
query 0 AATG 4
<BLANKLINE>
>>> alignment.indices
array([[ 1, 2, 3, 4, 5, 6],
[ 0, 1, -1, 2, -1, 3]])
array([[2, 3, 4, 5],
[0, 1, 2, 3]])
>>> alignments = aligner.align("GAACTGG", "CATT", strand="-")
>>> alignment = alignments[0]
@ -2978,13 +3009,13 @@ class Alignment:
[ 3, 2, -1, 1, 0]])
>>> alignment = alignments[1]
>>> print(alignment)
target 1 AACTGG 7
0 ||-|-| 6
query 4 AA-T-G 0
target 2 ACTG 6
0 |.|| 4
query 4 AATG 0
<BLANKLINE>
>>> alignment.indices
array([[ 1, 2, 3, 4, 5, 6],
[ 3, 2, -1, 1, -1, 0]])
array([[2, 3, 4, 5],
[3, 2, 1, 0]])
"""
a = -np.ones(self.shape, int)
@ -3045,12 +3076,12 @@ class Alignment:
[array([-1, 0, 1, 2, 3, 4, -1]), array([0, 1, 3, 4])]
>>> alignment = alignments[1]
>>> print(alignment)
target 1 AACTGG 7
0 ||-|-| 6
query 0 AA-T-G 4
target 2 ACTG 6
0 |.|| 4
query 0 AATG 4
<BLANKLINE>
>>> alignment.inverse_indices
[array([-1, 0, 1, 2, 3, 4, 5]), array([0, 1, 3, 5])]
[array([-1, -1, 0, 1, 2, 3, -1]), array([0, 1, 2, 3])]
>>> alignments = aligner.align("GAACTGG", "CATT", strand="-")
>>> alignment = alignments[0]
>>> print(alignment)
@ -3062,12 +3093,12 @@ class Alignment:
[array([-1, 0, 1, 2, 3, 4, -1]), array([4, 3, 1, 0])]
>>> alignment = alignments[1]
>>> print(alignment)
target 1 AACTGG 7
0 ||-|-| 6
query 4 AA-T-G 0
target 2 ACTG 6
0 |.|| 4
query 4 AATG 0
<BLANKLINE>
>>> alignment.inverse_indices
[array([-1, 0, 1, 2, 3, 4, 5]), array([5, 3, 1, 0])]
[array([-1, -1, 0, 1, 2, 3, -1]), array([3, 2, 1, 0])]
"""
a = [-np.ones(len(sequence), int) for sequence in self.sequences]
@ -3629,13 +3660,13 @@ class Alignment:
... print(f"{c.gaps} gaps, {c.identities} identities, {c.mismatches} mismatches")
... print(alignment)
...
Score = 6.0:
Score = 4.0:
2 gaps, 3 identities, 0 mismatches
target 0 TACCG 5
0 -||-| 5
query 0 -AC-G 3
<BLANKLINE>
Score = 6.0:
Score = 4.0:
2 gaps, 3 identities, 0 mismatches
target 0 TACCG 5
0 -|-|| 5
@ -4023,12 +4054,12 @@ class PairwiseAligner(_pairwisealigner.PairwiseAligner):
... print("Score = %.1f:" % alignment.score)
... print(alignment)
...
Score = 3.0:
Score = 1.0:
target 0 TACCG 5
0 -|-|| 5
query 0 -A-CG 3
<BLANKLINE>
Score = 3.0:
Score = 1.0:
target 0 TACCG 5
0 -||-| 5
query 0 -AC-G 3
@ -4042,15 +4073,15 @@ class PairwiseAligner(_pairwisealigner.PairwiseAligner):
... print("Score = %.1f:" % alignment.score)
... print(alignment)
...
Score = 3.0:
target 1 ACCG 5
0 |-|| 4
query 0 A-CG 3
Score = 2.0:
target 1 AC 3
0 || 2
query 0 AC 2
<BLANKLINE>
Score = 3.0:
target 1 ACCG 5
0 ||-| 4
query 0 AC-G 3
Score = 2.0:
target 3 CG 5
0 || 2
query 1 CG 3
<BLANKLINE>
Do a global alignment. Identical characters are given 2 points,
@ -4063,12 +4094,12 @@ class PairwiseAligner(_pairwisealigner.PairwiseAligner):
... print("Score = %.1f:" % alignment.score)
... print(alignment)
...
Score = 6.0:
Score = 4.0:
target 0 TACCG 5
0 -||-| 5
query 0 -AC-G 3
<BLANKLINE>
Score = 6.0:
Score = 4.0:
target 0 TACCG 5
0 -|-|| 5
query 0 -A-CG 3
@ -4107,7 +4138,7 @@ class PairwiseAligner(_pairwisealigner.PairwiseAligner):
Number of alignments: 1
>>> alignment = alignments[0]
>>> print("Score = %.1f" % alignment.score)
Score = 13.0
Score = 11.0
>>> print(alignment)
target 0 KEVLA 5
0 -|||- 5
@ -4122,12 +4153,12 @@ class PairwiseAligner(_pairwisealigner.PairwiseAligner):
... print("Score = %.1f:" % alignment.score)
... print(alignment)
...
Score = 6.0:
Score = 4.0:
target 0 TACCG 5
0 -||-| 5
query 0 -AC-G 3
<BLANKLINE>
Score = 6.0:
Score = 4.0:
target 0 TACCG 5
0 -|-|| 5
query 0 -A-CG 3
@ -4154,7 +4185,7 @@ class PairwiseAligner(_pairwisealigner.PairwiseAligner):
# use default values:
# match = 1.0
# mismatch = 0.0
# gap_score = 0.0
# gap_score = -1.0
pass
elif scoring == "blastn":
self.substitution_matrix = substitution_matrices.load("BLASTN")
@ -4273,6 +4304,7 @@ AlignmentCounts object returned by the .counts method of an Alignment object."""
def align(self, seqA, seqB, strand="+"):
"""Return the alignments of two sequences using PairwiseAligner."""
self.warn_defaults_changed() # FIXME remove this after 1.87 is out
if isinstance(seqA, (bytes, Seq, MutableSeq, SeqRecord)):
sA = bytes(seqA)
sA = np.frombuffer(sA, dtype=np.uint8).astype(np.int32)
@ -4330,6 +4362,7 @@ AlignmentCounts object returned by the .counts method of an Alignment object."""
def score(self, seqA, seqB, strand="+"):
"""Return the alignment score of two sequences using PairwiseAligner."""
self.warn_defaults_changed() # FIXME remove this after 1.87 is out
if isinstance(seqA, (bytes, Seq, MutableSeq, SeqRecord)):
seqA = bytes(seqA)
seqA = np.frombuffer(seqA, dtype=np.uint8).astype(np.int32)

View File

@ -1643,15 +1643,10 @@ static char _alignmentcounts__doc__[] =
/* Module definition */
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"_alignmentcounts",
_alignmentcounts__doc__,
-1,
NULL,
NULL,
NULL,
NULL,
NULL
PyModuleDef_HEAD_INIT,
.m_name = "_alignmentcounts",
.m_doc = _alignmentcounts__doc__,
.m_size = -1,
};
PyObject *

View File

@ -1074,15 +1074,10 @@ static char _codonaligner__doc__[] =
"C extension module implementing a dynamic programming algorithm to align a nucleotide sequence to an amino acid sequence";
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"_codonaligner",
_codonaligner__doc__,
-1,
NULL,
NULL,
NULL,
NULL,
NULL
PyModuleDef_HEAD_INIT,
.m_name = "_codonaligner",
.m_doc = _codonaligner__doc__,
.m_size = -1,
};
PyObject *

View File

@ -6,7 +6,6 @@
*/
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include <float.h>
@ -15,6 +14,9 @@
#include "substitution_matrices/_arraycore.h"
static bool warned = false; // FIXME remove once Biopython 1.87 is out.
#define STARTPOINT 0x8
#define ENDPOINT 0x10
#define M_MATRIX 0x1
@ -1767,18 +1769,30 @@ Aligner_init(Aligner *self, PyObject *args, PyObject *kwds)
self->match = 1.0;
self->mismatch = 0.0;
self->epsilon = 1.e-6;
self->open_internal_insertion_score = 0;
self->extend_internal_insertion_score = 0;
self->open_internal_deletion_score = 0;
self->extend_internal_deletion_score = 0;
self->open_left_insertion_score = 0;
self->extend_left_insertion_score = 0;
self->open_right_insertion_score = 0;
self->extend_right_insertion_score = 0;
self->open_left_deletion_score = 0;
self->extend_left_deletion_score = 0;
self->open_right_deletion_score = 0;
self->extend_right_deletion_score = 0;
self->open_internal_insertion_score = -1.0;
self->extend_internal_insertion_score = -1.0;
self->open_internal_deletion_score = -1.0;
self->extend_internal_deletion_score = -1.0;
self->open_left_insertion_score = -1.0;
self->extend_left_insertion_score = -1.0;
self->open_right_insertion_score = -1.0;
self->extend_right_insertion_score = -1.0;
self->open_left_deletion_score = -1.0;
self->extend_left_deletion_score = -1.0;
self->open_right_deletion_score = -1.0;
self->extend_right_deletion_score = -1.0;
self->open_internal_insertion_score_set = false;
self->extend_internal_insertion_score_set = false;
self->open_left_insertion_score_set = false;
self->extend_left_insertion_score_set = false;
self->open_right_insertion_score_set = false;
self->extend_right_insertion_score_set = false;
self->open_internal_deletion_score_set = false;
self->extend_internal_deletion_score_set = false;
self->open_left_deletion_score_set = false;
self->extend_left_deletion_score_set = false;
self->open_right_deletion_score_set = false;
self->extend_right_deletion_score_set = false;
self->insertion_score_function = NULL;
self->deletion_score_function = NULL;
self->substitution_matrix.obj = NULL;
@ -2165,17 +2179,29 @@ Aligner_set_gap_score(Aligner* self, PyObject* value, void* closure)
self->deletion_score_function = NULL;
}
self->open_internal_insertion_score = score;
self->open_internal_insertion_score_set = true;
self->extend_internal_insertion_score = score;
self->extend_internal_insertion_score_set = true;
self->open_left_insertion_score = score;
self->open_left_insertion_score_set = true;
self->extend_left_insertion_score = score;
self->extend_left_insertion_score_set = true;
self->open_right_insertion_score = score;
self->open_right_insertion_score_set = true;
self->extend_right_insertion_score = score;
self->extend_right_insertion_score_set = true;
self->open_internal_deletion_score = score;
self->open_internal_deletion_score_set = true;
self->extend_internal_deletion_score = score;
self->extend_internal_deletion_score_set = true;
self->open_left_deletion_score = score;
self->open_left_deletion_score_set = true;
self->extend_left_deletion_score = score;
self->extend_left_deletion_score_set = true;
self->open_right_deletion_score = score;
self->open_right_deletion_score_set = true;
self->extend_right_deletion_score = score;
self->extend_right_deletion_score_set = true;
}
self->algorithm = Unknown;
return 0;
@ -2217,11 +2243,17 @@ Aligner_set_open_gap_score(Aligner* self, PyObject* value, void* closure)
self->deletion_score_function = NULL;
}
self->open_internal_insertion_score = score;
self->open_internal_insertion_score_set = true;
self->open_left_insertion_score = score;
self->open_left_insertion_score_set = true;
self->open_right_insertion_score = score;
self->open_right_insertion_score_set = true;
self->open_internal_deletion_score = score;
self->open_internal_deletion_score_set = true;
self->open_left_deletion_score = score;
self->open_left_deletion_score_set = true;
self->open_right_deletion_score = score;
self->open_right_deletion_score_set = true;
self->algorithm = Unknown;
return 0;
}
@ -2262,11 +2294,17 @@ Aligner_set_extend_gap_score(Aligner* self, PyObject* value, void* closure)
self->deletion_score_function = NULL;
}
self->extend_internal_insertion_score = score;
self->extend_internal_insertion_score_set = true;
self->extend_left_insertion_score = score;
self->extend_left_insertion_score_set = true;
self->extend_right_insertion_score = score;
self->extend_right_insertion_score_set = true;
self->extend_internal_deletion_score = score;
self->extend_internal_deletion_score_set = true;
self->extend_left_deletion_score = score;
self->extend_left_deletion_score_set = true;
self->extend_right_deletion_score = score;
self->extend_right_deletion_score_set = true;
self->algorithm = Unknown;
return 0;
}
@ -2304,9 +2342,13 @@ Aligner_set_internal_gap_score(Aligner* self, PyObject* value, void* closure)
self->deletion_score_function = NULL;
}
self->open_internal_insertion_score = score;
self->open_internal_insertion_score_set = true;
self->extend_internal_insertion_score = score;
self->extend_internal_insertion_score_set = true;
self->open_internal_deletion_score = score;
self->open_internal_deletion_score_set = true;
self->extend_internal_deletion_score = score;
self->extend_internal_deletion_score_set = true;
self->algorithm = Unknown;
return 0;
}
@ -2342,7 +2384,9 @@ Aligner_set_open_internal_gap_score(Aligner* self, PyObject* value, void* closur
self->deletion_score_function = NULL;
}
self->open_internal_insertion_score = score;
self->open_internal_insertion_score_set = true;
self->open_internal_deletion_score = score;
self->open_internal_deletion_score_set = true;
self->algorithm = Unknown;
return 0;
}
@ -2379,7 +2423,9 @@ Aligner_set_extend_internal_gap_score(Aligner* self, PyObject* value,
self->deletion_score_function = NULL;
}
self->extend_internal_insertion_score = score;
self->extend_internal_insertion_score_set = true;
self->extend_internal_deletion_score = score;
self->extend_internal_deletion_score_set = true;
self->algorithm = Unknown;
return 0;
}
@ -2421,13 +2467,21 @@ Aligner_set_end_gap_score(Aligner* self, PyObject* value, void* closure)
self->deletion_score_function = NULL;
}
self->open_left_insertion_score = score;
self->open_left_insertion_score_set = true;
self->extend_left_insertion_score = score;
self->extend_left_insertion_score_set = true;
self->open_right_insertion_score = score;
self->open_right_insertion_score_set = true;
self->extend_right_insertion_score = score;
self->extend_right_insertion_score_set = true;
self->open_left_deletion_score = score;
self->open_left_deletion_score_set = true;
self->extend_left_deletion_score = score;
self->extend_left_deletion_score_set = true;
self->open_right_deletion_score = score;
self->open_right_deletion_score_set = true;
self->extend_right_deletion_score = score;
self->extend_right_deletion_score_set = true;
self->algorithm = Unknown;
return 0;
}
@ -2465,9 +2519,13 @@ Aligner_set_open_end_gap_score(Aligner* self, PyObject* value, void* closure)
self->deletion_score_function = NULL;
}
self->open_left_insertion_score = score;
self->open_left_insertion_score_set = true;
self->open_right_insertion_score = score;
self->open_right_insertion_score_set = true;
self->open_left_deletion_score = score;
self->open_left_deletion_score_set = true;
self->open_right_deletion_score = score;
self->open_right_deletion_score_set = true;
self->algorithm = Unknown;
return 0;
}
@ -2505,9 +2563,13 @@ Aligner_set_extend_end_gap_score(Aligner* self, PyObject* value, void* closure)
self->deletion_score_function = NULL;
}
self->extend_left_insertion_score = score;
self->extend_left_insertion_score_set = true;
self->extend_right_insertion_score = score;
self->extend_right_insertion_score_set = true;
self->extend_left_deletion_score = score;
self->extend_left_deletion_score_set = true;
self->extend_right_deletion_score = score;
self->extend_right_deletion_score_set = true;
self->algorithm = Unknown;
return 0;
}
@ -2545,9 +2607,13 @@ Aligner_set_left_gap_score(Aligner* self, PyObject* value, void* closure)
self->deletion_score_function = NULL;
}
self->open_left_insertion_score = score;
self->open_left_insertion_score_set = true;
self->extend_left_insertion_score = score;
self->extend_left_insertion_score_set = true;
self->open_left_deletion_score = score;
self->open_left_deletion_score_set = true;
self->extend_left_deletion_score = score;
self->extend_left_deletion_score_set = true;
self->algorithm = Unknown;
return 0;
}
@ -2585,9 +2651,13 @@ Aligner_set_right_gap_score(Aligner* self, PyObject* value, void* closure)
self->deletion_score_function = NULL;
}
self->open_right_insertion_score = score;
self->open_right_insertion_score_set = true;
self->extend_right_insertion_score = score;
self->extend_right_insertion_score_set = true;
self->open_right_deletion_score = score;
self->open_right_deletion_score_set = true;
self->extend_right_deletion_score = score;
self->extend_right_deletion_score_set = true;
self->algorithm = Unknown;
return 0;
}
@ -2623,7 +2693,9 @@ Aligner_set_open_left_gap_score(Aligner* self, PyObject* value, void* closure)
self->deletion_score_function = NULL;
}
self->open_left_insertion_score = score;
self->open_left_insertion_score_set = true;
self->open_left_deletion_score = score;
self->open_left_deletion_score_set = true;
self->algorithm = Unknown;
return 0;
}
@ -2659,7 +2731,9 @@ Aligner_set_extend_left_gap_score(Aligner* self, PyObject* value, void* closure)
self->deletion_score_function = NULL;
}
self->extend_left_insertion_score = score;
self->extend_left_insertion_score_set = true;
self->extend_left_deletion_score = score;
self->extend_left_deletion_score_set = true;
self->algorithm = Unknown;
return 0;
}
@ -2695,7 +2769,9 @@ Aligner_set_open_right_gap_score(Aligner* self, PyObject* value, void* closure)
self->deletion_score_function = NULL;
}
self->open_right_insertion_score = score;
self->open_right_insertion_score_set = true;
self->open_right_deletion_score = score;
self->open_right_deletion_score_set = true;
self->algorithm = Unknown;
return 0;
}
@ -2731,7 +2807,9 @@ Aligner_set_extend_right_gap_score(Aligner* self, PyObject* value, void* closure
self->deletion_score_function = NULL;
}
self->extend_right_insertion_score = score;
self->extend_right_insertion_score_set = true;
self->extend_right_deletion_score = score;
self->extend_right_deletion_score_set = true;
self->algorithm = Unknown;
return 0;
}
@ -2760,8 +2838,11 @@ Aligner_set_open_insertion_score(Aligner* self, PyObject* value, void* closure)
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_internal_insertion_score = score;
self->open_internal_insertion_score_set = true;
self->open_left_insertion_score = score;
self->open_left_insertion_score_set = true;
self->open_right_insertion_score = score;
self->open_right_insertion_score_set = true;
if (self->insertion_score_function) {
Py_DECREF(self->insertion_score_function);
self->insertion_score_function = NULL;
@ -2794,8 +2875,11 @@ Aligner_set_extend_insertion_score(Aligner* self, PyObject* value, void* closure
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->extend_internal_insertion_score = score;
self->extend_internal_insertion_score_set = true;
self->extend_left_insertion_score = score;
self->extend_left_insertion_score_set = true;
self->extend_right_insertion_score = score;
self->extend_right_insertion_score_set = true;
if (self->insertion_score_function) {
Py_DECREF(self->insertion_score_function);
self->insertion_score_function = NULL;
@ -2842,11 +2926,17 @@ Aligner_set_insertion_score(Aligner* self, PyObject* value, void* closure)
return -1;
}
self->open_internal_insertion_score = score;
self->open_internal_insertion_score_set = true;
self->extend_internal_insertion_score = score;
self->extend_internal_insertion_score_set = true;
self->open_left_insertion_score = score;
self->open_left_insertion_score_set = true;
self->extend_left_insertion_score = score;
self->extend_left_insertion_score_set = true;
self->open_right_insertion_score = score;
self->open_right_insertion_score_set = true;
self->extend_right_insertion_score = score;
self->extend_right_insertion_score_set = true;
if (self->insertion_score_function) {
Py_DECREF(self->insertion_score_function);
self->insertion_score_function = NULL;
@ -2880,8 +2970,11 @@ Aligner_set_open_deletion_score(Aligner* self, PyObject* value, void* closure)
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_internal_deletion_score = score;
self->open_internal_deletion_score_set = true;
self->open_left_deletion_score = score;
self->open_left_deletion_score_set = true;
self->open_right_deletion_score = score;
self->open_right_deletion_score_set = true;
if (self->deletion_score_function) {
Py_DECREF(self->deletion_score_function);
self->deletion_score_function = NULL;
@ -2914,8 +3007,11 @@ Aligner_set_extend_deletion_score(Aligner* self, PyObject* value, void* closure)
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->extend_internal_deletion_score = score;
self->extend_internal_deletion_score_set = true;
self->extend_left_deletion_score = score;
self->extend_left_deletion_score_set = true;
self->extend_right_deletion_score = score;
self->extend_right_deletion_score_set = true;
if (self->deletion_score_function) {
Py_DECREF(self->deletion_score_function);
self->deletion_score_function = NULL;
@ -2961,11 +3057,17 @@ Aligner_set_deletion_score(Aligner* self, PyObject* value, void* closure)
return -1;
}
self->open_internal_deletion_score = score;
self->open_internal_deletion_score_set = true;
self->extend_internal_deletion_score = score;
self->extend_internal_deletion_score_set = true;
self->open_left_deletion_score = score;
self->open_left_deletion_score_set = true;
self->extend_left_deletion_score = score;
self->extend_left_deletion_score_set = true;
self->open_right_deletion_score = score;
self->open_right_deletion_score_set = true;
self->extend_right_deletion_score = score;
self->extend_right_deletion_score_set = true;
if (self->deletion_score_function) {
Py_DECREF(self->deletion_score_function);
self->deletion_score_function = NULL;
@ -2992,6 +3094,7 @@ Aligner_set_open_internal_insertion_score(Aligner* self,
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_internal_insertion_score = score;
self->open_internal_insertion_score_set = true;
if (self->insertion_score_function) {
Py_DECREF(self->insertion_score_function);
self->insertion_score_function = NULL;
@ -3017,6 +3120,7 @@ Aligner_set_extend_internal_insertion_score(Aligner* self,
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->extend_internal_insertion_score = score;
self->extend_internal_insertion_score_set = true;
if (self->insertion_score_function) {
Py_DECREF(self->insertion_score_function);
self->insertion_score_function = NULL;
@ -3049,7 +3153,9 @@ Aligner_set_internal_insertion_score(Aligner* self, PyObject* value,
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_internal_insertion_score = score;
self->open_internal_insertion_score_set = true;
self->extend_internal_insertion_score = score;
self->extend_internal_insertion_score_set = true;
if (self->insertion_score_function) {
Py_DECREF(self->insertion_score_function);
self->insertion_score_function = NULL;
@ -3083,9 +3189,13 @@ Aligner_set_end_insertion_score(Aligner* self, PyObject* value, void* closure) {
const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_left_insertion_score = score;
self->open_left_insertion_score_set = true;
self->extend_left_insertion_score = score;
self->extend_left_insertion_score_set = true;
self->open_right_insertion_score = score;
self->open_right_insertion_score_set = true;
self->extend_right_insertion_score = score;
self->extend_right_insertion_score_set = true;
if (self->insertion_score_function) {
Py_DECREF(self->insertion_score_function);
self->insertion_score_function = NULL;
@ -3118,7 +3228,9 @@ Aligner_set_open_end_insertion_score(Aligner* self, PyObject* value,
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_left_insertion_score = score;
self->open_left_insertion_score_set = true;
self->open_right_insertion_score = score;
self->open_right_insertion_score_set = true;
if (self->insertion_score_function) {
Py_DECREF(self->insertion_score_function);
self->insertion_score_function = NULL;
@ -3150,7 +3262,9 @@ Aligner_set_extend_end_insertion_score(Aligner* self, PyObject* value, void* clo
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->extend_left_insertion_score = score;
self->extend_left_insertion_score_set = true;
self->extend_right_insertion_score = score;
self->extend_right_insertion_score_set = true;
if (self->insertion_score_function) {
Py_DECREF(self->insertion_score_function);
self->insertion_score_function = NULL;
@ -3175,6 +3289,7 @@ Aligner_set_open_left_insertion_score(Aligner* self, PyObject* value, void* clos
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_left_insertion_score = score;
self->open_left_insertion_score_set = true;
if (self->insertion_score_function) {
Py_DECREF(self->insertion_score_function);
self->insertion_score_function = NULL;
@ -3199,6 +3314,7 @@ Aligner_set_extend_left_insertion_score(Aligner* self, PyObject* value, void* cl
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->extend_left_insertion_score = score;
self->extend_left_insertion_score_set = true;
if (self->insertion_score_function) {
Py_DECREF(self->insertion_score_function);
self->insertion_score_function = NULL;
@ -3230,7 +3346,9 @@ Aligner_set_left_insertion_score(Aligner* self, PyObject* value, void* closure)
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_left_insertion_score = score;
self->open_left_insertion_score_set = true;
self->extend_left_insertion_score = score;
self->extend_left_insertion_score_set = true;
if (self->insertion_score_function) {
Py_DECREF(self->insertion_score_function);
self->insertion_score_function = NULL;
@ -3255,6 +3373,7 @@ Aligner_set_open_right_insertion_score(Aligner* self, PyObject* value, void* clo
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_right_insertion_score = score;
self->open_right_insertion_score_set = true;
if (self->insertion_score_function) {
Py_DECREF(self->insertion_score_function);
self->insertion_score_function = NULL;
@ -3279,6 +3398,7 @@ Aligner_set_extend_right_insertion_score(Aligner* self, PyObject* value, void* c
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->extend_right_insertion_score = score;
self->extend_right_insertion_score_set = true;
if (self->insertion_score_function) {
Py_DECREF(self->insertion_score_function);
self->insertion_score_function = NULL;
@ -3310,7 +3430,9 @@ Aligner_set_right_insertion_score(Aligner* self, PyObject* value, void* closure)
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_right_insertion_score = score;
self->open_right_insertion_score_set = true;
self->extend_right_insertion_score = score;
self->extend_right_insertion_score_set = true;
if (self->insertion_score_function) {
Py_DECREF(self->insertion_score_function);
self->insertion_score_function = NULL;
@ -3344,9 +3466,13 @@ Aligner_set_end_deletion_score(Aligner* self, PyObject* value, void* closure)
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_left_deletion_score = score;
self->open_left_deletion_score_set = true;
self->extend_left_deletion_score = score;
self->extend_left_deletion_score_set = true;
self->open_right_deletion_score = score;
self->open_right_deletion_score_set = true;
self->extend_right_deletion_score = score;
self->extend_right_deletion_score_set = true;
if (self->deletion_score_function) {
Py_DECREF(self->deletion_score_function);
self->deletion_score_function = NULL;
@ -3378,7 +3504,9 @@ Aligner_set_open_end_deletion_score(Aligner* self, PyObject* value, void* closur
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_left_deletion_score = score;
self->open_left_deletion_score_set = true;
self->open_right_deletion_score = score;
self->open_right_deletion_score_set = true;
if (self->deletion_score_function) {
Py_DECREF(self->deletion_score_function);
self->deletion_score_function = NULL;
@ -3410,7 +3538,9 @@ Aligner_set_extend_end_deletion_score(Aligner* self, PyObject* value, void* clos
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->extend_left_deletion_score = score;
self->extend_left_deletion_score_set = true;
self->extend_right_deletion_score = score;
self->extend_right_deletion_score_set = true;
if (self->deletion_score_function) {
Py_DECREF(self->deletion_score_function);
self->deletion_score_function = NULL;
@ -3436,6 +3566,7 @@ Aligner_set_open_internal_deletion_score(Aligner* self, PyObject* value,
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_internal_deletion_score = score;
self->open_internal_deletion_score_set = true;
if (self->deletion_score_function) {
Py_DECREF(self->deletion_score_function);
self->deletion_score_function = NULL;
@ -3461,6 +3592,7 @@ Aligner_set_extend_internal_deletion_score(Aligner* self, PyObject* value,
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->extend_internal_deletion_score = score;
self->extend_internal_deletion_score_set = true;
if (self->deletion_score_function) {
Py_DECREF(self->deletion_score_function);
self->deletion_score_function = NULL;
@ -3493,7 +3625,9 @@ Aligner_set_internal_deletion_score(Aligner* self, PyObject* value,
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_internal_deletion_score = score;
self->open_internal_deletion_score_set = true;
self->extend_internal_deletion_score = score;
self->extend_internal_deletion_score_set = true;
if (self->deletion_score_function) {
Py_DECREF(self->deletion_score_function);
self->deletion_score_function = NULL;
@ -3518,6 +3652,7 @@ Aligner_set_open_left_deletion_score(Aligner* self, PyObject* value, void* closu
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_left_deletion_score = score;
self->open_left_deletion_score_set = true;
if (self->deletion_score_function) {
Py_DECREF(self->deletion_score_function);
self->deletion_score_function = NULL;
@ -3542,6 +3677,7 @@ Aligner_set_extend_left_deletion_score(Aligner* self, PyObject* value, void* clo
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->extend_left_deletion_score = score;
self->extend_left_deletion_score_set = true;
if (self->deletion_score_function) {
Py_DECREF(self->deletion_score_function);
self->deletion_score_function = NULL;
@ -3573,7 +3709,9 @@ Aligner_set_left_deletion_score(Aligner* self, PyObject* value, void* closure)
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_left_deletion_score = score;
self->open_left_deletion_score_set = true;
self->extend_left_deletion_score = score;
self->extend_left_deletion_score_set = true;
if (self->deletion_score_function) {
Py_DECREF(self->deletion_score_function);
self->deletion_score_function = NULL;
@ -3598,6 +3736,7 @@ Aligner_set_open_right_deletion_score(Aligner* self, PyObject* value, void* clos
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_right_deletion_score = score;
self->open_right_deletion_score_set = true;
if (self->deletion_score_function) {
Py_DECREF(self->deletion_score_function);
self->deletion_score_function = NULL;
@ -3622,6 +3761,7 @@ Aligner_set_extend_right_deletion_score(Aligner* self, PyObject* value, void* cl
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->extend_right_deletion_score = score;
self->extend_right_deletion_score_set = true;
if (self->deletion_score_function) {
Py_DECREF(self->deletion_score_function);
self->deletion_score_function = NULL;
@ -3653,7 +3793,9 @@ Aligner_set_right_deletion_score(Aligner* self, PyObject* value, void* closure)
{ const double score = PyFloat_AsDouble(value);
if (PyErr_Occurred()) return -1;
self->open_right_deletion_score = score;
self->open_right_deletion_score_set = true;
self->extend_right_deletion_score = score;
self->extend_right_deletion_score_set = true;
if (self->deletion_score_function) {
Py_DECREF(self->deletion_score_function);
self->deletion_score_function = NULL;
@ -4351,7 +4493,8 @@ struct fogsaa_queue_node {
(queue->array[a].next_upper == queue->array[b].next_upper && \
queue->array[a].next_lower > queue->array[b].next_lower))
int fogsaa_queue_insert(struct fogsaa_queue *queue, int pA, int pB,
static int
fogsaa_queue_insert(struct fogsaa_queue *queue, int pA, int pB,
int type_total, int next_type, double next_lower, double next_upper) {
// max heap implementation for the priority queue by next_upper
struct fogsaa_queue_node temp;
@ -4387,7 +4530,7 @@ int fogsaa_queue_insert(struct fogsaa_queue *queue, int pA, int pB,
return 1;
}
struct fogsaa_queue_node fogsaa_queue_pop(struct fogsaa_queue *queue) {
static struct fogsaa_queue_node fogsaa_queue_pop(struct fogsaa_queue *queue) {
// caller code must check queue is not empty
struct fogsaa_queue_node temp, root = queue->array[0];
int largest_child, i = 0;
@ -7464,6 +7607,50 @@ static char Aligner_doc[] =
"The PairwiseAligner class implements common algorithms to align two\n"
"sequences to each other.\n";
static PyObject*
Aligner_warn_defaults_changed(Aligner* self)
// FIXME remove this function once Biopython release 1.87 is out
{
if (warned)
Py_RETURN_NONE;
if (self->open_internal_insertion_score_set
&& self->extend_internal_insertion_score_set
&& self->open_left_insertion_score_set
&& self->extend_left_insertion_score_set
&& self->open_right_insertion_score_set
&& self->extend_right_insertion_score_set
&& self->open_internal_deletion_score_set
&& self->extend_internal_deletion_score_set
&& self->open_left_deletion_score_set
&& self->extend_left_deletion_score_set
&& self->open_right_deletion_score_set
&& self->extend_right_deletion_score_set) {
Py_RETURN_NONE;
}
warned = true;
PyErr_WarnEx(PyExc_UserWarning,
"\n"
"Note that the default value for the gap score parameter of a\n"
"PairwiseAligner object has changed.\n"
"\n"
"In older versions of Biopython, the pairwise aligner was initialized\n"
"by default with a match score of +1, a mismatch score of 0, and a gap\n"
"score of 0. This choice was made to be consistent with the pairwise\n"
"alignment code in Bio.pairwise2.\n"
"\n"
"However, this scoring scheme tends to produce a large number of alignments\n"
"with only trivial difference between them. In particular, a mismatch\n"
"between two letters, a single insertion followed by a deletion, and a\n"
"deletion followed by an insertion are all assigned the same score. For long\n"
"sequences, the number of alignments with such trivial differences can be\n"
"astronomical.\n"
"\n"
"In Biopython 1.86, the default gap score was therefore changed to -1,\n"
"while the default match score remained +1 and the default mismatch score\n"
"remained 0.\n", 1);
Py_RETURN_NONE;
}
static PyMethodDef Aligner_methods[] = {
{"score",
(PyCFunction)Aligner_score,
@ -7475,6 +7662,11 @@ static PyMethodDef Aligner_methods[] = {
METH_VARARGS | METH_KEYWORDS,
Aligner_align__doc__
},
{"warn_defaults_changed",
(PyCFunction)Aligner_warn_defaults_changed,
METH_NOARGS,
"return False if all gap scores have been set explicitly, and True otherwise."
},
{NULL, NULL, 0, NULL} /* Sentinel */
};
@ -7499,15 +7691,10 @@ static char _pairwisealigner__doc__[] =
"C extension module implementing pairwise alignment algorithms";
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"_pairwisealigner",
_pairwisealigner__doc__,
-1,
NULL,
NULL,
NULL,
NULL,
NULL
PyModuleDef_HEAD_INIT,
.m_name = "_pairwisealigner",
.m_doc = _pairwisealigner__doc__,
.m_size = -1,
};
PyObject *

View File

@ -40,6 +40,18 @@ typedef struct {
double extend_left_deletion_score;
double open_right_deletion_score;
double extend_right_deletion_score;
bool open_internal_insertion_score_set;
bool extend_internal_insertion_score_set;
bool open_left_insertion_score_set;
bool extend_left_insertion_score_set;
bool open_right_insertion_score_set;
bool extend_right_insertion_score_set;
bool open_internal_deletion_score_set;
bool extend_internal_deletion_score_set;
bool open_left_deletion_score_set;
bool extend_left_deletion_score_set;
bool open_right_deletion_score_set;
bool extend_right_deletion_score_set;
PyObject* insertion_score_function;
PyObject* deletion_score_function;
Py_buffer substitution_matrix;

View File

@ -171,11 +171,9 @@ static PyTypeObject Array_Type = {
static struct PyModuleDef module = {
PyModuleDef_HEAD_INIT,
"_arraycore",
"Base module defining the Array base class",
-1,
NULL,
NULL, NULL, NULL, NULL
.m_name = "_arraycore",
.m_doc = "Base module defining the Array base class",
.m_size = -1,
};
PyMODINIT_FUNC

View File

@ -644,6 +644,21 @@ Release 1.86, with the original name still available with a deprecation warning.
These attributes were renamed to be consistent with the AlignmentCounts class
and with the common nomenclature in the literature.
The default value of the gap score of a PairwiseAligner object was changed in
Release 1.86. Previously, for consistency with Bio.pairwise2, the default
value for gap score was 0. However, this means that a mismatch, an insertion
followed by a deletion, and a deletion followed by an insertion all get
assigned a score of 0. The aligner then finds a large number of alignments
that are logically the same, but with trivial differences between them. For
example, aligning AAACAAA to AAAGAAA previously yielded the following three
alignments, all with score 6:
AAACAAA AAAC-AAA AAA-CAAA
AAAGAAA AAA-GAAA AAAG-AAA
With the new default parameter for the gap score, only the first alignment is
returned.
The ``alphabet`` attribute of the PairwiseAligner class was deprecated in
Release 1.86. The attribute is still being stored, but it is not used in any
way.

View File

@ -54,8 +54,8 @@ between two sequences:
.. code:: pycon
>>> target = "GAACT"
>>> query = "GAT"
>>> target = "GAACTTT"
>>> query = "GATTT"
>>> score = aligner.score(target, query)
>>> score
3.0
@ -82,7 +82,7 @@ indexing:
>>> alignment = alignments[0]
>>> alignment # doctest: +ELLIPSIS
<Alignment object (2 rows x 5 columns) at 0x...>
<Alignment object (2 rows x 7 columns) at 0x...>
Iterate over the ``Alignment`` objects and print them to see the
alignments:
@ -94,13 +94,13 @@ alignments:
>>> for alignment in alignments:
... print(alignment)
...
target 0 GAACT 5
0 ||--| 5
query 0 GA--T 3
target 0 GAACTTT 7
0 ||--||| 7
query 0 GA--TTT 5
<BLANKLINE>
target 0 GAACT 5
0 |-|-| 5
query 0 G-A-T 3
target 0 GAACTTT 7
0 |-|-||| 7
query 0 G-A-TTT 5
<BLANKLINE>
Use indices to get the aligned sequence (see :ref:`subsec:slicing-indexing-alignment`):
@ -110,9 +110,9 @@ Use indices to get the aligned sequence (see :ref:`subsec:slicing-indexing-align
.. code:: pycon
>>> alignment[0]
'GAACT'
'GAACTTT'
>>> alignment[1]
'G-A-T'
'G-A-TTT'
Each alignment stores the alignment score:
@ -130,9 +130,9 @@ as well as pointers to the sequences that were aligned:
.. code:: pycon
>>> alignment.target
'GAACT'
'GAACTTT'
>>> alignment.query
'GAT'
'GATTT'
Internally, the alignment is stored in terms of the sequence coordinates:
@ -142,8 +142,8 @@ Internally, the alignment is stored in terms of the sequence coordinates:
>>> alignment = alignments[0]
>>> alignment.coordinates
array([[0, 2, 4, 5],
[0, 2, 2, 3]])
array([[0, 2, 4, 7],
[0, 2, 2, 5]])
Here, the two rows refer to the target and query sequence. These
coordinates show that the alignment consists of the following three
@ -154,7 +154,7 @@ blocks:
- ``target[2:4]`` aligned to a gap, since ``query[2:2]`` is an empty
string (i.e., a deletion);
- ``target[4:5]`` aligned to ``query[2:3]``.
- ``target[4:7]`` aligned to ``query[2:5]``.
The number of aligned sequences is always 2 for a pairwise alignment:
@ -175,7 +175,7 @@ query:
.. code:: pycon
>>> alignment.length
5
7
The ``aligned`` property, which returns the start and end indices of
aligned subsequences, returns two tuples of length 2 for the first
@ -187,10 +187,10 @@ alignment:
>>> alignment.aligned
array([[[0, 2],
[4, 5]],
[4, 7]],
<BLANKLINE>
[[0, 2],
[2, 3]]])
[2, 5]]])
while for the alternative alignment, two tuples of length 3 are
returned:
@ -201,18 +201,18 @@ returned:
>>> alignment = alignments[1]
>>> print(alignment)
target 0 GAACT 5
0 |-|-| 5
query 0 G-A-T 3
target 0 GAACTTT 7
0 |-|-||| 7
query 0 G-A-TTT 5
<BLANKLINE>
>>> alignment.aligned
array([[[0, 1],
[2, 3],
[4, 5]],
[4, 7]],
<BLANKLINE>
[[0, 1],
[1, 2],
[2, 3]]])
[2, 5]]])
Note that different alignments may have the same subsequences aligned to
each other. In particular, this may occur if alignments differ from each
@ -345,16 +345,14 @@ follow the suggestion by Waterman & Eggert
If ``aligner.mode`` is set to ``"fogsaa"``, then the Fast Optimal Global
Alignment Algorithm [Chakraborty2013]_ with some modifications is used. This
mode calculates a global alignment, but it is not like the regular `"global"`
mode. It is best suited for long alignments between similar sequences. Rather
than calculating all possible alignments like other algorithms do, FOGSAA uses
a heuristic to detect steps in an alignment that cannot lead to an optimal
alignment. This can speed up alignment, however, the heuristic makes
assumptions about your match, mismatch, and gap scores. If the match score is
less than the mismatch score or any gap score, or if any gap score is greater
than the mismatch score, then a warning is raised and the algorithm may return
incorrect results. Unlike other modes that may return more than one alignment,
FOGSAA always returns only one alignment.
mode also calculates a global alignment, but uses a heuristic to detect steps
in an alignment that cannot lead to an optimal alignment. This can speed up
alignment and is best suited for long alignments between similar sequences.
The heuristic makes assumptions about your match, mismatch, and gap scores. If
the match score is less than the mismatch score or any gap score, or if any gap
score is greater than the mismatch score, then a warning is raised and the
algorithm may return incorrect results. Unlike other modes, which may return
more than one alignment, FOGSAA always returns only one alignment.
.. cont-doctest
@ -391,18 +389,18 @@ all parameters, use
wildcard: None
match_score: 1.000000
mismatch_score: 0.000000
open_internal_insertion_score: 0.000000
extend_internal_insertion_score: 0.000000
open_left_insertion_score: 0.000000
extend_left_insertion_score: 0.000000
open_right_insertion_score: 0.000000
extend_right_insertion_score: 0.000000
open_internal_deletion_score: 0.000000
extend_internal_deletion_score: 0.000000
open_left_deletion_score: 0.000000
extend_left_deletion_score: 0.000000
open_right_deletion_score: 0.000000
extend_right_deletion_score: 0.000000
open_internal_insertion_score: -1.000000
extend_internal_insertion_score: -1.000000
open_left_insertion_score: -1.000000
extend_left_insertion_score: -1.000000
open_right_insertion_score: -1.000000
extend_right_insertion_score: -1.000000
open_internal_deletion_score: -1.000000
extend_internal_deletion_score: -1.000000
open_left_deletion_score: -1.000000
extend_left_deletion_score: -1.000000
open_right_deletion_score: -1.000000
extend_right_deletion_score: -1.000000
mode: local
<BLANKLINE>
@ -775,14 +773,25 @@ disallows a deletion after two nucleotides in the query sequence:
Using a pre-defined substitution matrix and gap scores
------------------------------------------------------
By default, a ``PairwiseAligner`` object is initialized with a match
score of +1.0, a mismatch score of 0.0, and all gap scores equal to 0.0,
While this has the benefit of being a simple scoring scheme, in general
it does not give the best performance. Instead, you can use the argument
``scoring`` to select a predefined scoring scheme when initializing a
``PairwiseAligner`` object. Currently, the provided scoring schemes are
``blastn`` and ``megablast``, which are suitable for nucleotide
alignments, and ``blastp``, which is suitable for protein alignments.
Currently, a ``PairwiseAligner`` object is initialized by default with a match
score of +1.0, a mismatch score of 0.0, and all gap scores equal to -1.0.
Biopython versions 1.85 and older used a default gap score of 0.0 (this choice
was made to be consistent with the older pairwise aligner in ``Bio.pairwise2``,
which uses a default gap score of 0.0). However, this scheme assigns the same
score to a mismatch and a insertion or deletion, e.g.
.. code:: pycon
A A- -A
C -C C-
are evaluated equally, which tends to result in a large number of alignments
that are only trivially different from each other. While the current scoring
scheme avoids this problem, in general you may get better performance using
a predefined scoring scheme, which you can select using the ``scoring``
argument when initializing a ``PairwiseAligner`` object. Currently, the
provided scoring schemes are ``blastn`` and ``megablast``, which are suitable
for nucleotide alignments, and ``blastp``, suitable for protein alignments.
Selecting these scoring schemes will initialize the ``PairwiseAligner``
object to the default scoring parameters used by BLASTN, MegaBLAST, and
BLASTP, respectively.
@ -908,7 +917,7 @@ You can perform the following operations on ``alignments``:
.. code:: pycon
>>> print(alignments.score)
2.0
1.0
Aligning to the reverse strand
------------------------------
@ -927,6 +936,7 @@ for ``query`` to the reverse strand of ``target``, use ``strand="-"``:
>>> query = "AACC"
>>> aligner = Align.PairwiseAligner()
>>> aligner.mismatch_score = -1
>>> aligner.gap_score = 0
>>> aligner.internal_gap_score = -1
>>> aligner.score(target, query) # strand is "+" by default
4.0
@ -1767,9 +1777,9 @@ hemoglobin sequences from above (``HBA_HUMAN``, ``HBB_HUMAN``) stored in
>>> aligner = Align.PairwiseAligner()
>>> score = aligner.score(seq1.seq, seq2.seq)
>>> print(score)
72.0
56.0
showing an alignment score of 72.0. To see the individual alignments, do
showing an alignment score of 56.0. To see the individual alignments, do
.. cont-doctest
@ -1804,28 +1814,24 @@ alignment itself:
.. code:: pycon
>>> print(alignment.score)
72.0
56.0
>>> print(alignment)
target 0 MV-LS-PAD--KTN--VK-AA-WGKV-----GAHAGEYGAEALE-RMFLSF----P-TTK
0 ||-|--|----|----|--|--||||-----|---||--|--|--|--|------|-|--
query 0 MVHL-TP--EEK--SAV-TA-LWGKVNVDEVG---GE--A--L-GR--L--LVVYPWT--
target 0 MV-LSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHF-DLSHGSAQ---
0 ||-|.|..|..|.|.||||...--|.|.|||.|.....|.|...|..|-|||...|.---
query 0 MVHLTPEEKSAVTALWGKVNVD--EVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGN
<BLANKLINE>
target 41 TY--FPHF----DLSHGS---AQVK-G------HGKKV--A--DA-LTNAVAHV-DDMPN
60 ----|--|----|||------|-|--|------|||||--|--|--|--|--|--|---|
query 39 --QRF--FESFGDLS---TPDA-V-MGNPKVKAHGKKVLGAFSD-GL--A--H-LD---N
target 55 --VKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAH
60 --||.|||||..|.....||.|........||.||..||.|||.||.||...|...||.|
query 58 PKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHH
<BLANKLINE>
target 79 ALS----A-LSD-LHAH--KLR-VDPV-NFK-LLSHC---LLVT--LAAHLPA----EFT
120 -|-----|-||--||----||--|||--||--||------|-|---||-|-------|||
query 81 -L-KGTFATLS-ELH--CDKL-HVDP-ENF-RLL---GNVL-V-CVLA-H---HFGKEFT
<BLANKLINE>
target 119 PA-VH-ASLDKFLAS---VSTV------LTS--KYR- 142
180 |--|--|------|----|--|------|----||-- 217
query 124 P-PV-QA------A-YQKV--VAGVANAL--AHKY-H 147
target 113 LPAEFTPAVHASLDKFLASVSTVLTSKYR 142
120 ...||||.|.|...|..|.|...|..||. 149
query 118 FGKEFTPPVQAAYQKVVAGVANALAHKYH 147
<BLANKLINE>
Better alignments are usually obtained by penalizing gaps: higher costs
for opening a gap and lower costs for extending an existing gap. For
amino acid sequences match scores are usually encoded in matrices like
amino acid sequences, match scores are usually encoded in matrices like
``PAM`` or ``BLOSUM``. Thus, a more meaningful alignment for our example
can be obtained by using the BLOSUM62 matrix, together with a gap open
penalty of 10 and a gap extension penalty of 0.5:
@ -2024,7 +2030,7 @@ BLOSUM62 matrix, and align these sequences to each other:
Asn --- Leu Phe
<BLANKLINE>
>>> print(alignments.score)
18.0
17.0
Generalized pairwise alignments using match/mismatch scores and integer sequences
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@ -2060,7 +2066,7 @@ This step can be bypassed by passing integer arrays directly:
2 -- 10 13
<BLANKLINE>
>>> print(alignments.score)
18.0
17.0
Note that the indices should consist of 32-bit integers, as specified in
this example by ``numpy.int32``.

View File

@ -20,6 +20,21 @@ has also been tested on PyPy3.10 v7.3.17.
`Infernal <http://eddylab.org/infernal/>` (v1.0.0+) RNA search tool. The
format are ``infernal-tab`` and ``infernal-text``.
The default value of the gap score of a ``PairwiseAligner`` object was changed
in this release. Previously, for consistency with ``Bio.pairwise2``, the
default value for gap score was 0. However, this means that a mismatch, an
insertion followed by a deletion, and a deletion followed by an insertion all
get assigned a score of 0. The aligner then finds a large number of alignments
that are logically the same, but have trivial differences between them. For
example, aligning AAACAAA to AAAGAAA previously yielded the following three
alignments, all with score 6:
AAACAAA AAAC-AAA AAA-CAAA
AAAGAAA AAA-GAAA AAAG-AAA
With the new default parameter for the gap score, only the first alignment is
returned.
``Bio.PDB.PDBIO`` now ensures that b-factor values are always at most 6 characters to
ensure that we do not violate the wwPDB specification. This should not have an impact
on the majority of uses, as b-factor values are generally small (less than 100). When

File diff suppressed because it is too large Load Diff