mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
Aligner default arguments (#5029)
* update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * update * doctests_fixed * update * update * documentation * add warning * add a note to the DEPRECATED file * adding a NEWS entry * remove stray comments --------- Co-authored-by: Michiel de Hoon <mdehoon@lacg01.local> Co-authored-by: Michiel de Hoon <mdehoon@tkx380.genome.gsc.riken.jp> Co-authored-by: Michiel Jan Laurens de Hoon <mdehoon@Michiels-MacBook-Air.local> Co-authored-by: Michiel de Hoon <mdehoon@tkx288.genome.gsc.riken.jp>
This commit is contained in:
@ -57,6 +57,9 @@ from Bio.SeqRecord import SeqRecord
|
||||
# https://github.com/biopython/biopython/pull/2007
|
||||
|
||||
|
||||
from Bio import BiopythonWarning
|
||||
|
||||
|
||||
class MultipleSeqAlignment:
|
||||
"""Represents a classical multiple sequence alignment (MSA).
|
||||
|
||||
@ -1330,22 +1333,22 @@ class Alignment:
|
||||
>>> alignments = aligner.align("GACCTG", "CGATCG")
|
||||
>>> alignment = alignments[0]
|
||||
>>> print(alignment)
|
||||
target 0 -GACCT-G 6
|
||||
0 -||--|-| 8
|
||||
query 0 CGA--TCG 6
|
||||
target 0 -GACCTG 6
|
||||
0 -||.|-| 7
|
||||
query 0 CGATC-G 6
|
||||
<BLANKLINE>
|
||||
>>> alignment.frequencies
|
||||
{'-': array([1., 0., 0., 1., 1., 0., 1., 0.]), 'G': array([0., 2., 0., 0., 0., 0., 0., 2.]), 'A': array([0., 0., 2., 0., 0., 0., 0., 0.]), 'C': array([1., 0., 0., 1., 1., 0., 1., 0.]), 'T': array([0., 0., 0., 0., 0., 2., 0., 0.])}
|
||||
{'-': array([1., 0., 0., 0., 0., 1., 0.]), 'G': array([0., 2., 0., 0., 0., 0., 2.]), 'A': array([0., 0., 2., 0., 0., 0., 0.]), 'C': array([1., 0., 0., 1., 2., 0., 0.]), 'T': array([0., 0., 0., 1., 0., 1., 0.])}
|
||||
>>> aligner.mode = "local"
|
||||
>>> alignments = aligner.align("GACCTG", "CGATCG")
|
||||
>>> alignment = alignments[0]
|
||||
>>> print(alignment)
|
||||
target 0 GACCT-G 6
|
||||
0 ||--|-| 7
|
||||
query 1 GA--TCG 6
|
||||
target 0 GACC 4
|
||||
0 ||.| 4
|
||||
query 1 GATC 5
|
||||
<BLANKLINE>
|
||||
>>> alignment.frequencies
|
||||
{'G': array([2., 0., 0., 0., 0., 0., 2.]), 'A': array([0., 2., 0., 0., 0., 0., 0.]), 'C': array([0., 0., 1., 1., 0., 1., 0.]), 'T': array([0., 0., 0., 0., 2., 0., 0.]), '-': array([0., 0., 1., 1., 0., 1., 0.])}
|
||||
{'G': array([2., 0., 0., 0.]), 'A': array([0., 2., 0., 0.]), 'C': array([0., 0., 1., 2.]), 'T': array([0., 0., 1., 0.])}
|
||||
"""
|
||||
coordinates = self.coordinates.copy()
|
||||
sequences = list(self.sequences)
|
||||
@ -1709,7 +1712,9 @@ class Alignment:
|
||||
if steps[i] == 0:
|
||||
line = "-" * length
|
||||
else:
|
||||
start = coordinate[i] + start_index - indices[i - 1]
|
||||
start = coordinate[i] + start_index
|
||||
if i > 0:
|
||||
start -= indices[i - 1]
|
||||
stop = start + length
|
||||
line = str(sequence[start:stop])
|
||||
else:
|
||||
@ -1744,7 +1749,9 @@ class Alignment:
|
||||
if steps[i] == 0:
|
||||
line = [None] * length
|
||||
else:
|
||||
start = coordinate[i] + start_index - indices[i - 1]
|
||||
start = coordinate[i] + start_index
|
||||
if i > 0:
|
||||
start -= indices[i - 1]
|
||||
stop = start + length
|
||||
line = sequence[start:stop]
|
||||
else:
|
||||
@ -2000,68 +2007,68 @@ class Alignment:
|
||||
>>> alignments = aligner.align("ACCGGTTT", "ACGGGTT")
|
||||
>>> alignment = alignments[0]
|
||||
>>> print(alignment)
|
||||
target 0 ACCGG-TTT 8
|
||||
0 ||-||-||- 9
|
||||
query 0 AC-GGGTT- 7
|
||||
target 0 ACCGGTTT 8
|
||||
0 ||.||||- 8
|
||||
query 0 ACGGGTT- 7
|
||||
<BLANKLINE>
|
||||
>>> alignment[0, :]
|
||||
'ACCGG-TTT'
|
||||
'ACCGGTTT'
|
||||
>>> alignment[1, :]
|
||||
'AC-GGGTT-'
|
||||
'ACGGGTT-'
|
||||
>>> alignment[0]
|
||||
'ACCGG-TTT'
|
||||
'ACCGGTTT'
|
||||
>>> alignment[1]
|
||||
'AC-GGGTT-'
|
||||
'ACGGGTT-'
|
||||
>>> alignment[0, 1:-2]
|
||||
'CCGG-T'
|
||||
'CCGGT'
|
||||
>>> alignment[1, 1:-2]
|
||||
'C-GGGT'
|
||||
>>> alignment[0, (1, 5, 2)]
|
||||
'C-C'
|
||||
'CGGGT'
|
||||
>>> alignment[1, (1, 7, 2)]
|
||||
'C-G'
|
||||
>>> alignment[1, ::2]
|
||||
'A-GT-'
|
||||
>>> alignment[1, range(0, 9, 2)]
|
||||
'A-GT-'
|
||||
'AGGT'
|
||||
>>> alignment[1, range(0, 8, 2)]
|
||||
'AGGT'
|
||||
>>> alignment[:, 0]
|
||||
'AA'
|
||||
>>> alignment[:, 5]
|
||||
'-G'
|
||||
'TT'
|
||||
>>> alignment[:, 1:] # doctest:+ELLIPSIS
|
||||
<Alignment object (2 rows x 8 columns) at 0x...>
|
||||
<Alignment object (2 rows x 7 columns) at 0x...>
|
||||
>>> print(alignment[:, 1:])
|
||||
target 1 CCGG-TTT 8
|
||||
0 |-||-||- 8
|
||||
query 1 C-GGGTT- 7
|
||||
target 1 CCGGTTT 8
|
||||
0 |.||||- 7
|
||||
query 1 CGGGTT- 7
|
||||
<BLANKLINE>
|
||||
>>> print(alignment[:, 2:])
|
||||
target 2 CGG-TTT 8
|
||||
0 -||-||- 7
|
||||
query 2 -GGGTT- 7
|
||||
<BLANKLINE>
|
||||
>>> print(alignment[:, 3:])
|
||||
target 3 GG-TTT 8
|
||||
0 ||-||- 6
|
||||
target 2 CGGTTT 8
|
||||
0 .||||- 6
|
||||
query 2 GGGTT- 7
|
||||
<BLANKLINE>
|
||||
>>> print(alignment[:, 3:])
|
||||
target 3 GGTTT 8
|
||||
0 ||||- 5
|
||||
query 3 GGTT- 7
|
||||
<BLANKLINE>
|
||||
>>> print(alignment[:, 3:-1])
|
||||
target 3 GG-TT 7
|
||||
0 ||-|| 5
|
||||
query 2 GGGTT 7
|
||||
target 3 GGTT 7
|
||||
0 |||| 4
|
||||
query 3 GGTT 7
|
||||
<BLANKLINE>
|
||||
>>> print(alignment[:, ::2])
|
||||
target 0 ACGTT 5
|
||||
0 |-||- 5
|
||||
query 0 A-GT- 3
|
||||
target 0 ACGT 4
|
||||
0 |.|| 4
|
||||
query 0 AGGT 4
|
||||
<BLANKLINE>
|
||||
>>> print(alignment[:, range(1, 9, 2)])
|
||||
target 0 CG-T 3
|
||||
0 ||-| 4
|
||||
query 0 CGGT 4
|
||||
>>> print(alignment[:, range(1, 8, 2)])
|
||||
target 0 CGTT 4
|
||||
0 |||- 4
|
||||
query 0 CGT- 3
|
||||
<BLANKLINE>
|
||||
>>> print(alignment[:, (2, 7, 3)])
|
||||
target 0 CTG 3
|
||||
0 -|| 3
|
||||
query 0 -TG 2
|
||||
0 .-| 3
|
||||
query 0 G-G 2
|
||||
<BLANKLINE>
|
||||
"""
|
||||
if isinstance(key, numbers.Integral):
|
||||
@ -2648,12 +2655,24 @@ class Alignment:
|
||||
|
||||
>>> alignments = aligner.align(seqA, seqB)
|
||||
>>> len(alignments)
|
||||
1
|
||||
3
|
||||
>>> alignment = alignments[0]
|
||||
>>> print(alignment)
|
||||
target 0 TTAA-CCCCATTTG 13
|
||||
0 --||-||||-|||- 14
|
||||
query 0 --AAGCCCC-TTT- 10
|
||||
target 0 TTAACCCCATTTG 13
|
||||
0 .-|.||||-|||- 13
|
||||
query 0 A-AGCCCC-TTT- 10
|
||||
<BLANKLINE>
|
||||
>>> alignment = alignments[1]
|
||||
>>> print(alignment)
|
||||
target 0 TTAACCCCATTTG 13
|
||||
0 -.|.||||-|||- 13
|
||||
query 0 -AAGCCCC-TTT- 10
|
||||
<BLANKLINE>
|
||||
>>> alignment = alignments[2]
|
||||
>>> print(alignment)
|
||||
target 0 TTAACCCCATTTG 13
|
||||
0 --||.|||.|||- 13
|
||||
query 0 --AAGCCCCTTT- 10
|
||||
<BLANKLINE>
|
||||
|
||||
Note that seqC is the reverse complement of seqB. Aligning it to the
|
||||
@ -2662,12 +2681,24 @@ class Alignment:
|
||||
|
||||
>>> alignments = aligner.align(seqA, seqC, strand="-")
|
||||
>>> len(alignments)
|
||||
1
|
||||
3
|
||||
>>> alignment = alignments[0]
|
||||
>>> print(alignment)
|
||||
target 0 TTAA-CCCCATTTG 13
|
||||
0 --||-||||-|||- 14
|
||||
query 10 --AAGCCCC-TTT- 0
|
||||
target 0 TTAACCCCATTTG 13
|
||||
0 .-|.||||-|||- 13
|
||||
query 10 A-AGCCCC-TTT- 0
|
||||
<BLANKLINE>
|
||||
>>> alignment = alignments[1]
|
||||
>>> print(alignment)
|
||||
target 0 TTAACCCCATTTG 13
|
||||
0 -.|.||||-|||- 13
|
||||
query 10 -AAGCCCC-TTT- 0
|
||||
<BLANKLINE>
|
||||
>>> alignment = alignments[2]
|
||||
>>> print(alignment)
|
||||
target 0 TTAACCCCATTTG 13
|
||||
0 --||.|||.|||- 13
|
||||
query 10 --AAGCCCCTTT- 0
|
||||
<BLANKLINE>
|
||||
|
||||
"""
|
||||
@ -2734,24 +2765,24 @@ class Alignment:
|
||||
>>> alignments = aligner.align("GACCTG", "CGATCG")
|
||||
>>> alignment = alignments[0]
|
||||
>>> print(alignment)
|
||||
target 0 -GACCT-G 6
|
||||
0 -||--|-| 8
|
||||
query 0 CGA--TCG 6
|
||||
target 0 -GACCTG 6
|
||||
0 -||.|-| 7
|
||||
query 0 CGATC-G 6
|
||||
<BLANKLINE>
|
||||
>>> alignment.length
|
||||
8
|
||||
7
|
||||
>>> aligner.mode = "local"
|
||||
>>> alignments = aligner.align("GACCTG", "CGATCG")
|
||||
>>> alignment = alignments[0]
|
||||
>>> print(alignment)
|
||||
target 0 GACCT-G 6
|
||||
0 ||--|-| 7
|
||||
query 1 GA--TCG 6
|
||||
target 0 GACC 4
|
||||
0 ||.| 4
|
||||
query 1 GATC 5
|
||||
<BLANKLINE>
|
||||
>>> len(alignment)
|
||||
2
|
||||
>>> alignment.length
|
||||
7
|
||||
4
|
||||
"""
|
||||
n = len(self.coordinates)
|
||||
if n == 0: # no sequences
|
||||
@ -2793,26 +2824,26 @@ class Alignment:
|
||||
>>> alignments = aligner.align("GACCTG", "CGATCG")
|
||||
>>> alignment = alignments[0]
|
||||
>>> print(alignment)
|
||||
target 0 -GACCT-G 6
|
||||
0 -||--|-| 8
|
||||
query 0 CGA--TCG 6
|
||||
<BLANKLINE>
|
||||
>>> len(alignment)
|
||||
2
|
||||
>>> alignment.shape
|
||||
(2, 8)
|
||||
>>> aligner.mode = "local"
|
||||
>>> alignments = aligner.align("GACCTG", "CGATCG")
|
||||
>>> alignment = alignments[0]
|
||||
>>> print(alignment)
|
||||
target 0 GACCT-G 6
|
||||
0 ||--|-| 7
|
||||
query 1 GA--TCG 6
|
||||
target 0 -GACCTG 6
|
||||
0 -||.|-| 7
|
||||
query 0 CGATC-G 6
|
||||
<BLANKLINE>
|
||||
>>> len(alignment)
|
||||
2
|
||||
>>> alignment.shape
|
||||
(2, 7)
|
||||
>>> aligner.mode = "local"
|
||||
>>> alignments = aligner.align("GACCTG", "CGATCG")
|
||||
>>> alignment = alignments[0]
|
||||
>>> print(alignment)
|
||||
target 0 GACC 4
|
||||
0 ||.| 4
|
||||
query 1 GATC 5
|
||||
<BLANKLINE>
|
||||
>>> len(alignment)
|
||||
2
|
||||
>>> alignment.shape
|
||||
(2, 4)
|
||||
"""
|
||||
n = len(self.coordinates)
|
||||
m = self.length
|
||||
@ -2958,13 +2989,13 @@ class Alignment:
|
||||
[ 0, 1, -1, 2, 3]])
|
||||
>>> alignment = alignments[1]
|
||||
>>> print(alignment)
|
||||
target 1 AACTGG 7
|
||||
0 ||-|-| 6
|
||||
query 0 AA-T-G 4
|
||||
target 2 ACTG 6
|
||||
0 |.|| 4
|
||||
query 0 AATG 4
|
||||
<BLANKLINE>
|
||||
>>> alignment.indices
|
||||
array([[ 1, 2, 3, 4, 5, 6],
|
||||
[ 0, 1, -1, 2, -1, 3]])
|
||||
array([[2, 3, 4, 5],
|
||||
[0, 1, 2, 3]])
|
||||
|
||||
>>> alignments = aligner.align("GAACTGG", "CATT", strand="-")
|
||||
>>> alignment = alignments[0]
|
||||
@ -2978,13 +3009,13 @@ class Alignment:
|
||||
[ 3, 2, -1, 1, 0]])
|
||||
>>> alignment = alignments[1]
|
||||
>>> print(alignment)
|
||||
target 1 AACTGG 7
|
||||
0 ||-|-| 6
|
||||
query 4 AA-T-G 0
|
||||
target 2 ACTG 6
|
||||
0 |.|| 4
|
||||
query 4 AATG 0
|
||||
<BLANKLINE>
|
||||
>>> alignment.indices
|
||||
array([[ 1, 2, 3, 4, 5, 6],
|
||||
[ 3, 2, -1, 1, -1, 0]])
|
||||
array([[2, 3, 4, 5],
|
||||
[3, 2, 1, 0]])
|
||||
|
||||
"""
|
||||
a = -np.ones(self.shape, int)
|
||||
@ -3045,12 +3076,12 @@ class Alignment:
|
||||
[array([-1, 0, 1, 2, 3, 4, -1]), array([0, 1, 3, 4])]
|
||||
>>> alignment = alignments[1]
|
||||
>>> print(alignment)
|
||||
target 1 AACTGG 7
|
||||
0 ||-|-| 6
|
||||
query 0 AA-T-G 4
|
||||
target 2 ACTG 6
|
||||
0 |.|| 4
|
||||
query 0 AATG 4
|
||||
<BLANKLINE>
|
||||
>>> alignment.inverse_indices
|
||||
[array([-1, 0, 1, 2, 3, 4, 5]), array([0, 1, 3, 5])]
|
||||
[array([-1, -1, 0, 1, 2, 3, -1]), array([0, 1, 2, 3])]
|
||||
>>> alignments = aligner.align("GAACTGG", "CATT", strand="-")
|
||||
>>> alignment = alignments[0]
|
||||
>>> print(alignment)
|
||||
@ -3062,12 +3093,12 @@ class Alignment:
|
||||
[array([-1, 0, 1, 2, 3, 4, -1]), array([4, 3, 1, 0])]
|
||||
>>> alignment = alignments[1]
|
||||
>>> print(alignment)
|
||||
target 1 AACTGG 7
|
||||
0 ||-|-| 6
|
||||
query 4 AA-T-G 0
|
||||
target 2 ACTG 6
|
||||
0 |.|| 4
|
||||
query 4 AATG 0
|
||||
<BLANKLINE>
|
||||
>>> alignment.inverse_indices
|
||||
[array([-1, 0, 1, 2, 3, 4, 5]), array([5, 3, 1, 0])]
|
||||
[array([-1, -1, 0, 1, 2, 3, -1]), array([3, 2, 1, 0])]
|
||||
|
||||
"""
|
||||
a = [-np.ones(len(sequence), int) for sequence in self.sequences]
|
||||
@ -3629,13 +3660,13 @@ class Alignment:
|
||||
... print(f"{c.gaps} gaps, {c.identities} identities, {c.mismatches} mismatches")
|
||||
... print(alignment)
|
||||
...
|
||||
Score = 6.0:
|
||||
Score = 4.0:
|
||||
2 gaps, 3 identities, 0 mismatches
|
||||
target 0 TACCG 5
|
||||
0 -||-| 5
|
||||
query 0 -AC-G 3
|
||||
<BLANKLINE>
|
||||
Score = 6.0:
|
||||
Score = 4.0:
|
||||
2 gaps, 3 identities, 0 mismatches
|
||||
target 0 TACCG 5
|
||||
0 -|-|| 5
|
||||
@ -4023,12 +4054,12 @@ class PairwiseAligner(_pairwisealigner.PairwiseAligner):
|
||||
... print("Score = %.1f:" % alignment.score)
|
||||
... print(alignment)
|
||||
...
|
||||
Score = 3.0:
|
||||
Score = 1.0:
|
||||
target 0 TACCG 5
|
||||
0 -|-|| 5
|
||||
query 0 -A-CG 3
|
||||
<BLANKLINE>
|
||||
Score = 3.0:
|
||||
Score = 1.0:
|
||||
target 0 TACCG 5
|
||||
0 -||-| 5
|
||||
query 0 -AC-G 3
|
||||
@ -4042,15 +4073,15 @@ class PairwiseAligner(_pairwisealigner.PairwiseAligner):
|
||||
... print("Score = %.1f:" % alignment.score)
|
||||
... print(alignment)
|
||||
...
|
||||
Score = 3.0:
|
||||
target 1 ACCG 5
|
||||
0 |-|| 4
|
||||
query 0 A-CG 3
|
||||
Score = 2.0:
|
||||
target 1 AC 3
|
||||
0 || 2
|
||||
query 0 AC 2
|
||||
<BLANKLINE>
|
||||
Score = 3.0:
|
||||
target 1 ACCG 5
|
||||
0 ||-| 4
|
||||
query 0 AC-G 3
|
||||
Score = 2.0:
|
||||
target 3 CG 5
|
||||
0 || 2
|
||||
query 1 CG 3
|
||||
<BLANKLINE>
|
||||
|
||||
Do a global alignment. Identical characters are given 2 points,
|
||||
@ -4063,12 +4094,12 @@ class PairwiseAligner(_pairwisealigner.PairwiseAligner):
|
||||
... print("Score = %.1f:" % alignment.score)
|
||||
... print(alignment)
|
||||
...
|
||||
Score = 6.0:
|
||||
Score = 4.0:
|
||||
target 0 TACCG 5
|
||||
0 -||-| 5
|
||||
query 0 -AC-G 3
|
||||
<BLANKLINE>
|
||||
Score = 6.0:
|
||||
Score = 4.0:
|
||||
target 0 TACCG 5
|
||||
0 -|-|| 5
|
||||
query 0 -A-CG 3
|
||||
@ -4107,7 +4138,7 @@ class PairwiseAligner(_pairwisealigner.PairwiseAligner):
|
||||
Number of alignments: 1
|
||||
>>> alignment = alignments[0]
|
||||
>>> print("Score = %.1f" % alignment.score)
|
||||
Score = 13.0
|
||||
Score = 11.0
|
||||
>>> print(alignment)
|
||||
target 0 KEVLA 5
|
||||
0 -|||- 5
|
||||
@ -4122,12 +4153,12 @@ class PairwiseAligner(_pairwisealigner.PairwiseAligner):
|
||||
... print("Score = %.1f:" % alignment.score)
|
||||
... print(alignment)
|
||||
...
|
||||
Score = 6.0:
|
||||
Score = 4.0:
|
||||
target 0 TACCG 5
|
||||
0 -||-| 5
|
||||
query 0 -AC-G 3
|
||||
<BLANKLINE>
|
||||
Score = 6.0:
|
||||
Score = 4.0:
|
||||
target 0 TACCG 5
|
||||
0 -|-|| 5
|
||||
query 0 -A-CG 3
|
||||
@ -4154,7 +4185,7 @@ class PairwiseAligner(_pairwisealigner.PairwiseAligner):
|
||||
# use default values:
|
||||
# match = 1.0
|
||||
# mismatch = 0.0
|
||||
# gap_score = 0.0
|
||||
# gap_score = -1.0
|
||||
pass
|
||||
elif scoring == "blastn":
|
||||
self.substitution_matrix = substitution_matrices.load("BLASTN")
|
||||
@ -4273,6 +4304,7 @@ AlignmentCounts object returned by the .counts method of an Alignment object."""
|
||||
|
||||
def align(self, seqA, seqB, strand="+"):
|
||||
"""Return the alignments of two sequences using PairwiseAligner."""
|
||||
self.warn_defaults_changed() # FIXME remove this after 1.87 is out
|
||||
if isinstance(seqA, (bytes, Seq, MutableSeq, SeqRecord)):
|
||||
sA = bytes(seqA)
|
||||
sA = np.frombuffer(sA, dtype=np.uint8).astype(np.int32)
|
||||
@ -4330,6 +4362,7 @@ AlignmentCounts object returned by the .counts method of an Alignment object."""
|
||||
|
||||
def score(self, seqA, seqB, strand="+"):
|
||||
"""Return the alignment score of two sequences using PairwiseAligner."""
|
||||
self.warn_defaults_changed() # FIXME remove this after 1.87 is out
|
||||
if isinstance(seqA, (bytes, Seq, MutableSeq, SeqRecord)):
|
||||
seqA = bytes(seqA)
|
||||
seqA = np.frombuffer(seqA, dtype=np.uint8).astype(np.int32)
|
||||
|
@ -1643,15 +1643,10 @@ static char _alignmentcounts__doc__[] =
|
||||
/* Module definition */
|
||||
|
||||
static struct PyModuleDef moduledef = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
"_alignmentcounts",
|
||||
_alignmentcounts__doc__,
|
||||
-1,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
PyModuleDef_HEAD_INIT,
|
||||
.m_name = "_alignmentcounts",
|
||||
.m_doc = _alignmentcounts__doc__,
|
||||
.m_size = -1,
|
||||
};
|
||||
|
||||
PyObject *
|
||||
|
@ -1074,15 +1074,10 @@ static char _codonaligner__doc__[] =
|
||||
"C extension module implementing a dynamic programming algorithm to align a nucleotide sequence to an amino acid sequence";
|
||||
|
||||
static struct PyModuleDef moduledef = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
"_codonaligner",
|
||||
_codonaligner__doc__,
|
||||
-1,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
PyModuleDef_HEAD_INIT,
|
||||
.m_name = "_codonaligner",
|
||||
.m_doc = _codonaligner__doc__,
|
||||
.m_size = -1,
|
||||
};
|
||||
|
||||
PyObject *
|
||||
|
@ -6,7 +6,6 @@
|
||||
*/
|
||||
|
||||
|
||||
|
||||
#define PY_SSIZE_T_CLEAN
|
||||
#include "Python.h"
|
||||
#include <float.h>
|
||||
@ -15,6 +14,9 @@
|
||||
#include "substitution_matrices/_arraycore.h"
|
||||
|
||||
|
||||
static bool warned = false; // FIXME remove once Biopython 1.87 is out.
|
||||
|
||||
|
||||
#define STARTPOINT 0x8
|
||||
#define ENDPOINT 0x10
|
||||
#define M_MATRIX 0x1
|
||||
@ -1767,18 +1769,30 @@ Aligner_init(Aligner *self, PyObject *args, PyObject *kwds)
|
||||
self->match = 1.0;
|
||||
self->mismatch = 0.0;
|
||||
self->epsilon = 1.e-6;
|
||||
self->open_internal_insertion_score = 0;
|
||||
self->extend_internal_insertion_score = 0;
|
||||
self->open_internal_deletion_score = 0;
|
||||
self->extend_internal_deletion_score = 0;
|
||||
self->open_left_insertion_score = 0;
|
||||
self->extend_left_insertion_score = 0;
|
||||
self->open_right_insertion_score = 0;
|
||||
self->extend_right_insertion_score = 0;
|
||||
self->open_left_deletion_score = 0;
|
||||
self->extend_left_deletion_score = 0;
|
||||
self->open_right_deletion_score = 0;
|
||||
self->extend_right_deletion_score = 0;
|
||||
self->open_internal_insertion_score = -1.0;
|
||||
self->extend_internal_insertion_score = -1.0;
|
||||
self->open_internal_deletion_score = -1.0;
|
||||
self->extend_internal_deletion_score = -1.0;
|
||||
self->open_left_insertion_score = -1.0;
|
||||
self->extend_left_insertion_score = -1.0;
|
||||
self->open_right_insertion_score = -1.0;
|
||||
self->extend_right_insertion_score = -1.0;
|
||||
self->open_left_deletion_score = -1.0;
|
||||
self->extend_left_deletion_score = -1.0;
|
||||
self->open_right_deletion_score = -1.0;
|
||||
self->extend_right_deletion_score = -1.0;
|
||||
self->open_internal_insertion_score_set = false;
|
||||
self->extend_internal_insertion_score_set = false;
|
||||
self->open_left_insertion_score_set = false;
|
||||
self->extend_left_insertion_score_set = false;
|
||||
self->open_right_insertion_score_set = false;
|
||||
self->extend_right_insertion_score_set = false;
|
||||
self->open_internal_deletion_score_set = false;
|
||||
self->extend_internal_deletion_score_set = false;
|
||||
self->open_left_deletion_score_set = false;
|
||||
self->extend_left_deletion_score_set = false;
|
||||
self->open_right_deletion_score_set = false;
|
||||
self->extend_right_deletion_score_set = false;
|
||||
self->insertion_score_function = NULL;
|
||||
self->deletion_score_function = NULL;
|
||||
self->substitution_matrix.obj = NULL;
|
||||
@ -2165,17 +2179,29 @@ Aligner_set_gap_score(Aligner* self, PyObject* value, void* closure)
|
||||
self->deletion_score_function = NULL;
|
||||
}
|
||||
self->open_internal_insertion_score = score;
|
||||
self->open_internal_insertion_score_set = true;
|
||||
self->extend_internal_insertion_score = score;
|
||||
self->extend_internal_insertion_score_set = true;
|
||||
self->open_left_insertion_score = score;
|
||||
self->open_left_insertion_score_set = true;
|
||||
self->extend_left_insertion_score = score;
|
||||
self->extend_left_insertion_score_set = true;
|
||||
self->open_right_insertion_score = score;
|
||||
self->open_right_insertion_score_set = true;
|
||||
self->extend_right_insertion_score = score;
|
||||
self->extend_right_insertion_score_set = true;
|
||||
self->open_internal_deletion_score = score;
|
||||
self->open_internal_deletion_score_set = true;
|
||||
self->extend_internal_deletion_score = score;
|
||||
self->extend_internal_deletion_score_set = true;
|
||||
self->open_left_deletion_score = score;
|
||||
self->open_left_deletion_score_set = true;
|
||||
self->extend_left_deletion_score = score;
|
||||
self->extend_left_deletion_score_set = true;
|
||||
self->open_right_deletion_score = score;
|
||||
self->open_right_deletion_score_set = true;
|
||||
self->extend_right_deletion_score = score;
|
||||
self->extend_right_deletion_score_set = true;
|
||||
}
|
||||
self->algorithm = Unknown;
|
||||
return 0;
|
||||
@ -2217,11 +2243,17 @@ Aligner_set_open_gap_score(Aligner* self, PyObject* value, void* closure)
|
||||
self->deletion_score_function = NULL;
|
||||
}
|
||||
self->open_internal_insertion_score = score;
|
||||
self->open_internal_insertion_score_set = true;
|
||||
self->open_left_insertion_score = score;
|
||||
self->open_left_insertion_score_set = true;
|
||||
self->open_right_insertion_score = score;
|
||||
self->open_right_insertion_score_set = true;
|
||||
self->open_internal_deletion_score = score;
|
||||
self->open_internal_deletion_score_set = true;
|
||||
self->open_left_deletion_score = score;
|
||||
self->open_left_deletion_score_set = true;
|
||||
self->open_right_deletion_score = score;
|
||||
self->open_right_deletion_score_set = true;
|
||||
self->algorithm = Unknown;
|
||||
return 0;
|
||||
}
|
||||
@ -2262,11 +2294,17 @@ Aligner_set_extend_gap_score(Aligner* self, PyObject* value, void* closure)
|
||||
self->deletion_score_function = NULL;
|
||||
}
|
||||
self->extend_internal_insertion_score = score;
|
||||
self->extend_internal_insertion_score_set = true;
|
||||
self->extend_left_insertion_score = score;
|
||||
self->extend_left_insertion_score_set = true;
|
||||
self->extend_right_insertion_score = score;
|
||||
self->extend_right_insertion_score_set = true;
|
||||
self->extend_internal_deletion_score = score;
|
||||
self->extend_internal_deletion_score_set = true;
|
||||
self->extend_left_deletion_score = score;
|
||||
self->extend_left_deletion_score_set = true;
|
||||
self->extend_right_deletion_score = score;
|
||||
self->extend_right_deletion_score_set = true;
|
||||
self->algorithm = Unknown;
|
||||
return 0;
|
||||
}
|
||||
@ -2304,9 +2342,13 @@ Aligner_set_internal_gap_score(Aligner* self, PyObject* value, void* closure)
|
||||
self->deletion_score_function = NULL;
|
||||
}
|
||||
self->open_internal_insertion_score = score;
|
||||
self->open_internal_insertion_score_set = true;
|
||||
self->extend_internal_insertion_score = score;
|
||||
self->extend_internal_insertion_score_set = true;
|
||||
self->open_internal_deletion_score = score;
|
||||
self->open_internal_deletion_score_set = true;
|
||||
self->extend_internal_deletion_score = score;
|
||||
self->extend_internal_deletion_score_set = true;
|
||||
self->algorithm = Unknown;
|
||||
return 0;
|
||||
}
|
||||
@ -2342,7 +2384,9 @@ Aligner_set_open_internal_gap_score(Aligner* self, PyObject* value, void* closur
|
||||
self->deletion_score_function = NULL;
|
||||
}
|
||||
self->open_internal_insertion_score = score;
|
||||
self->open_internal_insertion_score_set = true;
|
||||
self->open_internal_deletion_score = score;
|
||||
self->open_internal_deletion_score_set = true;
|
||||
self->algorithm = Unknown;
|
||||
return 0;
|
||||
}
|
||||
@ -2379,7 +2423,9 @@ Aligner_set_extend_internal_gap_score(Aligner* self, PyObject* value,
|
||||
self->deletion_score_function = NULL;
|
||||
}
|
||||
self->extend_internal_insertion_score = score;
|
||||
self->extend_internal_insertion_score_set = true;
|
||||
self->extend_internal_deletion_score = score;
|
||||
self->extend_internal_deletion_score_set = true;
|
||||
self->algorithm = Unknown;
|
||||
return 0;
|
||||
}
|
||||
@ -2421,13 +2467,21 @@ Aligner_set_end_gap_score(Aligner* self, PyObject* value, void* closure)
|
||||
self->deletion_score_function = NULL;
|
||||
}
|
||||
self->open_left_insertion_score = score;
|
||||
self->open_left_insertion_score_set = true;
|
||||
self->extend_left_insertion_score = score;
|
||||
self->extend_left_insertion_score_set = true;
|
||||
self->open_right_insertion_score = score;
|
||||
self->open_right_insertion_score_set = true;
|
||||
self->extend_right_insertion_score = score;
|
||||
self->extend_right_insertion_score_set = true;
|
||||
self->open_left_deletion_score = score;
|
||||
self->open_left_deletion_score_set = true;
|
||||
self->extend_left_deletion_score = score;
|
||||
self->extend_left_deletion_score_set = true;
|
||||
self->open_right_deletion_score = score;
|
||||
self->open_right_deletion_score_set = true;
|
||||
self->extend_right_deletion_score = score;
|
||||
self->extend_right_deletion_score_set = true;
|
||||
self->algorithm = Unknown;
|
||||
return 0;
|
||||
}
|
||||
@ -2465,9 +2519,13 @@ Aligner_set_open_end_gap_score(Aligner* self, PyObject* value, void* closure)
|
||||
self->deletion_score_function = NULL;
|
||||
}
|
||||
self->open_left_insertion_score = score;
|
||||
self->open_left_insertion_score_set = true;
|
||||
self->open_right_insertion_score = score;
|
||||
self->open_right_insertion_score_set = true;
|
||||
self->open_left_deletion_score = score;
|
||||
self->open_left_deletion_score_set = true;
|
||||
self->open_right_deletion_score = score;
|
||||
self->open_right_deletion_score_set = true;
|
||||
self->algorithm = Unknown;
|
||||
return 0;
|
||||
}
|
||||
@ -2505,9 +2563,13 @@ Aligner_set_extend_end_gap_score(Aligner* self, PyObject* value, void* closure)
|
||||
self->deletion_score_function = NULL;
|
||||
}
|
||||
self->extend_left_insertion_score = score;
|
||||
self->extend_left_insertion_score_set = true;
|
||||
self->extend_right_insertion_score = score;
|
||||
self->extend_right_insertion_score_set = true;
|
||||
self->extend_left_deletion_score = score;
|
||||
self->extend_left_deletion_score_set = true;
|
||||
self->extend_right_deletion_score = score;
|
||||
self->extend_right_deletion_score_set = true;
|
||||
self->algorithm = Unknown;
|
||||
return 0;
|
||||
}
|
||||
@ -2545,9 +2607,13 @@ Aligner_set_left_gap_score(Aligner* self, PyObject* value, void* closure)
|
||||
self->deletion_score_function = NULL;
|
||||
}
|
||||
self->open_left_insertion_score = score;
|
||||
self->open_left_insertion_score_set = true;
|
||||
self->extend_left_insertion_score = score;
|
||||
self->extend_left_insertion_score_set = true;
|
||||
self->open_left_deletion_score = score;
|
||||
self->open_left_deletion_score_set = true;
|
||||
self->extend_left_deletion_score = score;
|
||||
self->extend_left_deletion_score_set = true;
|
||||
self->algorithm = Unknown;
|
||||
return 0;
|
||||
}
|
||||
@ -2585,9 +2651,13 @@ Aligner_set_right_gap_score(Aligner* self, PyObject* value, void* closure)
|
||||
self->deletion_score_function = NULL;
|
||||
}
|
||||
self->open_right_insertion_score = score;
|
||||
self->open_right_insertion_score_set = true;
|
||||
self->extend_right_insertion_score = score;
|
||||
self->extend_right_insertion_score_set = true;
|
||||
self->open_right_deletion_score = score;
|
||||
self->open_right_deletion_score_set = true;
|
||||
self->extend_right_deletion_score = score;
|
||||
self->extend_right_deletion_score_set = true;
|
||||
self->algorithm = Unknown;
|
||||
return 0;
|
||||
}
|
||||
@ -2623,7 +2693,9 @@ Aligner_set_open_left_gap_score(Aligner* self, PyObject* value, void* closure)
|
||||
self->deletion_score_function = NULL;
|
||||
}
|
||||
self->open_left_insertion_score = score;
|
||||
self->open_left_insertion_score_set = true;
|
||||
self->open_left_deletion_score = score;
|
||||
self->open_left_deletion_score_set = true;
|
||||
self->algorithm = Unknown;
|
||||
return 0;
|
||||
}
|
||||
@ -2659,7 +2731,9 @@ Aligner_set_extend_left_gap_score(Aligner* self, PyObject* value, void* closure)
|
||||
self->deletion_score_function = NULL;
|
||||
}
|
||||
self->extend_left_insertion_score = score;
|
||||
self->extend_left_insertion_score_set = true;
|
||||
self->extend_left_deletion_score = score;
|
||||
self->extend_left_deletion_score_set = true;
|
||||
self->algorithm = Unknown;
|
||||
return 0;
|
||||
}
|
||||
@ -2695,7 +2769,9 @@ Aligner_set_open_right_gap_score(Aligner* self, PyObject* value, void* closure)
|
||||
self->deletion_score_function = NULL;
|
||||
}
|
||||
self->open_right_insertion_score = score;
|
||||
self->open_right_insertion_score_set = true;
|
||||
self->open_right_deletion_score = score;
|
||||
self->open_right_deletion_score_set = true;
|
||||
self->algorithm = Unknown;
|
||||
return 0;
|
||||
}
|
||||
@ -2731,7 +2807,9 @@ Aligner_set_extend_right_gap_score(Aligner* self, PyObject* value, void* closure
|
||||
self->deletion_score_function = NULL;
|
||||
}
|
||||
self->extend_right_insertion_score = score;
|
||||
self->extend_right_insertion_score_set = true;
|
||||
self->extend_right_deletion_score = score;
|
||||
self->extend_right_deletion_score_set = true;
|
||||
self->algorithm = Unknown;
|
||||
return 0;
|
||||
}
|
||||
@ -2760,8 +2838,11 @@ Aligner_set_open_insertion_score(Aligner* self, PyObject* value, void* closure)
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_internal_insertion_score = score;
|
||||
self->open_internal_insertion_score_set = true;
|
||||
self->open_left_insertion_score = score;
|
||||
self->open_left_insertion_score_set = true;
|
||||
self->open_right_insertion_score = score;
|
||||
self->open_right_insertion_score_set = true;
|
||||
if (self->insertion_score_function) {
|
||||
Py_DECREF(self->insertion_score_function);
|
||||
self->insertion_score_function = NULL;
|
||||
@ -2794,8 +2875,11 @@ Aligner_set_extend_insertion_score(Aligner* self, PyObject* value, void* closure
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->extend_internal_insertion_score = score;
|
||||
self->extend_internal_insertion_score_set = true;
|
||||
self->extend_left_insertion_score = score;
|
||||
self->extend_left_insertion_score_set = true;
|
||||
self->extend_right_insertion_score = score;
|
||||
self->extend_right_insertion_score_set = true;
|
||||
if (self->insertion_score_function) {
|
||||
Py_DECREF(self->insertion_score_function);
|
||||
self->insertion_score_function = NULL;
|
||||
@ -2842,11 +2926,17 @@ Aligner_set_insertion_score(Aligner* self, PyObject* value, void* closure)
|
||||
return -1;
|
||||
}
|
||||
self->open_internal_insertion_score = score;
|
||||
self->open_internal_insertion_score_set = true;
|
||||
self->extend_internal_insertion_score = score;
|
||||
self->extend_internal_insertion_score_set = true;
|
||||
self->open_left_insertion_score = score;
|
||||
self->open_left_insertion_score_set = true;
|
||||
self->extend_left_insertion_score = score;
|
||||
self->extend_left_insertion_score_set = true;
|
||||
self->open_right_insertion_score = score;
|
||||
self->open_right_insertion_score_set = true;
|
||||
self->extend_right_insertion_score = score;
|
||||
self->extend_right_insertion_score_set = true;
|
||||
if (self->insertion_score_function) {
|
||||
Py_DECREF(self->insertion_score_function);
|
||||
self->insertion_score_function = NULL;
|
||||
@ -2880,8 +2970,11 @@ Aligner_set_open_deletion_score(Aligner* self, PyObject* value, void* closure)
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_internal_deletion_score = score;
|
||||
self->open_internal_deletion_score_set = true;
|
||||
self->open_left_deletion_score = score;
|
||||
self->open_left_deletion_score_set = true;
|
||||
self->open_right_deletion_score = score;
|
||||
self->open_right_deletion_score_set = true;
|
||||
if (self->deletion_score_function) {
|
||||
Py_DECREF(self->deletion_score_function);
|
||||
self->deletion_score_function = NULL;
|
||||
@ -2914,8 +3007,11 @@ Aligner_set_extend_deletion_score(Aligner* self, PyObject* value, void* closure)
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->extend_internal_deletion_score = score;
|
||||
self->extend_internal_deletion_score_set = true;
|
||||
self->extend_left_deletion_score = score;
|
||||
self->extend_left_deletion_score_set = true;
|
||||
self->extend_right_deletion_score = score;
|
||||
self->extend_right_deletion_score_set = true;
|
||||
if (self->deletion_score_function) {
|
||||
Py_DECREF(self->deletion_score_function);
|
||||
self->deletion_score_function = NULL;
|
||||
@ -2961,11 +3057,17 @@ Aligner_set_deletion_score(Aligner* self, PyObject* value, void* closure)
|
||||
return -1;
|
||||
}
|
||||
self->open_internal_deletion_score = score;
|
||||
self->open_internal_deletion_score_set = true;
|
||||
self->extend_internal_deletion_score = score;
|
||||
self->extend_internal_deletion_score_set = true;
|
||||
self->open_left_deletion_score = score;
|
||||
self->open_left_deletion_score_set = true;
|
||||
self->extend_left_deletion_score = score;
|
||||
self->extend_left_deletion_score_set = true;
|
||||
self->open_right_deletion_score = score;
|
||||
self->open_right_deletion_score_set = true;
|
||||
self->extend_right_deletion_score = score;
|
||||
self->extend_right_deletion_score_set = true;
|
||||
if (self->deletion_score_function) {
|
||||
Py_DECREF(self->deletion_score_function);
|
||||
self->deletion_score_function = NULL;
|
||||
@ -2992,6 +3094,7 @@ Aligner_set_open_internal_insertion_score(Aligner* self,
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_internal_insertion_score = score;
|
||||
self->open_internal_insertion_score_set = true;
|
||||
if (self->insertion_score_function) {
|
||||
Py_DECREF(self->insertion_score_function);
|
||||
self->insertion_score_function = NULL;
|
||||
@ -3017,6 +3120,7 @@ Aligner_set_extend_internal_insertion_score(Aligner* self,
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->extend_internal_insertion_score = score;
|
||||
self->extend_internal_insertion_score_set = true;
|
||||
if (self->insertion_score_function) {
|
||||
Py_DECREF(self->insertion_score_function);
|
||||
self->insertion_score_function = NULL;
|
||||
@ -3049,7 +3153,9 @@ Aligner_set_internal_insertion_score(Aligner* self, PyObject* value,
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_internal_insertion_score = score;
|
||||
self->open_internal_insertion_score_set = true;
|
||||
self->extend_internal_insertion_score = score;
|
||||
self->extend_internal_insertion_score_set = true;
|
||||
if (self->insertion_score_function) {
|
||||
Py_DECREF(self->insertion_score_function);
|
||||
self->insertion_score_function = NULL;
|
||||
@ -3083,9 +3189,13 @@ Aligner_set_end_insertion_score(Aligner* self, PyObject* value, void* closure) {
|
||||
const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_left_insertion_score = score;
|
||||
self->open_left_insertion_score_set = true;
|
||||
self->extend_left_insertion_score = score;
|
||||
self->extend_left_insertion_score_set = true;
|
||||
self->open_right_insertion_score = score;
|
||||
self->open_right_insertion_score_set = true;
|
||||
self->extend_right_insertion_score = score;
|
||||
self->extend_right_insertion_score_set = true;
|
||||
if (self->insertion_score_function) {
|
||||
Py_DECREF(self->insertion_score_function);
|
||||
self->insertion_score_function = NULL;
|
||||
@ -3118,7 +3228,9 @@ Aligner_set_open_end_insertion_score(Aligner* self, PyObject* value,
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_left_insertion_score = score;
|
||||
self->open_left_insertion_score_set = true;
|
||||
self->open_right_insertion_score = score;
|
||||
self->open_right_insertion_score_set = true;
|
||||
if (self->insertion_score_function) {
|
||||
Py_DECREF(self->insertion_score_function);
|
||||
self->insertion_score_function = NULL;
|
||||
@ -3150,7 +3262,9 @@ Aligner_set_extend_end_insertion_score(Aligner* self, PyObject* value, void* clo
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->extend_left_insertion_score = score;
|
||||
self->extend_left_insertion_score_set = true;
|
||||
self->extend_right_insertion_score = score;
|
||||
self->extend_right_insertion_score_set = true;
|
||||
if (self->insertion_score_function) {
|
||||
Py_DECREF(self->insertion_score_function);
|
||||
self->insertion_score_function = NULL;
|
||||
@ -3175,6 +3289,7 @@ Aligner_set_open_left_insertion_score(Aligner* self, PyObject* value, void* clos
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_left_insertion_score = score;
|
||||
self->open_left_insertion_score_set = true;
|
||||
if (self->insertion_score_function) {
|
||||
Py_DECREF(self->insertion_score_function);
|
||||
self->insertion_score_function = NULL;
|
||||
@ -3199,6 +3314,7 @@ Aligner_set_extend_left_insertion_score(Aligner* self, PyObject* value, void* cl
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->extend_left_insertion_score = score;
|
||||
self->extend_left_insertion_score_set = true;
|
||||
if (self->insertion_score_function) {
|
||||
Py_DECREF(self->insertion_score_function);
|
||||
self->insertion_score_function = NULL;
|
||||
@ -3230,7 +3346,9 @@ Aligner_set_left_insertion_score(Aligner* self, PyObject* value, void* closure)
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_left_insertion_score = score;
|
||||
self->open_left_insertion_score_set = true;
|
||||
self->extend_left_insertion_score = score;
|
||||
self->extend_left_insertion_score_set = true;
|
||||
if (self->insertion_score_function) {
|
||||
Py_DECREF(self->insertion_score_function);
|
||||
self->insertion_score_function = NULL;
|
||||
@ -3255,6 +3373,7 @@ Aligner_set_open_right_insertion_score(Aligner* self, PyObject* value, void* clo
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_right_insertion_score = score;
|
||||
self->open_right_insertion_score_set = true;
|
||||
if (self->insertion_score_function) {
|
||||
Py_DECREF(self->insertion_score_function);
|
||||
self->insertion_score_function = NULL;
|
||||
@ -3279,6 +3398,7 @@ Aligner_set_extend_right_insertion_score(Aligner* self, PyObject* value, void* c
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->extend_right_insertion_score = score;
|
||||
self->extend_right_insertion_score_set = true;
|
||||
if (self->insertion_score_function) {
|
||||
Py_DECREF(self->insertion_score_function);
|
||||
self->insertion_score_function = NULL;
|
||||
@ -3310,7 +3430,9 @@ Aligner_set_right_insertion_score(Aligner* self, PyObject* value, void* closure)
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_right_insertion_score = score;
|
||||
self->open_right_insertion_score_set = true;
|
||||
self->extend_right_insertion_score = score;
|
||||
self->extend_right_insertion_score_set = true;
|
||||
if (self->insertion_score_function) {
|
||||
Py_DECREF(self->insertion_score_function);
|
||||
self->insertion_score_function = NULL;
|
||||
@ -3344,9 +3466,13 @@ Aligner_set_end_deletion_score(Aligner* self, PyObject* value, void* closure)
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_left_deletion_score = score;
|
||||
self->open_left_deletion_score_set = true;
|
||||
self->extend_left_deletion_score = score;
|
||||
self->extend_left_deletion_score_set = true;
|
||||
self->open_right_deletion_score = score;
|
||||
self->open_right_deletion_score_set = true;
|
||||
self->extend_right_deletion_score = score;
|
||||
self->extend_right_deletion_score_set = true;
|
||||
if (self->deletion_score_function) {
|
||||
Py_DECREF(self->deletion_score_function);
|
||||
self->deletion_score_function = NULL;
|
||||
@ -3378,7 +3504,9 @@ Aligner_set_open_end_deletion_score(Aligner* self, PyObject* value, void* closur
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_left_deletion_score = score;
|
||||
self->open_left_deletion_score_set = true;
|
||||
self->open_right_deletion_score = score;
|
||||
self->open_right_deletion_score_set = true;
|
||||
if (self->deletion_score_function) {
|
||||
Py_DECREF(self->deletion_score_function);
|
||||
self->deletion_score_function = NULL;
|
||||
@ -3410,7 +3538,9 @@ Aligner_set_extend_end_deletion_score(Aligner* self, PyObject* value, void* clos
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->extend_left_deletion_score = score;
|
||||
self->extend_left_deletion_score_set = true;
|
||||
self->extend_right_deletion_score = score;
|
||||
self->extend_right_deletion_score_set = true;
|
||||
if (self->deletion_score_function) {
|
||||
Py_DECREF(self->deletion_score_function);
|
||||
self->deletion_score_function = NULL;
|
||||
@ -3436,6 +3566,7 @@ Aligner_set_open_internal_deletion_score(Aligner* self, PyObject* value,
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_internal_deletion_score = score;
|
||||
self->open_internal_deletion_score_set = true;
|
||||
if (self->deletion_score_function) {
|
||||
Py_DECREF(self->deletion_score_function);
|
||||
self->deletion_score_function = NULL;
|
||||
@ -3461,6 +3592,7 @@ Aligner_set_extend_internal_deletion_score(Aligner* self, PyObject* value,
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->extend_internal_deletion_score = score;
|
||||
self->extend_internal_deletion_score_set = true;
|
||||
if (self->deletion_score_function) {
|
||||
Py_DECREF(self->deletion_score_function);
|
||||
self->deletion_score_function = NULL;
|
||||
@ -3493,7 +3625,9 @@ Aligner_set_internal_deletion_score(Aligner* self, PyObject* value,
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_internal_deletion_score = score;
|
||||
self->open_internal_deletion_score_set = true;
|
||||
self->extend_internal_deletion_score = score;
|
||||
self->extend_internal_deletion_score_set = true;
|
||||
if (self->deletion_score_function) {
|
||||
Py_DECREF(self->deletion_score_function);
|
||||
self->deletion_score_function = NULL;
|
||||
@ -3518,6 +3652,7 @@ Aligner_set_open_left_deletion_score(Aligner* self, PyObject* value, void* closu
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_left_deletion_score = score;
|
||||
self->open_left_deletion_score_set = true;
|
||||
if (self->deletion_score_function) {
|
||||
Py_DECREF(self->deletion_score_function);
|
||||
self->deletion_score_function = NULL;
|
||||
@ -3542,6 +3677,7 @@ Aligner_set_extend_left_deletion_score(Aligner* self, PyObject* value, void* clo
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->extend_left_deletion_score = score;
|
||||
self->extend_left_deletion_score_set = true;
|
||||
if (self->deletion_score_function) {
|
||||
Py_DECREF(self->deletion_score_function);
|
||||
self->deletion_score_function = NULL;
|
||||
@ -3573,7 +3709,9 @@ Aligner_set_left_deletion_score(Aligner* self, PyObject* value, void* closure)
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_left_deletion_score = score;
|
||||
self->open_left_deletion_score_set = true;
|
||||
self->extend_left_deletion_score = score;
|
||||
self->extend_left_deletion_score_set = true;
|
||||
if (self->deletion_score_function) {
|
||||
Py_DECREF(self->deletion_score_function);
|
||||
self->deletion_score_function = NULL;
|
||||
@ -3598,6 +3736,7 @@ Aligner_set_open_right_deletion_score(Aligner* self, PyObject* value, void* clos
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_right_deletion_score = score;
|
||||
self->open_right_deletion_score_set = true;
|
||||
if (self->deletion_score_function) {
|
||||
Py_DECREF(self->deletion_score_function);
|
||||
self->deletion_score_function = NULL;
|
||||
@ -3622,6 +3761,7 @@ Aligner_set_extend_right_deletion_score(Aligner* self, PyObject* value, void* cl
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->extend_right_deletion_score = score;
|
||||
self->extend_right_deletion_score_set = true;
|
||||
if (self->deletion_score_function) {
|
||||
Py_DECREF(self->deletion_score_function);
|
||||
self->deletion_score_function = NULL;
|
||||
@ -3653,7 +3793,9 @@ Aligner_set_right_deletion_score(Aligner* self, PyObject* value, void* closure)
|
||||
{ const double score = PyFloat_AsDouble(value);
|
||||
if (PyErr_Occurred()) return -1;
|
||||
self->open_right_deletion_score = score;
|
||||
self->open_right_deletion_score_set = true;
|
||||
self->extend_right_deletion_score = score;
|
||||
self->extend_right_deletion_score_set = true;
|
||||
if (self->deletion_score_function) {
|
||||
Py_DECREF(self->deletion_score_function);
|
||||
self->deletion_score_function = NULL;
|
||||
@ -4351,7 +4493,8 @@ struct fogsaa_queue_node {
|
||||
(queue->array[a].next_upper == queue->array[b].next_upper && \
|
||||
queue->array[a].next_lower > queue->array[b].next_lower))
|
||||
|
||||
int fogsaa_queue_insert(struct fogsaa_queue *queue, int pA, int pB,
|
||||
static int
|
||||
fogsaa_queue_insert(struct fogsaa_queue *queue, int pA, int pB,
|
||||
int type_total, int next_type, double next_lower, double next_upper) {
|
||||
// max heap implementation for the priority queue by next_upper
|
||||
struct fogsaa_queue_node temp;
|
||||
@ -4387,7 +4530,7 @@ int fogsaa_queue_insert(struct fogsaa_queue *queue, int pA, int pB,
|
||||
return 1;
|
||||
}
|
||||
|
||||
struct fogsaa_queue_node fogsaa_queue_pop(struct fogsaa_queue *queue) {
|
||||
static struct fogsaa_queue_node fogsaa_queue_pop(struct fogsaa_queue *queue) {
|
||||
// caller code must check queue is not empty
|
||||
struct fogsaa_queue_node temp, root = queue->array[0];
|
||||
int largest_child, i = 0;
|
||||
@ -7464,6 +7607,50 @@ static char Aligner_doc[] =
|
||||
"The PairwiseAligner class implements common algorithms to align two\n"
|
||||
"sequences to each other.\n";
|
||||
|
||||
static PyObject*
|
||||
Aligner_warn_defaults_changed(Aligner* self)
|
||||
// FIXME remove this function once Biopython release 1.87 is out
|
||||
{
|
||||
if (warned)
|
||||
Py_RETURN_NONE;
|
||||
if (self->open_internal_insertion_score_set
|
||||
&& self->extend_internal_insertion_score_set
|
||||
&& self->open_left_insertion_score_set
|
||||
&& self->extend_left_insertion_score_set
|
||||
&& self->open_right_insertion_score_set
|
||||
&& self->extend_right_insertion_score_set
|
||||
&& self->open_internal_deletion_score_set
|
||||
&& self->extend_internal_deletion_score_set
|
||||
&& self->open_left_deletion_score_set
|
||||
&& self->extend_left_deletion_score_set
|
||||
&& self->open_right_deletion_score_set
|
||||
&& self->extend_right_deletion_score_set) {
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
warned = true;
|
||||
PyErr_WarnEx(PyExc_UserWarning,
|
||||
"\n"
|
||||
"Note that the default value for the gap score parameter of a\n"
|
||||
"PairwiseAligner object has changed.\n"
|
||||
"\n"
|
||||
"In older versions of Biopython, the pairwise aligner was initialized\n"
|
||||
"by default with a match score of +1, a mismatch score of 0, and a gap\n"
|
||||
"score of 0. This choice was made to be consistent with the pairwise\n"
|
||||
"alignment code in Bio.pairwise2.\n"
|
||||
"\n"
|
||||
"However, this scoring scheme tends to produce a large number of alignments\n"
|
||||
"with only trivial difference between them. In particular, a mismatch\n"
|
||||
"between two letters, a single insertion followed by a deletion, and a\n"
|
||||
"deletion followed by an insertion are all assigned the same score. For long\n"
|
||||
"sequences, the number of alignments with such trivial differences can be\n"
|
||||
"astronomical.\n"
|
||||
"\n"
|
||||
"In Biopython 1.86, the default gap score was therefore changed to -1,\n"
|
||||
"while the default match score remained +1 and the default mismatch score\n"
|
||||
"remained 0.\n", 1);
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
static PyMethodDef Aligner_methods[] = {
|
||||
{"score",
|
||||
(PyCFunction)Aligner_score,
|
||||
@ -7475,6 +7662,11 @@ static PyMethodDef Aligner_methods[] = {
|
||||
METH_VARARGS | METH_KEYWORDS,
|
||||
Aligner_align__doc__
|
||||
},
|
||||
{"warn_defaults_changed",
|
||||
(PyCFunction)Aligner_warn_defaults_changed,
|
||||
METH_NOARGS,
|
||||
"return False if all gap scores have been set explicitly, and True otherwise."
|
||||
},
|
||||
{NULL, NULL, 0, NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
@ -7499,15 +7691,10 @@ static char _pairwisealigner__doc__[] =
|
||||
"C extension module implementing pairwise alignment algorithms";
|
||||
|
||||
static struct PyModuleDef moduledef = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
"_pairwisealigner",
|
||||
_pairwisealigner__doc__,
|
||||
-1,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL
|
||||
PyModuleDef_HEAD_INIT,
|
||||
.m_name = "_pairwisealigner",
|
||||
.m_doc = _pairwisealigner__doc__,
|
||||
.m_size = -1,
|
||||
};
|
||||
|
||||
PyObject *
|
||||
|
@ -40,6 +40,18 @@ typedef struct {
|
||||
double extend_left_deletion_score;
|
||||
double open_right_deletion_score;
|
||||
double extend_right_deletion_score;
|
||||
bool open_internal_insertion_score_set;
|
||||
bool extend_internal_insertion_score_set;
|
||||
bool open_left_insertion_score_set;
|
||||
bool extend_left_insertion_score_set;
|
||||
bool open_right_insertion_score_set;
|
||||
bool extend_right_insertion_score_set;
|
||||
bool open_internal_deletion_score_set;
|
||||
bool extend_internal_deletion_score_set;
|
||||
bool open_left_deletion_score_set;
|
||||
bool extend_left_deletion_score_set;
|
||||
bool open_right_deletion_score_set;
|
||||
bool extend_right_deletion_score_set;
|
||||
PyObject* insertion_score_function;
|
||||
PyObject* deletion_score_function;
|
||||
Py_buffer substitution_matrix;
|
||||
|
@ -171,11 +171,9 @@ static PyTypeObject Array_Type = {
|
||||
|
||||
static struct PyModuleDef module = {
|
||||
PyModuleDef_HEAD_INIT,
|
||||
"_arraycore",
|
||||
"Base module defining the Array base class",
|
||||
-1,
|
||||
NULL,
|
||||
NULL, NULL, NULL, NULL
|
||||
.m_name = "_arraycore",
|
||||
.m_doc = "Base module defining the Array base class",
|
||||
.m_size = -1,
|
||||
};
|
||||
|
||||
PyMODINIT_FUNC
|
||||
|
@ -644,6 +644,21 @@ Release 1.86, with the original name still available with a deprecation warning.
|
||||
These attributes were renamed to be consistent with the AlignmentCounts class
|
||||
and with the common nomenclature in the literature.
|
||||
|
||||
The default value of the gap score of a PairwiseAligner object was changed in
|
||||
Release 1.86. Previously, for consistency with Bio.pairwise2, the default
|
||||
value for gap score was 0. However, this means that a mismatch, an insertion
|
||||
followed by a deletion, and a deletion followed by an insertion all get
|
||||
assigned a score of 0. The aligner then finds a large number of alignments
|
||||
that are logically the same, but with trivial differences between them. For
|
||||
example, aligning AAACAAA to AAAGAAA previously yielded the following three
|
||||
alignments, all with score 6:
|
||||
|
||||
AAACAAA AAAC-AAA AAA-CAAA
|
||||
AAAGAAA AAA-GAAA AAAG-AAA
|
||||
|
||||
With the new default parameter for the gap score, only the first alignment is
|
||||
returned.
|
||||
|
||||
The ``alphabet`` attribute of the PairwiseAligner class was deprecated in
|
||||
Release 1.86. The attribute is still being stored, but it is not used in any
|
||||
way.
|
||||
|
@ -54,8 +54,8 @@ between two sequences:
|
||||
|
||||
.. code:: pycon
|
||||
|
||||
>>> target = "GAACT"
|
||||
>>> query = "GAT"
|
||||
>>> target = "GAACTTT"
|
||||
>>> query = "GATTT"
|
||||
>>> score = aligner.score(target, query)
|
||||
>>> score
|
||||
3.0
|
||||
@ -82,7 +82,7 @@ indexing:
|
||||
|
||||
>>> alignment = alignments[0]
|
||||
>>> alignment # doctest: +ELLIPSIS
|
||||
<Alignment object (2 rows x 5 columns) at 0x...>
|
||||
<Alignment object (2 rows x 7 columns) at 0x...>
|
||||
|
||||
Iterate over the ``Alignment`` objects and print them to see the
|
||||
alignments:
|
||||
@ -94,13 +94,13 @@ alignments:
|
||||
>>> for alignment in alignments:
|
||||
... print(alignment)
|
||||
...
|
||||
target 0 GAACT 5
|
||||
0 ||--| 5
|
||||
query 0 GA--T 3
|
||||
target 0 GAACTTT 7
|
||||
0 ||--||| 7
|
||||
query 0 GA--TTT 5
|
||||
<BLANKLINE>
|
||||
target 0 GAACT 5
|
||||
0 |-|-| 5
|
||||
query 0 G-A-T 3
|
||||
target 0 GAACTTT 7
|
||||
0 |-|-||| 7
|
||||
query 0 G-A-TTT 5
|
||||
<BLANKLINE>
|
||||
|
||||
Use indices to get the aligned sequence (see :ref:`subsec:slicing-indexing-alignment`):
|
||||
@ -110,9 +110,9 @@ Use indices to get the aligned sequence (see :ref:`subsec:slicing-indexing-align
|
||||
.. code:: pycon
|
||||
|
||||
>>> alignment[0]
|
||||
'GAACT'
|
||||
'GAACTTT'
|
||||
>>> alignment[1]
|
||||
'G-A-T'
|
||||
'G-A-TTT'
|
||||
|
||||
Each alignment stores the alignment score:
|
||||
|
||||
@ -130,9 +130,9 @@ as well as pointers to the sequences that were aligned:
|
||||
.. code:: pycon
|
||||
|
||||
>>> alignment.target
|
||||
'GAACT'
|
||||
'GAACTTT'
|
||||
>>> alignment.query
|
||||
'GAT'
|
||||
'GATTT'
|
||||
|
||||
Internally, the alignment is stored in terms of the sequence coordinates:
|
||||
|
||||
@ -142,8 +142,8 @@ Internally, the alignment is stored in terms of the sequence coordinates:
|
||||
|
||||
>>> alignment = alignments[0]
|
||||
>>> alignment.coordinates
|
||||
array([[0, 2, 4, 5],
|
||||
[0, 2, 2, 3]])
|
||||
array([[0, 2, 4, 7],
|
||||
[0, 2, 2, 5]])
|
||||
|
||||
Here, the two rows refer to the target and query sequence. These
|
||||
coordinates show that the alignment consists of the following three
|
||||
@ -154,7 +154,7 @@ blocks:
|
||||
- ``target[2:4]`` aligned to a gap, since ``query[2:2]`` is an empty
|
||||
string (i.e., a deletion);
|
||||
|
||||
- ``target[4:5]`` aligned to ``query[2:3]``.
|
||||
- ``target[4:7]`` aligned to ``query[2:5]``.
|
||||
|
||||
The number of aligned sequences is always 2 for a pairwise alignment:
|
||||
|
||||
@ -175,7 +175,7 @@ query:
|
||||
.. code:: pycon
|
||||
|
||||
>>> alignment.length
|
||||
5
|
||||
7
|
||||
|
||||
The ``aligned`` property, which returns the start and end indices of
|
||||
aligned subsequences, returns two tuples of length 2 for the first
|
||||
@ -187,10 +187,10 @@ alignment:
|
||||
|
||||
>>> alignment.aligned
|
||||
array([[[0, 2],
|
||||
[4, 5]],
|
||||
[4, 7]],
|
||||
<BLANKLINE>
|
||||
[[0, 2],
|
||||
[2, 3]]])
|
||||
[2, 5]]])
|
||||
|
||||
while for the alternative alignment, two tuples of length 3 are
|
||||
returned:
|
||||
@ -201,18 +201,18 @@ returned:
|
||||
|
||||
>>> alignment = alignments[1]
|
||||
>>> print(alignment)
|
||||
target 0 GAACT 5
|
||||
0 |-|-| 5
|
||||
query 0 G-A-T 3
|
||||
target 0 GAACTTT 7
|
||||
0 |-|-||| 7
|
||||
query 0 G-A-TTT 5
|
||||
<BLANKLINE>
|
||||
>>> alignment.aligned
|
||||
array([[[0, 1],
|
||||
[2, 3],
|
||||
[4, 5]],
|
||||
[4, 7]],
|
||||
<BLANKLINE>
|
||||
[[0, 1],
|
||||
[1, 2],
|
||||
[2, 3]]])
|
||||
[2, 5]]])
|
||||
|
||||
Note that different alignments may have the same subsequences aligned to
|
||||
each other. In particular, this may occur if alignments differ from each
|
||||
@ -345,16 +345,14 @@ follow the suggestion by Waterman & Eggert
|
||||
|
||||
If ``aligner.mode`` is set to ``"fogsaa"``, then the Fast Optimal Global
|
||||
Alignment Algorithm [Chakraborty2013]_ with some modifications is used. This
|
||||
mode calculates a global alignment, but it is not like the regular `"global"`
|
||||
mode. It is best suited for long alignments between similar sequences. Rather
|
||||
than calculating all possible alignments like other algorithms do, FOGSAA uses
|
||||
a heuristic to detect steps in an alignment that cannot lead to an optimal
|
||||
alignment. This can speed up alignment, however, the heuristic makes
|
||||
assumptions about your match, mismatch, and gap scores. If the match score is
|
||||
less than the mismatch score or any gap score, or if any gap score is greater
|
||||
than the mismatch score, then a warning is raised and the algorithm may return
|
||||
incorrect results. Unlike other modes that may return more than one alignment,
|
||||
FOGSAA always returns only one alignment.
|
||||
mode also calculates a global alignment, but uses a heuristic to detect steps
|
||||
in an alignment that cannot lead to an optimal alignment. This can speed up
|
||||
alignment and is best suited for long alignments between similar sequences.
|
||||
The heuristic makes assumptions about your match, mismatch, and gap scores. If
|
||||
the match score is less than the mismatch score or any gap score, or if any gap
|
||||
score is greater than the mismatch score, then a warning is raised and the
|
||||
algorithm may return incorrect results. Unlike other modes, which may return
|
||||
more than one alignment, FOGSAA always returns only one alignment.
|
||||
|
||||
.. cont-doctest
|
||||
|
||||
@ -391,18 +389,18 @@ all parameters, use
|
||||
wildcard: None
|
||||
match_score: 1.000000
|
||||
mismatch_score: 0.000000
|
||||
open_internal_insertion_score: 0.000000
|
||||
extend_internal_insertion_score: 0.000000
|
||||
open_left_insertion_score: 0.000000
|
||||
extend_left_insertion_score: 0.000000
|
||||
open_right_insertion_score: 0.000000
|
||||
extend_right_insertion_score: 0.000000
|
||||
open_internal_deletion_score: 0.000000
|
||||
extend_internal_deletion_score: 0.000000
|
||||
open_left_deletion_score: 0.000000
|
||||
extend_left_deletion_score: 0.000000
|
||||
open_right_deletion_score: 0.000000
|
||||
extend_right_deletion_score: 0.000000
|
||||
open_internal_insertion_score: -1.000000
|
||||
extend_internal_insertion_score: -1.000000
|
||||
open_left_insertion_score: -1.000000
|
||||
extend_left_insertion_score: -1.000000
|
||||
open_right_insertion_score: -1.000000
|
||||
extend_right_insertion_score: -1.000000
|
||||
open_internal_deletion_score: -1.000000
|
||||
extend_internal_deletion_score: -1.000000
|
||||
open_left_deletion_score: -1.000000
|
||||
extend_left_deletion_score: -1.000000
|
||||
open_right_deletion_score: -1.000000
|
||||
extend_right_deletion_score: -1.000000
|
||||
mode: local
|
||||
<BLANKLINE>
|
||||
|
||||
@ -775,14 +773,25 @@ disallows a deletion after two nucleotides in the query sequence:
|
||||
Using a pre-defined substitution matrix and gap scores
|
||||
------------------------------------------------------
|
||||
|
||||
By default, a ``PairwiseAligner`` object is initialized with a match
|
||||
score of +1.0, a mismatch score of 0.0, and all gap scores equal to 0.0,
|
||||
While this has the benefit of being a simple scoring scheme, in general
|
||||
it does not give the best performance. Instead, you can use the argument
|
||||
``scoring`` to select a predefined scoring scheme when initializing a
|
||||
``PairwiseAligner`` object. Currently, the provided scoring schemes are
|
||||
``blastn`` and ``megablast``, which are suitable for nucleotide
|
||||
alignments, and ``blastp``, which is suitable for protein alignments.
|
||||
Currently, a ``PairwiseAligner`` object is initialized by default with a match
|
||||
score of +1.0, a mismatch score of 0.0, and all gap scores equal to -1.0.
|
||||
Biopython versions 1.85 and older used a default gap score of 0.0 (this choice
|
||||
was made to be consistent with the older pairwise aligner in ``Bio.pairwise2``,
|
||||
which uses a default gap score of 0.0). However, this scheme assigns the same
|
||||
score to a mismatch and a insertion or deletion, e.g.
|
||||
|
||||
.. code:: pycon
|
||||
|
||||
A A- -A
|
||||
C -C C-
|
||||
|
||||
are evaluated equally, which tends to result in a large number of alignments
|
||||
that are only trivially different from each other. While the current scoring
|
||||
scheme avoids this problem, in general you may get better performance using
|
||||
a predefined scoring scheme, which you can select using the ``scoring``
|
||||
argument when initializing a ``PairwiseAligner`` object. Currently, the
|
||||
provided scoring schemes are ``blastn`` and ``megablast``, which are suitable
|
||||
for nucleotide alignments, and ``blastp``, suitable for protein alignments.
|
||||
Selecting these scoring schemes will initialize the ``PairwiseAligner``
|
||||
object to the default scoring parameters used by BLASTN, MegaBLAST, and
|
||||
BLASTP, respectively.
|
||||
@ -908,7 +917,7 @@ You can perform the following operations on ``alignments``:
|
||||
.. code:: pycon
|
||||
|
||||
>>> print(alignments.score)
|
||||
2.0
|
||||
1.0
|
||||
|
||||
Aligning to the reverse strand
|
||||
------------------------------
|
||||
@ -927,6 +936,7 @@ for ``query`` to the reverse strand of ``target``, use ``strand="-"``:
|
||||
>>> query = "AACC"
|
||||
>>> aligner = Align.PairwiseAligner()
|
||||
>>> aligner.mismatch_score = -1
|
||||
>>> aligner.gap_score = 0
|
||||
>>> aligner.internal_gap_score = -1
|
||||
>>> aligner.score(target, query) # strand is "+" by default
|
||||
4.0
|
||||
@ -1767,9 +1777,9 @@ hemoglobin sequences from above (``HBA_HUMAN``, ``HBB_HUMAN``) stored in
|
||||
>>> aligner = Align.PairwiseAligner()
|
||||
>>> score = aligner.score(seq1.seq, seq2.seq)
|
||||
>>> print(score)
|
||||
72.0
|
||||
56.0
|
||||
|
||||
showing an alignment score of 72.0. To see the individual alignments, do
|
||||
showing an alignment score of 56.0. To see the individual alignments, do
|
||||
|
||||
.. cont-doctest
|
||||
|
||||
@ -1804,28 +1814,24 @@ alignment itself:
|
||||
.. code:: pycon
|
||||
|
||||
>>> print(alignment.score)
|
||||
72.0
|
||||
56.0
|
||||
>>> print(alignment)
|
||||
target 0 MV-LS-PAD--KTN--VK-AA-WGKV-----GAHAGEYGAEALE-RMFLSF----P-TTK
|
||||
0 ||-|--|----|----|--|--||||-----|---||--|--|--|--|------|-|--
|
||||
query 0 MVHL-TP--EEK--SAV-TA-LWGKVNVDEVG---GE--A--L-GR--L--LVVYPWT--
|
||||
target 0 MV-LSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHF-DLSHGSAQ---
|
||||
0 ||-|.|..|..|.|.||||...--|.|.|||.|.....|.|...|..|-|||...|.---
|
||||
query 0 MVHLTPEEKSAVTALWGKVNVD--EVGGEALGRLLVVYPWTQRFFESFGDLSTPDAVMGN
|
||||
<BLANKLINE>
|
||||
target 41 TY--FPHF----DLSHGS---AQVK-G------HGKKV--A--DA-LTNAVAHV-DDMPN
|
||||
60 ----|--|----|||------|-|--|------|||||--|--|--|--|--|--|---|
|
||||
query 39 --QRF--FESFGDLS---TPDA-V-MGNPKVKAHGKKVLGAFSD-GL--A--H-LD---N
|
||||
target 55 --VKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKLLSHCLLVTLAAH
|
||||
60 --||.|||||..|.....||.|........||.||..||.|||.||.||...|...||.|
|
||||
query 58 PKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDPENFRLLGNVLVCVLAHH
|
||||
<BLANKLINE>
|
||||
target 79 ALS----A-LSD-LHAH--KLR-VDPV-NFK-LLSHC---LLVT--LAAHLPA----EFT
|
||||
120 -|-----|-||--||----||--|||--||--||------|-|---||-|-------|||
|
||||
query 81 -L-KGTFATLS-ELH--CDKL-HVDP-ENF-RLL---GNVL-V-CVLA-H---HFGKEFT
|
||||
<BLANKLINE>
|
||||
target 119 PA-VH-ASLDKFLAS---VSTV------LTS--KYR- 142
|
||||
180 |--|--|------|----|--|------|----||-- 217
|
||||
query 124 P-PV-QA------A-YQKV--VAGVANAL--AHKY-H 147
|
||||
target 113 LPAEFTPAVHASLDKFLASVSTVLTSKYR 142
|
||||
120 ...||||.|.|...|..|.|...|..||. 149
|
||||
query 118 FGKEFTPPVQAAYQKVVAGVANALAHKYH 147
|
||||
<BLANKLINE>
|
||||
|
||||
Better alignments are usually obtained by penalizing gaps: higher costs
|
||||
for opening a gap and lower costs for extending an existing gap. For
|
||||
amino acid sequences match scores are usually encoded in matrices like
|
||||
amino acid sequences, match scores are usually encoded in matrices like
|
||||
``PAM`` or ``BLOSUM``. Thus, a more meaningful alignment for our example
|
||||
can be obtained by using the BLOSUM62 matrix, together with a gap open
|
||||
penalty of 10 and a gap extension penalty of 0.5:
|
||||
@ -2024,7 +2030,7 @@ BLOSUM62 matrix, and align these sequences to each other:
|
||||
Asn --- Leu Phe
|
||||
<BLANKLINE>
|
||||
>>> print(alignments.score)
|
||||
18.0
|
||||
17.0
|
||||
|
||||
Generalized pairwise alignments using match/mismatch scores and integer sequences
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
@ -2060,7 +2066,7 @@ This step can be bypassed by passing integer arrays directly:
|
||||
2 -- 10 13
|
||||
<BLANKLINE>
|
||||
>>> print(alignments.score)
|
||||
18.0
|
||||
17.0
|
||||
|
||||
Note that the indices should consist of 32-bit integers, as specified in
|
||||
this example by ``numpy.int32``.
|
||||
|
15
NEWS.rst
15
NEWS.rst
@ -20,6 +20,21 @@ has also been tested on PyPy3.10 v7.3.17.
|
||||
`Infernal <http://eddylab.org/infernal/>` (v1.0.0+) RNA search tool. The
|
||||
format are ``infernal-tab`` and ``infernal-text``.
|
||||
|
||||
The default value of the gap score of a ``PairwiseAligner`` object was changed
|
||||
in this release. Previously, for consistency with ``Bio.pairwise2``, the
|
||||
default value for gap score was 0. However, this means that a mismatch, an
|
||||
insertion followed by a deletion, and a deletion followed by an insertion all
|
||||
get assigned a score of 0. The aligner then finds a large number of alignments
|
||||
that are logically the same, but have trivial differences between them. For
|
||||
example, aligning AAACAAA to AAAGAAA previously yielded the following three
|
||||
alignments, all with score 6:
|
||||
|
||||
AAACAAA AAAC-AAA AAA-CAAA
|
||||
AAAGAAA AAA-GAAA AAAG-AAA
|
||||
|
||||
With the new default parameter for the gap score, only the first alignment is
|
||||
returned.
|
||||
|
||||
``Bio.PDB.PDBIO`` now ensures that b-factor values are always at most 6 characters to
|
||||
ensure that we do not violate the wwPDB specification. This should not have an impact
|
||||
on the majority of uses, as b-factor values are generally small (less than 100). When
|
||||
|
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user