mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
* Initialize variables with dummy values to silence warnings * Fix indentation to silence warnings * Initialize pointers to NULL to silence warnings
480 lines
16 KiB
C
480 lines
16 KiB
C
/* Copyright 2002 by Jeffrey Chang.
|
|
* Copyright 2016, 2019 by Markus Piotrowski.
|
|
* All rights reserved.
|
|
*
|
|
* This file is part of the Biopython distribution and governed by your
|
|
* choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
|
|
* Please see the LICENSE file that should have been included as part of this
|
|
* package.
|
|
*
|
|
* cpairwise2module.c
|
|
* Created 30 Sep 2001
|
|
*
|
|
* Optimized C routines that complement pairwise2.py.
|
|
*/
|
|
|
|
#include "Python.h"
|
|
|
|
|
|
#define _PRECISION 1000
|
|
#define rint(x) (int)((x)*_PRECISION+0.5)
|
|
|
|
/* Functions in this module. */
|
|
|
|
static double calc_affine_penalty(int length, double open, double extend,
|
|
int penalize_extend_when_opening)
|
|
{
|
|
double penalty;
|
|
|
|
if(length <= 0)
|
|
return 0.0;
|
|
penalty = open + extend * length;
|
|
if(!penalize_extend_when_opening)
|
|
penalty -= extend;
|
|
return penalty;
|
|
}
|
|
|
|
static double _get_match_score(PyObject *py_sequenceA, PyObject *py_sequenceB,
|
|
PyObject *py_match_fn, int i, int j,
|
|
char *sequenceA, char *sequenceB,
|
|
int use_sequence_cstring,
|
|
double match, double mismatch,
|
|
int use_match_mismatch_scores)
|
|
{
|
|
PyObject *py_A=NULL, *py_B=NULL;
|
|
PyObject *py_arglist=NULL, *py_result=NULL;
|
|
double score = 0;
|
|
|
|
if(use_sequence_cstring && use_match_mismatch_scores) {
|
|
score = (sequenceA[i] == sequenceB[j]) ? match : mismatch;
|
|
return score;
|
|
}
|
|
/* Calculate the match score. */
|
|
if(!(py_A = PySequence_GetItem(py_sequenceA, i)))
|
|
goto _get_match_score_cleanup;
|
|
if(!(py_B = PySequence_GetItem(py_sequenceB, j)))
|
|
goto _get_match_score_cleanup;
|
|
if(!(py_arglist = Py_BuildValue("(OO)", py_A, py_B)))
|
|
goto _get_match_score_cleanup;
|
|
|
|
if(!(py_result = PyObject_CallObject(py_match_fn, py_arglist)))
|
|
goto _get_match_score_cleanup;
|
|
score = PyFloat_AsDouble(py_result);
|
|
|
|
_get_match_score_cleanup:
|
|
if(py_A) {
|
|
Py_DECREF(py_A);
|
|
}
|
|
if(py_B) {
|
|
Py_DECREF(py_B);
|
|
}
|
|
if(py_arglist) {
|
|
Py_DECREF(py_arglist);
|
|
}
|
|
if(py_result) {
|
|
Py_DECREF(py_result);
|
|
}
|
|
return score;
|
|
}
|
|
|
|
#if PY_MAJOR_VERSION >= 3
|
|
static PyObject* _create_bytes_object(PyObject* o)
|
|
{
|
|
PyObject* b;
|
|
if (PyBytes_Check(o)) {
|
|
return o;
|
|
}
|
|
if (!PyUnicode_Check(o)) {
|
|
return NULL;
|
|
}
|
|
b = PyUnicode_AsASCIIString(o);
|
|
if (!b) {
|
|
PyErr_Clear();
|
|
return NULL;
|
|
}
|
|
return b;
|
|
}
|
|
#endif
|
|
|
|
/* This function is a more-or-less straightforward port of the
|
|
* equivalent function in pairwise2. Please see there for algorithm
|
|
* documentation.
|
|
*/
|
|
static PyObject *cpairwise2__make_score_matrix_fast(PyObject *self,
|
|
PyObject *args)
|
|
{
|
|
int i;
|
|
int row, col;
|
|
PyObject *py_sequenceA, *py_sequenceB, *py_match_fn;
|
|
#if PY_MAJOR_VERSION >= 3
|
|
PyObject *py_bytesA, *py_bytesB;
|
|
#endif
|
|
char *sequenceA=NULL, *sequenceB=NULL;
|
|
int use_sequence_cstring;
|
|
double open_A, extend_A, open_B, extend_B;
|
|
int penalize_extend_when_opening, penalize_end_gaps_A, penalize_end_gaps_B;
|
|
int align_globally, score_only;
|
|
|
|
PyObject *py_match=NULL, *py_mismatch=NULL;
|
|
double first_A_gap, first_B_gap;
|
|
double match, mismatch;
|
|
double score;
|
|
double best_score = 0;
|
|
double local_max_score = 0;
|
|
int use_match_mismatch_scores;
|
|
int lenA, lenB;
|
|
double *score_matrix = NULL;
|
|
unsigned char *trace_matrix = NULL;
|
|
PyObject *py_score_matrix=NULL, *py_trace_matrix=NULL;
|
|
|
|
double *col_cache_score = NULL;
|
|
PyObject *py_retval = NULL;
|
|
|
|
if(!PyArg_ParseTuple(args, "OOOddddi(ii)ii", &py_sequenceA, &py_sequenceB,
|
|
&py_match_fn, &open_A, &extend_A, &open_B, &extend_B,
|
|
&penalize_extend_when_opening,
|
|
&penalize_end_gaps_A, &penalize_end_gaps_B,
|
|
&align_globally, &score_only))
|
|
return NULL;
|
|
if(!PySequence_Check(py_sequenceA) || !PySequence_Check(py_sequenceB)) {
|
|
PyErr_SetString(PyExc_TypeError,
|
|
"py_sequenceA and py_sequenceB should be sequences.");
|
|
return NULL;
|
|
}
|
|
|
|
/* Optimize for the common case. Check to see if py_sequenceA and
|
|
py_sequenceB are strings. If they are, use the c string
|
|
representation. */
|
|
#if PY_MAJOR_VERSION < 3
|
|
use_sequence_cstring = 0;
|
|
if(PyString_Check(py_sequenceA) && PyString_Check(py_sequenceB)) {
|
|
sequenceA = PyString_AS_STRING(py_sequenceA);
|
|
sequenceB = PyString_AS_STRING(py_sequenceB);
|
|
use_sequence_cstring = 1;
|
|
}
|
|
#else
|
|
py_bytesA = _create_bytes_object(py_sequenceA);
|
|
py_bytesB = _create_bytes_object(py_sequenceB);
|
|
if (py_bytesA && py_bytesB) {
|
|
sequenceA = PyBytes_AS_STRING(py_bytesA);
|
|
sequenceB = PyBytes_AS_STRING(py_bytesB);
|
|
use_sequence_cstring = 1;
|
|
}
|
|
else {
|
|
Py_XDECREF(py_bytesA);
|
|
Py_XDECREF(py_bytesB);
|
|
use_sequence_cstring = 0;
|
|
}
|
|
#endif
|
|
|
|
if(!PyCallable_Check(py_match_fn)) {
|
|
PyErr_SetString(PyExc_TypeError, "py_match_fn must be callable.");
|
|
return NULL;
|
|
}
|
|
/* Optimize for the common case. Check to see if py_match_fn is
|
|
an identity_match. If so, pull out the match and mismatch
|
|
member variables and calculate the scores myself. */
|
|
match = mismatch = 0;
|
|
use_match_mismatch_scores = 0;
|
|
if(!(py_match = PyObject_GetAttrString(py_match_fn, "match")))
|
|
goto cleanup_after_py_match_fn;
|
|
match = PyFloat_AsDouble(py_match);
|
|
if(match==-1.0 && PyErr_Occurred())
|
|
goto cleanup_after_py_match_fn;
|
|
if(!(py_mismatch = PyObject_GetAttrString(py_match_fn, "mismatch")))
|
|
goto cleanup_after_py_match_fn;
|
|
mismatch = PyFloat_AsDouble(py_mismatch);
|
|
if(mismatch==-1.0 && PyErr_Occurred())
|
|
goto cleanup_after_py_match_fn;
|
|
use_match_mismatch_scores = 1;
|
|
|
|
cleanup_after_py_match_fn:
|
|
if(PyErr_Occurred())
|
|
PyErr_Clear();
|
|
if(py_match) {
|
|
Py_DECREF(py_match);
|
|
}
|
|
if(py_mismatch) {
|
|
Py_DECREF(py_mismatch);
|
|
}
|
|
/* Cache some commonly used gap penalties */
|
|
first_A_gap = calc_affine_penalty(1, open_A, extend_A,
|
|
penalize_extend_when_opening);
|
|
first_B_gap = calc_affine_penalty(1, open_B, extend_B,
|
|
penalize_extend_when_opening);
|
|
|
|
/* Allocate matrices for storing the results and initialize first row and col. */
|
|
lenA = PySequence_Length(py_sequenceA);
|
|
lenB = PySequence_Length(py_sequenceB);
|
|
score_matrix = malloc((lenA+1)*(lenB+1)*sizeof(*score_matrix));
|
|
if(!score_matrix) {
|
|
PyErr_SetString(PyExc_MemoryError, "Out of memory");
|
|
goto _cleanup_make_score_matrix_fast;
|
|
}
|
|
for(i=0; i<(lenB+1); i++)
|
|
score_matrix[i] = 0;
|
|
for(i=0; i<(lenA+1)*(lenB+1); i += (lenB+1))
|
|
score_matrix[i] = 0;
|
|
/* If we only want the score, we don't need the trace matrix. */
|
|
if (!score_only){
|
|
trace_matrix = malloc((lenA+1)*(lenB+1)*sizeof(*trace_matrix));
|
|
if(!trace_matrix) {
|
|
PyErr_SetString(PyExc_MemoryError, "Out of memory");
|
|
goto _cleanup_make_score_matrix_fast;
|
|
}
|
|
for(i=0; i<(lenB+1); i++)
|
|
trace_matrix[i] = 0;
|
|
for(i=0; i<(lenA+1)*(lenB+1); i += (lenB+1))
|
|
trace_matrix[i] = 0;
|
|
}
|
|
else
|
|
trace_matrix = malloc(1);
|
|
|
|
/* Initialize the first row and col of the score matrix. */
|
|
for(i=0; i<=lenA; i++) {
|
|
if(penalize_end_gaps_B)
|
|
score = calc_affine_penalty(i, open_B, extend_B,
|
|
penalize_extend_when_opening);
|
|
else
|
|
score = 0;
|
|
score_matrix[i*(lenB+1)] = score;
|
|
}
|
|
for(i=0; i<=lenB; i++) {
|
|
if(penalize_end_gaps_A)
|
|
score = calc_affine_penalty(i, open_A, extend_A,
|
|
penalize_extend_when_opening);
|
|
else
|
|
score = 0;
|
|
score_matrix[i] = score;
|
|
}
|
|
|
|
/* Now initialize the col cache. */
|
|
col_cache_score = malloc((lenB+1)*sizeof(*col_cache_score));
|
|
memset((void *)col_cache_score, 0, (lenB+1)*sizeof(*col_cache_score));
|
|
for(i=0; i<=lenB; i++) {
|
|
col_cache_score[i] = calc_affine_penalty(i, (2*open_B), extend_B,
|
|
penalize_extend_when_opening);
|
|
}
|
|
|
|
/* Fill in the score matrix. The row cache is calculated on the fly.*/
|
|
for(row=1; row<=lenA; row++) {
|
|
double row_cache_score = calc_affine_penalty(row, (2*open_A), extend_A,
|
|
penalize_extend_when_opening);
|
|
for(col=1; col<=lenB; col++) {
|
|
double match_score, nogap_score;
|
|
double row_open, row_extend, col_open, col_extend;
|
|
int best_score_rint, row_score_rint, col_score_rint;
|
|
unsigned char row_trace_score, col_trace_score, trace_score;
|
|
|
|
/* Calculate the best score. */
|
|
match_score = _get_match_score(py_sequenceA, py_sequenceB,
|
|
py_match_fn, row-1, col-1,
|
|
sequenceA, sequenceB,
|
|
use_sequence_cstring,
|
|
match, mismatch,
|
|
use_match_mismatch_scores);
|
|
if(match_score==-1.0 && PyErr_Occurred())
|
|
goto _cleanup_make_score_matrix_fast;
|
|
nogap_score = score_matrix[(row-1)*(lenB+1)+col-1] + match_score;
|
|
|
|
if (!penalize_end_gaps_A && row==lenA) {
|
|
row_open = score_matrix[(row)*(lenB+1)+col-1];
|
|
row_extend = row_cache_score;
|
|
}
|
|
else {
|
|
row_open = score_matrix[(row)*(lenB+1)+col-1] + first_A_gap;
|
|
row_extend = row_cache_score + extend_A;
|
|
}
|
|
row_cache_score = (row_open > row_extend) ? row_open : row_extend;
|
|
|
|
if (!penalize_end_gaps_B && col==lenB){
|
|
col_open = score_matrix[(row-1)*(lenB+1)+col];
|
|
col_extend = col_cache_score[col];
|
|
}
|
|
else {
|
|
col_open = score_matrix[(row-1)*(lenB+1)+col] + first_B_gap;
|
|
col_extend = col_cache_score[col] + extend_B;
|
|
}
|
|
col_cache_score[col] = (col_open > col_extend) ? col_open : col_extend;
|
|
|
|
best_score = (row_cache_score > col_cache_score[col]) ? row_cache_score : col_cache_score[col];
|
|
if(nogap_score > best_score)
|
|
best_score = nogap_score;
|
|
|
|
if (best_score > local_max_score)
|
|
local_max_score = best_score;
|
|
|
|
if(!align_globally && best_score < 0)
|
|
score_matrix[row*(lenB+1)+col] = 0;
|
|
else
|
|
score_matrix[row*(lenB+1)+col] = best_score;
|
|
|
|
if (!score_only) {
|
|
row_score_rint = rint(row_cache_score);
|
|
col_score_rint = rint(col_cache_score[col]);
|
|
row_trace_score = 0;
|
|
col_trace_score = 0;
|
|
if (rint(row_open) == row_score_rint)
|
|
row_trace_score = row_trace_score|1;
|
|
if (rint(row_extend) == row_score_rint)
|
|
row_trace_score = row_trace_score|8;
|
|
if (rint(col_open) == col_score_rint)
|
|
col_trace_score = col_trace_score|4;
|
|
if (rint(col_extend) == col_score_rint)
|
|
col_trace_score = col_trace_score|16;
|
|
|
|
trace_score = 0;
|
|
best_score_rint = rint(best_score);
|
|
if (rint(nogap_score) == best_score_rint)
|
|
trace_score = trace_score|2;
|
|
if (row_score_rint == best_score_rint)
|
|
trace_score += row_trace_score;
|
|
if (col_score_rint == best_score_rint)
|
|
trace_score += col_trace_score;
|
|
trace_matrix[row*(lenB+1)+col] = trace_score;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!align_globally)
|
|
best_score = local_max_score;
|
|
|
|
/* Save the score and traceback matrices into real python objects. */
|
|
if(!score_only) {
|
|
if(!(py_score_matrix = PyList_New(lenA+1)))
|
|
goto _cleanup_make_score_matrix_fast;
|
|
if(!(py_trace_matrix = PyList_New(lenA+1)))
|
|
goto _cleanup_make_score_matrix_fast;
|
|
|
|
for(row=0; row<=lenA; row++) {
|
|
PyObject *py_score_row, *py_trace_row;
|
|
if(!(py_score_row = PyList_New(lenB+1)))
|
|
goto _cleanup_make_score_matrix_fast;
|
|
PyList_SET_ITEM(py_score_matrix, row, py_score_row);
|
|
if(!(py_trace_row = PyList_New(lenB+1)))
|
|
goto _cleanup_make_score_matrix_fast;
|
|
PyList_SET_ITEM(py_trace_matrix, row, py_trace_row);
|
|
|
|
for(col=0; col<=lenB; col++) {
|
|
PyObject *py_score, *py_trace;
|
|
int offset = row*(lenB+1) + col;
|
|
|
|
/* Set py_score_matrix[row][col] to the score. */
|
|
if(!(py_score = PyFloat_FromDouble(score_matrix[offset])))
|
|
goto _cleanup_make_score_matrix_fast;
|
|
PyList_SET_ITEM(py_score_row, col, py_score);
|
|
|
|
/* Set py_trace_matrix[row][col] to a list of indexes. On
|
|
the edges of the matrix (row or column is 0), the
|
|
matrix should be [None]. */
|
|
if(!row || !col) {
|
|
if(!(py_trace = Py_BuildValue("B", 1)))
|
|
goto _cleanup_make_score_matrix_fast;
|
|
Py_INCREF(Py_None);
|
|
PyList_SET_ITEM(py_trace_row, col, Py_None);
|
|
}
|
|
else {
|
|
if(!(py_trace = Py_BuildValue("B", trace_matrix[offset])))
|
|
goto _cleanup_make_score_matrix_fast;
|
|
PyList_SET_ITEM(py_trace_row, col, py_trace);
|
|
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
py_score_matrix = PyList_New(1);
|
|
py_trace_matrix = PyList_New(1);
|
|
}
|
|
py_retval = Py_BuildValue("(OOd)", py_score_matrix, py_trace_matrix, best_score);
|
|
|
|
_cleanup_make_score_matrix_fast:
|
|
if(score_matrix)
|
|
free(score_matrix);
|
|
if(trace_matrix)
|
|
free(trace_matrix);
|
|
if(col_cache_score)
|
|
free(col_cache_score);
|
|
if(py_score_matrix){
|
|
Py_DECREF(py_score_matrix);
|
|
}
|
|
if(py_trace_matrix){
|
|
Py_DECREF(py_trace_matrix);
|
|
}
|
|
|
|
#if PY_MAJOR_VERSION >= 3
|
|
if (py_bytesA != NULL && py_bytesA != py_sequenceA) Py_DECREF(py_bytesA);
|
|
if (py_bytesB != NULL && py_bytesB != py_sequenceB) Py_DECREF(py_bytesB);
|
|
#endif
|
|
|
|
return py_retval;
|
|
}
|
|
|
|
static PyObject *cpairwise2_rint(PyObject *self, PyObject *args,
|
|
PyObject *keywds)
|
|
{
|
|
double x;
|
|
int precision = _PRECISION;
|
|
int rint_x;
|
|
|
|
static char *kwlist[] = {"x", "precision", NULL};
|
|
|
|
if(!PyArg_ParseTupleAndKeywords(args, keywds, "d|l", kwlist,
|
|
&x, &precision))
|
|
return NULL;
|
|
rint_x = (int)(x * precision + 0.5);
|
|
#if PY_MAJOR_VERSION >= 3
|
|
return PyLong_FromLong((long)rint_x);
|
|
#else
|
|
return PyInt_FromLong((long)rint_x);
|
|
#endif
|
|
}
|
|
|
|
/* Module definition stuff */
|
|
|
|
static PyMethodDef cpairwise2Methods[] = {
|
|
{"_make_score_matrix_fast",
|
|
(PyCFunction)cpairwise2__make_score_matrix_fast, METH_VARARGS, ""},
|
|
{"rint", (PyCFunction)cpairwise2_rint, METH_VARARGS|METH_KEYWORDS, ""},
|
|
{NULL, NULL, 0, NULL}
|
|
};
|
|
|
|
static char cpairwise2__doc__[] =
|
|
"Optimized C routines that complement pairwise2.py. These are called from within pairwise2.py.\n\
|
|
\n\
|
|
";
|
|
|
|
#if PY_MAJOR_VERSION >= 3
|
|
|
|
static struct PyModuleDef moduledef = {
|
|
PyModuleDef_HEAD_INIT,
|
|
"cpairwise2",
|
|
cpairwise2__doc__,
|
|
-1,
|
|
cpairwise2Methods,
|
|
NULL,
|
|
NULL,
|
|
NULL,
|
|
NULL
|
|
};
|
|
|
|
PyObject *
|
|
PyInit_cpairwise2(void)
|
|
|
|
#else
|
|
|
|
void
|
|
/* for Windows: _declspec(dllexport) initcpairwise2(void) */
|
|
initcpairwise2(void)
|
|
#endif
|
|
|
|
{
|
|
#if PY_MAJOR_VERSION >= 3
|
|
PyObject* module = PyModule_Create(&moduledef);
|
|
if (module==NULL) return NULL;
|
|
return module;
|
|
#else
|
|
(void) Py_InitModule3("cpairwise2", cpairwise2Methods, cpairwise2__doc__);
|
|
#endif
|
|
}
|