Files
biopython/Bio/cpairwise2module.c
Michael M. 68dc8b6463 Address C compiler warnings (#4885)
* Initialize variables with dummy values to silence warnings

* Fix indentation to silence warnings

* Initialize pointers to NULL to silence warnings
2024-11-20 14:31:17 +09:00

480 lines
16 KiB
C

/* Copyright 2002 by Jeffrey Chang.
* Copyright 2016, 2019 by Markus Piotrowski.
* All rights reserved.
*
* This file is part of the Biopython distribution and governed by your
* choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
* Please see the LICENSE file that should have been included as part of this
* package.
*
* cpairwise2module.c
* Created 30 Sep 2001
*
* Optimized C routines that complement pairwise2.py.
*/
#include "Python.h"
#define _PRECISION 1000
#define rint(x) (int)((x)*_PRECISION+0.5)
/* Functions in this module. */
static double calc_affine_penalty(int length, double open, double extend,
int penalize_extend_when_opening)
{
double penalty;
if(length <= 0)
return 0.0;
penalty = open + extend * length;
if(!penalize_extend_when_opening)
penalty -= extend;
return penalty;
}
static double _get_match_score(PyObject *py_sequenceA, PyObject *py_sequenceB,
PyObject *py_match_fn, int i, int j,
char *sequenceA, char *sequenceB,
int use_sequence_cstring,
double match, double mismatch,
int use_match_mismatch_scores)
{
PyObject *py_A=NULL, *py_B=NULL;
PyObject *py_arglist=NULL, *py_result=NULL;
double score = 0;
if(use_sequence_cstring && use_match_mismatch_scores) {
score = (sequenceA[i] == sequenceB[j]) ? match : mismatch;
return score;
}
/* Calculate the match score. */
if(!(py_A = PySequence_GetItem(py_sequenceA, i)))
goto _get_match_score_cleanup;
if(!(py_B = PySequence_GetItem(py_sequenceB, j)))
goto _get_match_score_cleanup;
if(!(py_arglist = Py_BuildValue("(OO)", py_A, py_B)))
goto _get_match_score_cleanup;
if(!(py_result = PyObject_CallObject(py_match_fn, py_arglist)))
goto _get_match_score_cleanup;
score = PyFloat_AsDouble(py_result);
_get_match_score_cleanup:
if(py_A) {
Py_DECREF(py_A);
}
if(py_B) {
Py_DECREF(py_B);
}
if(py_arglist) {
Py_DECREF(py_arglist);
}
if(py_result) {
Py_DECREF(py_result);
}
return score;
}
#if PY_MAJOR_VERSION >= 3
static PyObject* _create_bytes_object(PyObject* o)
{
PyObject* b;
if (PyBytes_Check(o)) {
return o;
}
if (!PyUnicode_Check(o)) {
return NULL;
}
b = PyUnicode_AsASCIIString(o);
if (!b) {
PyErr_Clear();
return NULL;
}
return b;
}
#endif
/* This function is a more-or-less straightforward port of the
* equivalent function in pairwise2. Please see there for algorithm
* documentation.
*/
static PyObject *cpairwise2__make_score_matrix_fast(PyObject *self,
PyObject *args)
{
int i;
int row, col;
PyObject *py_sequenceA, *py_sequenceB, *py_match_fn;
#if PY_MAJOR_VERSION >= 3
PyObject *py_bytesA, *py_bytesB;
#endif
char *sequenceA=NULL, *sequenceB=NULL;
int use_sequence_cstring;
double open_A, extend_A, open_B, extend_B;
int penalize_extend_when_opening, penalize_end_gaps_A, penalize_end_gaps_B;
int align_globally, score_only;
PyObject *py_match=NULL, *py_mismatch=NULL;
double first_A_gap, first_B_gap;
double match, mismatch;
double score;
double best_score = 0;
double local_max_score = 0;
int use_match_mismatch_scores;
int lenA, lenB;
double *score_matrix = NULL;
unsigned char *trace_matrix = NULL;
PyObject *py_score_matrix=NULL, *py_trace_matrix=NULL;
double *col_cache_score = NULL;
PyObject *py_retval = NULL;
if(!PyArg_ParseTuple(args, "OOOddddi(ii)ii", &py_sequenceA, &py_sequenceB,
&py_match_fn, &open_A, &extend_A, &open_B, &extend_B,
&penalize_extend_when_opening,
&penalize_end_gaps_A, &penalize_end_gaps_B,
&align_globally, &score_only))
return NULL;
if(!PySequence_Check(py_sequenceA) || !PySequence_Check(py_sequenceB)) {
PyErr_SetString(PyExc_TypeError,
"py_sequenceA and py_sequenceB should be sequences.");
return NULL;
}
/* Optimize for the common case. Check to see if py_sequenceA and
py_sequenceB are strings. If they are, use the c string
representation. */
#if PY_MAJOR_VERSION < 3
use_sequence_cstring = 0;
if(PyString_Check(py_sequenceA) && PyString_Check(py_sequenceB)) {
sequenceA = PyString_AS_STRING(py_sequenceA);
sequenceB = PyString_AS_STRING(py_sequenceB);
use_sequence_cstring = 1;
}
#else
py_bytesA = _create_bytes_object(py_sequenceA);
py_bytesB = _create_bytes_object(py_sequenceB);
if (py_bytesA && py_bytesB) {
sequenceA = PyBytes_AS_STRING(py_bytesA);
sequenceB = PyBytes_AS_STRING(py_bytesB);
use_sequence_cstring = 1;
}
else {
Py_XDECREF(py_bytesA);
Py_XDECREF(py_bytesB);
use_sequence_cstring = 0;
}
#endif
if(!PyCallable_Check(py_match_fn)) {
PyErr_SetString(PyExc_TypeError, "py_match_fn must be callable.");
return NULL;
}
/* Optimize for the common case. Check to see if py_match_fn is
an identity_match. If so, pull out the match and mismatch
member variables and calculate the scores myself. */
match = mismatch = 0;
use_match_mismatch_scores = 0;
if(!(py_match = PyObject_GetAttrString(py_match_fn, "match")))
goto cleanup_after_py_match_fn;
match = PyFloat_AsDouble(py_match);
if(match==-1.0 && PyErr_Occurred())
goto cleanup_after_py_match_fn;
if(!(py_mismatch = PyObject_GetAttrString(py_match_fn, "mismatch")))
goto cleanup_after_py_match_fn;
mismatch = PyFloat_AsDouble(py_mismatch);
if(mismatch==-1.0 && PyErr_Occurred())
goto cleanup_after_py_match_fn;
use_match_mismatch_scores = 1;
cleanup_after_py_match_fn:
if(PyErr_Occurred())
PyErr_Clear();
if(py_match) {
Py_DECREF(py_match);
}
if(py_mismatch) {
Py_DECREF(py_mismatch);
}
/* Cache some commonly used gap penalties */
first_A_gap = calc_affine_penalty(1, open_A, extend_A,
penalize_extend_when_opening);
first_B_gap = calc_affine_penalty(1, open_B, extend_B,
penalize_extend_when_opening);
/* Allocate matrices for storing the results and initialize first row and col. */
lenA = PySequence_Length(py_sequenceA);
lenB = PySequence_Length(py_sequenceB);
score_matrix = malloc((lenA+1)*(lenB+1)*sizeof(*score_matrix));
if(!score_matrix) {
PyErr_SetString(PyExc_MemoryError, "Out of memory");
goto _cleanup_make_score_matrix_fast;
}
for(i=0; i<(lenB+1); i++)
score_matrix[i] = 0;
for(i=0; i<(lenA+1)*(lenB+1); i += (lenB+1))
score_matrix[i] = 0;
/* If we only want the score, we don't need the trace matrix. */
if (!score_only){
trace_matrix = malloc((lenA+1)*(lenB+1)*sizeof(*trace_matrix));
if(!trace_matrix) {
PyErr_SetString(PyExc_MemoryError, "Out of memory");
goto _cleanup_make_score_matrix_fast;
}
for(i=0; i<(lenB+1); i++)
trace_matrix[i] = 0;
for(i=0; i<(lenA+1)*(lenB+1); i += (lenB+1))
trace_matrix[i] = 0;
}
else
trace_matrix = malloc(1);
/* Initialize the first row and col of the score matrix. */
for(i=0; i<=lenA; i++) {
if(penalize_end_gaps_B)
score = calc_affine_penalty(i, open_B, extend_B,
penalize_extend_when_opening);
else
score = 0;
score_matrix[i*(lenB+1)] = score;
}
for(i=0; i<=lenB; i++) {
if(penalize_end_gaps_A)
score = calc_affine_penalty(i, open_A, extend_A,
penalize_extend_when_opening);
else
score = 0;
score_matrix[i] = score;
}
/* Now initialize the col cache. */
col_cache_score = malloc((lenB+1)*sizeof(*col_cache_score));
memset((void *)col_cache_score, 0, (lenB+1)*sizeof(*col_cache_score));
for(i=0; i<=lenB; i++) {
col_cache_score[i] = calc_affine_penalty(i, (2*open_B), extend_B,
penalize_extend_when_opening);
}
/* Fill in the score matrix. The row cache is calculated on the fly.*/
for(row=1; row<=lenA; row++) {
double row_cache_score = calc_affine_penalty(row, (2*open_A), extend_A,
penalize_extend_when_opening);
for(col=1; col<=lenB; col++) {
double match_score, nogap_score;
double row_open, row_extend, col_open, col_extend;
int best_score_rint, row_score_rint, col_score_rint;
unsigned char row_trace_score, col_trace_score, trace_score;
/* Calculate the best score. */
match_score = _get_match_score(py_sequenceA, py_sequenceB,
py_match_fn, row-1, col-1,
sequenceA, sequenceB,
use_sequence_cstring,
match, mismatch,
use_match_mismatch_scores);
if(match_score==-1.0 && PyErr_Occurred())
goto _cleanup_make_score_matrix_fast;
nogap_score = score_matrix[(row-1)*(lenB+1)+col-1] + match_score;
if (!penalize_end_gaps_A && row==lenA) {
row_open = score_matrix[(row)*(lenB+1)+col-1];
row_extend = row_cache_score;
}
else {
row_open = score_matrix[(row)*(lenB+1)+col-1] + first_A_gap;
row_extend = row_cache_score + extend_A;
}
row_cache_score = (row_open > row_extend) ? row_open : row_extend;
if (!penalize_end_gaps_B && col==lenB){
col_open = score_matrix[(row-1)*(lenB+1)+col];
col_extend = col_cache_score[col];
}
else {
col_open = score_matrix[(row-1)*(lenB+1)+col] + first_B_gap;
col_extend = col_cache_score[col] + extend_B;
}
col_cache_score[col] = (col_open > col_extend) ? col_open : col_extend;
best_score = (row_cache_score > col_cache_score[col]) ? row_cache_score : col_cache_score[col];
if(nogap_score > best_score)
best_score = nogap_score;
if (best_score > local_max_score)
local_max_score = best_score;
if(!align_globally && best_score < 0)
score_matrix[row*(lenB+1)+col] = 0;
else
score_matrix[row*(lenB+1)+col] = best_score;
if (!score_only) {
row_score_rint = rint(row_cache_score);
col_score_rint = rint(col_cache_score[col]);
row_trace_score = 0;
col_trace_score = 0;
if (rint(row_open) == row_score_rint)
row_trace_score = row_trace_score|1;
if (rint(row_extend) == row_score_rint)
row_trace_score = row_trace_score|8;
if (rint(col_open) == col_score_rint)
col_trace_score = col_trace_score|4;
if (rint(col_extend) == col_score_rint)
col_trace_score = col_trace_score|16;
trace_score = 0;
best_score_rint = rint(best_score);
if (rint(nogap_score) == best_score_rint)
trace_score = trace_score|2;
if (row_score_rint == best_score_rint)
trace_score += row_trace_score;
if (col_score_rint == best_score_rint)
trace_score += col_trace_score;
trace_matrix[row*(lenB+1)+col] = trace_score;
}
}
}
if (!align_globally)
best_score = local_max_score;
/* Save the score and traceback matrices into real python objects. */
if(!score_only) {
if(!(py_score_matrix = PyList_New(lenA+1)))
goto _cleanup_make_score_matrix_fast;
if(!(py_trace_matrix = PyList_New(lenA+1)))
goto _cleanup_make_score_matrix_fast;
for(row=0; row<=lenA; row++) {
PyObject *py_score_row, *py_trace_row;
if(!(py_score_row = PyList_New(lenB+1)))
goto _cleanup_make_score_matrix_fast;
PyList_SET_ITEM(py_score_matrix, row, py_score_row);
if(!(py_trace_row = PyList_New(lenB+1)))
goto _cleanup_make_score_matrix_fast;
PyList_SET_ITEM(py_trace_matrix, row, py_trace_row);
for(col=0; col<=lenB; col++) {
PyObject *py_score, *py_trace;
int offset = row*(lenB+1) + col;
/* Set py_score_matrix[row][col] to the score. */
if(!(py_score = PyFloat_FromDouble(score_matrix[offset])))
goto _cleanup_make_score_matrix_fast;
PyList_SET_ITEM(py_score_row, col, py_score);
/* Set py_trace_matrix[row][col] to a list of indexes. On
the edges of the matrix (row or column is 0), the
matrix should be [None]. */
if(!row || !col) {
if(!(py_trace = Py_BuildValue("B", 1)))
goto _cleanup_make_score_matrix_fast;
Py_INCREF(Py_None);
PyList_SET_ITEM(py_trace_row, col, Py_None);
}
else {
if(!(py_trace = Py_BuildValue("B", trace_matrix[offset])))
goto _cleanup_make_score_matrix_fast;
PyList_SET_ITEM(py_trace_row, col, py_trace);
}
}
}
}
else {
py_score_matrix = PyList_New(1);
py_trace_matrix = PyList_New(1);
}
py_retval = Py_BuildValue("(OOd)", py_score_matrix, py_trace_matrix, best_score);
_cleanup_make_score_matrix_fast:
if(score_matrix)
free(score_matrix);
if(trace_matrix)
free(trace_matrix);
if(col_cache_score)
free(col_cache_score);
if(py_score_matrix){
Py_DECREF(py_score_matrix);
}
if(py_trace_matrix){
Py_DECREF(py_trace_matrix);
}
#if PY_MAJOR_VERSION >= 3
if (py_bytesA != NULL && py_bytesA != py_sequenceA) Py_DECREF(py_bytesA);
if (py_bytesB != NULL && py_bytesB != py_sequenceB) Py_DECREF(py_bytesB);
#endif
return py_retval;
}
static PyObject *cpairwise2_rint(PyObject *self, PyObject *args,
PyObject *keywds)
{
double x;
int precision = _PRECISION;
int rint_x;
static char *kwlist[] = {"x", "precision", NULL};
if(!PyArg_ParseTupleAndKeywords(args, keywds, "d|l", kwlist,
&x, &precision))
return NULL;
rint_x = (int)(x * precision + 0.5);
#if PY_MAJOR_VERSION >= 3
return PyLong_FromLong((long)rint_x);
#else
return PyInt_FromLong((long)rint_x);
#endif
}
/* Module definition stuff */
static PyMethodDef cpairwise2Methods[] = {
{"_make_score_matrix_fast",
(PyCFunction)cpairwise2__make_score_matrix_fast, METH_VARARGS, ""},
{"rint", (PyCFunction)cpairwise2_rint, METH_VARARGS|METH_KEYWORDS, ""},
{NULL, NULL, 0, NULL}
};
static char cpairwise2__doc__[] =
"Optimized C routines that complement pairwise2.py. These are called from within pairwise2.py.\n\
\n\
";
#if PY_MAJOR_VERSION >= 3
static struct PyModuleDef moduledef = {
PyModuleDef_HEAD_INIT,
"cpairwise2",
cpairwise2__doc__,
-1,
cpairwise2Methods,
NULL,
NULL,
NULL,
NULL
};
PyObject *
PyInit_cpairwise2(void)
#else
void
/* for Windows: _declspec(dllexport) initcpairwise2(void) */
initcpairwise2(void)
#endif
{
#if PY_MAJOR_VERSION >= 3
PyObject* module = PyModule_Create(&moduledef);
if (module==NULL) return NULL;
return module;
#else
(void) Py_InitModule3("cpairwise2", cpairwise2Methods, cpairwise2__doc__);
#endif
}