mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
$ ruff check --fix --select=I \ --config=lint.isort.force-single-line=true \ --config=lint.isort.order-by-type=false \ BioSQL/ Bio/ Tests/ Scripts/ Doc/ setup.py Using ruff version 0.4.10
489 lines
20 KiB
Python
489 lines
20 KiB
Python
# Copyright 2009 by Cymon J. Cox. All rights reserved.
|
|
#
|
|
# This file is part of the Biopython distribution and governed by your
|
|
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
|
|
# Please see the LICENSE file that should have been included as part of this
|
|
# package.
|
|
"""Command line wrapper for the multiple alignment program Clustal W."""
|
|
|
|
import os
|
|
|
|
from Bio.Application import _Option
|
|
from Bio.Application import _Switch
|
|
from Bio.Application import AbstractCommandline
|
|
|
|
|
|
class ClustalwCommandline(AbstractCommandline):
|
|
"""Command line wrapper for clustalw (version one or two).
|
|
|
|
http://www.clustal.org/
|
|
|
|
Notes
|
|
-----
|
|
Last checked against versions: 1.83 and 2.1
|
|
|
|
References
|
|
----------
|
|
Larkin MA, Blackshields G, Brown NP, Chenna R, McGettigan PA,
|
|
McWilliam H, Valentin F, Wallace IM, Wilm A, Lopez R, Thompson JD,
|
|
Gibson TJ, Higgins DG. (2007). Clustal W and Clustal X version 2.0.
|
|
Bioinformatics, 23, 2947-2948.
|
|
|
|
Examples
|
|
--------
|
|
>>> from Bio.Align.Applications import ClustalwCommandline
|
|
>>> in_file = "unaligned.fasta"
|
|
>>> clustalw_cline = ClustalwCommandline("clustalw2", infile=in_file)
|
|
>>> print(clustalw_cline)
|
|
clustalw2 -infile=unaligned.fasta
|
|
|
|
You would typically run the command line with clustalw_cline() or via
|
|
the Python subprocess module, as described in the Biopython tutorial.
|
|
|
|
"""
|
|
|
|
# TODO - Should we default to cmd="clustalw2" now?
|
|
def __init__(self, cmd="clustalw", **kwargs):
|
|
"""Initialize the class."""
|
|
self.parameters = [
|
|
_Option(
|
|
["-infile", "-INFILE", "INFILE", "infile"],
|
|
"Input sequences.",
|
|
filename=True,
|
|
),
|
|
_Option(
|
|
["-profile1", "-PROFILE1", "PROFILE1", "profile1"],
|
|
"Profiles (old alignment).",
|
|
filename=True,
|
|
),
|
|
_Option(
|
|
["-profile2", "-PROFILE2", "PROFILE2", "profile2"],
|
|
"Profiles (old alignment).",
|
|
filename=True,
|
|
),
|
|
# ################# VERBS (do things) #############################
|
|
_Switch(
|
|
["-options", "-OPTIONS", "OPTIONS", "options"],
|
|
"List the command line parameters",
|
|
),
|
|
_Switch(
|
|
["-help", "-HELP", "HELP", "help"], "Outline the command line params."
|
|
),
|
|
_Switch(
|
|
["-check", "-CHECK", "CHECK", "check"],
|
|
"Outline the command line params.",
|
|
),
|
|
_Switch(
|
|
["-fullhelp", "-FULLHELP", "FULLHELP", "fullhelp"],
|
|
"Output full help content.",
|
|
),
|
|
_Switch(
|
|
["-align", "-ALIGN", "ALIGN", "align"], "Do full multiple alignment."
|
|
),
|
|
_Switch(["-tree", "-TREE", "TREE", "tree"], "Calculate NJ tree."),
|
|
_Switch(
|
|
["-pim", "-PIM", "PIM", "pim"],
|
|
"Output percent identity matrix (while calculating the tree).",
|
|
),
|
|
_Option(
|
|
["-bootstrap", "-BOOTSTRAP", "BOOTSTRAP", "bootstrap"],
|
|
"Bootstrap a NJ tree (n= number of bootstraps; def. = 1000).",
|
|
checker_function=lambda x: isinstance(x, int),
|
|
),
|
|
_Switch(
|
|
["-convert", "-CONVERT", "CONVERT", "convert"],
|
|
"Output the input sequences in a different file format.",
|
|
),
|
|
# #################### PARAMETERS (set things) #########################
|
|
# ***General settings:****
|
|
# Makes no sense in biopython
|
|
# _Option(["-interactive", "-INTERACTIVE", "INTERACTIVE", "interactive"],
|
|
# [],
|
|
# lambda x: 0, # Does not take value
|
|
# False,
|
|
# "read command line, then enter normal interactive menus",
|
|
# False),
|
|
_Switch(
|
|
["-quicktree", "-QUICKTREE", "QUICKTREE", "quicktree"],
|
|
"Use FAST algorithm for the alignment guide tree",
|
|
),
|
|
_Option(
|
|
["-type", "-TYPE", "TYPE", "type"],
|
|
"PROTEIN or DNA sequences",
|
|
checker_function=lambda x: x in ["PROTEIN", "DNA", "protein", "dna"],
|
|
),
|
|
_Switch(
|
|
["-negative", "-NEGATIVE", "NEGATIVE", "negative"],
|
|
"Protein alignment with negative values in matrix",
|
|
),
|
|
_Option(
|
|
["-outfile", "-OUTFILE", "OUTFILE", "outfile"],
|
|
"Output sequence alignment file name",
|
|
filename=True,
|
|
),
|
|
_Option(
|
|
["-output", "-OUTPUT", "OUTPUT", "output"],
|
|
"Output format: CLUSTAL(default), GCG, GDE, PHYLIP, PIR, NEXUS and FASTA",
|
|
checker_function=lambda x: x
|
|
in [
|
|
"CLUSTAL",
|
|
"GCG",
|
|
"GDE",
|
|
"PHYLIP",
|
|
"PIR",
|
|
"NEXUS",
|
|
"FASTA",
|
|
"clustal",
|
|
"gcg",
|
|
"gde",
|
|
"phylip",
|
|
"pir",
|
|
"nexus",
|
|
"fasta",
|
|
],
|
|
),
|
|
_Option(
|
|
["-outorder", "-OUTORDER", "OUTORDER", "outorder"],
|
|
"Output taxon order: INPUT or ALIGNED",
|
|
checker_function=lambda x: x
|
|
in ["INPUT", "input", "ALIGNED", "aligned"],
|
|
),
|
|
_Option(
|
|
["-case", "-CASE", "CASE", "case"],
|
|
"LOWER or UPPER (for GDE output only)",
|
|
checker_function=lambda x: x in ["UPPER", "upper", "LOWER", "lower"],
|
|
),
|
|
_Option(
|
|
["-seqnos", "-SEQNOS", "SEQNOS", "seqnos"],
|
|
"OFF or ON (for Clustal output only)",
|
|
checker_function=lambda x: x in ["ON", "on", "OFF", "off"],
|
|
),
|
|
_Option(
|
|
["-seqno_range", "-SEQNO_RANGE", "SEQNO_RANGE", "seqno_range"],
|
|
"OFF or ON (NEW- for all output formats)",
|
|
checker_function=lambda x: x in ["ON", "on", "OFF", "off"],
|
|
),
|
|
_Option(
|
|
["-range", "-RANGE", "RANGE", "range"],
|
|
"Sequence range to write starting m to m+n. "
|
|
"Input as string eg. '24,200'",
|
|
),
|
|
_Option(
|
|
["-maxseqlen", "-MAXSEQLEN", "MAXSEQLEN", "maxseqlen"],
|
|
"Maximum allowed input sequence length",
|
|
checker_function=lambda x: isinstance(x, int),
|
|
),
|
|
_Switch(
|
|
["-quiet", "-QUIET", "QUIET", "quiet"],
|
|
"Reduce console output to minimum",
|
|
),
|
|
_Option(
|
|
["-stats", "-STATS", "STATS", "stats"],
|
|
"Log some alignment statistics to file",
|
|
filename=True,
|
|
),
|
|
# ***Fast Pairwise Alignments:***
|
|
_Option(
|
|
["-ktuple", "-KTUPLE", "KTUPLE", "ktuple"],
|
|
"Word size",
|
|
checker_function=lambda x: (isinstance(x, (float, int))),
|
|
),
|
|
_Option(
|
|
["-topdiags", "-TOPDIAGS", "TOPDIAGS", "topdiags"],
|
|
"Number of best diags.",
|
|
checker_function=lambda x: (isinstance(x, (float, int))),
|
|
),
|
|
_Option(
|
|
["-window", "-WINDOW", "WINDOW", "window"],
|
|
"Window around best diags.",
|
|
checker_function=lambda x: (isinstance(x, (float, int))),
|
|
),
|
|
_Option(
|
|
["-pairgap", "-PAIRGAP", "PAIRGAP", "pairgap"],
|
|
"Gap penalty",
|
|
checker_function=lambda x: (isinstance(x, (float, int))),
|
|
),
|
|
_Option(
|
|
["-score", "-SCORE", "SCORE", "score"],
|
|
"Either: PERCENT or ABSOLUTE",
|
|
checker_function=lambda x: x
|
|
in ["percent", "PERCENT", "absolute", "ABSOLUTE"],
|
|
),
|
|
# ***Slow Pairwise Alignments:***
|
|
_Option(
|
|
["-pwmatrix", "-PWMATRIX", "PWMATRIX", "pwmatrix"],
|
|
"Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename",
|
|
checker_function=lambda x: (
|
|
x
|
|
in [
|
|
"BLOSUM",
|
|
"PAM",
|
|
"GONNET",
|
|
"ID",
|
|
"blosum",
|
|
"pam",
|
|
"gonnet",
|
|
"id",
|
|
]
|
|
or os.path.exists(x)
|
|
),
|
|
filename=True,
|
|
),
|
|
_Option(
|
|
["-pwdnamatrix", "-PWDNAMATRIX", "PWDNAMATRIX", "pwdnamatrix"],
|
|
"DNA weight matrix=IUB, CLUSTALW or filename",
|
|
checker_function=lambda x: (
|
|
x in ["IUB", "CLUSTALW", "iub", "clustalw"] or os.path.exists(x)
|
|
),
|
|
filename=True,
|
|
),
|
|
_Option(
|
|
["-pwgapopen", "-PWGAPOPEN", "PWGAPOPEN", "pwgapopen"],
|
|
"Gap opening penalty",
|
|
checker_function=lambda x: (isinstance(x, (float, int))),
|
|
),
|
|
_Option(
|
|
["-pwgapext", "-PWGAPEXT", "PWGAPEXT", "pwgapext"],
|
|
"Gap extension penalty",
|
|
checker_function=lambda x: (isinstance(x, (float, int))),
|
|
),
|
|
# ***Multiple Alignments:***
|
|
_Option(
|
|
["-newtree", "-NEWTREE", "NEWTREE", "newtree"],
|
|
"Output file name for newly created guide tree",
|
|
filename=True,
|
|
),
|
|
_Option(
|
|
["-usetree", "-USETREE", "USETREE", "usetree"],
|
|
"File name of guide tree",
|
|
checker_function=lambda x: os.path.exists,
|
|
filename=True,
|
|
),
|
|
_Option(
|
|
["-matrix", "-MATRIX", "MATRIX", "matrix"],
|
|
"Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename",
|
|
checker_function=lambda x: (
|
|
x
|
|
in [
|
|
"BLOSUM",
|
|
"PAM",
|
|
"GONNET",
|
|
"ID",
|
|
"blosum",
|
|
"pam",
|
|
"gonnet",
|
|
"id",
|
|
]
|
|
or os.path.exists(x)
|
|
),
|
|
filename=True,
|
|
),
|
|
_Option(
|
|
["-dnamatrix", "-DNAMATRIX", "DNAMATRIX", "dnamatrix"],
|
|
"DNA weight matrix=IUB, CLUSTALW or filename",
|
|
checker_function=lambda x: (
|
|
x in ["IUB", "CLUSTALW", "iub", "clustalw"] or os.path.exists(x)
|
|
),
|
|
filename=True,
|
|
),
|
|
_Option(
|
|
["-gapopen", "-GAPOPEN", "GAPOPEN", "gapopen"],
|
|
"Gap opening penalty",
|
|
checker_function=lambda x: (isinstance(x, (float, int))),
|
|
),
|
|
_Option(
|
|
["-gapext", "-GAPEXT", "GAPEXT", "gapext"],
|
|
"Gap extension penalty",
|
|
checker_function=lambda x: (isinstance(x, (float, int))),
|
|
),
|
|
_Switch(
|
|
["-endgaps", "-ENDGAPS", "ENDGAPS", "endgaps"],
|
|
"No end gap separation pen.",
|
|
),
|
|
_Option(
|
|
["-gapdist", "-GAPDIST", "GAPDIST", "gapdist"],
|
|
"Gap separation pen. range",
|
|
checker_function=lambda x: (isinstance(x, (float, int))),
|
|
),
|
|
_Switch(
|
|
["-nopgap", "-NOPGAP", "NOPGAP", "nopgap"], "Residue-specific gaps off"
|
|
),
|
|
_Switch(["-nohgap", "-NOHGAP", "NOHGAP", "nohgap"], "Hydrophilic gaps off"),
|
|
_Switch(
|
|
["-hgapresidues", "-HGAPRESIDUES", "HGAPRESIDUES", "hgapresidues"],
|
|
"List hydrophilic res.",
|
|
),
|
|
_Option(
|
|
["-maxdiv", "-MAXDIV", "MAXDIV", "maxdiv"],
|
|
"% ident. for delay",
|
|
checker_function=lambda x: (isinstance(x, (float, int))),
|
|
),
|
|
# Already handled in General Settings section, but appears a second
|
|
# time under Multiple Alignments in the help
|
|
# _Option(["-type", "-TYPE", "TYPE", "type"],
|
|
# "PROTEIN or DNA",
|
|
# checker_function=lambda x: x in ["PROTEIN", "DNA",
|
|
# "protein", "dna"]),
|
|
_Option(
|
|
["-transweight", "-TRANSWEIGHT", "TRANSWEIGHT", "transweight"],
|
|
"Transitions weighting",
|
|
checker_function=lambda x: (isinstance(x, (float, int))),
|
|
),
|
|
_Option(
|
|
["-iteration", "-ITERATION", "ITERATION", "iteration"],
|
|
"NONE or TREE or ALIGNMENT",
|
|
checker_function=lambda x: x
|
|
in ["NONE", "TREE", "ALIGNMENT", "none", "tree", "alignment"],
|
|
),
|
|
_Option(
|
|
["-numiter", "-NUMITER", "NUMITER", "numiter"],
|
|
"maximum number of iterations to perform",
|
|
checker_function=lambda x: isinstance(x, int),
|
|
),
|
|
_Switch(
|
|
["-noweights", "-NOWEIGHTS", "NOWEIGHTS", "noweights"],
|
|
"Disable sequence weighting",
|
|
),
|
|
# ***Profile Alignments:***
|
|
_Switch(
|
|
["-profile", "-PROFILE", "PROFILE", "profile"],
|
|
"Merge two alignments by profile alignment",
|
|
),
|
|
_Option(
|
|
["-newtree1", "-NEWTREE1", "NEWTREE1", "newtree1"],
|
|
"Output file name for new guide tree of profile1",
|
|
filename=True,
|
|
),
|
|
_Option(
|
|
["-newtree2", "-NEWTREE2", "NEWTREE2", "newtree2"],
|
|
"Output file for new guide tree of profile2",
|
|
filename=True,
|
|
),
|
|
_Option(
|
|
["-usetree1", "-USETREE1", "USETREE1", "usetree1"],
|
|
"File name of guide tree for profile1",
|
|
checker_function=lambda x: os.path.exists,
|
|
filename=True,
|
|
),
|
|
_Option(
|
|
["-usetree2", "-USETREE2", "USETREE2", "usetree2"],
|
|
"File name of guide tree for profile2",
|
|
checker_function=lambda x: os.path.exists,
|
|
filename=True,
|
|
),
|
|
# ***Sequence to Profile Alignments:***
|
|
_Switch(
|
|
["-sequences", "-SEQUENCES", "SEQUENCES", "sequences"],
|
|
"Sequentially add profile2 sequences to profile1 alignment",
|
|
),
|
|
# These are already handled in the Multiple Alignments section,
|
|
# but appear a second time here in the help.
|
|
# _Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"],
|
|
# "File for new guide tree",
|
|
# filename=True),
|
|
# _Option(["-usetree", "-USETREE", "USETREE", "usetree"],
|
|
# "File for old guide tree",
|
|
# checker_function=lambda x: os.path.exists,
|
|
# filename=True),
|
|
# ***Structure Alignments:***
|
|
_Switch(
|
|
["-nosecstr1", "-NOSECSTR1", "NOSECSTR1", "nosecstr1"],
|
|
"Do not use secondary structure-gap penalty mask for profile 1",
|
|
),
|
|
_Switch(
|
|
["-nosecstr2", "-NOSECSTR2", "NOSECSTR2", "nosecstr2"],
|
|
"Do not use secondary structure-gap penalty mask for profile 2",
|
|
),
|
|
_Option(
|
|
["-secstrout", "-SECSTROUT", "SECSTROUT", "secstrout"],
|
|
"STRUCTURE or MASK or BOTH or NONE output in alignment file",
|
|
checker_function=lambda x: x
|
|
in [
|
|
"STRUCTURE",
|
|
"MASK",
|
|
"BOTH",
|
|
"NONE",
|
|
"structure",
|
|
"mask",
|
|
"both",
|
|
"none",
|
|
],
|
|
),
|
|
_Option(
|
|
["-helixgap", "-HELIXGAP", "HELIXGAP", "helixgap"],
|
|
"Gap penalty for helix core residues",
|
|
checker_function=lambda x: (isinstance(x, (float, int))),
|
|
),
|
|
_Option(
|
|
["-strandgap", "-STRANDGAP", "STRANDGAP", "strandgap"],
|
|
"gap penalty for strand core residues",
|
|
checker_function=lambda x: (isinstance(x, (float, int))),
|
|
),
|
|
_Option(
|
|
["-loopgap", "-LOOPGAP", "LOOPGAP", "loopgap"],
|
|
"Gap penalty for loop regions",
|
|
checker_function=lambda x: (isinstance(x, (float, int))),
|
|
),
|
|
_Option(
|
|
["-terminalgap", "-TERMINALGAP", "TERMINALGAP", "terminalgap"],
|
|
"Gap penalty for structure termini",
|
|
checker_function=lambda x: (isinstance(x, (float, int))),
|
|
),
|
|
_Option(
|
|
["-helixendin", "-HELIXENDIN", "HELIXENDIN", "helixendin"],
|
|
"Number of residues inside helix to be treated as terminal",
|
|
checker_function=lambda x: isinstance(x, int),
|
|
),
|
|
_Option(
|
|
["-helixendout", "-HELIXENDOUT", "HELIXENDOUT", "helixendout"],
|
|
"Number of residues outside helix to be treated as terminal",
|
|
checker_function=lambda x: isinstance(x, int),
|
|
),
|
|
_Option(
|
|
["-strandendin", "-STRANDENDIN", "STRANDENDIN", "strandendin"],
|
|
"Number of residues inside strand to be treated as terminal",
|
|
checker_function=lambda x: isinstance(x, int),
|
|
),
|
|
_Option(
|
|
["-strandendout", "-STRANDENDOUT", "STRANDENDOUT", "strandendout"],
|
|
"Number of residues outside strand to be treated as terminal",
|
|
checker_function=lambda x: isinstance(x, int),
|
|
),
|
|
# ***Trees:***
|
|
_Option(
|
|
["-outputtree", "-OUTPUTTREE", "OUTPUTTREE", "outputtree"],
|
|
"nj OR phylip OR dist OR nexus",
|
|
checker_function=lambda x: x
|
|
in ["NJ", "PHYLIP", "DIST", "NEXUS", "nj", "phylip", "dist", "nexus"],
|
|
),
|
|
_Option(
|
|
["-seed", "-SEED", "SEED", "seed"],
|
|
"Seed number for bootstraps.",
|
|
checker_function=lambda x: isinstance(x, int),
|
|
),
|
|
_Switch(
|
|
["-kimura", "-KIMURA", "KIMURA", "kimura"], "Use Kimura's correction."
|
|
),
|
|
_Switch(
|
|
["-tossgaps", "-TOSSGAPS", "TOSSGAPS", "tossgaps"],
|
|
"Ignore positions with gaps.",
|
|
),
|
|
_Option(
|
|
["-bootlabels", "-BOOTLABELS", "BOOTLABELS", "bootlabels"],
|
|
"Node OR branch position of bootstrap values in tree display",
|
|
checker_function=lambda x: x in ["NODE", "BRANCH", "node", "branch"],
|
|
),
|
|
_Option(
|
|
["-clustering", "-CLUSTERING", "CLUSTERING", "clustering"],
|
|
"NJ or UPGMA",
|
|
checker_function=lambda x: x in ["NJ", "UPGMA", "nj", "upgma"],
|
|
),
|
|
]
|
|
AbstractCommandline.__init__(self, cmd, **kwargs)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
from Bio._utils import run_doctest
|
|
|
|
run_doctest()
|