biopython/Bio/Emboss/Applications.py

# Copyright 2001-2009 Brad Chapman.
# Revisions copyright 2009-2016 by Peter Cock.
# Revisions copyright 2009 by David Winter.
# Revisions copyright 2009-2010 by Leighton Pritchard.
# All rights reserved.
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Code to interact with and run various EMBOSS programs (OBSOLETE).

These classes follow the AbstractCommandline interfaces for running
programs.

We have decided to remove this module in future, and instead recommend
building your command and invoking it via the subprocess module directly.
"""

from Bio.Application import _Option
from Bio.Application import _Switch
from Bio.Application import AbstractCommandline


class _EmbossMinimalCommandLine(AbstractCommandline):
    """Base Commandline object for EMBOSS wrappers (PRIVATE).

    This is provided for subclassing, it deals with shared options
    common to all the EMBOSS tools:

    Attributes:
     - auto               Turn off prompts
     - stdout             Write standard output
     - filter             Read standard input, write standard output
     - options            Prompt for standard and additional values
     - debug              Write debug output to program.dbg
     - verbose            Report some/full command line options
     - help               Report command line options. More
                          information on associated and general
                          qualifiers can be found with -help -verbose
     - warning            Report warnings
     - error              Report errors
     - fatal              Report fatal errors
     - die                Report dying program messages

    """

    def __init__(self, cmd=None, **kwargs):
        assert cmd is not None
        extra_parameters = [
            _Switch(
                ["-auto", "auto"],
                "Turn off prompts.\n\n"
                "Automatic mode disables prompting, so we recommend you set this "
                "argument all the time when calling an EMBOSS tool from Biopython.",
            ),
            _Switch(["-stdout", "stdout"], "Write standard output."),
            _Switch(
                ["-filter", "filter"], "Read standard input, write standard output."
            ),
            _Switch(
                ["-options", "options"],
                "Prompt for standard and additional values.\n\n"
                "If you are calling an EMBOSS tool from within Biopython, "
                "we DO NOT recommend using this option.",
            ),
            _Switch(["-debug", "debug"], "Write debug output to program.dbg."),
            _Switch(["-verbose", "verbose"], "Report some/full command line options"),
            _Switch(
                ["-help", "help"],
                "Report command line options.\n\n"
                "More information on associated and general qualifiers "
                "can be found with -help -verbose",
            ),
            _Switch(["-warning", "warning"], "Report warnings."),
            _Switch(["-error", "error"], "Report errors."),
            _Switch(["-die", "die"], "Report dying program messages."),
        ]
        try:
            # Insert extra parameters - at the start just in case there
            # are any arguments which must come last:
            self.parameters = extra_parameters + self.parameters
        except AttributeError:
            # Should we raise an error?  The subclass should have set this up!
            self.parameters = extra_parameters
        AbstractCommandline.__init__(self, cmd, **kwargs)


class _EmbossCommandLine(_EmbossMinimalCommandLine):
    """Base Commandline object for EMBOSS wrappers (PRIVATE).

    This is provided for subclassing, it deals with shared options
    common to all the EMBOSS tools plus:

     - outfile            Output filename

    """

    def __init__(self, cmd=None, **kwargs):
        assert cmd is not None
        extra_parameters = [
            _Option(["-outfile", "outfile"], "Output filename", filename=True)
        ]
        try:
            # Insert extra parameters - at the start just in case there
            # are any arguments which must come last:
            self.parameters = extra_parameters + self.parameters
        except AttributeError:
            # Should we raise an error?  The subclass should have set this up!
            self.parameters = extra_parameters
        _EmbossMinimalCommandLine.__init__(self, cmd, **kwargs)

    def _validate(self):
        # Check the outfile, filter, or stdout option has been set.
        # We can't simply do this via the required flag for the outfile
        # output - this seems the simplest solution.
        if not (self.outfile or self.filter or self.stdout):
            raise ValueError(
                "You must either set outfile (output filename), "
                "or enable filter or stdout (output to stdout)."
            )
        return _EmbossMinimalCommandLine._validate(self)


class Primer3Commandline(_EmbossCommandLine):
    """Commandline object for the Primer3 interface from EMBOSS.

    The precise set of supported arguments depends on your version of EMBOSS.
    This version accepts arguments current at EMBOSS 6.1.0:

    >>> cline = Primer3Commandline(sequence="mysequence.fas", auto=True, hybridprobe=True)
    >>> cline.explainflag = True
    >>> cline.osizeopt=20
    >>> cline.psizeopt=200
    >>> cline.outfile = "myresults.out"
    >>> cline.bogusparameter = 1967  # Invalid parameter
    Traceback (most recent call last):
        ...
    ValueError: Option name bogusparameter was not found.
    >>> print(cline)
    eprimer3 -auto -outfile=myresults.out -sequence=mysequence.fas -hybridprobe=True -psizeopt=200 -osizeopt=20 -explainflag=True

    """

    def __init__(self, cmd="eprimer3", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-sequence", "sequence"],
                "Sequence to choose primers from.",
                is_required=True,
            ),
            _Option(["-task", "task"], "Tell eprimer3 what task to perform."),
            _Option(
                ["-hybridprobe", "hybridprobe"],
                "Find an internal oligo to use as a hyb probe.",
            ),
            _Option(
                ["-numreturn", "numreturn"], "Maximum number of primer pairs to return."
            ),
            _Option(
                ["-includedregion", "includedregion"],
                "Subregion of the sequence in which to pick primers.",
            ),
            _Option(["-target", "target"], "Sequence to target for flanking primers."),
            _Option(
                ["-excludedregion", "excludedregion"],
                "Regions to exclude from primer picking.",
            ),
            _Option(
                ["-forwardinput", "forwardinput"],
                "Sequence of a forward primer to check.",
            ),
            _Option(
                ["-reverseinput", "reverseinput"],
                "Sequence of a reverse primer to check.",
            ),
            _Option(
                ["-gcclamp", "gcclamp"],
                "The required number of Gs and Cs at the 3' of each primer.",
            ),
            _Option(["-osize", "osize"], "Optimum length of a primer oligo."),
            _Option(["-minsize", "minsize"], "Minimum length of a primer oligo."),
            _Option(["-maxsize", "maxsize"], "Maximum length of a primer oligo."),
            _Option(
                ["-otm", "otm"],
                "Melting temperature for primer oligo (OBSOLETE).\n\n"
                "Option replaced in EMBOSS 6.6.0 by -opttm",
            ),
            _Option(
                ["-opttm", "opttm"],
                "Optimum melting temperature for a primer oligo.\n\n"
                "Option added in EMBOSS 6.6.0, replacing -otm",
            ),
            _Option(
                ["-mintm", "mintm"], "Minimum melting temperature for a primer oligo."
            ),
            _Option(
                ["-maxtm", "maxtm"], "Maximum melting temperature for a primer oligo."
            ),
            _Option(
                ["-maxdifftm", "maxdifftm"],
                "Maximum difference in melting temperatures between "
                "forward and reverse primers.",
            ),
            _Option(["-ogcpercent", "ogcpercent"], "Optimum GC% for a primer."),
            _Option(["-mingc", "mingc"], "Minimum GC% for a primer."),
            _Option(["-maxgc", "maxgc"], "Maximum GC% for a primer."),
            _Option(
                ["-saltconc", "saltconc"], "Millimolar salt concentration in the PCR."
            ),
            _Option(
                ["-dnaconc", "dnaconc"],
                "Nanomolar concentration of annealing oligos in the PCR.",
            ),
            _Option(
                ["-maxpolyx", "maxpolyx"],
                "Maximum allowable mononucleotide repeat length in a primer.",
            ),
            # Primer length:
            _Option(["-psizeopt", "psizeopt"], "Optimum size for the PCR product."),
            _Option(
                ["-prange", "prange"], "Acceptable range of length for the PCR product."
            ),
            # Primer temperature:
            _Option(
                ["-ptmopt", "ptmopt"],
                "Optimum melting temperature for the PCR product.",
            ),
            _Option(
                ["-ptmmin", "ptmmin"],
                "Minimum allowed melting temperature for the amplicon.",
            ),
            _Option(
                ["-ptmmax", "ptmmax"],
                "Maximum allowed melting temperature for the amplicon.",
            ),
            # Note to self, should be -oexcludedregion not -oexcluderegion
            _Option(
                ["-oexcludedregion", "oexcludedregion"],
                "Do not pick internal oligos in this region.",
            ),
            _Option(["-oligoinput", "oligoinput"], "Sequence of the internal oligo."),
            # Oligo length:
            _Option(["-osizeopt", "osizeopt"], "Optimum length of internal oligo."),
            _Option(["-ominsize", "ominsize"], "Minimum length of internal oligo."),
            _Option(["-omaxsize", "omaxsize"], "Maximum length of internal oligo."),
            # Oligo GC temperature:
            _Option(
                ["-otmopt", "otmopt"], "Optimum melting temperature of internal oligo."
            ),
            _Option(
                ["-otmmin", "otmmin"], "Minimum melting temperature of internal oligo."
            ),
            _Option(
                ["-otmmax", "otmmax"], "Maximum melting temperature of internal oligo."
            ),
            # Oligo GC percent:
            _Option(["-ogcopt", "ogcopt"], "Optimum GC% for internal oligo."),
            _Option(["-ogcmin", "ogcmin"], "Minimum GC% for internal oligo."),
            _Option(["-ogcmax", "ogcmax"], "Maximum GC% for internal oligo."),
            # Oligo salt concentration:
            _Option(
                ["-osaltconc", "osaltconc"],
                "Millimolar concentration of salt in the hybridisation.",
            ),
            _Option(
                ["-odnaconc", "odnaconc"],
                "Nanomolar concentration of internal oligo in the hybridisation.",
            ),
            # Oligo self complementarity
            _Option(
                ["-oanyself", "oanyself"],
                "Maximum allowable alignment score for self-complementarity.",
            ),
            _Option(
                ["-oendself", "oendself"],
                "Max 3'-anchored self-complementarity global alignment score.",
            ),
            _Option(
                ["-opolyxmax", "opolyxmax"],
                "Maximum length of mononucleotide repeat in internal oligo.",
            ),
            _Option(
                ["-mispriminglibraryfile", "mispriminglibraryfile"],
                "File containing library of sequences to avoid amplifying",
            ),
            _Option(
                ["-maxmispriming", "maxmispriming"],
                "Maximum allowed similarity of primers to sequences in "
                "library specified by -mispriminglibrary",
            ),
            _Option(
                ["-omishybmax", "omishybmax"],
                "Maximum alignment score for hybridisation of internal oligo to "
                "library specified by -mishyblibraryfile.",
            ),
            _Option(
                ["-mishyblibraryfile", "mishyblibraryfile"],
                "Library file of seqs to avoid internal oligo hybridisation.",
            ),
            _Option(
                ["-explainflag", "explainflag"],
                "Produce output tags with eprimer3 statistics",
            ),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class PrimerSearchCommandline(_EmbossCommandLine):
    """Commandline object for the primersearch program from EMBOSS."""

    def __init__(self, cmd="primersearch", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-seqall", "-sequences", "sequences", "seqall"],
                "Sequence to look for the primer pairs in.",
                is_required=True,
            ),
            # When this wrapper was written primersearch used -sequences
            # as the argument name. Since at least EMBOSS 5.0 (and
            # perhaps earlier) this has been -seqall instead.
            _Option(
                ["-infile", "-primers", "primers", "infile"],
                "File containing the primer pairs to search for.",
                filename=True,
                is_required=True,
            ),
            # When this wrapper was written primersearch used -primers
            # as the argument name. Since at least EMBOSS 5.0 (and
            # perhaps earlier) this has been -infile instead.
            _Option(
                ["-mismatchpercent", "mismatchpercent"],
                "Allowed percentage mismatch (any integer value, default 0).",
                is_required=True,
            ),
            _Option(
                ["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)"
            ),
            _Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class FDNADistCommandline(_EmbossCommandLine):
    """Commandline object for the fdnadist program from EMBOSS.

    fdnadist is an EMBOSS wrapper for the PHYLIP program dnadist for
    calculating distance matrices from DNA sequence files.
    """

    def __init__(self, cmd="fdnadist", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-sequence", "sequence"],
                "seq file to use (phylip)",
                filename=True,
                is_required=True,
            ),
            _Option(["-method", "method"], "sub. model [f,k,j,l,s]", is_required=True),
            _Option(["-gamma", "gamma"], "gamma [g, i,n]"),
            _Option(["-ncategories", "ncategories"], "number of rate categories (1-9)"),
            _Option(["-rate", "rate"], "rate for each category"),
            _Option(
                ["-categories", "categories"], "File of substitution rate categories"
            ),
            _Option(["-weights", "weights"], "weights file"),
            _Option(
                ["-gammacoefficient", "gammacoefficient"], "value for gamma (> 0.001)"
            ),
            _Option(["-invarfrac", "invarfrac"], "proportoin of invariant sites"),
            _Option(["-ttratio", "ttratio"], "ts/tv ratio"),
            _Option(["-freqsfrom", "freqsfrom"], "use empirical base freqs"),
            _Option(["-basefreq", "basefreq"], "specify basefreqs"),
            _Option(["-lower", "lower"], "lower triangle matrix (y/N)"),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class FTreeDistCommandline(_EmbossCommandLine):
    """Commandline object for the ftreedist program from EMBOSS.

    ftreedist is an EMBOSS wrapper for the PHYLIP program treedist used for
    calculating distance measures between phylogentic trees.
    """

    def __init__(self, cmd="ftreedist", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-intreefile", "intreefile"],
                "tree file to score (phylip)",
                filename=True,
                is_required=True,
            ),
            _Option(["-dtype", "dtype"], "distance type ([S]ymetric, [b]ranch score)"),
            _Option(
                ["-pairing", "pairing"],
                "tree pairing method ([A]djacent pairs, all [p]ossible pairs)",
            ),
            _Option(["-style", "style"], "output style - [V]erbose, [f]ill, [s]parse"),
            _Option(["-noroot", "noroot"], "treat trees as rooted [N/y]"),
            _Option(
                ["-outgrno", "outgrno"],
                "which taxon to root the trees with (starts from 0)",
            ),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class FNeighborCommandline(_EmbossCommandLine):
    """Commandline object for the fneighbor program from EMBOSS.

    fneighbor is an EMBOSS wrapper for the PHYLIP program neighbor used for
    calculating neighbor-joining or UPGMA trees from distance matrices.
    """

    def __init__(self, cmd="fneighbor", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-datafile", "datafile"],
                "dist file to use (phylip)",
                filename=True,
                is_required=True,
            ),
            _Option(
                ["-matrixtype", "matrixtype"],
                "is matrix square (S), upper (U) or lower (L)",
            ),
            _Option(["-treetype", "treetype"], "nj or UPGMA tree (n/u)"),
            _Option(["-outgrno", "outgrno"], "taxon to use as OG"),
            _Option(["-jumble", "jumble"], "randommise input order (Y/n)"),
            _Option(["-seed", "seed"], "provide a random seed"),
            _Option(["-trout", "trout"], "write tree (Y/n)"),
            _Option(["-outtreefile", "outtreefile"], "filename for output tree"),
            _Option(["-progress", "progress"], "print progress (Y/n)"),
            _Option(["-treeprint", "treeprint"], "print tree (Y/n)"),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class FSeqBootCommandline(_EmbossCommandLine):
    """Commandline object for the fseqboot program from EMBOSS.

    fseqboot is an EMBOSS wrapper for the PHYLIP program seqboot used to
    pseudo-sample alignment files.
    """

    def __init__(self, cmd="fseqboot", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-sequence", "sequence"],
                "seq file to sample (phylip)",
                filename=True,
                is_required=True,
            ),
            _Option(["-categories", "catergories"], "file of input categories"),
            _Option(["-weights", "weights"], " weights file"),
            _Option(["-test", "test"], "specify operation, default is bootstrap"),
            _Option(["-regular", "regular"], "absolute number to resample"),
            _Option(["-fracsample", "fracsample"], "fraction to resample"),
            _Option(
                ["-rewriteformat", "rewriteformat"],
                "output format ([P]hyilp, [n]exus, [x]ml",
            ),
            _Option(["-seqtype", "seqtype"], "output format ([D]na, [p]rotein, [r]na"),
            _Option(["-blocksize", "blocksize"], "print progress (Y/n)"),
            _Option(["-reps", "reps"], "how many replicates, defaults to 100)"),
            _Option(
                ["-justweights", "jusweights"],
                "what to write out [D]atasets of just [w]eights",
            ),
            _Option(["-seed", "seed"], "specify random seed"),
            _Option(["-dotdiff", "dotdiff"], "Use dot-differencing? [Y/n]"),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class FDNAParsCommandline(_EmbossCommandLine):
    """Commandline object for the fdnapars program from EMBOSS.

    fdnapars is an EMBOSS version of the PHYLIP program dnapars, for
    estimating trees from DNA sequences using parsiomny. Calling this command
    without providing a value for the option "-intreefile" will invoke
    "interactive mode" (and as a result fail if called with subprocess) if
    "-auto" is not set to true.
    """

    def __init__(self, cmd="fdnapars", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-sequence", "sequence"],
                "seq file to use (phylip)",
                filename=True,
                is_required=True,
            ),
            _Option(["-intreefile", "intreefile"], "Phylip tree file"),
            _Option(["-weights", "weights"], "weights file"),
            _Option(["-maxtrees", "maxtrees"], "max trees to save during run"),
            _Option(["-thorough", "thorough"], "more thorough search (Y/n)"),
            _Option(["-rearrange", "rearrange"], "Rearrange on just 1 best tree (Y/n)"),
            _Option(
                ["-transversion", "transversion"], "Use tranversion parsimony (y/N)"
            ),
            _Option(
                ["-njumble", "njumble"],
                "number of times to randomise input order (default is 0)",
            ),
            _Option(["-seed", "seed"], "provide random seed"),
            _Option(["-outgrno", "outgrno"], "Specify outgroup"),
            _Option(["-thresh", "thresh"], "Use threshold parsimony (y/N)"),
            _Option(["-threshold", "threshold"], "Threshold value"),
            _Option(["-trout", "trout"], "Write trees to file (Y/n)"),
            _Option(["-outtreefile", "outtreefile"], "filename for output tree"),
            _Option(["-dotdiff", "dotdiff"], "Use dot-differencing? [Y/n]"),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class FProtParsCommandline(_EmbossCommandLine):
    """Commandline object for the fdnapars program from EMBOSS.

    fprotpars is an EMBOSS version of the PHYLIP program protpars, for
    estimating trees from protein  sequences using parsiomny. Calling this
    command without providing a value for the option "-intreefile" will invoke
    "interactive mode" (and as a result fail if called with subprocess) if
    "-auto" is not set to true.
    """

    def __init__(self, cmd="fprotpars", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-sequence", "sequence"],
                "seq file to use (phylip)",
                filename=True,
                is_required=True,
            ),
            _Option(["-intreefile", "intreefile"], "Phylip tree file to score"),
            _Option(
                ["-outtreefile", "outtreefile"],
                "phylip tree output file",
                filename=True,
                is_required=True,
            ),
            _Option(["-weights", "weights"], "weights file"),
            _Option(["-whichcode", "whichcode"], "which genetic code, [U,M,V,F,Y]]"),
            _Option(
                ["-njumble", "njumble"],
                "number of times to randomise input order (default is 0)",
            ),
            _Option(["-seed", "seed"], "provide random seed"),
            _Option(["-outgrno", "outgrno"], "Specify outgroup"),
            _Option(["-thresh", "thresh"], "Use threshold parsimony (y/N)"),
            _Option(["-threshold", "threshold"], "Threshold value"),
            _Option(["-trout", "trout"], "Write trees to file (Y/n)"),
            _Option(["-dotdiff", "dotdiff"], "Use dot-differencing? [Y/n]"),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class FProtDistCommandline(_EmbossCommandLine):
    """Commandline object for the fprotdist program from EMBOSS.

    fprotdist is an EMBOSS wrapper for the PHYLIP program protdist used to
    estimate trees from protein sequences using parsimony
    """

    def __init__(self, cmd="fprotdist", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-sequence", "sequence"],
                "seq file to use (phylip)",
                filename=True,
                is_required=True,
            ),
            _Option(["-ncategories", "ncategories"], "number of rate categories (1-9)"),
            _Option(["-rate", "rate"], "rate for each category"),
            _Option(["-catergories", "catergories"], "file of rates"),
            _Option(["-weights", "weights"], "weights file"),
            _Option(["-method", "method"], "sub. model [j,h,d,k,s,c]"),
            _Option(["-gamma", "gamma"], "gamma [g, i,c]"),
            _Option(
                ["-gammacoefficient", "gammacoefficient"], "value for gamma (> 0.001)"
            ),
            _Option(
                ["-invarcoefficient", "invarcoefficient"],
                "float for variation of substitution rate among sites",
            ),
            _Option(["-aacateg", "aacateg"], "Choose the category to use [G,C,H]"),
            _Option(["-whichcode", "whichcode"], "genetic code [c,m,v,f,y]"),
            _Option(["-ease", "ease"], "Pob change category (float between -0 and 1)"),
            _Option(["-ttratio", "ttratio"], "Transition/transversion ratio (0-1)"),
            _Option(
                ["-basefreq", "basefreq"], "DNA base frequencies (space separated list)"
            ),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class FConsenseCommandline(_EmbossCommandLine):
    """Commandline object for the fconsense program from EMBOSS.

    fconsense is an EMBOSS wrapper for the PHYLIP program consense used to
    calculate consensus trees.
    """

    def __init__(self, cmd="fconsense", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-intreefile", "intreefile"],
                "file with phylip trees to make consensus from",
                filename=True,
                is_required=True,
            ),
            _Option(["-method", "method"], "consensus method [s, mr, MRE, ml]"),
            _Option(
                ["-mlfrac", "mlfrac"],
                "cut-off freq for branch to appear in consensus (0.5-1.0)",
            ),
            _Option(["-root", "root"], "treat trees as rooted (YES, no)"),
            _Option(["-outgrno", "outgrno"], "OTU to use as outgroup (starts from 0)"),
            _Option(["-trout", "trout"], "treat trees as rooted (YES, no)"),
            _Option(
                ["-outtreefile", "outtreefile"], "Phylip tree output file (optional)"
            ),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class WaterCommandline(_EmbossCommandLine):
    """Commandline object for the water program from EMBOSS."""

    def __init__(self, cmd="water", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-asequence", "asequence"],
                "First sequence to align",
                filename=True,
                is_required=True,
            ),
            _Option(
                ["-bsequence", "bsequence"],
                "Second sequence to align",
                filename=True,
                is_required=True,
            ),
            _Option(["-gapopen", "gapopen"], "Gap open penalty", is_required=True),
            _Option(
                ["-gapextend", "gapextend"], "Gap extension penalty", is_required=True
            ),
            _Option(["-datafile", "datafile"], "Matrix file", filename=True),
            _Switch(
                ["-nobrief", "nobrief"], "Display extended identity and similarity"
            ),
            _Switch(["-brief", "brief"], "Display brief identity and similarity"),
            _Option(
                ["-similarity", "similarity"], "Display percent identity and similarity"
            ),
            _Option(
                ["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)"
            ),
            _Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"),
            _Option(
                ["-aformat", "aformat"],
                "Display output in a different specified output format",
            ),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class NeedleCommandline(_EmbossCommandLine):
    """Commandline object for the needle program from EMBOSS."""

    def __init__(self, cmd="needle", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-asequence", "asequence"],
                "First sequence to align",
                filename=True,
                is_required=True,
            ),
            _Option(
                ["-bsequence", "bsequence"],
                "Second sequence to align",
                filename=True,
                is_required=True,
            ),
            _Option(["-gapopen", "gapopen"], "Gap open penalty", is_required=True),
            _Option(
                ["-gapextend", "gapextend"], "Gap extension penalty", is_required=True
            ),
            _Option(["-datafile", "datafile"], "Matrix file", filename=True),
            _Option(["-endweight", "endweight"], "Apply And gap penalties"),
            _Option(
                ["-endopen", "endopen"],
                "The score taken away when an end gap is created.",
            ),
            _Option(
                ["-endextend", "endextend"],
                "The score added to the end gap penalty for each base or "
                "residue in the end gap.",
            ),
            _Switch(
                ["-nobrief", "nobrief"], "Display extended identity and similarity"
            ),
            _Switch(["-brief", "brief"], "Display brief identity and similarity"),
            _Option(
                ["-similarity", "similarity"], "Display percent identity and similarity"
            ),
            _Option(
                ["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)"
            ),
            _Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"),
            _Option(
                ["-aformat", "aformat"],
                "Display output in a different specified output format",
            ),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class NeedleallCommandline(_EmbossCommandLine):
    """Commandline object for the needleall program from EMBOSS."""

    def __init__(self, cmd="needleall", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-asequence", "asequence"],
                "First sequence to align",
                filename=True,
                is_required=True,
            ),
            _Option(
                ["-bsequence", "bsequence"],
                "Second sequence to align",
                filename=True,
                is_required=True,
            ),
            _Option(["-gapopen", "gapopen"], "Gap open penalty", is_required=True),
            _Option(
                ["-gapextend", "gapextend"], "Gap extension penalty", is_required=True
            ),
            _Option(["-datafile", "datafile"], "Matrix file", filename=True),
            _Option(
                ["-minscore", "minscore"],
                "Exclude alignments with scores below this threshold score.",
            ),
            _Option(["-errorfile", "errorfile"], "Error file to be written to."),
            _Option(["-endweight", "endweight"], "Apply And gap penalties"),
            _Option(
                ["-endopen", "endopen"],
                "The score taken away when an end gap is created.",
            ),
            _Option(
                ["-endextend", "endextend"],
                "The score added to the end gap penalty for each base or "
                "residue in the end gap.",
            ),
            _Switch(
                ["-nobrief", "nobrief"], "Display extended identity and similarity"
            ),
            _Switch(["-brief", "brief"], "Display brief identity and similarity"),
            _Option(
                ["-similarity", "similarity"], "Display percent identity and similarity"
            ),
            _Option(
                ["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)"
            ),
            _Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"),
            _Option(
                ["-aformat", "aformat"],
                "Display output in a different specified output format",
            ),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class StretcherCommandline(_EmbossCommandLine):
    """Commandline object for the stretcher program from EMBOSS."""

    def __init__(self, cmd="stretcher", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-asequence", "asequence"],
                "First sequence to align",
                filename=True,
                is_required=True,
            ),
            _Option(
                ["-bsequence", "bsequence"],
                "Second sequence to align",
                filename=True,
                is_required=True,
            ),
            _Option(
                ["-gapopen", "gapopen"],
                "Gap open penalty",
                is_required=True,
                checker_function=lambda value: isinstance(value, int),
            ),
            _Option(
                ["-gapextend", "gapextend"],
                "Gap extension penalty",
                is_required=True,
                checker_function=lambda value: isinstance(value, int),
            ),
            _Option(["-datafile", "datafile"], "Matrix file", filename=True),
            _Option(
                ["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)"
            ),
            _Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"),
            _Option(
                ["-aformat", "aformat"],
                "Display output in a different specified output format",
            ),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class FuzznucCommandline(_EmbossCommandLine):
    """Commandline object for the fuzznuc program from EMBOSS."""

    def __init__(self, cmd="fuzznuc", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-sequence", "sequence"], "Sequence database USA", is_required=True
            ),
            _Option(
                ["-pattern", "pattern"],
                "Search pattern, using standard IUPAC one-letter codes",
                is_required=True,
            ),
            _Option(["-pmismatch", "pmismatch"], "Number of mismatches"),
            _Option(["-complement", "complement"], "Search complementary strand"),
            _Option(["-rformat", "rformat"], "Specify the report format to output in."),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class FuzzproCommandline(_EmbossCommandLine):
    """Commandline object for the fuzzpro program from EMBOSS."""

    def __init__(self, cmd="fuzzpro", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-sequence", "sequence"], "Sequence database USA", is_required=True
            ),
            _Option(
                ["-pattern", "pattern"],
                "Search pattern, using standard IUPAC one-letter codes",
                is_required=True,
            ),
            _Option(["-pmismatch", "pmismatch"], "Number of mismatches"),
            _Option(["-rformat", "rformat"], "Specify the report format to output in."),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class Est2GenomeCommandline(_EmbossCommandLine):
    """Commandline object for the est2genome program from EMBOSS."""

    def __init__(self, cmd="est2genome", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(["-est", "est"], "EST sequence(s)", is_required=True),
            _Option(["-genome", "genome"], "Genomic sequence", is_required=True),
            _Option(["-match", "match"], "Score for matching two bases"),
            _Option(["-mismatch", "mismatch"], "Cost for mismatching two bases"),
            _Option(
                ["-gappenalty", "gappenalty"],
                "Cost for deleting a single base in either sequence, "
                "excluding introns",
            ),
            _Option(
                ["-intronpenalty", "intronpenalty"],
                "Cost for an intron, independent of length.",
            ),
            _Option(
                ["-splicepenalty", "splicepenalty"],
                "Cost for an intron, independent of length "
                "and starting/ending on donor-acceptor sites",
            ),
            _Option(
                ["-minscore", "minscore"],
                "Exclude alignments with scores below this threshold score.",
            ),
            _Option(
                ["-reverse", "reverse"], "Reverse the orientation of the EST sequence"
            ),
            _Option(["-splice", "splice"], "Use donor and acceptor splice sites."),
            _Option(
                ["-mode", "mode"],
                "This determines the comparison mode. 'both', 'forward', or 'reverse'",
            ),
            _Option(
                ["-best", "best"],
                "You can print out all comparisons instead of just the best",
            ),
            _Option(["-space", "space"], "for linear-space recursion."),
            _Option(["-shuffle", "shuffle"], "Shuffle"),
            _Option(["-seed", "seed"], "Random number seed"),
            _Option(["-align", "align"], "Show the alignment."),
            _Option(["-width", "width"], "Alignment width"),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class ETandemCommandline(_EmbossCommandLine):
    """Commandline object for the etandem program from EMBOSS."""

    def __init__(self, cmd="etandem", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-sequence", "sequence"], "Sequence", filename=True, is_required=True
            ),
            _Option(
                ["-minrepeat", "minrepeat"], "Minimum repeat size", is_required=True
            ),
            _Option(
                ["-maxrepeat", "maxrepeat"], "Maximum repeat size", is_required=True
            ),
            _Option(["-threshold", "threshold"], "Threshold score"),
            _Option(["-mismatch", "mismatch"], "Allow N as a mismatch"),
            _Option(["-uniform", "uniform"], "Allow uniform consensus"),
            _Option(["-rformat", "rformat"], "Output report format"),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class EInvertedCommandline(_EmbossCommandLine):
    """Commandline object for the einverted program from EMBOSS."""

    def __init__(self, cmd="einverted", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-sequence", "sequence"], "Sequence", filename=True, is_required=True
            ),
            _Option(["-gap", "gap"], "Gap penalty", filename=True, is_required=True),
            _Option(
                ["-threshold", "threshold"], "Minimum score threshold", is_required=True
            ),
            _Option(["-match", "match"], "Match score", is_required=True),
            _Option(["-mismatch", "mismatch"], "Mismatch score", is_required=True),
            _Option(
                ["-maxrepeat", "maxrepeat"],
                "Maximum separation between the start and end of repeat",
            ),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class PalindromeCommandline(_EmbossCommandLine):
    """Commandline object for the palindrome program from EMBOSS."""

    def __init__(self, cmd="palindrome", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-sequence", "sequence"], "Sequence", filename=True, is_required=True
            ),
            _Option(
                ["-minpallen", "minpallen"],
                "Minimum palindrome length",
                is_required=True,
            ),
            _Option(
                ["-maxpallen", "maxpallen"],
                "Maximum palindrome length",
                is_required=True,
            ),
            _Option(
                ["-gaplimit", "gaplimit"],
                "Maximum gap between repeats",
                is_required=True,
            ),
            _Option(
                ["-nummismatches", "nummismatches"],
                "Number of mismatches allowed",
                is_required=True,
            ),
            _Option(
                ["-overlap", "overlap"], "Report overlapping matches", is_required=True
            ),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class TranalignCommandline(_EmbossCommandLine):
    """Commandline object for the tranalign program from EMBOSS."""

    def __init__(self, cmd="tranalign", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-asequence", "asequence"],
                "Nucleotide sequences to be aligned.",
                filename=True,
                is_required=True,
            ),
            _Option(
                ["-bsequence", "bsequence"],
                "Protein sequence alignment",
                filename=True,
                is_required=True,
            ),
            _Option(
                ["-outseq", "outseq"],
                "Output sequence file.",
                filename=True,
                is_required=True,
            ),
            _Option(["-table", "table"], "Code to use"),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class DiffseqCommandline(_EmbossCommandLine):
    """Commandline object for the diffseq program from EMBOSS."""

    def __init__(self, cmd="diffseq", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-asequence", "asequence"],
                "First sequence to compare",
                filename=True,
                is_required=True,
            ),
            _Option(
                ["-bsequence", "bsequence"],
                "Second sequence to compare",
                filename=True,
                is_required=True,
            ),
            _Option(
                ["-wordsize", "wordsize"],
                "Word size to use for comparisons (10 default)",
                is_required=True,
            ),
            _Option(
                ["-aoutfeat", "aoutfeat"],
                "File for output of first sequence's features",
                filename=True,
                is_required=True,
            ),
            _Option(
                ["-boutfeat", "boutfeat"],
                "File for output of second sequence's features",
                filename=True,
                is_required=True,
            ),
            _Option(["-rformat", "rformat"], "Output report file format"),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


class IepCommandline(_EmbossCommandLine):
    """Commandline for EMBOSS iep: calculated isoelectric point and charge.

    Examples
    --------
    >>> from Bio.Emboss.Applications import IepCommandline
    >>> iep_cline = IepCommandline(sequence="proteins.faa",
    ...                            outfile="proteins.txt")
    >>> print(iep_cline)
    iep -outfile=proteins.txt -sequence=proteins.faa

    You would typically run the command line with iep_cline() or via the
    Python subprocess module, as described in the Biopython tutorial.

    """

    def __init__(self, cmd="iep", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-sequence", "sequence"],
                "Protein sequence(s) filename",
                filename=True,
                is_required=True,
            ),
            _Option(
                ["-amino", "amino"],
                """Number of N-termini

                    Integer 0 (default) or more.
                    """,
            ),
            _Option(
                ["-carboxyl", "carboxyl"],
                """Number of C-termini

                    Integer 0 (default) or more.
                    """,
            ),
            _Option(
                ["-lysinemodified", "lysinemodified"],
                """Number of modified lysines

                    Integer 0 (default) or more.
                    """,
            ),
            _Option(
                ["-disulphides", "disulphides"],
                """Number of disulphide bridges

                    Integer 0 (default) or more.
                    """,
            ),
            # Should we implement the -termini switch as well?
            _Option(
                ["-notermini", "notermini"],
                "Exclude (True) or include (False) charge at N and C terminus.",
            ),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


# seqret uses -outseq, not -outfile, so use the base class:
class SeqretCommandline(_EmbossMinimalCommandLine):
    """Commandline object for the seqret program from EMBOSS.

    This tool allows you to interconvert between different sequence file
    formats (e.g. GenBank to FASTA). Combining Biopython's Bio.SeqIO module
    with seqret using a suitable intermediate file format can allow you to
    read/write to an even wider range of file formats.

    This wrapper currently only supports the core functionality, things like
    feature tables (in EMBOSS 6.1.0 onwards) are not yet included.
    """

    def __init__(self, cmd="seqret", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-sequence", "sequence"], "Input sequence(s) filename", filename=True
            ),
            _Option(["-outseq", "outseq"], "Output sequence file.", filename=True),
            _Option(
                ["-sformat", "sformat"],
                "Input sequence(s) format (e.g. fasta, genbank)",
            ),
            _Option(
                ["-osformat", "osformat"],
                "Output sequence(s) format (e.g. fasta, genbank)",
            ),
        ]
        _EmbossMinimalCommandLine.__init__(self, cmd, **kwargs)

    def _validate(self):
        # Check the outfile, filter, or stdout option has been set.
        # We can't simply do this via the required flag for the outfile
        # output - this seems the simplest solution.
        if not (self.outseq or self.filter or self.stdout):
            raise ValueError(
                "You must either set outfile (output filename), "
                "or enable filter or stdout (output to stdout)."
            )
        if not (self.sequence or self.filter or self.stdint):
            raise ValueError(
                "You must either set sequence (input filename), "
                "or enable filter or stdin (input from stdin)."
            )
        return _EmbossMinimalCommandLine._validate(self)


class SeqmatchallCommandline(_EmbossCommandLine):
    """Commandline object for the seqmatchall program from EMBOSS.

    e.g.
    >>> cline = SeqmatchallCommandline(sequence="opuntia.fasta", outfile="opuntia.txt")
    >>> cline.auto = True
    >>> cline.wordsize = 18
    >>> cline.aformat = "pair"
    >>> print(cline)
    seqmatchall -auto -outfile=opuntia.txt -sequence=opuntia.fasta -wordsize=18 -aformat=pair

    """

    def __init__(self, cmd="seqmatchall", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-sequence", "sequence"],
                "Readable set of sequences",
                filename=True,
                is_required=True,
            ),
            _Option(
                ["-wordsize", "wordsize"], "Word size (Integer 2 or more, default 4)"
            ),
            _Option(
                ["-aformat", "aformat"],
                "Display output in a different specified output format",
            ),
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)


if __name__ == "__main__":
    from Bio._utils import run_doctest

    run_doctest()