mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
@ -3350,8 +3350,8 @@ class AlignmentsAbstractBaseClass(ABC):
|
|||||||
|
|
||||||
|
|
||||||
class Alignments(AlignmentsAbstractBaseClass, list): # noqa: D101
|
class Alignments(AlignmentsAbstractBaseClass, list): # noqa: D101
|
||||||
def __init__(self): # noqa: D107
|
def __init__(self, alignments=()): # noqa: D107
|
||||||
super().__init__()
|
super().__init__(alignments)
|
||||||
self._index = -1
|
self._index = -1
|
||||||
|
|
||||||
def __next__(self):
|
def __next__(self):
|
||||||
|
@ -161,6 +161,9 @@ Douglas R. Cavener: ``Comparison of the consensus sequence flanking translationa
|
|||||||
Timothy L. Bailey and Charles Elkan: ``Fitting a mixture model by expectation maximization to discover motifs in biopolymers'', \textit{Proceedings of the Second International Conference on Intelligent Systems for Molecular Biology} 28--36. AAAI Press, Menlo Park, California (1994).
|
Timothy L. Bailey and Charles Elkan: ``Fitting a mixture model by expectation maximization to discover motifs in biopolymers'', \textit{Proceedings of the Second International Conference on Intelligent Systems for Molecular Biology} 28--36. AAAI Press, Menlo Park, California (1994).
|
||||||
\bibitem{chapman2000}
|
\bibitem{chapman2000}
|
||||||
Brad Chapman and Jeff Chang: ``Biopython: Python tools for computational biology''. \textit{ACM SIGBIO Newsletter} {\bf 20} (2): 15--19 (August 2000).
|
Brad Chapman and Jeff Chang: ``Biopython: Python tools for computational biology''. \textit{ACM SIGBIO Newsletter} {\bf 20} (2): 15--19 (August 2000).
|
||||||
|
\bibitem{darling2004}
|
||||||
|
Aaron E. Darling, Bob Mau, Frederick R. Blattner, Nicole T. Perna: ``Mauve: Multiple alignment of conserved genomic sequence with rearrangements.'' \textit{Genome Research} {\bf 14} (7): 1394--1403 (2004).
|
||||||
|
\url{https://doi.org/10.1101/gr.2289704}
|
||||||
\bibitem{dayhoff1978}
|
\bibitem{dayhoff1978}
|
||||||
M.O. Dayhoff, R.M. Schwartz, and B.C. Orcutt: ``A Model of Evolutionary Change in Proteins.'' \textit{Atlas of Protein Sequence and Structure}, Volume 5, Supplement 3, 1978: 345--352. The National Biomedical Research Foundation, 1979.
|
M.O. Dayhoff, R.M. Schwartz, and B.C. Orcutt: ``A Model of Evolutionary Change in Proteins.'' \textit{Atlas of Protein Sequence and Structure}, Volume 5, Supplement 3, 1978: 345--352. The National Biomedical Research Foundation, 1979.
|
||||||
\bibitem{dehoon2004}
|
\bibitem{dehoon2004}
|
||||||
|
@ -770,24 +770,6 @@ The consensus sequence and secondary structure are associated with the sequence
|
|||||||
'consensus sequence': 'KVKFKYKGEEKEVDISKIKKVWRVGKMVSFTYDD.NGKTGRGAVSEKDAPKELLsMLuK'}
|
'consensus sequence': 'KVKFKYKGEEKEVDISKIKKVWRVGKMVSFTYDD.NGKTGRGAVSEKDAPKELLsMLuK'}
|
||||||
\end{minted}
|
\end{minted}
|
||||||
|
|
||||||
\subsection{EMBOSS}
|
|
||||||
\label{subsec:align_emboss}
|
|
||||||
|
|
||||||
\subsection{GSG Multiple Sequence Format (MSF)}
|
|
||||||
\label{subsec:align_msf}
|
|
||||||
|
|
||||||
\subsection{Exonerate}
|
|
||||||
\label{subsec:align_exonerate}
|
|
||||||
|
|
||||||
\subsection{Nexus}
|
|
||||||
\label{subsec:align_nexus}
|
|
||||||
|
|
||||||
\subsection{Mauve eXtended Multi-FastA (xmfa) format}
|
|
||||||
\label{subsec:align_mauve}
|
|
||||||
|
|
||||||
\subsection{Tabular output from BLAST or FASTA}
|
|
||||||
\label{subsec:align_tabular}
|
|
||||||
|
|
||||||
\subsection{PHYLIP output files}
|
\subsection{PHYLIP output files}
|
||||||
\label{subsec:align_phylip}
|
\label{subsec:align_phylip}
|
||||||
|
|
||||||
@ -923,12 +905,320 @@ True
|
|||||||
['CYS1_DICDI', 'ALEU_HORVU', 'CATH_HUMAN']
|
['CYS1_DICDI', 'ALEU_HORVU', 'CATH_HUMAN']
|
||||||
\end{minted}
|
\end{minted}
|
||||||
|
|
||||||
|
\subsection{EMBOSS}
|
||||||
|
\label{subsec:align_emboss}
|
||||||
|
|
||||||
|
\subsection{GSG Multiple Sequence Format (MSF)}
|
||||||
|
\label{subsec:align_msf}
|
||||||
|
|
||||||
|
\subsection{Exonerate}
|
||||||
|
\label{subsec:align_exonerate}
|
||||||
|
|
||||||
|
\subsection{Nexus}
|
||||||
|
\label{subsec:align_nexus}
|
||||||
|
|
||||||
|
\subsection{Tabular output from BLAST or FASTA}
|
||||||
|
\label{subsec:align_tabular}
|
||||||
|
|
||||||
\subsection{HH-suite output files}
|
\subsection{HH-suite output files}
|
||||||
\label{subsec:align_hhr}
|
\label{subsec:align_hhr}
|
||||||
|
|
||||||
\subsection{A2M}
|
\subsection{A2M}
|
||||||
\label{subsec:align_a2m}
|
\label{subsec:align_a2m}
|
||||||
|
|
||||||
|
\subsection{Mauve eXtended Multi-FastA (xmfa) format}
|
||||||
|
\label{subsec:align_mauve}
|
||||||
|
|
||||||
|
Mauve~\cite{darling2004} is a software package for constructing multiple genome alignments. These alignments are stored in the eXtended Multi-FastA (xmfa) format.
|
||||||
|
Depending on how exactly \verb|progressiveMauve| (the aligner program in Mauve) was called, the xmfa format is slightly different.
|
||||||
|
|
||||||
|
If \verb|progressiveMauve| is called with a single sequence input file, as in
|
||||||
|
\begin{minted}{text}
|
||||||
|
progressiveMauve combined.fasta --output=combined.xmfa ...
|
||||||
|
\end{minted}
|
||||||
|
where \verb|combined.fasta| contains the genome sequences:
|
||||||
|
\begin{minted}{text}
|
||||||
|
>equCab1
|
||||||
|
GAAAAGGAAAGTACGGCCCGGCCACTCCGGGTGTGTGCTAGGAGGGCTTA
|
||||||
|
>mm9
|
||||||
|
GAAGAGGAAAAGTAGATCCCTGGCGTCCGGAGCTGGGACGT
|
||||||
|
>canFam2
|
||||||
|
CAAGCCCTGCGCGCTCAGCCGGAGTGTCCCGGGCCCTGCTTTCCTTTTC
|
||||||
|
\end{minted}
|
||||||
|
then the output file \verb|combined.xmfa| is as follows:
|
||||||
|
\begin{minted}{text}
|
||||||
|
#FormatVersion Mauve1
|
||||||
|
#Sequence1File combined.fa
|
||||||
|
#Sequence1Entry 1
|
||||||
|
#Sequence1Format FastA
|
||||||
|
#Sequence2File combined.fa
|
||||||
|
#Sequence2Entry 2
|
||||||
|
#Sequence2Format FastA
|
||||||
|
#Sequence3File combined.fa
|
||||||
|
#Sequence3Entry 3
|
||||||
|
#Sequence3Format FastA
|
||||||
|
#BackboneFile combined.xmfa.bbcols
|
||||||
|
> 1:2-49 - combined.fa
|
||||||
|
AAGCCCTCCTAGCACACACCCGGAGTGG-CCGGGCCGTACTTTCCTTTT
|
||||||
|
> 2:0-0 + combined.fa
|
||||||
|
-------------------------------------------------
|
||||||
|
> 3:2-48 + combined.fa
|
||||||
|
AAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGCTTTCCTTTT
|
||||||
|
=
|
||||||
|
> 1:1-1 + combined.fa
|
||||||
|
G
|
||||||
|
=
|
||||||
|
> 1:50-50 + combined.fa
|
||||||
|
A
|
||||||
|
=
|
||||||
|
> 2:1-41 + combined.fa
|
||||||
|
GAAGAGGAAAAGTAGATCCCTGGCGTCCGGAGCTGGGACGT
|
||||||
|
=
|
||||||
|
> 3:1-1 + combined.fa
|
||||||
|
C
|
||||||
|
=
|
||||||
|
> 3:49-49 + combined.fa
|
||||||
|
C
|
||||||
|
=
|
||||||
|
\end{minted}
|
||||||
|
with numbers (1, 2, 3) referring to the input genome sequences for horse (\verb+equCab1+), mouse (\verb+mm9+), and dog (\verb+canFam2+), respectively.
|
||||||
|
This xmfa file consists of six alignment blocks, separated by \verb|=| characters. Use \verb|Align.parse| to extract these alignments:
|
||||||
|
%doctest ../Tests/Mauve lib:numpy
|
||||||
|
\begin{minted}{pycon}
|
||||||
|
>>> from Bio import Align
|
||||||
|
>>> alignments = Align.parse("combined.xmfa", "mauve")
|
||||||
|
\end{minted}
|
||||||
|
The file header data are stored in the \verb|metadata| attribute:
|
||||||
|
%cont-doctest
|
||||||
|
\begin{minted}{pycon}
|
||||||
|
>>> alignments.metadata # doctest: +NORMALIZE_WHITESPACE
|
||||||
|
{'FormatVersion': 'Mauve1',
|
||||||
|
'BackboneFile': 'combined.xmfa.bbcols',
|
||||||
|
'File': 'combined.fa'}
|
||||||
|
\end{minted}
|
||||||
|
The \verb|identifiers| attribute stores the sequence identifiers for the three sequences, which in this case is the three numbers:
|
||||||
|
%cont-doctest
|
||||||
|
\begin{minted}{pycon}
|
||||||
|
>>> alignments.identifiers
|
||||||
|
['0', '1', '2']
|
||||||
|
\end{minted}
|
||||||
|
These identifiers are used in the individual alignments:
|
||||||
|
%cont-doctest
|
||||||
|
\begin{minted}{pycon}
|
||||||
|
>>> for alignment in alignments:
|
||||||
|
... print([record.id for record in alignment.sequences])
|
||||||
|
... print(alignment)
|
||||||
|
... print("******")
|
||||||
|
...
|
||||||
|
['0', '1', '2']
|
||||||
|
0 49 AAGCCCTCCTAGCACACACCCGGAGTGG-CCGGGCCGTACTTTCCTTTT 1
|
||||||
|
1 0 ------------------------------------------------- 0
|
||||||
|
2 1 AAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGCTTTCCTTTT 48
|
||||||
|
<BLANKLINE>
|
||||||
|
******
|
||||||
|
['0']
|
||||||
|
0 0 G 1
|
||||||
|
<BLANKLINE>
|
||||||
|
******
|
||||||
|
['0']
|
||||||
|
0 49 A 50
|
||||||
|
<BLANKLINE>
|
||||||
|
******
|
||||||
|
['1']
|
||||||
|
1 0 GAAGAGGAAAAGTAGATCCCTGGCGTCCGGAGCTGGGACGT 41
|
||||||
|
<BLANKLINE>
|
||||||
|
******
|
||||||
|
['2']
|
||||||
|
2 0 C 1
|
||||||
|
<BLANKLINE>
|
||||||
|
******
|
||||||
|
['2']
|
||||||
|
2 48 C 49
|
||||||
|
<BLANKLINE>
|
||||||
|
******
|
||||||
|
\end{minted}
|
||||||
|
Note that only the first block is a real alignment; the other blocks contain only a single sequence. By including these blocks, the xmfa file contains the full sequence that was provided in the \verb|combined.fa| input file.
|
||||||
|
|
||||||
|
If \verb|progressiveMauve| is called with a separate input file for each genome, as in
|
||||||
|
\begin{minted}{text}
|
||||||
|
progressiveMauve equCab1.fa canFam2.fa mm9.fa --output=separate.xmfa ...
|
||||||
|
\end{minted}
|
||||||
|
where each Fasta file contains the genome sequence for one species only, then the output file \verb|separate.xmfa| is as follows:
|
||||||
|
\begin{minted}{text}
|
||||||
|
#FormatVersion Mauve1
|
||||||
|
#Sequence1File equCab1.fa
|
||||||
|
#Sequence1Format FastA
|
||||||
|
#Sequence2File canFam2.fa
|
||||||
|
#Sequence2Format FastA
|
||||||
|
#Sequence3File mm9.fa
|
||||||
|
#Sequence3Format FastA
|
||||||
|
#BackboneFile separate.xmfa.bbcols
|
||||||
|
> 1:1-50 - equCab1.fa
|
||||||
|
TAAGCCCTCCTAGCACACACCCGGAGTGGCC-GGGCCGTAC-TTTCCTTTTC
|
||||||
|
> 2:1-49 + canFam2.fa
|
||||||
|
CAAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGC-TTTCCTTTTC
|
||||||
|
> 3:1-19 - mm9.fa
|
||||||
|
---------------------------------GGATCTACTTTTCCTCTTC
|
||||||
|
=
|
||||||
|
> 3:20-41 + mm9.fa
|
||||||
|
CTGGCGTCCGGAGCTGGGACGT
|
||||||
|
=
|
||||||
|
\end{minted}
|
||||||
|
The identifiers \verb+equCab1+ for horse, \verb+mm9+ for mouse, and \verb+canFam2+ for dog are now shown explicitly in the output file.
|
||||||
|
This xmfa file consists of two alignment blocks, separated by \verb|=| characters. Use \verb|Align.parse| to extract these alignments:
|
||||||
|
%doctest ../Tests/Mauve lib:numpy
|
||||||
|
\begin{minted}{pycon}
|
||||||
|
>>> from Bio import Align
|
||||||
|
>>> alignments = Align.parse("separate.xmfa", "mauve")
|
||||||
|
\end{minted}
|
||||||
|
The file header data now does not include the input file name:
|
||||||
|
%cont-doctest
|
||||||
|
\begin{minted}{pycon}
|
||||||
|
>>> alignments.metadata # doctest: +NORMALIZE_WHITESPACE
|
||||||
|
{'FormatVersion': 'Mauve1',
|
||||||
|
'BackboneFile': 'separate.xmfa.bbcols'}
|
||||||
|
\end{minted}
|
||||||
|
The \verb|identifiers| attribute stores the sequence identifiers for the three sequences:
|
||||||
|
%cont-doctest
|
||||||
|
\begin{minted}{pycon}
|
||||||
|
>>> alignments.identifiers
|
||||||
|
['equCab1.fa', 'canFam2.fa', 'mm9.fa']
|
||||||
|
\end{minted}
|
||||||
|
These identifiers are used in the individual alignments:
|
||||||
|
%cont-doctest
|
||||||
|
\begin{minted}{pycon}
|
||||||
|
>>> for alignment in alignments:
|
||||||
|
... print([record.id for record in alignment.sequences])
|
||||||
|
... print(alignment)
|
||||||
|
... print("******")
|
||||||
|
...
|
||||||
|
['equCab1.fa', 'canFam2.fa', 'mm9.fa']
|
||||||
|
equCab1.f 50 TAAGCCCTCCTAGCACACACCCGGAGTGGCC-GGGCCGTAC-TTTCCTTTTC 0
|
||||||
|
canFam2.f 0 CAAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGC-TTTCCTTTTC 49
|
||||||
|
mm9.fa 19 ---------------------------------GGATCTACTTTTCCTCTTC 0
|
||||||
|
<BLANKLINE>
|
||||||
|
******
|
||||||
|
['mm9.fa']
|
||||||
|
mm9.fa 19 CTGGCGTCCGGAGCTGGGACGT 41
|
||||||
|
<BLANKLINE>
|
||||||
|
******
|
||||||
|
\end{minted}
|
||||||
|
|
||||||
|
To print the alignments in Mauve format, use \verb|Align.write|:
|
||||||
|
%cont-doctest
|
||||||
|
\begin{minted}{pycon}
|
||||||
|
>>> from io import StringIO
|
||||||
|
>>> stream = StringIO()
|
||||||
|
>>> alignments = Align.parse("separate.xmfa", "mauve")
|
||||||
|
>>> Align.write(alignments, stream, "mauve")
|
||||||
|
2
|
||||||
|
>>> print(stream.getvalue()) # doctest: +NORMALIZE_WHITESPACE
|
||||||
|
#FormatVersion Mauve1
|
||||||
|
#Sequence1File equCab1.fa
|
||||||
|
#Sequence1Format FastA
|
||||||
|
#Sequence2File canFam2.fa
|
||||||
|
#Sequence2Format FastA
|
||||||
|
#Sequence3File mm9.fa
|
||||||
|
#Sequence3Format FastA
|
||||||
|
#BackboneFile separate.xmfa.bbcols
|
||||||
|
> 1:1-50 - equCab1.fa
|
||||||
|
TAAGCCCTCCTAGCACACACCCGGAGTGGCC-GGGCCGTAC-TTTCCTTTTC
|
||||||
|
> 2:1-49 + canFam2.fa
|
||||||
|
CAAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGC-TTTCCTTTTC
|
||||||
|
> 3:1-19 - mm9.fa
|
||||||
|
---------------------------------GGATCTACTTTTCCTCTTC
|
||||||
|
=
|
||||||
|
> 3:20-41 + mm9.fa
|
||||||
|
CTGGCGTCCGGAGCTGGGACGT
|
||||||
|
=
|
||||||
|
<BLANKLINE>
|
||||||
|
\end{minted}
|
||||||
|
Here, the writer makes use of the information stored in \verb+alignments.metadata+ and \verb+alignments.identifiers+ to create this format.
|
||||||
|
If your \verb|alignments| object does not have these attributes, you can provide them as keyword arguments to \verb+Align.write+:
|
||||||
|
%cont-doctest
|
||||||
|
\begin{minted}{pycon}
|
||||||
|
>>> stream = StringIO()
|
||||||
|
>>> alignments = Align.parse("separate.xmfa", "mauve")
|
||||||
|
>>> metadata = alignments.metadata
|
||||||
|
>>> identifiers = alignments.identifiers
|
||||||
|
>>> alignments = list(alignments) # this drops the attributes
|
||||||
|
>>> alignments.metadata # doctest: +ELLIPSIS
|
||||||
|
Traceback (most recent call last):
|
||||||
|
...
|
||||||
|
AttributeError: 'list' object has no attribute 'metadata'
|
||||||
|
>>> alignments.identifiers # doctest: +ELLIPSIS
|
||||||
|
Traceback (most recent call last):
|
||||||
|
...
|
||||||
|
AttributeError: 'list' object has no attribute 'identifiers'
|
||||||
|
>>> Align.write(alignments, stream, "mauve", metadata=metadata, identifiers=identifiers)
|
||||||
|
2
|
||||||
|
>>> print(stream.getvalue()) # doctest: +NORMALIZE_WHITESPACE
|
||||||
|
#FormatVersion Mauve1
|
||||||
|
#Sequence1File equCab1.fa
|
||||||
|
#Sequence1Format FastA
|
||||||
|
#Sequence2File canFam2.fa
|
||||||
|
#Sequence2Format FastA
|
||||||
|
#Sequence3File mm9.fa
|
||||||
|
#Sequence3Format FastA
|
||||||
|
#BackboneFile separate.xmfa.bbcols
|
||||||
|
> 1:1-50 - equCab1.fa
|
||||||
|
TAAGCCCTCCTAGCACACACCCGGAGTGGCC-GGGCCGTAC-TTTCCTTTTC
|
||||||
|
> 2:1-49 + canFam2.fa
|
||||||
|
CAAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGC-TTTCCTTTTC
|
||||||
|
> 3:1-19 - mm9.fa
|
||||||
|
---------------------------------GGATCTACTTTTCCTCTTC
|
||||||
|
=
|
||||||
|
> 3:20-41 + mm9.fa
|
||||||
|
CTGGCGTCCGGAGCTGGGACGT
|
||||||
|
=
|
||||||
|
<BLANKLINE>
|
||||||
|
\end{minted}
|
||||||
|
Python does not allow you to add these attributes to the \verb+alignments+ object directly, as in this example it was converted to a plain list.
|
||||||
|
However, you can construct an \verb|Alignments| object (which inherits from \verb+list+) and add the attributes to it:
|
||||||
|
%cont-doctest
|
||||||
|
\begin{minted}{pycon}
|
||||||
|
>>> alignments = Align.Alignments(alignments)
|
||||||
|
>>> alignments.metadata = metadata
|
||||||
|
>>> alignments.identifiers = identifiers
|
||||||
|
>>> stream = StringIO()
|
||||||
|
>>> Align.write(alignments, stream, "mauve", metadata=metadata, identifiers=identifiers)
|
||||||
|
2
|
||||||
|
>>> print(stream.getvalue()) # doctest: +NORMALIZE_WHITESPACE
|
||||||
|
#FormatVersion Mauve1
|
||||||
|
#Sequence1File equCab1.fa
|
||||||
|
#Sequence1Format FastA
|
||||||
|
#Sequence2File canFam2.fa
|
||||||
|
#Sequence2Format FastA
|
||||||
|
#Sequence3File mm9.fa
|
||||||
|
#Sequence3Format FastA
|
||||||
|
#BackboneFile separate.xmfa.bbcols
|
||||||
|
> 1:1-50 - equCab1.fa
|
||||||
|
TAAGCCCTCCTAGCACACACCCGGAGTGGCC-GGGCCGTAC-TTTCCTTTTC
|
||||||
|
> 2:1-49 + canFam2.fa
|
||||||
|
CAAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGC-TTTCCTTTTC
|
||||||
|
> 3:1-19 - mm9.fa
|
||||||
|
---------------------------------GGATCTACTTTTCCTCTTC
|
||||||
|
=
|
||||||
|
> 3:20-41 + mm9.fa
|
||||||
|
CTGGCGTCCGGAGCTGGGACGT
|
||||||
|
=
|
||||||
|
<BLANKLINE>
|
||||||
|
\end{minted}
|
||||||
|
When printing a single alignment in \verb+Mauve+ format, use keyword arguments to provide the metadata and identifiers:
|
||||||
|
%cont-doctest
|
||||||
|
\begin{minted}{pycon}
|
||||||
|
>>> alignment = alignments[0]
|
||||||
|
>>> print(alignment.format("mauve", metadata=metadata, identifiers=identifiers))
|
||||||
|
> 1:1-50 - equCab1.fa
|
||||||
|
TAAGCCCTCCTAGCACACACCCGGAGTGGCC-GGGCCGTAC-TTTCCTTTTC
|
||||||
|
> 2:1-49 + canFam2.fa
|
||||||
|
CAAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGC-TTTCCTTTTC
|
||||||
|
> 3:1-19 - mm9.fa
|
||||||
|
---------------------------------GGATCTACTTTTCCTCTTC
|
||||||
|
=
|
||||||
|
<BLANKLINE>
|
||||||
|
\end{minted}
|
||||||
|
|
||||||
\subsection{Sequence Alignment/Map (SAM)}
|
\subsection{Sequence Alignment/Map (SAM)}
|
||||||
\label{subseq:align_sam}
|
\label{subseq:align_sam}
|
||||||
|
|
||||||
|
@ -6,19 +6,13 @@
|
|||||||
#Sequence3File mm9.fa
|
#Sequence3File mm9.fa
|
||||||
#Sequence3Format FastA
|
#Sequence3Format FastA
|
||||||
#BackboneFile separate.xmfa.bbcols
|
#BackboneFile separate.xmfa.bbcols
|
||||||
> 1:0-0 + equCab1.fa
|
> 1:1-50 - equCab1.fa
|
||||||
------------------------
|
TAAGCCCTCCTAGCACACACCCGGAGTGGCC-GGGCCGTAC-TTTCCTTTTC
|
||||||
> 2:26-49 + canFam2.fa
|
> 2:1-49 + canFam2.fa
|
||||||
GTCCCGGGCCCTGCTTTCCTTTTC
|
CAAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGC-TTTCCTTTTC
|
||||||
> 3:1-24 - mm9.fa
|
> 3:1-19 - mm9.fa
|
||||||
GCCAGGGATCTACTTTTCCTCTTC
|
---------------------------------GGATCTACTTTTCCTCTTC
|
||||||
=
|
=
|
||||||
> 1:1-50 + equCab1.fa
|
> 3:20-41 + mm9.fa
|
||||||
GAAAAGGAAAGTACGGCCCGGCCACTCCGGGTGTGTGCTAGGAGGGCTTA
|
CTGGCGTCCGGAGCTGGGACGT
|
||||||
=
|
|
||||||
> 2:1-25 + canFam2.fa
|
|
||||||
CAAGCCCTGCGCGCTCAGCCGGAGT
|
|
||||||
=
|
|
||||||
> 3:25-41 + mm9.fa
|
|
||||||
GTCCGGAGCTGGGACGT
|
|
||||||
=
|
=
|
||||||
|
@ -1,2 +1,3 @@
|
|||||||
seq0_leftend seq0_rightend seq1_leftend seq1_rightend seq2_leftend seq2_rightend
|
seq0_leftend seq0_rightend seq1_leftend seq1_rightend seq2_leftend seq2_rightend
|
||||||
0 0 26 49 -1 -24
|
-19 -50 1 31 0 0
|
||||||
|
-1 -18 32 49 -1 -19
|
||||||
|
@ -1 +1,2 @@
|
|||||||
0 1 24 1 2
|
0 34 19 0 1 2
|
||||||
|
0 1 33 0 1
|
||||||
|
@ -25,13 +25,16 @@ except ImportError:
|
|||||||
|
|
||||||
|
|
||||||
class TestCombinedFile(unittest.TestCase):
|
class TestCombinedFile(unittest.TestCase):
|
||||||
def setUp(self):
|
# Generate the output file combined.xmfa by running
|
||||||
filename = "combined.fa"
|
# progressiveMauve combined.fa --output=combined.xmfa
|
||||||
path = os.path.join("Mauve", filename)
|
|
||||||
records = SeqIO.parse(path, "fasta")
|
filename = "combined.fa"
|
||||||
self.sequences = {
|
path = os.path.join("Mauve", filename)
|
||||||
str(index): record.seq for index, record in enumerate(records)
|
records = SeqIO.parse(path, "fasta")
|
||||||
}
|
sequences = {str(index): record.seq for index, record in enumerate(records)}
|
||||||
|
del filename
|
||||||
|
del path
|
||||||
|
del records
|
||||||
|
|
||||||
def test_parse(self):
|
def test_parse(self):
|
||||||
path = os.path.join("Mauve", "combined.xmfa")
|
path = os.path.join("Mauve", "combined.xmfa")
|
||||||
@ -432,14 +435,19 @@ numpy.array([['C']], dtype='U')
|
|||||||
self.assertEqual(output.read(), data)
|
self.assertEqual(output.read(), data)
|
||||||
|
|
||||||
|
|
||||||
class TestDSeparateFiles(unittest.TestCase):
|
class TestSeparateFiles(unittest.TestCase):
|
||||||
def setUp(self):
|
# Generate the output file separate.xmfa by running
|
||||||
self.sequences = {}
|
# progressiveMauve --solid-seeds equCab1.fa canFam2.fa mm9.fa --output=separate.xmfa
|
||||||
for species in ("equCab1", "canFam2", "mm9"):
|
|
||||||
filename = f"{species}.fa"
|
sequences = {}
|
||||||
path = os.path.join("Mauve", filename)
|
for species in ("equCab1", "canFam2", "mm9"):
|
||||||
record = SeqIO.read(path, "fasta")
|
filename = f"{species}.fa"
|
||||||
self.sequences[filename] = record.seq
|
path = os.path.join("Mauve", filename)
|
||||||
|
record = SeqIO.read(path, "fasta")
|
||||||
|
sequences[filename] = record.seq
|
||||||
|
del filename
|
||||||
|
del path
|
||||||
|
del record
|
||||||
|
|
||||||
def test_parse(self):
|
def test_parse(self):
|
||||||
path = os.path.join("Mauve", "separate.xmfa")
|
path = os.path.join("Mauve", "separate.xmfa")
|
||||||
@ -456,20 +464,23 @@ class TestDSeparateFiles(unittest.TestCase):
|
|||||||
self.assertEqual(len(alignment), 3)
|
self.assertEqual(len(alignment), 3)
|
||||||
self.assertEqual(len(alignment.sequences), 3)
|
self.assertEqual(len(alignment.sequences), 3)
|
||||||
self.assertEqual(alignment.sequences[0].id, "equCab1.fa")
|
self.assertEqual(alignment.sequences[0].id, "equCab1.fa")
|
||||||
self.assertEqual(alignment.sequences[0].seq, "")
|
self.assertEqual(
|
||||||
|
alignment.sequences[0].seq,
|
||||||
|
Seq("GAAAAGGAAAGTACGGCCCGGCCACTCCGGGTGTGTGCTAGGAGGGCTTA"),
|
||||||
|
)
|
||||||
start = alignment.coordinates[0, 0]
|
start = alignment.coordinates[0, 0]
|
||||||
end = alignment.coordinates[0, -1]
|
end = alignment.coordinates[0, -1]
|
||||||
self.assertEqual(start, 0)
|
self.assertEqual(start, 50)
|
||||||
self.assertEqual(end, 0)
|
self.assertEqual(end, 0)
|
||||||
self.assertEqual(alignment.sequences[1].id, "canFam2.fa")
|
self.assertEqual(alignment.sequences[1].id, "canFam2.fa")
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
repr(alignment.sequences[1].seq),
|
alignment.sequences[1].seq,
|
||||||
"Seq({25: 'GTCCCGGGCCCTGCTTTCCTTTTC'}, length=49)",
|
Seq("CAAGCCCTGCGCGCTCAGCCGGAGTGTCCCGGGCCCTGCTTTCCTTTTC"),
|
||||||
)
|
)
|
||||||
start = alignment.coordinates[1, 0]
|
start = alignment.coordinates[1, 0]
|
||||||
end = alignment.coordinates[1, -1]
|
end = alignment.coordinates[1, -1]
|
||||||
sequence = self.sequences[alignment.sequences[1].id]
|
sequence = self.sequences[alignment.sequences[1].id]
|
||||||
self.assertEqual(start, 25)
|
self.assertEqual(start, 0)
|
||||||
self.assertEqual(end, 49)
|
self.assertEqual(end, 49)
|
||||||
self.assertEqual(alignment.sequences[1].seq[start:end], sequence[start:end])
|
self.assertEqual(alignment.sequences[1].seq[start:end], sequence[start:end])
|
||||||
self.assertEqual(alignment.sequences[2].id, "mm9.fa")
|
self.assertEqual(alignment.sequences[2].id, "mm9.fa")
|
||||||
@ -478,83 +489,46 @@ class TestDSeparateFiles(unittest.TestCase):
|
|||||||
start = len(sequence) - alignment.coordinates[2, 0]
|
start = len(sequence) - alignment.coordinates[2, 0]
|
||||||
end = len(sequence) - alignment.coordinates[2, -1]
|
end = len(sequence) - alignment.coordinates[2, -1]
|
||||||
self.assertEqual(start, 0)
|
self.assertEqual(start, 0)
|
||||||
self.assertEqual(end, 24)
|
self.assertEqual(end, 19)
|
||||||
sequence = self.sequences[alignment.sequences[2].id][start:end]
|
sequence = self.sequences[alignment.sequences[2].id][start:end]
|
||||||
self.assertEqual(alignment.sequences[2].seq[start:end], sequence)
|
self.assertEqual(alignment.sequences[2].seq[start:end], sequence)
|
||||||
self.assertEqual(alignment[0], "------------------------")
|
self.assertEqual(
|
||||||
self.assertEqual(alignment[1], "GTCCCGGGCCCTGCTTTCCTTTTC")
|
alignment[0], "TAAGCCCTCCTAGCACACACCCGGAGTGGCC-GGGCCGTAC-TTTCCTTTTC"
|
||||||
self.assertEqual(alignment[2], "GCCAGGGATCTACTTTTCCTCTTC")
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[1], "CAAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGC-TTTCCTTTTC"
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[2], "---------------------------------GGATCTACTTTTCCTCTTC"
|
||||||
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
str(alignment),
|
str(alignment),
|
||||||
"""\
|
"""\
|
||||||
equCab1.f 0 ------------------------ 0
|
equCab1.f 50 TAAGCCCTCCTAGCACACACCCGGAGTGGCC-GGGCCGTAC-TTTCCTTTTC 0
|
||||||
canFam2.f 25 GTCCCGGGCCCTGCTTTCCTTTTC 49
|
canFam2.f 0 CAAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGC-TTTCCTTTTC 49
|
||||||
mm9.fa 24 GCCAGGGATCTACTTTTCCTCTTC 0
|
mm9.fa 19 ---------------------------------GGATCTACTTTTCCTCTTC 0
|
||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
numpy.array_equal(
|
numpy.array_equal(
|
||||||
alignment.coordinates,
|
alignment.coordinates,
|
||||||
numpy.array([[0, 0], [25, 49], [24, 0]]),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
|
||||||
alignment.format("mauve", metadata, identifiers),
|
|
||||||
"""\
|
|
||||||
> 1:0-0 + equCab1.fa
|
|
||||||
------------------------
|
|
||||||
> 2:26-49 + canFam2.fa
|
|
||||||
GTCCCGGGCCCTGCTTTCCTTTTC
|
|
||||||
> 3:1-24 - mm9.fa
|
|
||||||
GCCAGGGATCTACTTTTCCTCTTC
|
|
||||||
=
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
self.assertTrue(
|
|
||||||
numpy.array_equal(
|
|
||||||
numpy.array(alignment, "U"),
|
|
||||||
# fmt: off
|
# fmt: off
|
||||||
# flake8: noqa
|
# flake8: noqa
|
||||||
numpy.array([['-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-',
|
numpy.array([[50, 40, 38, 19, 19, 18, 10, 10, 0],
|
||||||
'-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-'],
|
[ 0, 10, 10, 29, 30, 31, 39, 39, 49],
|
||||||
['G', 'T', 'C', 'C', 'C', 'G', 'G', 'G', 'C', 'C', 'C', 'T', 'G',
|
[19, 19, 19, 19, 19, 19, 11, 10, 0]]),
|
||||||
'C', 'T', 'T', 'T', 'C', 'C', 'T', 'T', 'T', 'T', 'C'],
|
|
||||||
['G', 'C', 'C', 'A', 'G', 'G', 'G', 'A', 'T', 'C', 'T', 'A', 'C',
|
|
||||||
'T', 'T', 'T', 'T', 'C', 'C', 'T', 'C', 'T', 'T', 'C']],
|
|
||||||
dtype='U')
|
|
||||||
# fmt: on
|
# fmt: on
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
alignment = next(alignments)
|
|
||||||
saved_alignments.append(alignment)
|
|
||||||
self.assertEqual(len(alignment), 1)
|
|
||||||
self.assertEqual(len(alignment.sequences), 1)
|
|
||||||
self.assertEqual(alignment.sequences[0].id, "equCab1.fa")
|
|
||||||
self.assertEqual(
|
|
||||||
alignment.sequences[0].seq,
|
|
||||||
"GAAAAGGAAAGTACGGCCCGGCCACTCCGGGTGTGTGCTAGGAGGGCTTA",
|
|
||||||
)
|
|
||||||
sequence = self.sequences[alignment.sequences[0].id]
|
|
||||||
start = alignment.coordinates[0, 0]
|
|
||||||
end = alignment.coordinates[0, -1]
|
|
||||||
self.assertEqual(alignment.sequences[0].seq[start:end], sequence[start:end])
|
|
||||||
self.assertEqual(
|
|
||||||
alignment[0], "GAAAAGGAAAGTACGGCCCGGCCACTCCGGGTGTGTGCTAGGAGGGCTTA"
|
|
||||||
)
|
|
||||||
self.assertTrue(
|
|
||||||
numpy.array_equal(alignment.coordinates, numpy.array([[0, 50]]))
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
|
||||||
str(alignment),
|
|
||||||
"""\
|
|
||||||
equCab1.f 0 GAAAAGGAAAGTACGGCCCGGCCACTCCGGGTGTGTGCTAGGAGGGCTTA 50
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
alignment.format("mauve", metadata, identifiers),
|
alignment.format("mauve", metadata, identifiers),
|
||||||
"""\
|
"""\
|
||||||
> 1:1-50 + equCab1.fa
|
> 1:1-50 - equCab1.fa
|
||||||
GAAAAGGAAAGTACGGCCCGGCCACTCCGGGTGTGTGCTAGGAGGGCTTA
|
TAAGCCCTCCTAGCACACACCCGGAGTGGCC-GGGCCGTAC-TTTCCTTTTC
|
||||||
|
> 2:1-49 + canFam2.fa
|
||||||
|
CAAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGC-TTTCCTTTTC
|
||||||
|
> 3:1-19 - mm9.fa
|
||||||
|
---------------------------------GGATCTACTTTTCCTCTTC
|
||||||
=
|
=
|
||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
@ -563,49 +537,18 @@ GAAAAGGAAAGTACGGCCCGGCCACTCCGGGTGTGTGCTAGGAGGGCTTA
|
|||||||
numpy.array(alignment, "U"),
|
numpy.array(alignment, "U"),
|
||||||
# fmt: off
|
# fmt: off
|
||||||
# flake8: noqa
|
# flake8: noqa
|
||||||
numpy.array([['G', 'A', 'A', 'A', 'A', 'G', 'G', 'A', 'A', 'A', 'G', 'T', 'A',
|
numpy.array([['T', 'A', 'A', 'G', 'C', 'C', 'C', 'T', 'C', 'C', 'T', 'A', 'G',
|
||||||
'C', 'G', 'G', 'C', 'C', 'C', 'G', 'G', 'C', 'C', 'A', 'C', 'T',
|
'C', 'A', 'C', 'A', 'C', 'A', 'C', 'C', 'C', 'G', 'G', 'A', 'G',
|
||||||
'C', 'C', 'G', 'G', 'G', 'T', 'G', 'T', 'G', 'T', 'G', 'C', 'T',
|
'T', 'G', 'G', 'C', 'C', '-', 'G', 'G', 'G', 'C', 'C', 'G', 'T',
|
||||||
'A', 'G', 'G', 'A', 'G', 'G', 'G', 'C', 'T', 'T', 'A']],
|
'A', 'C', '-', 'T', 'T', 'T', 'C', 'C', 'T', 'T', 'T', 'T', 'C'],
|
||||||
dtype='U')
|
['C', 'A', 'A', 'G', 'C', 'C', 'C', 'T', 'G', 'C', '-', '-', 'G',
|
||||||
# fmt: on
|
'C', 'G', 'C', 'T', 'C', 'A', 'G', 'C', 'C', 'G', 'G', 'A', 'G',
|
||||||
)
|
'T', 'G', 'T', 'C', 'C', 'C', 'G', 'G', 'G', 'C', 'C', 'C', 'T',
|
||||||
)
|
'G', 'C', '-', 'T', 'T', 'T', 'C', 'C', 'T', 'T', 'T', 'T', 'C'],
|
||||||
alignment = next(alignments)
|
['-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-',
|
||||||
saved_alignments.append(alignment)
|
'-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-', '-',
|
||||||
self.assertEqual(len(alignment), 1)
|
'-', '-', '-', '-', '-', '-', '-', 'G', 'G', 'A', 'T', 'C', 'T',
|
||||||
self.assertEqual(len(alignment.sequences), 1)
|
'A', 'C', 'T', 'T', 'T', 'T', 'C', 'C', 'T', 'C', 'T', 'T', 'C']],
|
||||||
self.assertEqual(alignment.sequences[0].id, "canFam2.fa")
|
|
||||||
self.assertEqual(alignment.sequences[0].seq, "CAAGCCCTGCGCGCTCAGCCGGAGT")
|
|
||||||
sequence = self.sequences[alignment.sequences[0].id]
|
|
||||||
start = alignment.coordinates[0, 0]
|
|
||||||
end = alignment.coordinates[0, -1]
|
|
||||||
self.assertEqual(alignment.sequences[0].seq[start:end], sequence[start:end])
|
|
||||||
self.assertEqual(alignment[0], "CAAGCCCTGCGCGCTCAGCCGGAGT")
|
|
||||||
self.assertTrue(
|
|
||||||
numpy.array_equal(alignment.coordinates, numpy.array([[0, 25]]))
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
|
||||||
str(alignment),
|
|
||||||
"""\
|
|
||||||
canFam2.f 0 CAAGCCCTGCGCGCTCAGCCGGAGT 25
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
|
||||||
alignment.format("mauve", metadata, identifiers),
|
|
||||||
"""\
|
|
||||||
> 2:1-25 + canFam2.fa
|
|
||||||
CAAGCCCTGCGCGCTCAGCCGGAGT
|
|
||||||
=
|
|
||||||
""",
|
|
||||||
)
|
|
||||||
self.assertTrue(
|
|
||||||
numpy.array_equal(
|
|
||||||
numpy.array(alignment, "U"),
|
|
||||||
# fmt: off
|
|
||||||
# flake8: noqa
|
|
||||||
numpy.array([['C', 'A', 'A', 'G', 'C', 'C', 'C', 'T', 'G', 'C', 'G', 'C', 'G',
|
|
||||||
'C', 'T', 'C', 'A', 'G', 'C', 'C', 'G', 'G', 'A', 'G', 'T']],
|
|
||||||
dtype='U')
|
dtype='U')
|
||||||
# fmt: on
|
# fmt: on
|
||||||
)
|
)
|
||||||
@ -615,31 +558,29 @@ numpy.array([['C', 'A', 'A', 'G', 'C', 'C', 'C', 'T', 'G', 'C', 'G', 'C', 'G',
|
|||||||
self.assertEqual(len(alignment), 1)
|
self.assertEqual(len(alignment), 1)
|
||||||
self.assertEqual(len(alignment.sequences), 1)
|
self.assertEqual(len(alignment.sequences), 1)
|
||||||
self.assertEqual(alignment.sequences[0].id, "mm9.fa")
|
self.assertEqual(alignment.sequences[0].id, "mm9.fa")
|
||||||
sequence = self.sequences[alignment.sequences[0].id]
|
self.assertEqual(
|
||||||
start = alignment.coordinates[0, 0]
|
repr(alignment.sequences[0].seq),
|
||||||
end = alignment.coordinates[0, -1]
|
"Seq({19: 'CTGGCGTCCGGAGCTGGGACGT'}, length=41)",
|
||||||
self.assertEqual(start, 24)
|
)
|
||||||
self.assertEqual(end, 41)
|
|
||||||
self.assertEqual(alignment.sequences[0].seq[start:end], "GTCCGGAGCTGGGACGT")
|
|
||||||
sequence = self.sequences[alignment.sequences[0].id]
|
sequence = self.sequences[alignment.sequences[0].id]
|
||||||
start = alignment.coordinates[0, 0]
|
start = alignment.coordinates[0, 0]
|
||||||
end = alignment.coordinates[0, -1]
|
end = alignment.coordinates[0, -1]
|
||||||
self.assertEqual(alignment.sequences[0].seq[start:end], sequence[start:end])
|
self.assertEqual(alignment.sequences[0].seq[start:end], sequence[start:end])
|
||||||
self.assertEqual(alignment[0], "GTCCGGAGCTGGGACGT")
|
self.assertEqual(alignment[0], "CTGGCGTCCGGAGCTGGGACGT")
|
||||||
|
self.assertTrue(
|
||||||
|
numpy.array_equal(alignment.coordinates, numpy.array([[19, 41]]))
|
||||||
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
str(alignment),
|
str(alignment),
|
||||||
"""\
|
"""\
|
||||||
mm9.fa 24 GTCCGGAGCTGGGACGT 41
|
mm9.fa 19 CTGGCGTCCGGAGCTGGGACGT 41
|
||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
self.assertTrue(
|
|
||||||
numpy.array_equal(alignment.coordinates, numpy.array([[24, 41]]))
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
alignment.format("mauve", metadata, identifiers),
|
alignment.format("mauve", metadata, identifiers),
|
||||||
"""\
|
"""\
|
||||||
> 3:25-41 + mm9.fa
|
> 3:20-41 + mm9.fa
|
||||||
GTCCGGAGCTGGGACGT
|
CTGGCGTCCGGAGCTGGGACGT
|
||||||
=
|
=
|
||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
@ -648,15 +589,15 @@ GTCCGGAGCTGGGACGT
|
|||||||
numpy.array(alignment, "U"),
|
numpy.array(alignment, "U"),
|
||||||
# fmt: off
|
# fmt: off
|
||||||
# flake8: noqa
|
# flake8: noqa
|
||||||
numpy.array([['G', 'T', 'C', 'C', 'G', 'G', 'A', 'G', 'C', 'T', 'G', 'G', 'G',
|
numpy.array([['C', 'T', 'G', 'G', 'C', 'G', 'T', 'C', 'C', 'G', 'G',
|
||||||
'A', 'C', 'G', 'T']], dtype='U')
|
'A', 'G', 'C', 'T', 'G', 'G', 'G', 'A', 'C', 'G', 'T']], dtype='U')
|
||||||
# fmt: on
|
# fmt: on
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
self.assertRaises(StopIteration, next, alignments)
|
self.assertRaises(StopIteration, next, alignments)
|
||||||
# As each nucleotide in each sequence is stored exactly once in an XMFA
|
# As each nucleotide in each sequence is stored exactly once in an XMFA
|
||||||
# file, we can reconstitute the full sequences:
|
# file, we can reconstitute the full sequences:
|
||||||
self.assertEqual(len(saved_alignments), 4)
|
self.assertEqual(len(saved_alignments), 2)
|
||||||
filenames = []
|
filenames = []
|
||||||
for alignment in saved_alignments:
|
for alignment in saved_alignments:
|
||||||
for record in alignment.sequences:
|
for record in alignment.sequences:
|
||||||
@ -697,26 +638,20 @@ numpy.array([['G', 'T', 'C', 'C', 'G', 'G', 'A', 'G', 'C', 'T', 'G', 'G', 'G',
|
|||||||
for record in alignment.sequences:
|
for record in alignment.sequences:
|
||||||
filename = record.id
|
filename = record.id
|
||||||
record.seq = sequences[filename]
|
record.seq = sequences[filename]
|
||||||
self.assertEqual(alignment[0], "------------------------")
|
self.assertEqual(
|
||||||
self.assertEqual(alignment[1], "GTCCCGGGCCCTGCTTTCCTTTTC")
|
alignment[0], "TAAGCCCTCCTAGCACACACCCGGAGTGGCC-GGGCCGTAC-TTTCCTTTTC"
|
||||||
self.assertEqual(alignment[2], "GCCAGGGATCTACTTTTCCTCTTC")
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[1], "CAAGCCCTGC--GCGCTCAGCCGGAGTGTCCCGGGCCCTGC-TTTCCTTTTC"
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
alignment[2], "---------------------------------GGATCTACTTTTCCTCTTC"
|
||||||
|
)
|
||||||
alignment = saved_alignments[1]
|
alignment = saved_alignments[1]
|
||||||
for record in alignment.sequences:
|
for record in alignment.sequences:
|
||||||
filename = record.id
|
filename = record.id
|
||||||
record.seq = sequences[filename]
|
record.seq = sequences[filename]
|
||||||
self.assertEqual(
|
self.assertEqual(alignment[0], "CTGGCGTCCGGAGCTGGGACGT")
|
||||||
alignment[0], "GAAAAGGAAAGTACGGCCCGGCCACTCCGGGTGTGTGCTAGGAGGGCTTA"
|
|
||||||
)
|
|
||||||
alignment = saved_alignments[2]
|
|
||||||
for record in alignment.sequences:
|
|
||||||
filename = record.id
|
|
||||||
record.seq = sequences[filename]
|
|
||||||
self.assertEqual(alignment[0], "CAAGCCCTGCGCGCTCAGCCGGAGT")
|
|
||||||
alignment = saved_alignments[3]
|
|
||||||
for record in alignment.sequences:
|
|
||||||
filename = record.id
|
|
||||||
record.seq = sequences[filename]
|
|
||||||
self.assertEqual(alignment[0], "GTCCGGAGCTGGGACGT")
|
|
||||||
|
|
||||||
def test_write_read(self):
|
def test_write_read(self):
|
||||||
path = os.path.join("Mauve", "separate.xmfa")
|
path = os.path.join("Mauve", "separate.xmfa")
|
||||||
@ -729,7 +664,7 @@ numpy.array([['G', 'T', 'C', 'C', 'G', 'G', 'A', 'G', 'C', 'T', 'G', 'G', 'G',
|
|||||||
alignments = Align.parse(stream, "mauve")
|
alignments = Align.parse(stream, "mauve")
|
||||||
output = StringIO()
|
output = StringIO()
|
||||||
n = Align.write(alignments, output, "mauve")
|
n = Align.write(alignments, output, "mauve")
|
||||||
self.assertEqual(n, 4)
|
self.assertEqual(n, 2)
|
||||||
output.seek(0)
|
output.seek(0)
|
||||||
self.assertEqual(output.read(), data)
|
self.assertEqual(output.read(), data)
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user