mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 13:43:47 +08:00
support soft-masking in nib files (#2303)
* support soft-masking in nib files * nib doesn't need to be handled as a special case any more * style issues * Update NibIO.py prettified
This commit is contained in:
@ -10,23 +10,29 @@ Nib stands for nibble (4 bit) representation of nucleotide sequences.
|
||||
The two nibbles in a byte each store one nucleotide, represented numerically
|
||||
as follows:
|
||||
|
||||
0 - T
|
||||
1 - C
|
||||
2 - A
|
||||
3 - G
|
||||
4 - N (unknown)
|
||||
- ``0`` - T
|
||||
- ``1`` - C
|
||||
- ``2`` - A
|
||||
- ``3`` - G
|
||||
- ``4`` - N (unknown)
|
||||
|
||||
As the first bit in a nibble is set if the nucleotide is soft-masked, we
|
||||
additionally have:
|
||||
|
||||
- ``8`` - t
|
||||
- ``9`` - c
|
||||
- ``a`` - a
|
||||
- ``b`` - g
|
||||
- ``c`` - n (unknown)
|
||||
|
||||
A nib file contains only one sequence record.
|
||||
You are expected to use this module via the Bio.SeqIO functions under
|
||||
the format name "nib":
|
||||
|
||||
>>> from Bio import SeqIO
|
||||
>>> record = SeqIO.read("Nib/test_bigendian.nib", "nib")
|
||||
>>> record = SeqIO.read("Nib/test_even_bigendian.nib", "nib")
|
||||
>>> print("%i %s..." % (len(record), record.seq[:20]))
|
||||
37 ACGTAAACCGTACCCGTANA...
|
||||
|
||||
Notice that the sequence is given in upper case; unknown nucleotides are
|
||||
written as N.
|
||||
50 nAGAAGagccgcNGgCActt...
|
||||
|
||||
For detailed information on the file format, please see the UCSC
|
||||
description at https://genome.ucsc.edu/FAQ/FAQformat.html.
|
||||
@ -101,17 +107,17 @@ def NibIterator(handle, alphabet=None):
|
||||
This function is used internally via the Bio.SeqIO functions:
|
||||
|
||||
>>> from Bio import SeqIO
|
||||
>>> record = SeqIO.read("Nib/test_bigendian.nib", "nib")
|
||||
>>> record = SeqIO.read("Nib/test_even_bigendian.nib", "nib")
|
||||
>>> print("%s %i" % (record.seq, len(record)))
|
||||
ACGTAAACCGTACCCGTANANCANNNNACNANNANCN 37
|
||||
nAGAAGagccgcNGgCActtGAnTAtCGTCgcCacCaGncGncTtGNtGG 50
|
||||
|
||||
You can also call it directly:
|
||||
|
||||
>>> with open("Nib/test_bigendian.nib", "rb") as handle:
|
||||
>>> with open("Nib/test_even_bigendian.nib", "rb") as handle:
|
||||
... for record in NibIterator(handle):
|
||||
... print("%s %i" % (record.seq, len(record)))
|
||||
...
|
||||
ACGTAAACCGTACCCGTANANCANNNNACNANNANCN 37
|
||||
nAGAAGagccgcNGgCActtGAnTAtCGTCgcCacCaGncGncTtGNtGG 50
|
||||
|
||||
"""
|
||||
if alphabet is not None:
|
||||
@ -140,9 +146,9 @@ def NibIterator(handle, alphabet=None):
|
||||
if len(indices) != length + 1:
|
||||
raise ValueError("Unexpected file size")
|
||||
indices = indices[:length]
|
||||
if set(indices) != set("01234"):
|
||||
if not set(indices).issubset("0123489abc"):
|
||||
raise ValueError("Unexpected sequence data found in file")
|
||||
table = maketrans("01234", "TCAGN")
|
||||
table = maketrans("0123489abc", "TCAGNtcagn")
|
||||
nucleotides = indices.translate(table)
|
||||
sequence = Seq(nucleotides)
|
||||
record = SeqRecord(sequence)
|
||||
@ -182,13 +188,13 @@ class NibWriter(SequenceWriter):
|
||||
nucleotides = str(sequence)
|
||||
length = len(sequence)
|
||||
handle.write(struct.pack("i", length))
|
||||
table = maketrans("TCAGNtcagn", "0123401234")
|
||||
table = maketrans("TCAGNtcagn", "0123489abc")
|
||||
padding = length % 2
|
||||
suffix = padding * "T"
|
||||
nucleotides += suffix
|
||||
indices = nucleotides.translate(table)
|
||||
if set(indices) != set("01234"):
|
||||
if not set(nucleotides).issubset("ACGTNacgtn"):
|
||||
raise ValueError("Sequence should contain A,C,G,T,N,a,c,g,t,n only")
|
||||
indices = nucleotides.translate(table)
|
||||
handle.write(hex2bytes(indices))
|
||||
return count
|
||||
|
||||
|
BIN
Tests/Nib/test_bigendian.nib
generated
BIN
Tests/Nib/test_bigendian.nib
generated
Binary file not shown.
2
Tests/Nib/test_even.fa
Normal file
2
Tests/Nib/test_even.fa
Normal file
@ -0,0 +1,2 @@
|
||||
>even
|
||||
nAGAAGagccgcNGgCActtGAnTAtCGTCgcCacCaGncGncTtGNtGG
|
BIN
Tests/Nib/test_even_bigendian.nib
generated
Normal file
BIN
Tests/Nib/test_even_bigendian.nib
generated
Normal file
Binary file not shown.
BIN
Tests/Nib/test_even_littleendian.nib
generated
Normal file
BIN
Tests/Nib/test_even_littleendian.nib
generated
Normal file
Binary file not shown.
BIN
Tests/Nib/test_littleendian.nib
generated
BIN
Tests/Nib/test_littleendian.nib
generated
Binary file not shown.
2
Tests/Nib/test_odd.fa
Normal file
2
Tests/Nib/test_odd.fa
Normal file
@ -0,0 +1,2 @@
|
||||
>odd
|
||||
ATCCgCNcnTCGNgACaatccCCaNtNnacgaTNGgANAAgNCac
|
BIN
Tests/Nib/test_odd_bigendian.nib
generated
Normal file
BIN
Tests/Nib/test_odd_bigendian.nib
generated
Normal file
Binary file not shown.
BIN
Tests/Nib/test_odd_littleendian.nib
generated
Normal file
BIN
Tests/Nib/test_odd_littleendian.nib
generated
Normal file
Binary file not shown.
@ -281,8 +281,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
elif format == "qual":
|
||||
self.assertIsInstance(r2.seq, UnknownSeq)
|
||||
self.assertEqual(len(r2), len(r1))
|
||||
elif format == "nib":
|
||||
self.assertEqual(str(r1.seq).upper(), str(r2.seq))
|
||||
else:
|
||||
self.assertEqual(str(r1.seq), str(r2.seq))
|
||||
# Beware of different quirks and limitations in the
|
||||
@ -780,7 +778,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|4218935|gb|AF074388.1|AF074388).",
|
||||
"Need a Nucleotide or Protein alphabet",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|4218935|gb|AF074388.1|AF074388).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|4218935|gb|AF074388.1|AF074388).",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
@ -804,7 +801,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|5052071|gb|AF067555.1|AF067555).",
|
||||
"Need a Nucleotide or Protein alphabet",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|5052071|gb|AF067555.1|AF067555).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|5052071|gb|AF067555.1|AF067555).",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
@ -851,7 +847,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|5817701|gb|AF142731.1|AF142731).",
|
||||
"Need a Nucleotide or Protein alphabet",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|5817701|gb|AF142731.1|AF142731).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|5817701|gb|AF142731.1|AF142731).",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
@ -875,7 +870,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|3176602|gb|U78617.1|LOU78617).",
|
||||
"Need a Nucleotide or Protein alphabet",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|3176602|gb|U78617.1|LOU78617).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|3176602|gb|U78617.1|LOU78617).",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
@ -899,7 +893,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|5690369|gb|AF158246.1|AF158246).",
|
||||
"Need a Nucleotide or Protein alphabet",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|5690369|gb|AF158246.1|AF158246).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|5690369|gb|AF158246.1|AF158246).",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
@ -1168,7 +1161,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|45478711|ref|NC_005816.1|).",
|
||||
"Need a Nucleotide or Protein alphabet",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|45478711|ref|NC_005816.1|).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|45478711|ref|NC_005816.1|).",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
@ -1357,7 +1349,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|9629357|ref|NC_001802.1|).",
|
||||
"Need a Nucleotide or Protein alphabet",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|9629357|ref|NC_001802.1|).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|9629357|ref|NC_001802.1|).",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
@ -1382,7 +1373,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|9629357|ref|nc_001802.1|).",
|
||||
"Need a Nucleotide or Protein alphabet",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|9629357|ref|nc_001802.1|).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=gi|9629357|ref|nc_001802.1|).",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
@ -2010,7 +2000,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=NM_006141.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NM_006141.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NM_006141.1).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NM_006141.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NM_006141.1).",
|
||||
"Missing SFF flow information",
|
||||
@ -2066,7 +2055,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=AL109817.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AL109817.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AL109817.1).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AL109817.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AL109817.1).",
|
||||
"Missing SFF flow information",
|
||||
@ -2085,7 +2073,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=U05344.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=U05344.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=U05344.1).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=U05344.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=U05344.1).",
|
||||
"Missing SFF flow information",
|
||||
@ -2104,7 +2091,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=AC007323.5).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AC007323.5).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AC007323.5).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AC007323.5).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AC007323.5).",
|
||||
"Missing SFF flow information",
|
||||
@ -2161,7 +2147,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=AL138972.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AL138972.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AL138972.1).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AL138972.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AL138972.1).",
|
||||
"Missing SFF flow information",
|
||||
@ -2180,7 +2165,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=U18266.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=U18266.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=U18266.1).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=U18266.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=U18266.1).",
|
||||
"Missing SFF flow information",
|
||||
@ -2213,7 +2197,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=NC_002678.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NC_002678.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NC_002678.1).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NC_002678.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NC_002678.1).",
|
||||
"Missing SFF flow information",
|
||||
@ -2271,7 +2254,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=NC_005816.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NC_005816.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NC_005816.1).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NC_005816.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NC_005816.1).",
|
||||
"Missing SFF flow information",
|
||||
@ -2290,7 +2272,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=NC_000932.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NC_000932.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NC_000932.1).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NC_000932.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NC_000932.1).",
|
||||
"Missing SFF flow information",
|
||||
@ -2310,7 +2291,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=pBAD30).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=pBAD30).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=pBAD30).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=pBAD30).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=pBAD30).",
|
||||
"Missing SFF flow information",
|
||||
@ -2366,7 +2346,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=NC_001422.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NC_001422.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NC_001422.1).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NC_001422.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=NC_001422.1).",
|
||||
"Missing SFF flow information",
|
||||
@ -2528,7 +2507,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=X56734.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=X56734.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=X56734.1).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=X56734.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=X56734.1).",
|
||||
"Missing SFF flow information",
|
||||
@ -2547,7 +2525,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=DD231055.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=DD231055.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=DD231055.1).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=DD231055.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=DD231055.1).",
|
||||
"Missing SFF flow information",
|
||||
@ -2569,7 +2546,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=DD231055.1).",
|
||||
"Need a Nucleotide or Protein alphabet",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=DD231055.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=DD231055.1).",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
@ -2590,7 +2566,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=AL031232).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AL031232).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AL031232).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AL031232).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AL031232).",
|
||||
"Missing SFF flow information",
|
||||
@ -2609,7 +2584,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=U87107.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=U87107.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=U87107.1).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=U87107.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=U87107.1).",
|
||||
"Missing SFF flow information",
|
||||
@ -2628,7 +2602,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=AAA03323.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AAA03323.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AAA03323.1).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AAA03323.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AAA03323.1).",
|
||||
"Missing SFF flow information",
|
||||
@ -2647,7 +2620,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=AE017046.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AE017046.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AE017046.1).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AE017046.1).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=AE017046.1).",
|
||||
"Missing SFF flow information",
|
||||
@ -2727,7 +2699,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=Test).",
|
||||
"Need a Nucleotide or Protein alphabet",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=Test).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=Test).",
|
||||
"Need a DNA, RNA or Protein alphabet",
|
||||
@ -2749,7 +2720,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=A04195).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=A04195).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=A04195).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=A04195).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=A04195).",
|
||||
"Missing SFF flow information",
|
||||
@ -2769,7 +2739,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=A04195).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=A04195).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=A04195).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=A04195).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=A04195).",
|
||||
"Missing SFF flow information",
|
||||
@ -3364,8 +3333,7 @@ class TestSeqIO(unittest.TestCase):
|
||||
]
|
||||
lengths = [30]
|
||||
alignment = None
|
||||
messages = ["Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"Missing SFF flow information",
|
||||
messages = ["Missing SFF flow information",
|
||||
]
|
||||
self.perform_test("phd", False, "Phd/phd_454", 1, ids, names, sequences, lengths, alignment, messages)
|
||||
|
||||
@ -4147,8 +4115,7 @@ class TestSeqIO(unittest.TestCase):
|
||||
]
|
||||
lengths = [795]
|
||||
alignment = None
|
||||
messages = ["Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"Missing SFF flow information",
|
||||
messages = ["Missing SFF flow information",
|
||||
]
|
||||
self.perform_test("abi", False, "Abi/3100.ab1", 1, ids, names, sequences, lengths, alignment, messages)
|
||||
|
||||
@ -4392,7 +4359,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=Sample).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=Sample).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=Sample).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=Sample).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=Sample).",
|
||||
"Missing SFF flow information",
|
||||
@ -4411,7 +4377,6 @@ class TestSeqIO(unittest.TestCase):
|
||||
messages = ["No suitable quality scores found in letter_annotations of SeqRecord (id=Sample).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=Sample).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=Sample).",
|
||||
"Sequence should contain A,C,G,T,N,a,c,g,t,n only",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=Sample).",
|
||||
"No suitable quality scores found in letter_annotations of SeqRecord (id=Sample).",
|
||||
"Missing SFF flow information",
|
||||
|
@ -10,34 +10,61 @@ from Bio.SeqRecord import SeqRecord
|
||||
|
||||
class TestNibReaderWriter(unittest.TestCase):
|
||||
|
||||
nucleotides = "ACGTAAACCGTACCCGTANANCANNNNACNANNANCN"
|
||||
|
||||
def test_read_bigendian(self):
|
||||
handle = open("Nib/test_bigendian.nib", "rb")
|
||||
records = SeqIO.parse(handle, "nib")
|
||||
record = next(records)
|
||||
def test_read_even(self):
|
||||
handle = open("Nib/test_even.fa")
|
||||
record = SeqIO.read(handle, "fasta")
|
||||
handle.close()
|
||||
self.assertEqual(str(record.seq), self.nucleotides)
|
||||
|
||||
def test_read_littleendian(self):
|
||||
handle = open("Nib/test_littleendian.nib", "rb")
|
||||
records = SeqIO.parse(handle, "nib")
|
||||
record = next(records)
|
||||
sequence = str(record.seq)
|
||||
handle = open("Nib/test_even_bigendian.nib", "rb")
|
||||
record = SeqIO.read(handle, "nib")
|
||||
handle.close()
|
||||
self.assertEqual(str(record.seq), self.nucleotides)
|
||||
self.assertEqual(sequence, str(record.seq))
|
||||
handle = open("Nib/test_even_littleendian.nib", "rb")
|
||||
record = SeqIO.read(handle, "nib")
|
||||
handle.close()
|
||||
self.assertEqual(sequence, str(record.seq))
|
||||
|
||||
def test_write_and_read(self):
|
||||
def test_read_odd(self):
|
||||
handle = open("Nib/test_odd.fa")
|
||||
record = SeqIO.read(handle, "fasta")
|
||||
handle.close()
|
||||
sequence = str(record.seq)
|
||||
handle = open("Nib/test_odd_bigendian.nib", "rb")
|
||||
record = SeqIO.read(handle, "nib")
|
||||
handle.close()
|
||||
self.assertEqual(sequence, str(record.seq))
|
||||
handle = open("Nib/test_odd_littleendian.nib", "rb")
|
||||
record = SeqIO.read(handle, "nib")
|
||||
handle.close()
|
||||
self.assertEqual(sequence, str(record.seq))
|
||||
|
||||
def test_write_even(self):
|
||||
handle = open("Nib/test_even.fa")
|
||||
record = SeqIO.read(handle, "fasta")
|
||||
handle.close()
|
||||
sequence = str(record.seq)
|
||||
handle = BytesIO()
|
||||
sequence = Seq(self.nucleotides)
|
||||
record = SeqRecord(sequence)
|
||||
n = SeqIO.write(record, handle, "nib")
|
||||
self.assertEqual(n, 1)
|
||||
handle.flush()
|
||||
handle.seek(0)
|
||||
record = SeqIO.read(handle, "nib")
|
||||
handle.close()
|
||||
sequence = record.seq
|
||||
self.assertEqual(str(sequence), self.nucleotides)
|
||||
self.assertEqual(sequence, str(record.seq))
|
||||
|
||||
def test_write_odd(self):
|
||||
handle = open("Nib/test_odd.fa")
|
||||
record = SeqIO.read(handle, "fasta")
|
||||
handle.close()
|
||||
sequence = str(record.seq)
|
||||
handle = BytesIO()
|
||||
n = SeqIO.write(record, handle, "nib")
|
||||
self.assertEqual(n, 1)
|
||||
handle.flush()
|
||||
handle.seek(0)
|
||||
record = SeqIO.read(handle, "nib")
|
||||
handle.close()
|
||||
self.assertEqual(sequence, str(record.seq))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Reference in New Issue
Block a user