mirror of
https://github.com/biopython/biopython.git
synced 2025-10-20 21:53:47 +08:00
Fix assertAlmostEqual calls in tests, when checking for very small numbers in scientific notation by adding "places" parameter with enough precision. This mostly affects comparing E-values between test files and parsing of those files. Quite some cases were actually checking for the wrong value.
14831 lines
673 KiB
Python
14831 lines
673 KiB
Python
# Copyright 2005 by Michiel de Hoon. All rights reserved.
|
|
# This code is part of the Biopython distribution and governed by its
|
|
# license. Please see the LICENSE file that should have been included
|
|
# as part of this package.
|
|
"""Test the Blast XML parser."""
|
|
|
|
import io
|
|
import os
|
|
import unittest
|
|
|
|
import numpy as np
|
|
|
|
from Bio import Blast
|
|
from Bio import StreamModeError
|
|
from Bio.SeqRecord import SeqRecord
|
|
|
|
|
|
class TestBlastp(unittest.TestCase):
|
|
"""Test the Blast XML parser for blastp output."""
|
|
|
|
def check_xml_2218_blastp_002_header(self, records):
|
|
self.assertEqual(records.program, "blastp")
|
|
self.assertEqual(records.version, "BLASTP 2.2.18+")
|
|
self.assertEqual(
|
|
records.reference,
|
|
'Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.',
|
|
)
|
|
self.assertEqual(records.db, "gpipe/9606/Previous/protein")
|
|
self.assertIsInstance(records.query, SeqRecord)
|
|
self.assertEqual(records.query.id, "gi|585505|sp|Q08386|MOPB_RHOCA")
|
|
self.assertEqual(
|
|
records.query.description,
|
|
"Molybdenum-pterin-binding protein mopB >gi|310278|gb|AAA71913.1| molybdenum-pterin-binding protein",
|
|
)
|
|
self.assertEqual(repr(records.query.seq), "Seq(None, length=270)")
|
|
self.assertEqual(len(records.param), 5)
|
|
self.assertEqual(records.param["matrix"], "BLOSUM62")
|
|
self.assertAlmostEqual(records.param["expect"], 0.01)
|
|
self.assertEqual(records.param["gap-open"], 11)
|
|
self.assertEqual(records.param["gap-extend"], 1)
|
|
self.assertEqual(records.param["filter"], "m L; R -d repeat/repeat_9606;")
|
|
|
|
def check_xml_2218_blastp_002_record_0(self, record):
|
|
self.assertEqual(record.num, 1)
|
|
self.assertEqual(
|
|
repr(record),
|
|
"<Bio.Blast.Record query.id='gi|585505|sp|Q08386|MOPB_RHOCA'; no hits>",
|
|
)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "gi|585505|sp|Q08386|MOPB_RHOCA")
|
|
self.assertEqual(
|
|
record.query.description,
|
|
"Molybdenum-pterin-binding protein mopB >gi|310278|gb|AAA71913.1| molybdenum-pterin-binding protein",
|
|
)
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=270)")
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 27252)
|
|
self.assertEqual(record.stat["db-len"], 13958303)
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertEqual(record.stat["eff-space"], 0)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.041)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.267)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.14)
|
|
self.assertEqual(len(record), 0)
|
|
|
|
def check_xml_2218_blastp_002_record_1(self, record):
|
|
self.assertEqual(record.num, 2)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "gi|129628|sp|P07175.1|PARA_AGRTU")
|
|
self.assertEqual(record.query.description, "Protein parA")
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=222)")
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 27252)
|
|
self.assertEqual(record.stat["db-len"], 13958303)
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertEqual(record.stat["eff-space"], 0)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.041)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.267)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.14)
|
|
self.assertEqual(len(record), 0)
|
|
|
|
def test_xml_2218_blastp_002_iterator(self):
|
|
"""Parsing BLASTP 2.2.18+ (xml_2218_blastp_002.xml) by iteration."""
|
|
filename = "xml_2218_blastp_002.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_2218_blastp_002_header(records)
|
|
record = next(records)
|
|
self.check_xml_2218_blastp_002_record_0(record)
|
|
record = next(records)
|
|
self.check_xml_2218_blastp_002_record_1(record)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
records = records[:]
|
|
self.check_xml_2218_blastp_002_header(records)
|
|
record = next(records)
|
|
self.check_xml_2218_blastp_002_record_0(record)
|
|
record = next(records)
|
|
self.check_xml_2218_blastp_002_record_1(record)
|
|
self.assertRaises(StopIteration, next, records)
|
|
with open(path) as stream:
|
|
with self.assertRaises(StreamModeError) as cm:
|
|
Blast.parse(stream)
|
|
self.assertEqual(
|
|
str(cm.exception), "BLAST output files must be opened in binary mode."
|
|
)
|
|
|
|
def test_xml_2218_blastp_002_list(self):
|
|
"""Parsing BLASTP 2.2.18+ (xml_2218_blastp_002.xml) as a list."""
|
|
filename = "xml_2218_blastp_002.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_2218_blastp_002_header(records)
|
|
record = records[0]
|
|
# everything should have been read in by now
|
|
self.check_xml_2218_blastp_002_record_0(record)
|
|
record = records[1]
|
|
self.check_xml_2218_blastp_002_record_1(record)
|
|
# header should still be OK
|
|
self.check_xml_2218_blastp_002_header(records)
|
|
self.assertEqual(len(records), 2)
|
|
self.assertEqual(
|
|
str(records),
|
|
"""\
|
|
Program: BLASTP 2.2.18+
|
|
db: gpipe/9606/Previous/protein
|
|
|
|
Query: gi|585505|sp|Q08386|MOPB_RHOCA (length=270)
|
|
Molybdenum-pterin-binding protein mopB >gi|310278|gb|AAA71913.1|
|
|
molybdenum-pterin-binding protein
|
|
Hits: No hits found
|
|
|
|
Query: gi|129628|sp|P07175.1|PARA_AGRTU (length=222)
|
|
Protein parA
|
|
Hits: No hits found""",
|
|
)
|
|
# check if converting the records to a list does not lose the header:
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
records = records[:]
|
|
self.check_xml_2218_blastp_002_header(records)
|
|
|
|
def test_xml_2218_blastp_002_writer(self):
|
|
"""Writing BLASTP 2.2.18+ (xml_2218_blastp_002.xml)."""
|
|
filename = "xml_2218_blastp_002.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
records = records[:]
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream)
|
|
self.assertEqual(n, 2)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_xml_2218_blastp_002_header(written_records)
|
|
record = next(written_records)
|
|
self.check_xml_2218_blastp_002_record_0(record)
|
|
record = next(written_records)
|
|
self.check_xml_2218_blastp_002_record_1(record)
|
|
self.assertRaises(StopIteration, next, written_records)
|
|
|
|
def test_xml_2218L_blastp_001_parser(self):
|
|
"""Parsing blastp 2.2.18 [Mar-02-2008] (xml_2218L_blastp_001.xml)."""
|
|
filename = "xml_2218L_blastp_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_2218L_blastp_001_records(records)
|
|
self.check_xml_2218L_blastp_001_str(records)
|
|
|
|
with Blast.parse(path) as records:
|
|
self.check_xml_2218L_blastp_001_records(records)
|
|
self.check_xml_2218L_blastp_001_str(records)
|
|
|
|
with open(path, "rb") as stream:
|
|
record = Blast.read(stream)
|
|
self.check_xml_2218L_blastp_001_record(record)
|
|
|
|
record = Blast.read(path)
|
|
self.check_xml_2218L_blastp_001_record(record)
|
|
|
|
def check_xml_2218L_blastp_001_str(self, records):
|
|
self.assertEqual(
|
|
str(records),
|
|
"""\
|
|
Program: blastp 2.2.18 [Mar-02-2008]
|
|
db: /Users/pjcock/Downloads/Software/blast-2.2.18/data/nr
|
|
|
|
Hits: No hits found""",
|
|
)
|
|
|
|
def check_xml_2218L_blastp_001_records(self, records):
|
|
self.assertEqual(records.program, "blastp")
|
|
self.assertEqual(records.version, "blastp 2.2.18 [Mar-02-2008]")
|
|
self.assertEqual(
|
|
records.reference,
|
|
'~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~"Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs", Nucleic Acids Res. 25:3389-3402.',
|
|
)
|
|
self.assertEqual(
|
|
records.db, "/Users/pjcock/Downloads/Software/blast-2.2.18/data/nr"
|
|
)
|
|
self.assertIsInstance(records.query, SeqRecord)
|
|
self.assertEqual(records.query.id, "lcl|1_0")
|
|
self.assertEqual(records.query.description, "Fake")
|
|
self.assertEqual(repr(records.query.seq), "Seq(None, length=9)")
|
|
self.assertEqual(len(records.param), 5)
|
|
self.assertEqual(records.param["matrix"], "BLOSUM62")
|
|
self.assertAlmostEqual(records.param["expect"], 1e-05)
|
|
self.assertEqual(records.param["gap-open"], 11)
|
|
self.assertEqual(records.param["gap-extend"], 1)
|
|
self.assertEqual(records.param["filter"], "F")
|
|
record = next(records)
|
|
self.assertRaises(StopIteration, next, records)
|
|
self.check_xml_2218L_blastp_001_record(record)
|
|
|
|
def check_xml_2218L_blastp_001_record(self, record):
|
|
self.assertEqual(record.num, 1)
|
|
self.assertIsNone(record.query)
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 6589360)
|
|
self.assertEqual(record.stat["db-len"], 2253133281)
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertAlmostEqual(record.stat["eff-space"], 2.02782e10)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.041)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.267)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.14)
|
|
self.assertEqual(len(record), 0)
|
|
|
|
def test_xml_2218L_blastp_001_writer(self):
|
|
"""Writing blastp 2.2.18 [Mar-02-2008] (xml_2218L_blastp_001.xml)."""
|
|
filename = "xml_2218L_blastp_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream)
|
|
self.assertEqual(n, 1)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_xml_2218L_blastp_001_records(written_records)
|
|
|
|
def test_xml_2226_blastp_003(self):
|
|
"""Parsing BLASTP 2.2.26+ (xml_2226_blastp_003.xml)."""
|
|
filename = "xml_2226_blastp_003.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_2226_blastp_003(records)
|
|
with Blast.parse(path) as records:
|
|
self.check_xml_2226_blastp_003(records)
|
|
with Blast.parse(path) as records:
|
|
self.assertEqual(
|
|
str(records),
|
|
"""\
|
|
Program: BLASTP 2.2.26+
|
|
db: db/minirefseq_prot
|
|
|
|
Query: Query_1 (length=102)
|
|
gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus
|
|
subtilis subsp. subtilis str. 168]
|
|
Hits: ---- ----- ----------------------------------------------------------
|
|
# # HSP ID + description
|
|
---- ----- ----------------------------------------------------------
|
|
0 1 gnl|BL_ORD_ID|1 gi|308175296|ref|YP_003922001.1| membr...
|
|
1 1 gnl|BL_ORD_ID|2 gi|375363999|ref|YP_005132038.1| lytA ...
|
|
2 1 gnl|BL_ORD_ID|3 gi|154687679|ref|YP_001422840.1| LytA ...
|
|
3 1 gnl|BL_ORD_ID|4 gi|311070071|ref|YP_003974994.1| unnam...
|
|
4 1 gnl|BL_ORD_ID|15 gi|332258565|ref|XP_003278367.1| PRED...""",
|
|
)
|
|
record = Blast.read(path)
|
|
self.assertEqual(
|
|
str(record),
|
|
"""\
|
|
Program: BLASTP 2.2.26+
|
|
db: db/minirefseq_prot
|
|
Query: Query_1 (length=102)
|
|
gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus
|
|
subtilis subsp. subtilis str. 168]
|
|
Hits: ---- ----- ----------------------------------------------------------
|
|
# # HSP ID + description
|
|
---- ----- ----------------------------------------------------------
|
|
0 1 gnl|BL_ORD_ID|1 gi|308175296|ref|YP_003922001.1| membr...
|
|
1 1 gnl|BL_ORD_ID|2 gi|375363999|ref|YP_005132038.1| lytA ...
|
|
2 1 gnl|BL_ORD_ID|3 gi|154687679|ref|YP_001422840.1| LytA ...
|
|
3 1 gnl|BL_ORD_ID|4 gi|311070071|ref|YP_003974994.1| unnam...
|
|
4 1 gnl|BL_ORD_ID|15 gi|332258565|ref|XP_003278367.1| PRED...""",
|
|
)
|
|
|
|
def check_xml_2226_blastp_003(self, records):
|
|
self.assertEqual(records.program, "blastp")
|
|
self.assertEqual(records.version, "BLASTP 2.2.26+")
|
|
self.assertEqual(
|
|
records.reference,
|
|
'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.',
|
|
)
|
|
self.assertEqual(records.db, "db/minirefseq_prot")
|
|
self.assertIsInstance(records.query, SeqRecord)
|
|
self.assertEqual(records.query.id, "Query_1")
|
|
self.assertEqual(
|
|
records.query.description,
|
|
"gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]",
|
|
)
|
|
self.assertEqual(repr(records.query.seq), "Seq(None, length=102)")
|
|
self.assertEqual(len(records.param), 5)
|
|
self.assertEqual(records.param["matrix"], "BLOSUM62")
|
|
self.assertAlmostEqual(records.param["expect"], 10.0)
|
|
self.assertEqual(records.param["gap-open"], 11)
|
|
self.assertEqual(records.param["gap-extend"], 1)
|
|
self.assertEqual(records.param["filter"], "F")
|
|
record = next(records)
|
|
self.assertEqual(record.num, 1)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "Query_1")
|
|
self.assertEqual(
|
|
record.query.description,
|
|
"gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]",
|
|
)
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=102)")
|
|
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 20)
|
|
self.assertEqual(record.stat["db-len"], 6406)
|
|
self.assertEqual(record.stat["hsp-len"], 38)
|
|
self.assertEqual(record.stat["eff-space"], 361344)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.041)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.267)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.14)
|
|
self.assertEqual(len(record), 5)
|
|
hit = record[0]
|
|
self.assertEqual(hit.num, 1)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gnl|BL_ORD_ID|1")
|
|
self.assertEqual(hit.target.name, "1")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"gi|308175296|ref|YP_003922001.1| membrane bound lipoprotein [Bacillus amyloliquefaciens DSM 7]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=100)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 350.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 139.428)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 1.99275e-46, places=51)
|
|
self.assertEqual(hsp.annotations["identity"], 69)
|
|
self.assertEqual(hsp.annotations["positive"], 81)
|
|
self.assertEqual(hsp.annotations["gaps"], 2)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 30, 30, 100],
|
|
[ 0, 30, 32, 102]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 102))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVT...RAN')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKKIFGCLFFILLLAGCGVTNEKSQGEDAGEKLVTKEGTYVGLADTHTIEVTVD...PAN')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gnl|BL_ORD_ID|1")
|
|
self.assertEqual(hsp.target.name, "1")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"gi|308175296|ref|YP_003922001.1| membrane bound lipoprotein [Bacillus amyloliquefaciens DSM 7]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKK LFFILLL+GCGV ++KSQGED + TKEGTYVGLADTHTIEVTVD+EPVS DITEES D+ N+G+KVT+ Y+KN +GQL+LKDIE AN",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='gnl|BL_ORD_ID|1' query.id='Query_1'; 2 rows x 102 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_1 Length: 102 Strand: Plus
|
|
gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus
|
|
subtilis subsp. subtilis str. 168]
|
|
Target: gnl|BL_ORD_ID|1 Length: 100 Strand: Plus
|
|
gi|308175296|ref|YP_003922001.1| membrane bound lipoprotein [Bacillus
|
|
amyloliquefaciens DSM 7]
|
|
|
|
Score:139 bits(350), Expect:2e-46,
|
|
Identities:69/102(68%), Positives:81/102(79%), Gaps:2.102(2%)
|
|
|
|
gnl|BL_OR 0 MKKIFGCLFFILLLAGCGVTNEKSQGEDAG--EKLVTKEGTYVGLADTHTIEVTVDHEPV
|
|
0 |||....|||||||.||||...||||||..--....||||||||||||||||||||.|||
|
|
Query_1 0 MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVDNEPV
|
|
|
|
gnl|BL_OR 58 SFDITEESADDVKNLNNGEKVTVKYQKNSKGQLVLKDIEPAN 100
|
|
60 |.||||||..|....|.|.|||..|.||..|||.|||||.|| 102
|
|
Query_1 60 SLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERAN 102
|
|
|
|
""",
|
|
)
|
|
hit = record[1]
|
|
self.assertEqual(hit.num, 2)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gnl|BL_ORD_ID|2")
|
|
self.assertEqual(hit.target.name, "2")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"gi|375363999|ref|YP_005132038.1| lytA gene product [Bacillus amyloliquefaciens subsp. plantarum CAU B946]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=105)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 219.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 88.9669)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 6.94052e-27, places=32)
|
|
self.assertEqual(hsp.annotations["identity"], 48)
|
|
self.assertEqual(hsp.annotations["positive"], 69)
|
|
self.assertEqual(hsp.annotations["gaps"], 5)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 13, 17, 32, 32, 104],
|
|
[ 0, 13, 13, 28, 29, 101]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 105))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq({0: 'MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVT...ERA'}, length=102)",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MKKTIAASFLILLFSVVLAACGTAEQSKKGSGSSENQAQKETAYYVGMADTHTI...EKA'}, length=105)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gnl|BL_ORD_ID|2")
|
|
self.assertEqual(hsp.target.name, "2")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"gi|375363999|ref|YP_005132038.1| lytA gene product [Bacillus amyloliquefaciens subsp. plantarum CAU B946]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKK IA F ILL L+ CG Q +G S ++ + + YVG+ADTHTIEV VD++PVS + +++ + L+KF+ DKV+ITY ND+GQ +K+IE+A",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='gnl|BL_ORD_ID|2' query.id='Query_1'; 2 rows x 105 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_1 Length: 102 Strand: Plus
|
|
gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus
|
|
subtilis subsp. subtilis str. 168]
|
|
Target: gnl|BL_ORD_ID|2 Length: 105 Strand: Plus
|
|
gi|375363999|ref|YP_005132038.1| lytA gene product [Bacillus
|
|
amyloliquefaciens subsp. plantarum CAU B946]
|
|
|
|
Score:88 bits(219), Expect:7e-27,
|
|
Identities:48/105(46%), Positives:69/105(66%), Gaps:5.105(5%)
|
|
|
|
gnl|BL_OR 0 MKKTIAASFLILLFSVVLAACGTAEQSKKGSG-SSENQAQKETAYYVGMADTHTIEVKVD
|
|
0 |||.||..|.|||----|..||...|...|..-|...........|||.||||||||.||
|
|
Query_1 0 MKKFIALLFFILL----LSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVD
|
|
|
|
gnl|BL_OR 59 DQPVSFEFSDDFSDVLNKFSENDKVSITYFTNDKGQKEIKEIEKA 104
|
|
60 ..|||..........|.||...|||.|||..||.||...|.||.| 105
|
|
Query_1 56 NEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERA 101
|
|
|
|
""",
|
|
)
|
|
hit = record[2]
|
|
self.assertEqual(hit.num, 3)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gnl|BL_ORD_ID|3")
|
|
self.assertEqual(hit.target.name, "3")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"gi|154687679|ref|YP_001422840.1| LytA [Bacillus amyloliquefaciens FZB42]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=105)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 219.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 88.9669)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 8.41012e-27, places=32)
|
|
self.assertEqual(hsp.annotations["identity"], 48)
|
|
self.assertEqual(hsp.annotations["positive"], 69)
|
|
self.assertEqual(hsp.annotations["gaps"], 5)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 13, 17, 32, 32, 104],
|
|
[ 0, 13, 13, 28, 29, 101]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 105))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq({0: 'MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVT...ERA'}, length=102)",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MKKTIAASFLILLFSVVLAACGTADQSKKGSGSSENQAQKETAYYVGMADTHTI...EKA'}, length=105)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gnl|BL_ORD_ID|3")
|
|
self.assertEqual(hsp.target.name, "3")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"gi|154687679|ref|YP_001422840.1| LytA [Bacillus amyloliquefaciens FZB42]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKK IA F ILL L+ CG Q +G S ++ + + YVG+ADTHTIEV VD++PVS + +++ + L+KF+ DKV+ITY ND+GQ +K+IE+A",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='gnl|BL_ORD_ID|3' query.id='Query_1'; 2 rows x 105 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_1 Length: 102 Strand: Plus
|
|
gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus
|
|
subtilis subsp. subtilis str. 168]
|
|
Target: gnl|BL_ORD_ID|3 Length: 105 Strand: Plus
|
|
gi|154687679|ref|YP_001422840.1| LytA [Bacillus amyloliquefaciens FZB42]
|
|
|
|
Score:88 bits(219), Expect:8e-27,
|
|
Identities:48/105(46%), Positives:69/105(66%), Gaps:5.105(5%)
|
|
|
|
gnl|BL_OR 0 MKKTIAASFLILLFSVVLAACGTADQSKKGSG-SSENQAQKETAYYVGMADTHTIEVKVD
|
|
0 |||.||..|.|||----|..||...|...|..-|...........|||.||||||||.||
|
|
Query_1 0 MKKFIALLFFILL----LSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVD
|
|
|
|
gnl|BL_OR 59 DQPVSFEFSDDFSDVLNKFSENDKVSITYFTNDKGQKEIKEIEKA 104
|
|
60 ..|||..........|.||...|||.|||..||.||...|.||.| 105
|
|
Query_1 56 NEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIERA 101
|
|
|
|
""",
|
|
)
|
|
hit = record[3]
|
|
self.assertEqual(hit.num, 4)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gnl|BL_ORD_ID|4")
|
|
self.assertEqual(hit.target.name, "4")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"gi|311070071|ref|YP_003974994.1| unnamed protein product [Bacillus atrophaeus 1942]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=105)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 204.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 83.1889)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 1.37847e-24, places=29)
|
|
self.assertEqual(hsp.annotations["identity"], 45)
|
|
self.assertEqual(hsp.annotations["positive"], 66)
|
|
self.assertEqual(hsp.annotations["gaps"], 5)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 13, 17, 30, 30, 103],
|
|
[ 0, 13, 13, 26, 27, 100]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 104))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq({0: 'MKKFIALLFFILLLSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVT...IER'}, length=102)",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MKKNVASSFLILLFSIILAACGTAEQSKEGNGSSSSQVQNETAYYVGMADTHTI...IEK'}, length=105)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gnl|BL_ORD_ID|4")
|
|
self.assertEqual(hsp.target.name, "4")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"gi|311070071|ref|YP_003974994.1| unnamed protein product [Bacillus atrophaeus 1942]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKK +A F ILL L+ CG Q +G + S S ++ + YVG+ADTHTIEV +D++PVS + T++ + L++F DKV I+Y ND+GQ L +IE+",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='gnl|BL_ORD_ID|4' query.id='Query_1'; 2 rows x 104 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_1 Length: 102 Strand: Plus
|
|
gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus
|
|
subtilis subsp. subtilis str. 168]
|
|
Target: gnl|BL_ORD_ID|4 Length: 105 Strand: Plus
|
|
gi|311070071|ref|YP_003974994.1| unnamed protein product [Bacillus
|
|
atrophaeus 1942]
|
|
|
|
Score:83 bits(204), Expect:1e-24,
|
|
Identities:45/104(43%), Positives:66/104(63%), Gaps:5.104(5%)
|
|
|
|
gnl|BL_OR 0 MKKNVASSFLILLFSIILAACGTAEQSKEG-NGSSSSQVQNETAYYVGMADTHTIEVKID
|
|
0 |||..|..|.|||----|..||...|...|-..|..|........|||.||||||||..|
|
|
Query_1 0 MKKFIALLFFILL----LSGCGVNSQKSQGEDVSPDSNIETKEGTYVGLADTHTIEVTVD
|
|
|
|
gnl|BL_OR 59 DQPVSFEFTDDFSEILNEFEENDKVNISYLTNDKGQKELTEIEK 103
|
|
60 ..|||...|......|..|...|||.|.|..||.||..|..||. 104
|
|
Query_1 56 NEPVSLDITEESTSDLDKFNSGDKVTITYEKNDEGQLLLKDIER 100
|
|
|
|
""",
|
|
)
|
|
hit = record[4]
|
|
self.assertEqual(hit.num, 5)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gnl|BL_ORD_ID|15")
|
|
self.assertEqual(hit.target.name, "15")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"gi|332258565|ref|XP_003278367.1| PREDICTED: UPF0764 protein C16orf89-like [Nomascus leucogenys]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=132)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 29.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 15.779)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 7.12269)
|
|
self.assertEqual(hsp.annotations["identity"], 7)
|
|
self.assertEqual(hsp.annotations["positive"], 11)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 79, 104],
|
|
[ 59, 84]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 25))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq), "Seq({59: 'VSLDITEESTSDLDKFNSGDKVTIT'}, length=102)"
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus subtilis subsp. subtilis str. 168]",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq), "Seq({79: 'VEMGFLHVGQAGLELVTSGDPPTLT'}, length=132)"
|
|
)
|
|
self.assertEqual(hsp.target.id, "gnl|BL_ORD_ID|15")
|
|
self.assertEqual(hsp.target.name, "15")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"gi|332258565|ref|XP_003278367.1| PREDICTED: UPF0764 protein C16orf89-like [Nomascus leucogenys]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(hsp.annotations["midline"], "V + + L+ SGD T+T")
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='gnl|BL_ORD_ID|15' query.id='Query_1'; 2 rows x 25 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_1 Length: 102 Strand: Plus
|
|
gi|16080617|ref|NP_391444.1| membrane bound lipoprotein [Bacillus
|
|
subtilis subsp. subtilis str. 168]
|
|
Target: gnl|BL_ORD_ID|15 Length: 132 Strand: Plus
|
|
gi|332258565|ref|XP_003278367.1| PREDICTED: UPF0764 protein
|
|
C16orf89-like [Nomascus leucogenys]
|
|
|
|
Score:15 bits(29), Expect:7,
|
|
Identities:7/25(28%), Positives:11/25(44%), Gaps:0.25(0%)
|
|
|
|
gnl|BL_OR 79 VEMGFLHVGQAGLELVTSGDPPTLT 104
|
|
0 |...........|....|||..|.| 25
|
|
Query_1 59 VSLDITEESTSDLDKFNSGDKVTIT 84
|
|
|
|
""",
|
|
)
|
|
with self.assertRaises(IndexError) as cm:
|
|
record[5]
|
|
self.assertEqual(str(cm.exception), "index out of range")
|
|
with self.assertRaises(TypeError) as cm:
|
|
record[None]
|
|
self.assertEqual(str(cm.exception), "key must be an integer, slice, or str")
|
|
with self.assertRaises(KeyError) as cm:
|
|
record["weird_key"]
|
|
self.assertEqual(str(cm.exception), "'weird_key'")
|
|
target_id = "gnl|BL_ORD_ID|4"
|
|
self.assertIn(target_id, record)
|
|
self.assertNotIn("weird_id", record)
|
|
self.assertEqual(record[target_id].target.id, target_id)
|
|
self.assertEqual(record.index(target_id), 3)
|
|
with self.assertRaises(ValueError) as cm:
|
|
record.index("weird_id")
|
|
self.assertEqual(str(cm.exception), "'weird_id' not found")
|
|
self.assertEqual(
|
|
repr(hit),
|
|
"<Bio.Blast.Hit target.id='gnl|BL_ORD_ID|15' query.id='Query_1'; 1 HSP>",
|
|
)
|
|
self.assertEqual(
|
|
repr(hit[:0]), "<Bio.Blast.Hit target.id='gnl|BL_ORD_ID|15'; no hits>"
|
|
)
|
|
self.assertEqual(
|
|
record.keys(),
|
|
[
|
|
"gnl|BL_ORD_ID|1",
|
|
"gnl|BL_ORD_ID|2",
|
|
"gnl|BL_ORD_ID|3",
|
|
"gnl|BL_ORD_ID|4",
|
|
"gnl|BL_ORD_ID|15",
|
|
],
|
|
)
|
|
|
|
def test_xml_2226_blastp_003_writer(self):
|
|
"""Writing BLASTP 2.2.26+ (xml_2226_blastp_003.xml)."""
|
|
filename = "xml_2226_blastp_003.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream)
|
|
self.assertEqual(n, 1)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_xml_2226_blastp_003(written_records)
|
|
|
|
def test_phiblast_parser(self):
|
|
"""Parsing BLASTP 2.14.1+ (phiblast.xml)."""
|
|
filename = "phiblast.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_phiblast_records(records)
|
|
with Blast.parse(path) as records:
|
|
self.check_phiblast_records(records)
|
|
with open(path, "rb") as stream:
|
|
record = Blast.read(stream)
|
|
self.check_phiblast_record(record)
|
|
record = Blast.read(path)
|
|
self.check_phiblast_record(record)
|
|
|
|
def check_phiblast_records(self, records):
|
|
self.assertEqual(records.program, "blastp")
|
|
self.assertEqual(records.version, "BLASTP 2.14.1+")
|
|
self.assertEqual(
|
|
records.reference,
|
|
'Zheng Zhang, Alejandro A. Schäffer, Webb Miller, Thomas L. Madden, David J. Lipman, Eugene V. Koonin, and Stephen F. Altschul (1998), "Protein sequence similarity searches using patterns as seeds", Nucleic Acids Res. 26:3986-3990.',
|
|
)
|
|
self.assertEqual(records.db, "nr")
|
|
self.assertIsInstance(records.query, SeqRecord)
|
|
self.assertEqual(records.query.id, "Query_74414")
|
|
self.assertEqual(records.query.description, "unnamed protein product")
|
|
self.assertEqual(repr(records.query.seq), "Seq(None, length=664)")
|
|
self.assertEqual(len(records.param), 6)
|
|
self.assertEqual(records.param["matrix"], "BLOSUM62")
|
|
self.assertAlmostEqual(records.param["expect"], 0.05)
|
|
self.assertEqual(records.param["gap-open"], 11)
|
|
self.assertEqual(records.param["gap-extend"], 1)
|
|
self.assertEqual(records.param["filter"], "F")
|
|
self.assertEqual(
|
|
records.param["pattern"],
|
|
"[LIVMF]-G-E-x-[GAS]-[LIVM]-x(5,11)-R-[STAQ]-A-x-[LIVMA]-x-[STACV]",
|
|
)
|
|
record = next(records)
|
|
self.assertEqual(record.num, 1)
|
|
self.assertRaises(StopIteration, next, records)
|
|
self.check_phiblast_record(record)
|
|
|
|
def check_phiblast_record(self, record):
|
|
self.assertEqual(record.num, 1)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "Query_74414")
|
|
self.assertEqual(record.query.description, "unnamed protein product")
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=664)")
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 633473216)
|
|
self.assertEqual(record.stat["db-len"], 248084082182)
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertEqual(record.stat["eff-space"], 0)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.047)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.27)
|
|
self.assertAlmostEqual(record.stat["entropy"], 1.0)
|
|
self.assertEqual(len(record), 10)
|
|
hit = record[0]
|
|
self.assertEqual(hit.num, 1)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|NP_001075863.1|")
|
|
self.assertEqual(hit.target.name, "NP_001075863")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"cyclic nucleotide-gated olfactory channel [Oryctolagus cuniculus] >emb|CAA42201.1| aorta CNG channel (rACNG) [Oryctolagus cuniculus] >prf||1919268A cyclic nucleotide-gated channel [Oryctolagus cuniculus]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=732)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 3336.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 1290.65)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 664)
|
|
self.assertEqual(hsp.annotations["positive"], 664)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 68, 732],
|
|
[ 0, 664]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 664))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_74414")
|
|
self.assertEqual(hsp.query.description, "unnamed protein product")
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({68: 'MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP'}, length=732)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "ref|NP_001075863.1|")
|
|
self.assertEqual(hsp.target.name, "NP_001075863")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"cyclic nucleotide-gated olfactory channel [Oryctolagus cuniculus] >emb|CAA42201.1| aorta CNG channel (rACNG) [Oryctolagus cuniculus] >prf||1919268A cyclic nucleotide-gated channel [Oryctolagus cuniculus]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQQRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGDGKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDYFSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIHNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKKTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKLKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAAAEQP",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_74414 Length: 664 Strand: Plus
|
|
unnamed protein product
|
|
Target: ref|NP_001075863.1| Length: 732 Strand: Plus
|
|
cyclic nucleotide-gated olfactory channel [Oryctolagus cuniculus]
|
|
>emb|CAA42201.1| aorta CNG channel (rACNG) [Oryctolagus cuniculus]
|
|
>prf||1919268A cyclic nucleotide-gated channel [Oryctolagus cuniculus]
|
|
|
|
Score:1290 bits(3336), Expect:0,
|
|
Identities:664/664(100%), Positives:664/664(100%), Gaps:0.664(0%)
|
|
|
|
ref|NP_00 68 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ
|
|
|
|
ref|NP_00 128 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD
|
|
|
|
ref|NP_00 188 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY
|
|
|
|
ref|NP_00 248 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
|
|
ref|NP_00 308 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
240 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
|
|
ref|NP_00 368 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
|
|
ref|NP_00 428 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
360 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
|
|
ref|NP_00 488 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
420 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
|
|
ref|NP_00 548 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
480 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
|
|
ref|NP_00 608 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
540 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
|
|
ref|NP_00 668 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA
|
|
600 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA
|
|
|
|
ref|NP_00 728 AEQP 732
|
|
660 |||| 664
|
|
Query_744 660 AEQP 664
|
|
|
|
""",
|
|
)
|
|
hit = record[1]
|
|
self.assertEqual(hit.num, 2)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|XP_051689802.1|")
|
|
self.assertEqual(hit.target.name, "XP_051689802")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"cyclic nucleotide-gated olfactory channel isoform X3 [Oryctolagus cuniculus] >sp|Q28718.1| RecName: Full=Cyclic nucleotide-gated olfactory channel; AltName: Full=Aorta CNG channel; Short=RACNG; AltName: Full=Cyclic nucleotide-gated cation channel 2; AltName: Full=Cyclic nucleotide-gated channel alpha-2; Short=CNG channel alpha-2; Short=CNG-2; Short=CNG2 [Oryctolagus cuniculus]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=664)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 3336.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 1290.65)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 664)
|
|
self.assertEqual(hsp.annotations["positive"], 664)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 664],
|
|
[ 0, 664]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 664))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_74414")
|
|
self.assertEqual(hsp.query.description, "unnamed protein product")
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "ref|XP_051689802.1|")
|
|
self.assertEqual(hsp.target.name, "XP_051689802")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"cyclic nucleotide-gated olfactory channel isoform X3 [Oryctolagus cuniculus] >sp|Q28718.1| RecName: Full=Cyclic nucleotide-gated olfactory channel; AltName: Full=Aorta CNG channel; Short=RACNG; AltName: Full=Cyclic nucleotide-gated cation channel 2; AltName: Full=Cyclic nucleotide-gated channel alpha-2; Short=CNG channel alpha-2; Short=CNG-2; Short=CNG2 [Oryctolagus cuniculus]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQQRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGDGKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDYFSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIHNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKKTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKLKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAAAEQP",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_74414 Length: 664 Strand: Plus
|
|
unnamed protein product
|
|
Target: ref|XP_051689802.1| Length: 664 Strand: Plus
|
|
cyclic nucleotide-gated olfactory channel isoform X3 [Oryctolagus
|
|
cuniculus] >sp|Q28718.1| RecName: Full=Cyclic nucleotide-gated olfactory
|
|
channel; AltName: Full=Aorta CNG channel; Short=RACNG; AltName:
|
|
Full=Cyclic nucleotide-gated cation channel 2; AltName: Full=Cyclic
|
|
nucleotide-gated channel alpha-2; Short=CNG channel alpha-2;
|
|
Short=CNG-2; Short=CNG2 [Oryctolagus cuniculus]
|
|
|
|
Score:1290 bits(3336), Expect:0,
|
|
Identities:664/664(100%), Positives:664/664(100%), Gaps:0.664(0%)
|
|
|
|
ref|XP_05 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ
|
|
|
|
ref|XP_05 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD
|
|
|
|
ref|XP_05 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY
|
|
|
|
ref|XP_05 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
|
|
ref|XP_05 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
240 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
|
|
ref|XP_05 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
|
|
ref|XP_05 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
360 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
|
|
ref|XP_05 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
420 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
|
|
ref|XP_05 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
480 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
|
|
ref|XP_05 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
540 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
|
|
ref|XP_05 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA
|
|
600 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA
|
|
|
|
ref|XP_05 660 AEQP 664
|
|
660 |||| 664
|
|
Query_744 660 AEQP 664
|
|
|
|
""",
|
|
)
|
|
hit = record[2]
|
|
self.assertEqual(hit.num, 3)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|XP_017206345.1|")
|
|
self.assertEqual(hit.target.name, "XP_017206345")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"cyclic nucleotide-gated olfactory channel isoform X2 [Oryctolagus cuniculus]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=677)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 3336.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 1290.65)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 664)
|
|
self.assertEqual(hsp.annotations["positive"], 664)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 13, 677],
|
|
[ 0, 664]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 664))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_74414")
|
|
self.assertEqual(hsp.query.description, "unnamed protein product")
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({13: 'MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP'}, length=677)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "ref|XP_017206345.1|")
|
|
self.assertEqual(hsp.target.name, "XP_017206345")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"cyclic nucleotide-gated olfactory channel isoform X2 [Oryctolagus cuniculus]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQQRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGDGKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDYFSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIHNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKKTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKLKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAAAEQP",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_74414 Length: 664 Strand: Plus
|
|
unnamed protein product
|
|
Target: ref|XP_017206345.1| Length: 677 Strand: Plus
|
|
cyclic nucleotide-gated olfactory channel isoform X2 [Oryctolagus
|
|
cuniculus]
|
|
|
|
Score:1290 bits(3336), Expect:0,
|
|
Identities:664/664(100%), Positives:664/664(100%), Gaps:0.664(0%)
|
|
|
|
ref|XP_01 13 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ
|
|
|
|
ref|XP_01 73 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD
|
|
|
|
ref|XP_01 133 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY
|
|
|
|
ref|XP_01 193 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
|
|
ref|XP_01 253 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
240 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
|
|
ref|XP_01 313 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
|
|
ref|XP_01 373 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
360 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
|
|
ref|XP_01 433 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
420 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
|
|
ref|XP_01 493 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
480 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
|
|
ref|XP_01 553 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
540 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
|
|
ref|XP_01 613 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA
|
|
600 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA
|
|
|
|
ref|XP_01 673 AEQP 677
|
|
660 |||| 664
|
|
Query_744 660 AEQP 664
|
|
|
|
""",
|
|
)
|
|
hit = record[3]
|
|
self.assertEqual(hit.num, 4)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|XP_051689801.1|")
|
|
self.assertEqual(hit.target.name, "XP_051689801")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"cyclic nucleotide-gated olfactory channel isoform X1 [Oryctolagus cuniculus]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=687)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 3336.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 1290.65)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 664)
|
|
self.assertEqual(hsp.annotations["positive"], 664)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 23, 687],
|
|
[ 0, 664]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 664))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_74414")
|
|
self.assertEqual(hsp.query.description, "unnamed protein product")
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({23: 'MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP'}, length=687)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "ref|XP_051689801.1|")
|
|
self.assertEqual(hsp.target.name, "XP_051689801")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"cyclic nucleotide-gated olfactory channel isoform X1 [Oryctolagus cuniculus]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQQRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGDGKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDYFSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIHNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKKTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKLKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAAAEQP",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_74414 Length: 664 Strand: Plus
|
|
unnamed protein product
|
|
Target: ref|XP_051689801.1| Length: 687 Strand: Plus
|
|
cyclic nucleotide-gated olfactory channel isoform X1 [Oryctolagus
|
|
cuniculus]
|
|
|
|
Score:1290 bits(3336), Expect:0,
|
|
Identities:664/664(100%), Positives:664/664(100%), Gaps:0.664(0%)
|
|
|
|
ref|XP_05 23 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ
|
|
|
|
ref|XP_05 83 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD
|
|
|
|
ref|XP_05 143 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY
|
|
|
|
ref|XP_05 203 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
|
|
ref|XP_05 263 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
240 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
|
|
ref|XP_05 323 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
|
|
ref|XP_05 383 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
360 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
|
|
ref|XP_05 443 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
420 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
|
|
ref|XP_05 503 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
480 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
|
|
ref|XP_05 563 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
540 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
|
|
ref|XP_05 623 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA
|
|
600 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA
|
|
|
|
ref|XP_05 683 AEQP 687
|
|
660 |||| 664
|
|
Query_744 660 AEQP 664
|
|
|
|
""",
|
|
)
|
|
hit = record[4]
|
|
self.assertEqual(hit.num, 5)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|XP_004407164.1|")
|
|
self.assertEqual(hit.target.name, "XP_004407164")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"PREDICTED: cyclic nucleotide-gated olfactory channel [Odobenus rosmarus divergens]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=664)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 3231.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 1249.79)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 639)
|
|
self.assertEqual(hsp.annotations["positive"], 652)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 664],
|
|
[ 0, 664]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 664))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_74414")
|
|
self.assertEqual(hsp.query.description, "unnamed protein product")
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MTEKSNGVKSSPANNHNHHTPPAIKANGKDDHRTNSRPQSAADDDTSSELQRLA...DEP')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "ref|XP_004407164.1|")
|
|
self.assertEqual(hsp.target.name, "XP_004407164")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"PREDICTED: cyclic nucleotide-gated olfactory channel [Odobenus rosmarus divergens]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTEKSNGVKSSPANNHN+H P IKANGKD+ RT SRPQSAADDDTSSELQRLAEMDAPQQ RGGFRRIVRLVG+IR+WAN+NFREEE RPDSFLERFRGPELQTVTTQQGDGKGDKDG+GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGY+LVWLVLDYFSDVVYI DLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIH+PELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKK+VDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKL+QLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLE KMKQN DDYLSDG+NSPEP AA++P",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_74414 Length: 664 Strand: Plus
|
|
unnamed protein product
|
|
Target: ref|XP_004407164.1| Length: 664 Strand: Plus
|
|
PREDICTED: cyclic nucleotide-gated olfactory channel [Odobenus rosmarus
|
|
divergens]
|
|
|
|
Score:1249 bits(3231), Expect:0,
|
|
Identities:639/664(96%), Positives:652/664(98%), Gaps:0.664(0%)
|
|
|
|
ref|XP_00 0 MTEKSNGVKSSPANNHNHHTPPAIKANGKDDHRTNSRPQSAADDDTSSELQRLAEMDAPQ
|
|
0 |||||||||||||||||.|.|..|||||||..||.|||||||||||||||||||||||||
|
|
Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ
|
|
|
|
ref|XP_00 60 QGRGGFRRIVRLVGIIREWANKNFREEEPRPDSFLERFRGPELQTVTTQQGDGKGDKDGE
|
|
60 |.||||||||||||.||.|||.||||||.||||||||||||||||||||||||||||||.
|
|
Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD
|
|
|
|
ref|XP_00 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYYLVWLVLDY
|
|
120 |||||||||||||||||||||||||||||||||||||||||||||||||||.||||||||
|
|
Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY
|
|
|
|
ref|XP_00 180 FSDVVYITDLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
180 |||||||.||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
|
|
ref|XP_00 240 HSPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
240 |.||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
|
|
ref|XP_00 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
|
|
ref|XP_00 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
360 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
|
|
ref|XP_00 420 KSVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
420 |.||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
|
|
ref|XP_00 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
480 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
|
|
ref|XP_00 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
540 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
|
|
ref|XP_00 600 LEQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLETKMKQNNMDDYLSDGVNSPEPTA
|
|
600 |.|||||||||||||||||||||||||||||||||||.|||||..|||||||.|||||.|
|
|
Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA
|
|
|
|
ref|XP_00 660 ADEP 664
|
|
660 |..| 664
|
|
Query_744 660 AEQP 664
|
|
|
|
""",
|
|
)
|
|
hit = record[5]
|
|
self.assertEqual(hit.num, 6)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|XP_008688471.1|")
|
|
self.assertEqual(hit.target.name, "XP_008688471")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"cyclic nucleotide-gated olfactory channel [Ursus maritimus] >ref|XP_026343324.1| cyclic nucleotide-gated olfactory channel [Ursus arctos]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=664)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 3228.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 1248.63)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 638)
|
|
self.assertEqual(hsp.annotations["positive"], 652)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 664],
|
|
[ 0, 664]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 664))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_74414")
|
|
self.assertEqual(hsp.query.description, "unnamed protein product")
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MTEKSNGVKSSPANNHNHHAPPAIKANGKDDHRSSSRPQSAVDDDTSSELQRLA...DEP')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "ref|XP_008688471.1|")
|
|
self.assertEqual(hsp.target.name, "XP_008688471")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"cyclic nucleotide-gated olfactory channel [Ursus maritimus] >ref|XP_026343324.1| cyclic nucleotide-gated olfactory channel [Ursus arctos]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTEKSNGVKSSPANNHN+H P IKANGKD+ R+ SRPQSA DDDTSSELQRLAEMDAPQ+ RGGFRRIVRLVG+IR WAN+NFREEE RPDSFLERFRGPELQTVTTQQGDGKGDKDG+GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQ+GY+LVWLVLDYFSDVVYI DLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIH+PELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKK+VDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKL+QLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLE KMKQN EDDYLSDGMNSPEPAAA++P",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_74414 Length: 664 Strand: Plus
|
|
unnamed protein product
|
|
Target: ref|XP_008688471.1| Length: 664 Strand: Plus
|
|
cyclic nucleotide-gated olfactory channel [Ursus maritimus]
|
|
>ref|XP_026343324.1| cyclic nucleotide-gated olfactory channel [Ursus
|
|
arctos]
|
|
|
|
Score:1248 bits(3228), Expect:0,
|
|
Identities:638/664(96%), Positives:652/664(98%), Gaps:0.664(0%)
|
|
|
|
ref|XP_00 0 MTEKSNGVKSSPANNHNHHAPPAIKANGKDDHRSSSRPQSAVDDDTSSELQRLAEMDAPQ
|
|
0 |||||||||||||||||.|.|..|||||||..|..||||||.||||||||||||||||||
|
|
Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ
|
|
|
|
ref|XP_00 60 RGRGGFRRIVRLVGIIRDWANKNFREEEPRPDSFLERFRGPELQTVTTQQGDGKGDKDGE
|
|
60 ..||||||||||||.||.|||.||||||.||||||||||||||||||||||||||||||.
|
|
Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD
|
|
|
|
ref|XP_00 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQKGYYLVWLVLDY
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||.||.||||||||
|
|
Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY
|
|
|
|
ref|XP_00 180 FSDVVYITDLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
180 |||||||.||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
|
|
ref|XP_00 240 HSPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
240 |.||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
|
|
ref|XP_00 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
|
|
ref|XP_00 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
360 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
|
|
ref|XP_00 420 KSVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
420 |.||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
|
|
ref|XP_00 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
480 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
|
|
ref|XP_00 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
540 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
|
|
ref|XP_00 600 LEQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLETKMKQNNEDDYLSDGMNSPEPAA
|
|
600 |.|||||||||||||||||||||||||||||||||||.|||||.||||||||||||||||
|
|
Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA
|
|
|
|
ref|XP_00 660 ADEP 664
|
|
660 |..| 664
|
|
Query_744 660 AEQP 664
|
|
|
|
""",
|
|
)
|
|
hit = record[6]
|
|
self.assertEqual(hit.num, 7)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|XP_011229794.1|")
|
|
self.assertEqual(hit.target.name, "XP_011229794")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"cyclic nucleotide-gated olfactory channel [Ailuropoda melanoleuca] >gb|EFB14215.1| hypothetical protein PANDA_013994, partial [Ailuropoda melanoleuca]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=664)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 3227.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 1248.24)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 638)
|
|
self.assertEqual(hsp.annotations["positive"], 652)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 664],
|
|
[ 0, 664]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 664))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_74414")
|
|
self.assertEqual(hsp.query.description, "unnamed protein product")
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MTEKSNGVKSSPANNHNHHAPPAIKANGKDDHRSSSRPQSAVDDDTSSELQRLA...DEP')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "ref|XP_011229794.1|")
|
|
self.assertEqual(hsp.target.name, "XP_011229794")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"cyclic nucleotide-gated olfactory channel [Ailuropoda melanoleuca] >gb|EFB14215.1| hypothetical protein PANDA_013994, partial [Ailuropoda melanoleuca]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTEKSNGVKSSPANNHN+H P IKANGKD+ R+ SRPQSA DDDTSSELQRLAEMDAPQ+ RGGFRRIVRLVG+IR WAN+NFREEE RPDSFLERFRGPELQTVTTQQGDGKGDKDG+GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQ+GY+LVWLVLDYFSDVVYI DLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIH+PELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKK+VDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKL+QLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLE KMKQN EDDYLSDGMNSPEPAAA++P",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_74414 Length: 664 Strand: Plus
|
|
unnamed protein product
|
|
Target: ref|XP_011229794.1| Length: 664 Strand: Plus
|
|
cyclic nucleotide-gated olfactory channel [Ailuropoda melanoleuca]
|
|
>gb|EFB14215.1| hypothetical protein PANDA_013994, partial [Ailuropoda
|
|
melanoleuca]
|
|
|
|
Score:1248 bits(3227), Expect:0,
|
|
Identities:638/664(96%), Positives:652/664(98%), Gaps:0.664(0%)
|
|
|
|
ref|XP_01 0 MTEKSNGVKSSPANNHNHHAPPAIKANGKDDHRSSSRPQSAVDDDTSSELQRLAEMDAPQ
|
|
0 |||||||||||||||||.|.|..|||||||..|..||||||.||||||||||||||||||
|
|
Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ
|
|
|
|
ref|XP_01 60 RGRGGFRRIVRLVGIIRDWANKNFREEEPRPDSFLERFRGPELQTVTTQQGDGKGDKDGE
|
|
60 ..||||||||||||.||.|||.||||||.||||||||||||||||||||||||||||||.
|
|
Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD
|
|
|
|
ref|XP_01 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQKGYYLVWLVLDY
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||.||.||||||||
|
|
Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY
|
|
|
|
ref|XP_01 180 FSDVVYIIDLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
180 |||||||.||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
|
|
ref|XP_01 240 HSPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
240 |.||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
|
|
ref|XP_01 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
|
|
ref|XP_01 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
360 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
|
|
ref|XP_01 420 KSVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
420 |.||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
|
|
ref|XP_01 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
480 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
|
|
ref|XP_01 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
540 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
|
|
ref|XP_01 600 LEQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLETKMKQNNEDDYLSDGMNSPEPAA
|
|
600 |.|||||||||||||||||||||||||||||||||||.|||||.||||||||||||||||
|
|
Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA
|
|
|
|
ref|XP_01 660 ADEP 664
|
|
660 |..| 664
|
|
Query_744 660 AEQP 664
|
|
|
|
""",
|
|
)
|
|
hit = record[7]
|
|
self.assertEqual(hit.num, 8)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|XP_045646452.1|")
|
|
self.assertEqual(hit.target.name, "XP_045646452")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"cyclic nucleotide-gated olfactory channel [Ursus americanus]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=664)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 3223.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 1246.68)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 637)
|
|
self.assertEqual(hsp.annotations["positive"], 651)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 664],
|
|
[ 0, 664]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 664))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_74414")
|
|
self.assertEqual(hsp.query.description, "unnamed protein product")
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MTEKSNGVKCSPANNHNHHAPPAIKANGKDDHRSSSRPQSAVDDDTSSELQRLA...DEP')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "ref|XP_045646452.1|")
|
|
self.assertEqual(hsp.target.name, "XP_045646452")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"cyclic nucleotide-gated olfactory channel [Ursus americanus]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTEKSNGVK SPANNHN+H P IKANGKD+ R+ SRPQSA DDDTSSELQRLAEMDAPQ+ RGGFRRIVRLVG+IR WAN+NFREEE RPDSFLERFRGPELQTVTTQQGDGKGDKDG+GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQ+GY+LVWLVLDYFSDVVYI DLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIH+PELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKK+VDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKL+QLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLE KMKQN EDDYLSDGMNSPEPAAA++P",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_74414 Length: 664 Strand: Plus
|
|
unnamed protein product
|
|
Target: ref|XP_045646452.1| Length: 664 Strand: Plus
|
|
cyclic nucleotide-gated olfactory channel [Ursus americanus]
|
|
|
|
Score:1246 bits(3223), Expect:0,
|
|
Identities:637/664(96%), Positives:651/664(98%), Gaps:0.664(0%)
|
|
|
|
ref|XP_04 0 MTEKSNGVKCSPANNHNHHAPPAIKANGKDDHRSSSRPQSAVDDDTSSELQRLAEMDAPQ
|
|
0 |||||||||.|||||||.|.|..|||||||..|..||||||.||||||||||||||||||
|
|
Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ
|
|
|
|
ref|XP_04 60 RGRGGFRRIVRLVGIIRDWANKNFREEEPRPDSFLERFRGPELQTVTTQQGDGKGDKDGE
|
|
60 ..||||||||||||.||.|||.||||||.||||||||||||||||||||||||||||||.
|
|
Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD
|
|
|
|
ref|XP_04 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQKGYYLVWLVLDY
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||.||.||||||||
|
|
Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY
|
|
|
|
ref|XP_04 180 FSDVVYITDLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
180 |||||||.||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
|
|
ref|XP_04 240 HSPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
240 |.||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
|
|
ref|XP_04 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
|
|
ref|XP_04 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
360 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
|
|
ref|XP_04 420 KSVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
420 |.||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
|
|
ref|XP_04 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
480 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
|
|
ref|XP_04 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
540 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
|
|
ref|XP_04 600 LEQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLETKMKQNNEDDYLSDGMNSPEPAA
|
|
600 |.|||||||||||||||||||||||||||||||||||.|||||.||||||||||||||||
|
|
Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA
|
|
|
|
ref|XP_04 660 ADEP 664
|
|
660 |..| 664
|
|
Query_744 660 AEQP 664
|
|
|
|
""",
|
|
)
|
|
hit = record[8]
|
|
self.assertEqual(hit.num, 9)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|XP_035942617.1|")
|
|
self.assertEqual(hit.target.name, "XP_035942617")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"cyclic nucleotide-gated olfactory channel [Halichoerus grypus]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=664)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 3221.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 1245.9)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 638)
|
|
self.assertEqual(hsp.annotations["positive"], 651)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 664],
|
|
[ 0, 664]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 664))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_74414")
|
|
self.assertEqual(hsp.query.description, "unnamed protein product")
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MTEKSNGVKSSPANNHNHHAPPVIKANGKDDHRTSSRPQSAADDDTSSELQRLA...DEP')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "ref|XP_035942617.1|")
|
|
self.assertEqual(hsp.target.name, "XP_035942617")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"cyclic nucleotide-gated olfactory channel [Halichoerus grypus]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTEKSNGVKSSPANNHN+H P IKANGKD+ RT SRPQSAADDDTSSELQRLAEMD PQQ RGGFRRIVRLVG+IR+WAN+NFREEE RPDSFLERFRGPELQTVTTQQGDGKGDKDG+GKGTKKKFELFVLDPAGDWYYRWLFVIAM VLYNWCLLVARACFSDLQ+GY+LVWLVLDYFSDVVYI DLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGIH+PELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKK+VDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEKL+QLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLE KMKQN DDYLSDGMNSPEPAAA++P",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_74414 Length: 664 Strand: Plus
|
|
unnamed protein product
|
|
Target: ref|XP_035942617.1| Length: 664 Strand: Plus
|
|
cyclic nucleotide-gated olfactory channel [Halichoerus grypus]
|
|
|
|
Score:1245 bits(3221), Expect:0,
|
|
Identities:638/664(96%), Positives:651/664(98%), Gaps:0.664(0%)
|
|
|
|
ref|XP_03 0 MTEKSNGVKSSPANNHNHHAPPVIKANGKDDHRTSSRPQSAADDDTSSELQRLAEMDVPQ
|
|
0 |||||||||||||||||.|.|..|||||||..||.||||||||||||||||||||||.||
|
|
Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ
|
|
|
|
ref|XP_03 60 QGRGGFRRIVRLVGIIREWANKNFREEELRPDSFLERFRGPELQTVTTQQGDGKGDKDGE
|
|
60 |.||||||||||||.||.|||.||||||.||||||||||||||||||||||||||||||.
|
|
Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD
|
|
|
|
ref|XP_03 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMLVLYNWCLLVARACFSDLQKGYYLVWLVLDY
|
|
120 |||||||||||||||||||||||||||||.||||||||||||||||||.||.||||||||
|
|
Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY
|
|
|
|
ref|XP_03 180 FSDVVYITDLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
180 |||||||.||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
|
|
ref|XP_03 240 HSPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
240 |.||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
|
|
ref|XP_03 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
|
|
ref|XP_03 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
360 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
|
|
ref|XP_03 420 KSVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
420 |.||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
|
|
ref|XP_03 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
480 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
|
|
ref|XP_03 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
540 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
|
|
ref|XP_03 600 LEQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLETKMKQNNMDDYLSDGMNSPEPAA
|
|
600 |.|||||||||||||||||||||||||||||||||||.|||||..|||||||||||||||
|
|
Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA
|
|
|
|
ref|XP_03 660 ADEP 664
|
|
660 |..| 664
|
|
Query_744 660 AEQP 664
|
|
|
|
""",
|
|
)
|
|
hit = record[9]
|
|
self.assertEqual(hit.num, 10)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|XP_049729369.1|")
|
|
self.assertEqual(hit.target.name, "XP_049729369")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"cyclic nucleotide-gated olfactory channel [Elephas maximus indicus]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=664)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 3219.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 1245.12)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 635)
|
|
self.assertEqual(hsp.annotations["positive"], 654)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 664],
|
|
[ 0, 664]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 664))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLA...EQP')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_74414")
|
|
self.assertEqual(hsp.query.description, "unnamed protein product")
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MTEKSNGVKSSPANNHNHHVPSTIKANGKDDRRTSSRPQSAADDDTSSELQRLA...EKP')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "ref|XP_049729369.1|")
|
|
self.assertEqual(hsp.target.name, "XP_049729369")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"cyclic nucleotide-gated olfactory channel [Elephas maximus indicus]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTEKSNGVKSSPANNHN+HVP+TIKANGKD+ RT SRPQSAADDDTSSELQRLAEMDAPQQ RGGFRRI+RLVGVIR+WAN+NFREE+ RPDSFLERFRGPELQTVTTQQGDGK DKDG+GKGTKKKFELFVLDPAGDWYYRWLF IA+PVLYNWCLLVARACFSDLQ+GY+LVWLVLDYFSD+VYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHT+QFKLDVASIIPTDLIYFAVGIH+PELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSIGFGVDTWVYPNITDP YGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIGVLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNKKTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDYICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANIRSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAA+MEVDVQEKL+QLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLE KMKQN E+DYLSDG+NSPEPAA E+P",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_74414 Length: 664 Strand: Plus
|
|
unnamed protein product
|
|
Target: ref|XP_049729369.1| Length: 664 Strand: Plus
|
|
cyclic nucleotide-gated olfactory channel [Elephas maximus indicus]
|
|
|
|
Score:1245 bits(3219), Expect:0,
|
|
Identities:635/664(96%), Positives:654/664(98%), Gaps:0.664(0%)
|
|
|
|
ref|XP_04 0 MTEKSNGVKSSPANNHNHHVPSTIKANGKDDRRTSSRPQSAADDDTSSELQRLAEMDAPQ
|
|
0 |||||||||||||||||.|||.||||||||..||.|||||||||||||||||||||||||
|
|
Query_744 0 MTEKSNGVKSSPANNHNNHVPATIKANGKDESRTRSRPQSAADDDTSSELQRLAEMDAPQ
|
|
|
|
ref|XP_04 60 QWRGGFRRIIRLVGVIREWANKNFREEDPRPDSFLERFRGPELQTVTTQQGDGKSDKDGE
|
|
60 |.|||||||.|||||||.|||.|||||..|||||||||||||||||||||||||.||||.
|
|
Query_744 60 QRRGGFRRIVRLVGVIRQWANRNFREEEARPDSFLERFRGPELQTVTTQQGDGKGDKDGD
|
|
|
|
ref|XP_04 120 GKGTKKKFELFVLDPAGDWYYRWLFFIALPVLYNWCLLVARACFSDLQKGYYLVWLVLDY
|
|
120 |||||||||||||||||||||||||.||.|||||||||||||||||||.||.||||||||
|
|
Query_744 120 GKGTKKKFELFVLDPAGDWYYRWLFVIAMPVLYNWCLLVARACFSDLQRGYFLVWLVLDY
|
|
|
|
ref|XP_04 180 FSDMVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTMQFKLDVASIIPTDLIYFAVGI
|
|
180 |||.||||||||||||||||||||||||||||||||||.|||||||||||||||||||||
|
|
Query_744 180 FSDVVYIADLFIRLRTGFLEQGLLVKDPKKLRDNYIHTLQFKLDVASIIPTDLIYFAVGI
|
|
|
|
ref|XP_04 240 HSPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
240 |.||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 240 HNPELRFNRLLHFARMFEFFDRTETRTSYPNIFRISNLVLYILVIIHWNACIYYAISKSI
|
|
|
|
ref|XP_04 300 GFGVDTWVYPNITDPAYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
300 |||||||||||||||.||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 300 GFGVDTWVYPNITDPEYGYLAREYIYCLYWSTLTLTTIGETPPPVKDEEYLFVIFDFLIG
|
|
|
|
ref|XP_04 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
360 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 360 VLIFATIVGNVGSMISNMNATRAEFQAKIDAVKHYMQFRKVSKEMEAKVIKWFDYLWTNK
|
|
|
|
ref|XP_04 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
420 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 420 KTVDEREVLKNLPAKLRAEIAINVHLSTLKKVRIFQDCEAGLLVELVLKLRPQVFSPGDY
|
|
|
|
ref|XP_04 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
480 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
Query_744 480 ICRKGDIGKEMYIIKEGKLAVVADDGVTQYALLSAGSCFGEISILNIKGSKMGNRRTANI
|
|
|
|
ref|XP_04 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAATMEVDVQEK
|
|
540 |||||||||||||||||||||||||||||||||||||||||||||||||||.||||||||
|
|
Query_744 540 RSLGYSDLFCLSKDDLMEAVTEYPDAKKVLEERGREILMKEGLLDENEVAASMEVDVQEK
|
|
|
|
ref|XP_04 600 LEQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLETKMKQNNEEDYLSDGINSPEPAA
|
|
600 |.|||||||||||||||||||||||||||||||||||.|||||.|.||||||.|||||||
|
|
Query_744 600 LKQLETNMETLYTRFGRLLAEYTGAQQKLKQRITVLEVKMKQNTEDDYLSDGMNSPEPAA
|
|
|
|
ref|XP_04 660 VEKP 664
|
|
660 .|.| 664
|
|
Query_744 660 AEQP 664
|
|
|
|
""",
|
|
)
|
|
|
|
def test_phiblast_writer(self):
|
|
"""Writing BLASTP 2.14.1+ (phiblast.xml)."""
|
|
filename = "phiblast.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream)
|
|
self.assertEqual(n, 1)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_phiblast_records(written_records)
|
|
|
|
def test_xml_21500_blastp_001_parser(self):
|
|
"""Parsing BLASTP 2.15.0+ (xml_21500_blastp_001.xml)."""
|
|
filename = "xml_21500_blastp_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_21500_blastp_001_records(records)
|
|
with Blast.parse(path) as records:
|
|
self.check_xml_21500_blastp_001_records(records)
|
|
with open(path, "rb") as stream:
|
|
record = Blast.read(stream)
|
|
self.check_xml_21500_blastp_001_record(record)
|
|
record = Blast.read(path)
|
|
self.check_xml_21500_blastp_001_record(record)
|
|
with Blast.parse(path) as records:
|
|
self.assertEqual(
|
|
str(records),
|
|
"""\
|
|
Program: BLASTP 2.15.0+
|
|
db: nr
|
|
|
|
Query: WXX52402.1 (length=239)
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Hits: ---- ----- ----------------------------------------------------------
|
|
# # HSP ID + description
|
|
---- ----- ----------------------------------------------------------
|
|
0 1 ref|WP_003221446.1| MULTISPECIES: RNA polymerase sporu...
|
|
1 1 dbj|BAI85158.2| sporulation sigma factor SigE [Bacillu...
|
|
2 1 ref|WP_120028072.1| RNA polymerase sporulation sigma f...
|
|
3 1 ref|WP_326121348.1| RNA polymerase sporulation sigma f...
|
|
4 1 ref|WP_128473893.1| RNA polymerase sporulation sigma f...
|
|
5 1 ref|WP_174228079.1| RNA polymerase sporulation sigma f...
|
|
6 1 ref|WP_315947263.1| RNA polymerase sporulation sigma f...
|
|
7 1 ref|WP_219912761.1| RNA polymerase sporulation sigma f...
|
|
8 1 ref|WP_038828182.1| RNA polymerase sporulation sigma f...
|
|
9 1 ref|WP_326211018.1| RNA polymerase sporulation sigma f...""",
|
|
)
|
|
record = Blast.read(path)
|
|
self.assertEqual(
|
|
str(record),
|
|
"""\
|
|
Program: BLASTP 2.15.0+
|
|
db: nr
|
|
Query: WXX52402.1 (length=239)
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Hits: ---- ----- ----------------------------------------------------------
|
|
# # HSP ID + description
|
|
---- ----- ----------------------------------------------------------
|
|
0 1 ref|WP_003221446.1| MULTISPECIES: RNA polymerase sporu...
|
|
1 1 dbj|BAI85158.2| sporulation sigma factor SigE [Bacillu...
|
|
2 1 ref|WP_120028072.1| RNA polymerase sporulation sigma f...
|
|
3 1 ref|WP_326121348.1| RNA polymerase sporulation sigma f...
|
|
4 1 ref|WP_128473893.1| RNA polymerase sporulation sigma f...
|
|
5 1 ref|WP_174228079.1| RNA polymerase sporulation sigma f...
|
|
6 1 ref|WP_315947263.1| RNA polymerase sporulation sigma f...
|
|
7 1 ref|WP_219912761.1| RNA polymerase sporulation sigma f...
|
|
8 1 ref|WP_038828182.1| RNA polymerase sporulation sigma f...
|
|
9 1 ref|WP_326211018.1| RNA polymerase sporulation sigma f...""",
|
|
)
|
|
|
|
def test_xml2_21500_blastp_001_parser(self):
|
|
"""Parsing BLASTP 2.15.0+ (xml2_21500_blastp_001.xml)."""
|
|
filename = "xml2_21500_blastp_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_21500_blastp_001_records(records, xml2=True)
|
|
with Blast.parse(path) as records:
|
|
self.check_xml_21500_blastp_001_records(records, xml2=True)
|
|
with open(path, "rb") as stream:
|
|
record = Blast.read(stream)
|
|
self.check_xml_21500_blastp_001_record(record, xml2=True)
|
|
record = Blast.read(path)
|
|
self.check_xml_21500_blastp_001_record(record, xml2=True)
|
|
|
|
def check_xml_21500_blastp_001_records(self, records, xml2=False):
|
|
self.assertEqual(records.program, "blastp")
|
|
self.assertEqual(records.version, "BLASTP 2.15.0+")
|
|
self.assertEqual(
|
|
records.reference,
|
|
'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.',
|
|
)
|
|
self.assertEqual(records.db, "nr")
|
|
if xml2 is False:
|
|
self.assertIsInstance(records.query, SeqRecord)
|
|
self.assertEqual(records.query.id, "WXX52402.1")
|
|
self.assertEqual(
|
|
records.query.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(repr(records.query.seq), "Seq(None, length=239)")
|
|
self.assertEqual(records.param["matrix"], "BLOSUM62")
|
|
self.assertAlmostEqual(records.param["expect"], 0.05)
|
|
self.assertEqual(records.param["gap-open"], 11)
|
|
self.assertEqual(records.param["gap-extend"], 1)
|
|
self.assertEqual(records.param["filter"], "F")
|
|
if xml2 is False:
|
|
self.assertEqual(len(records.param), 5)
|
|
else:
|
|
self.assertEqual(records.param["cbs"], 2)
|
|
self.assertEqual(len(records.param), 6)
|
|
record = next(records)
|
|
self.assertRaises(StopIteration, next, records)
|
|
self.check_xml_21500_blastp_001_record(record, xml2=xml2)
|
|
|
|
def check_xml_21500_blastp_001_record(self, record, xml2=False):
|
|
hit = record[0]
|
|
self.assertEqual(hit.num, 1)
|
|
target = hit.target
|
|
self.assertIsInstance(target, SeqRecord)
|
|
self.assertEqual(target.id, "ref|WP_003221446.1|")
|
|
self.assertEqual(target.name, "WP_003221446")
|
|
seq = target.seq
|
|
self.assertEqual(repr(seq), "Seq(None, length=239)")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
target.description,
|
|
"MULTISPECIES: RNA polymerase sporulation sigma factor SigE [Bacillales]",
|
|
)
|
|
self.assertEqual(target.annotations["taxid"], 1385)
|
|
self.assertEqual(target.annotations["sciname"], "Bacillales")
|
|
self.assertIs(target, hit.targets[0])
|
|
self.assertEqual(len(hit.targets), 8)
|
|
target = hit.targets[1]
|
|
self.assertIsInstance(target, SeqRecord)
|
|
self.assertEqual(target.id, "ref|NP_389415.2|")
|
|
self.assertEqual(target.name, "NP_389415")
|
|
self.assertIs(target.seq, seq)
|
|
self.assertEqual(
|
|
target.description,
|
|
"RNA polymerase sporulation-specific sigma-29 factor (sigma-E) [Bacillus subtilis subsp. subtilis str. 168]",
|
|
)
|
|
self.assertEqual(target.annotations["taxid"], 224308)
|
|
self.assertEqual(
|
|
target.annotations["sciname"],
|
|
"Bacillus subtilis subsp. subtilis str. 168",
|
|
)
|
|
target = hit.targets[2]
|
|
self.assertIsInstance(target, SeqRecord)
|
|
self.assertEqual(target.id, "sp|P06222.1|")
|
|
self.assertEqual(target.name, "P06222")
|
|
self.assertIs(target.seq, seq)
|
|
self.assertEqual(
|
|
target.description,
|
|
"RecName: Full=RNA polymerase sigma-E factor; AltName: Full=P31; AltName: Full=Sigma-29; AltName: Full=Stage II sporulation protein GB; Flags: Precursor [Bacillus subtilis subsp. subtilis str. 168]",
|
|
)
|
|
self.assertEqual(target.annotations["taxid"], 224308)
|
|
self.assertEqual(
|
|
target.annotations["sciname"],
|
|
"Bacillus subtilis subsp. subtilis str. 168",
|
|
)
|
|
target = hit.targets[3]
|
|
self.assertIsInstance(target, SeqRecord)
|
|
self.assertEqual(target.id, "gb|KFI04694.1|")
|
|
self.assertEqual(target.name, "KFI04694")
|
|
self.assertIs(target.seq, seq)
|
|
self.assertEqual(
|
|
target.description,
|
|
"sporulation sigma factor SigE [Bacillus sp. BSC154]",
|
|
)
|
|
self.assertEqual(target.annotations["taxid"], 1549811)
|
|
self.assertEqual(target.annotations["sciname"], "Bacillus sp. BSC154")
|
|
target = hit.targets[4]
|
|
self.assertIsInstance(target, SeqRecord)
|
|
self.assertEqual(target.id, "gb|MDZ5720185.1|")
|
|
self.assertEqual(target.name, "MDZ5720185")
|
|
self.assertIs(target.seq, seq)
|
|
self.assertEqual(
|
|
target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus sp. X(2023)]",
|
|
)
|
|
self.assertEqual(target.annotations["taxid"], 3106047)
|
|
self.assertEqual(target.annotations["sciname"], "Bacillus sp. X(2023)")
|
|
target = hit.targets[5]
|
|
self.assertIsInstance(target, SeqRecord)
|
|
self.assertEqual(target.id, "gb|POO83984.1|")
|
|
self.assertEqual(target.name, "POO83984")
|
|
self.assertIs(target.seq, seq)
|
|
self.assertEqual(
|
|
target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus sp. MBGLi97]",
|
|
)
|
|
self.assertEqual(target.annotations["taxid"], 2070760)
|
|
self.assertEqual(target.annotations["sciname"], "Bacillus sp. MBGLi97")
|
|
target = hit.targets[6]
|
|
self.assertIsInstance(target, SeqRecord)
|
|
self.assertEqual(target.id, "dbj|BAM52179.1|")
|
|
self.assertEqual(target.name, "BAM52179")
|
|
self.assertIs(target.seq, seq)
|
|
self.assertEqual(
|
|
target.description,
|
|
"sporulation sigma factor SigE [Bacillus subtilis BEST7613]",
|
|
)
|
|
self.assertEqual(target.annotations["taxid"], 1204343)
|
|
self.assertEqual(
|
|
target.annotations["sciname"], "Bacillus subtilis BEST7613"
|
|
)
|
|
target = hit.targets[7]
|
|
self.assertIsInstance(target, SeqRecord)
|
|
self.assertEqual(target.id, "gb|ADM37626.1|")
|
|
self.assertEqual(target.name, "ADM37626")
|
|
self.assertIs(target.seq, seq)
|
|
self.assertEqual(
|
|
target.description,
|
|
"sporulation-specific sigma factor sigma-E [Bacillus spizizenii str. W23]",
|
|
)
|
|
self.assertEqual(target.annotations["taxid"], 655816)
|
|
self.assertEqual(
|
|
target.annotations["sciname"], "Bacillus spizizenii str. W23"
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
target.description,
|
|
"MULTISPECIES: RNA polymerase sporulation sigma factor SigE [Bacillales] >ref|NP_389415.2| RNA polymerase sporulation-specific sigma-29 factor (sigma-E) [Bacillus subtilis subsp. subtilis str. 168] >sp|P06222.1| RecName: Full=RNA polymerase sigma-E factor; AltName: Full=P31; AltName: Full=Sigma-29; AltName: Full=Stage II sporulation protein GB; Flags: Precursor [Bacillus subtilis subsp. subtilis str. 168] >gb|KFI04694.1| sporulation sigma factor SigE [Bacillus sp. BSC154] >gb|MDZ5720185.1| RNA polymerase sporulation sigma factor SigE [Bacillus sp. X(2023)] >gb|POO83984.1| RNA polymerase sporulation sigma factor SigE [Bacillus sp. MBGLi97] >dbj|BAM52179.1| sporulation sigma factor SigE [Bacillus subtilis BEST7613] >gb|ADM37626.1| sporulation-specific sigma factor sigma-E [Bacillus spizizenii str. W23]",
|
|
)
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1227.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 477.248)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 2.44722e-169, places=174)
|
|
self.assertEqual(hsp.annotations["identity"], 239)
|
|
self.assertEqual(hsp.annotations["positive"], 239)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[0, 239],
|
|
[0, 239]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 239))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "WXX52402.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.target.id, hit.target.id)
|
|
self.assertEqual(hsp.target.name, hit.target.name)
|
|
self.assertEqual(hsp.target.description, hit.target.description)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='ref|WP_003221446.1|' query.id='WXX52402.1'; 2 rows x 239 columns>",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: ref|WP_003221446.1| Length: 239 Strand: Plus
|
|
MULTISPECIES: RNA polymerase sporulation sigma factor SigE [Bacillales]
|
|
|
|
Score:477 bits(1227), Expect:2e-169,
|
|
Identities:239/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
ref|WP_00 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
ref|WP_00 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
ref|WP_00 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
ref|WP_00 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
ref|WP_00 239
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: ref|WP_003221446.1| Length: 239 Strand: Plus
|
|
MULTISPECIES: RNA polymerase sporulation sigma factor SigE [Bacillales]
|
|
>ref|NP_389415.2| RNA polymerase sporulation-specific sigma-29 factor
|
|
(sigma-E) [Bacillus subtilis subsp. subtilis str. 168] >sp|P06222.1|
|
|
RecName: Full=RNA polymerase sigma-E factor; AltName: Full=P31; AltName:
|
|
Full=Sigma-29; AltName: Full=Stage II sporulation protein GB; Flags:
|
|
Precursor [Bacillus subtilis subsp. subtilis str. 168] >gb|KFI04694.1|
|
|
sporulation sigma factor SigE [Bacillus sp. BSC154] >gb|MDZ5720185.1|
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus sp. X(2023)]
|
|
>gb|POO83984.1| RNA polymerase sporulation sigma factor SigE [Bacillus
|
|
sp. MBGLi97] >dbj|BAM52179.1| sporulation sigma factor SigE [Bacillus
|
|
subtilis BEST7613] >gb|ADM37626.1| sporulation-specific sigma factor
|
|
sigma-E [Bacillus spizizenii str. W23]
|
|
|
|
Score:477 bits(1227), Expect:2e-169,
|
|
Identities:239/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
ref|WP_00 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
ref|WP_00 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
ref|WP_00 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
ref|WP_00 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
ref|WP_00 239
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
hit = record[1]
|
|
self.assertEqual(hit.num, 2)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "dbj|BAI85158.2|")
|
|
self.assertEqual(hit.target.name, "BAI85158")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"sporulation sigma factor SigE [Bacillus subtilis subsp. natto BEST195]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=260)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1227.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 477.248)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 3.91488e-169, places=174)
|
|
self.assertEqual(hsp.annotations["identity"], 239)
|
|
self.assertEqual(hsp.annotations["positive"], 239)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[21, 260],
|
|
[ 0, 239]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 239))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "WXX52402.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({21: 'MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV'}, length=260)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "dbj|BAI85158.2|")
|
|
self.assertEqual(hsp.target.name, "BAI85158")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"sporulation sigma factor SigE [Bacillus subtilis subsp. natto BEST195]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='dbj|BAI85158.2|' query.id='WXX52402.1'; 2 rows x 239 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: dbj|BAI85158.2| Length: 260 Strand: Plus
|
|
sporulation sigma factor SigE [Bacillus subtilis subsp. natto BEST195]
|
|
|
|
Score:477 bits(1227), Expect:4e-169,
|
|
Identities:239/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
dbj|BAI85 21 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
dbj|BAI85 81 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
dbj|BAI85 141 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
dbj|BAI85 201 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
dbj|BAI85 260
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
hit = record[2]
|
|
self.assertEqual(hit.num, 3)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|WP_120028072.1|")
|
|
self.assertEqual(hit.target.name, "WP_120028072")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] >gb|RJS52520.1| RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=239)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1225.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 476.478)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 4.00949e-169, places=174)
|
|
self.assertEqual(hsp.annotations["identity"], 238)
|
|
self.assertEqual(hsp.annotations["positive"], 239)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[0, 239],
|
|
[0, 239]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 239))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "WXX52402.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.target.id, hit.target.id)
|
|
self.assertEqual(hsp.target.name, hit.target.name)
|
|
self.assertEqual(hsp.target.description, hit.target.description)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGL+GEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='ref|WP_120028072.1|' query.id='WXX52402.1'; 2 rows x 239 columns>",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: ref|WP_120028072.1| Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
|
|
Score:476 bits(1225), Expect:4e-169,
|
|
Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
ref|WP_12 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
ref|WP_12 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
ref|WP_12 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
ref|WP_12 180 LEQLNEREKQIMELRFGLIGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
180 ||||||||||||||||||.||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
ref|WP_12 239
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: ref|WP_120028072.1| Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
>gb|RJS52520.1| RNA polymerase sporulation sigma factor SigE [Bacillus
|
|
subtilis]
|
|
|
|
Score:476 bits(1225), Expect:4e-169,
|
|
Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
ref|WP_12 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
ref|WP_12 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
ref|WP_12 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
ref|WP_12 180 LEQLNEREKQIMELRFGLIGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
180 ||||||||||||||||||.||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
ref|WP_12 239
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
hit = record[3]
|
|
self.assertEqual(hit.num, 4)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|WP_326121348.1|")
|
|
self.assertEqual(hit.target.name, "WP_326121348")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] >gb|MEC0320584.1| RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=239)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1224.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 476.093)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 5.63375e-169, places=174)
|
|
self.assertEqual(hsp.annotations["identity"], 238)
|
|
self.assertEqual(hsp.annotations["positive"], 239)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[0, 239],
|
|
[0, 239]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 239))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "WXX52402.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLIMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.target.id, hit.target.id)
|
|
self.assertEqual(hsp.target.name, hit.target.name)
|
|
self.assertEqual(hsp.target.description, hit.target.description)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVL+MKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='ref|WP_326121348.1|' query.id='WXX52402.1'; 2 rows x 239 columns>",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: ref|WP_326121348.1| Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
|
|
Score:476 bits(1224), Expect:6e-169,
|
|
Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
ref|WP_32 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLIMKLPNGDQAARA
|
|
0 |||||||||||||||||||||||||||||||||||||||||||||||.||||||||||||
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
ref|WP_32 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
ref|WP_32 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
ref|WP_32 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
ref|WP_32 239
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: ref|WP_326121348.1| Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
>gb|MEC0320584.1| RNA polymerase sporulation sigma factor SigE [Bacillus
|
|
subtilis]
|
|
|
|
Score:476 bits(1224), Expect:6e-169,
|
|
Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
ref|WP_32 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLIMKLPNGDQAARA
|
|
0 |||||||||||||||||||||||||||||||||||||||||||||||.||||||||||||
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
ref|WP_32 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
ref|WP_32 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
ref|WP_32 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
ref|WP_32 239
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
hit = record[4]
|
|
self.assertEqual(hit.num, 5)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|WP_128473893.1|")
|
|
self.assertEqual(hit.target.name, "WP_128473893")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] >gb|QAR61557.1| RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=239)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1224.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 476.093)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 5.6959e-169, places=173)
|
|
self.assertEqual(hsp.annotations["identity"], 238)
|
|
self.assertEqual(hsp.annotations["positive"], 239)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[0, 239],
|
|
[0, 239]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 239))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "WXX52402.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMKLGMKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.target.id, hit.target.id)
|
|
self.assertEqual(hsp.target.name, hit.target.name)
|
|
self.assertEqual(hsp.target.description, hsp.target.description)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKKLKLRLTHLWYKLLMKLG+KSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='ref|WP_128473893.1|' query.id='WXX52402.1'; 2 rows x 239 columns>",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: ref|WP_128473893.1| Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
|
|
Score:476 bits(1224), Expect:6e-169,
|
|
Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
ref|WP_12 0 MKKLKLRLTHLWYKLLMKLGMKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
0 ||||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
ref|WP_12 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
ref|WP_12 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
ref|WP_12 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
ref|WP_12 239
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: ref|WP_128473893.1| Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
>gb|QAR61557.1| RNA polymerase sporulation sigma factor SigE [Bacillus
|
|
subtilis]
|
|
|
|
Score:476 bits(1224), Expect:6e-169,
|
|
Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
ref|WP_12 0 MKKLKLRLTHLWYKLLMKLGMKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
0 ||||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
ref|WP_12 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
ref|WP_12 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
ref|WP_12 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
ref|WP_12 239
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
hit = record[5]
|
|
self.assertEqual(hit.num, 6)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|WP_174228079.1|")
|
|
self.assertEqual(hit.target.name, "WP_174228079")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus tequilensis]",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus tequilensis] >gb|NTU26438.1| RNA polymerase sporulation sigma factor SigE [Bacillus tequilensis]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=239)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1224.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 476.093)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 7.17171e-169, places=174)
|
|
self.assertEqual(hsp.annotations["identity"], 238)
|
|
self.assertEqual(hsp.annotations["positive"], 239)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[0, 239],
|
|
[0, 239]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 239))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "WXX52402.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKKLKLRLTHLWYRLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.target.id, hit.target.id)
|
|
self.assertEqual(hsp.target.name, hit.target.name)
|
|
self.assertEqual(hsp.target.description, hit.target.description)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKKLKLRLTHLWY+LLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='ref|WP_174228079.1|' query.id='WXX52402.1'; 2 rows x 239 columns>",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: ref|WP_174228079.1| Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus tequilensis]
|
|
|
|
Score:476 bits(1224), Expect:7e-169,
|
|
Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
ref|WP_17 0 MKKLKLRLTHLWYRLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
0 |||||||||||||.||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
ref|WP_17 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
ref|WP_17 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
ref|WP_17 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
ref|WP_17 239
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: ref|WP_174228079.1| Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus tequilensis]
|
|
>gb|NTU26438.1| RNA polymerase sporulation sigma factor SigE [Bacillus
|
|
tequilensis]
|
|
|
|
Score:476 bits(1224), Expect:7e-169,
|
|
Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
ref|WP_17 0 MKKLKLRLTHLWYRLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
0 |||||||||||||.||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
ref|WP_17 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
ref|WP_17 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
ref|WP_17 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
ref|WP_17 239
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
hit = record[6]
|
|
self.assertEqual(hit.num, 7)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|WP_315947263.1|")
|
|
self.assertEqual(hit.target.name, "WP_315947263")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus cabrialesii]",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus cabrialesii] >gb|MDU0153406.1| RNA polymerase sporulation sigma factor SigE [Bacillus cabrialesii]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=239)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1224.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 476.093)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 7.17171e-169, places=174)
|
|
self.assertEqual(hsp.annotations["identity"], 238)
|
|
self.assertEqual(hsp.annotations["positive"], 239)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[0, 239],
|
|
[0, 239]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 239))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "WXX52402.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMRLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.target.id, hit.target.id)
|
|
self.assertEqual(hsp.target.name, hit.target.name)
|
|
self.assertEqual(hsp.target.description, hit.target.description)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKKLKLRLTHLWYKLLM+LGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='ref|WP_315947263.1|' query.id='WXX52402.1'; 2 rows x 239 columns>",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: ref|WP_315947263.1| Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus cabrialesii]
|
|
|
|
Score:476 bits(1224), Expect:7e-169,
|
|
Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
ref|WP_31 0 MKKLKLRLTHLWYKLLMRLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
0 |||||||||||||||||.||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
ref|WP_31 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
ref|WP_31 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
ref|WP_31 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
ref|WP_31 239
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: ref|WP_315947263.1| Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus cabrialesii]
|
|
>gb|MDU0153406.1| RNA polymerase sporulation sigma factor SigE [Bacillus
|
|
cabrialesii]
|
|
|
|
Score:476 bits(1224), Expect:7e-169,
|
|
Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
ref|WP_31 0 MKKLKLRLTHLWYKLLMRLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
0 |||||||||||||||||.||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
ref|WP_31 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
ref|WP_31 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
ref|WP_31 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
ref|WP_31 239
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
hit = record[7]
|
|
self.assertEqual(hit.num, 8)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|WP_219912761.1|")
|
|
self.assertEqual(hit.target.name, "WP_219912761")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] >emb|COM82207.1| RNA polymerase sigma factor RpoD [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=239)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1223.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 475.707)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 7.49345e-169, places=174)
|
|
self.assertEqual(hsp.annotations["identity"], 238)
|
|
self.assertEqual(hsp.annotations["positive"], 239)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[0, 239],
|
|
[0, 239]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 239))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "WXX52402.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.target.id, hit.target.id)
|
|
self.assertEqual(hsp.target.name, hit.target.name)
|
|
self.assertEqual(hsp.target.description, hit.target.description)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATY+SRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='ref|WP_219912761.1|' query.id='WXX52402.1'; 2 rows x 239 columns>",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: ref|WP_219912761.1| Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
|
|
Score:475 bits(1223), Expect:7e-169,
|
|
Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
ref|WP_21 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
ref|WP_21 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYSSRCIEN
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||||||||.||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
ref|WP_21 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
ref|WP_21 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
ref|WP_21 239
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: ref|WP_219912761.1| Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
>emb|COM82207.1| RNA polymerase sigma factor RpoD [Bacillus subtilis]
|
|
|
|
Score:475 bits(1223), Expect:7e-169,
|
|
Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
ref|WP_21 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
ref|WP_21 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYSSRCIEN
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||||||||.||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
ref|WP_21 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
ref|WP_21 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
ref|WP_21 239
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
hit = record[8]
|
|
self.assertEqual(hit.num, 9)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|WP_038828182.1|")
|
|
self.assertEqual(hit.target.name, "WP_038828182")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=239)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1223.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 475.707)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 7.49345e-169, places=174)
|
|
self.assertEqual(hsp.annotations["identity"], 238)
|
|
self.assertEqual(hsp.annotations["positive"], 239)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[0, 239],
|
|
[0, 239]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 239))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "WXX52402.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "ref|WP_038828182.1|")
|
|
self.assertEqual(hsp.target.name, "WP_038828182")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAAR+ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='ref|WP_038828182.1|' query.id='WXX52402.1'; 2 rows x 239 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: ref|WP_038828182.1| Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
|
|
Score:475 bits(1223), Expect:7e-169,
|
|
Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
ref|WP_03 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARS
|
|
0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||.
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
ref|WP_03 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
ref|WP_03 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
ref|WP_03 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
180 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
ref|WP_03 239
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
hit = record[9]
|
|
self.assertEqual(hit.num, 10)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|WP_326211018.1|")
|
|
self.assertEqual(hit.target.name, "WP_326211018")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis] >gb|MEC1541690.1| RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=239)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1223.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 475.707)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 8.36237e-169, places=174)
|
|
self.assertEqual(hsp.annotations["identity"], 238)
|
|
self.assertEqual(hsp.annotations["positive"], 239)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[0, 239],
|
|
[0, 239]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 239))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "WXX52402.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNG...KMM')",
|
|
)
|
|
self.assertEqual(hsp.target.id, hit.target.id)
|
|
self.assertEqual(hsp.target.name, hit.target.name)
|
|
self.assertEqual(hsp.target.description, hit.target.description)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARAILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIENEILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKALEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKM+",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='ref|WP_326211018.1|' query.id='WXX52402.1'; 2 rows x 239 columns>",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: ref|WP_326211018.1| Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
|
|
Score:475 bits(1223), Expect:8e-169,
|
|
Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
ref|WP_32 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
ref|WP_32 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
ref|WP_32 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
ref|WP_32 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMM
|
|
180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||.
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
ref|WP_32 239
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : WXX52402.1 Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
Target: ref|WP_326211018.1| Length: 239 Strand: Plus
|
|
RNA polymerase sporulation sigma factor SigE [Bacillus subtilis]
|
|
>gb|MEC1541690.1| RNA polymerase sporulation sigma factor SigE [Bacillus
|
|
subtilis]
|
|
|
|
Score:475 bits(1223), Expect:8e-169,
|
|
Identities:238/239(100%), Positives:239/239(100%), Gaps:0.239(0%)
|
|
|
|
ref|WP_32 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 0 MKKLKLRLTHLWYKLLMKLGLKSDEVYYIGGSEALPPPLSKDEEQVLLMKLPNGDQAARA
|
|
|
|
ref|WP_32 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 60 ILIERNLRLVVYIARKFENTGINIEDLISIGTIGLIKAVNTFNPEKKIKLATYASRCIEN
|
|
|
|
ref|WP_32 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
WXX52402. 120 EILMYLRRNNKIRSEVSFDEPLNIDWDGNELLLSDVLGTDDDIITKDIEANVDKKLLKKA
|
|
|
|
ref|WP_32 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMM
|
|
180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||.
|
|
WXX52402. 180 LEQLNEREKQIMELRFGLVGEEEKTQKDVADMMGISQSYISRLEKRIIKRLRKEFNKMV
|
|
|
|
ref|WP_32 239
|
|
239
|
|
WXX52402. 239
|
|
|
|
""",
|
|
)
|
|
with self.assertRaises(IndexError) as cm:
|
|
record[12]
|
|
self.assertEqual(str(cm.exception), "index out of range")
|
|
with self.assertRaises(TypeError) as cm:
|
|
record[None]
|
|
self.assertEqual(str(cm.exception), "key must be an integer, slice, or str")
|
|
with self.assertRaises(KeyError) as cm:
|
|
record["weird_key"]
|
|
self.assertEqual(str(cm.exception), "'weird_key'")
|
|
target_id = "ref|WP_326121348.1|"
|
|
self.assertIn(target_id, record)
|
|
self.assertNotIn("weird_id", record)
|
|
self.assertEqual(record[target_id].target.id, target_id)
|
|
self.assertEqual(record.index(target_id), 3)
|
|
with self.assertRaises(ValueError) as cm:
|
|
record.index("weird_id")
|
|
self.assertEqual(str(cm.exception), "'weird_id' not found")
|
|
self.assertEqual(
|
|
repr(hit),
|
|
"<Bio.Blast.Hit target.id='ref|WP_326211018.1|' query.id='WXX52402.1'; 1 HSP>",
|
|
)
|
|
self.assertEqual(
|
|
repr(hit[:0]), "<Bio.Blast.Hit target.id='ref|WP_326211018.1|'; no hits>"
|
|
)
|
|
self.assertEqual(
|
|
record.keys(),
|
|
[
|
|
"ref|WP_003221446.1|",
|
|
"dbj|BAI85158.2|",
|
|
"ref|WP_120028072.1|",
|
|
"ref|WP_326121348.1|",
|
|
"ref|WP_128473893.1|",
|
|
"ref|WP_174228079.1|",
|
|
"ref|WP_315947263.1|",
|
|
"ref|WP_219912761.1|",
|
|
"ref|WP_038828182.1|",
|
|
"ref|WP_326211018.1|",
|
|
],
|
|
)
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 718658123)
|
|
self.assertEqual(record.stat["db-len"], 277418989154)
|
|
if xml2 is True:
|
|
self.assertEqual(record.stat["hsp-len"], 161)
|
|
self.assertEqual(record.stat["eff-space"], 12613772445378)
|
|
else:
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertEqual(record.stat["eff-space"], 0)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.041)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.267)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.14)
|
|
|
|
def test_xml_21500_blastp_001_writer(self):
|
|
"""Writing BLASTP 2.15.0+ (xml_21500_blastp_001.xml)."""
|
|
filename = "xml_21500_blastp_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream)
|
|
self.assertEqual(n, 1)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_xml_21500_blastp_001_records(written_records)
|
|
|
|
def test_xml2_21500_blastp_001_writer(self):
|
|
"""Writing BLASTP 2.15.0+ (xml2_21500_blastp_001.xml)."""
|
|
filename = "xml2_21500_blastp_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream, fmt="XML2")
|
|
self.assertEqual(n, 1)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_xml_21500_blastp_001_records(written_records, xml2=True)
|
|
|
|
|
|
class TestBlastn(unittest.TestCase):
|
|
"""Test the Blast XML parser for blastn output."""
|
|
|
|
def test_xml_21500_blastn_001_parser(self):
|
|
"""Parsing BLASTN 2.15.0+ (xml_21500_blastn_001.xml)."""
|
|
filename = "xml_21500_blastn_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_21500_blastn_001_records(records)
|
|
with Blast.parse(path) as records:
|
|
self.check_xml_21500_blastn_001_records(records)
|
|
with open(path, "rb") as stream:
|
|
record = Blast.read(stream)
|
|
self.check_xml_21500_blastn_001_record(record)
|
|
record = Blast.read(path)
|
|
self.check_xml_21500_blastn_001_record(record)
|
|
with Blast.parse(path) as records:
|
|
self.assertEqual(
|
|
str(records),
|
|
"""\
|
|
Program: BLASTN 2.15.0+
|
|
db: genomic/10090/GCF_000001635.26
|
|
|
|
Query: Query_78041 (length=285)
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Hits: ---- ----- ----------------------------------------------------------
|
|
# # HSP ID + description
|
|
---- ----- ----------------------------------------------------------
|
|
0 1 gi|372099107|ref|NC_000069.6| Mus musculus strain C57B...
|
|
1 1 gi|372099103|ref|NC_000073.6| Mus musculus strain C57B...
|
|
2 2 gi|372099106|ref|NC_000070.6| Mus musculus strain C57B...
|
|
3 2 gi|372099108|ref|NC_000068.7| Mus musculus strain C57B...
|
|
4 2 gi|372099097|ref|NC_000079.6| Mus musculus strain C57B...
|
|
5 2 gi|372099098|ref|NC_000078.6| Mus musculus strain C57B...
|
|
6 1 gi|372099049|ref|NT_187008.1| Mus musculus strain 129S...
|
|
7 1 gi|372099109|ref|NC_000067.6| Mus musculus strain C57B...
|
|
8 1 gi|372099101|ref|NC_000075.6| Mus musculus strain C57B...
|
|
9 1 gi|372099100|ref|NC_000076.6| Mus musculus strain C57B...
|
|
10 1 gi|372099094|ref|NC_000082.6| Mus musculus strain C57B...""",
|
|
)
|
|
|
|
def test_xml2_21500_blastn_001_parser(self):
|
|
"""Parsing BLASTN 2.15.0+ (xml2_21500_blastn_001.xml)."""
|
|
filename = "xml2_21500_blastn_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_21500_blastn_001_records(records, xml2=True)
|
|
with Blast.parse(path) as records:
|
|
self.check_xml_21500_blastn_001_records(records, xml2=True)
|
|
with open(path, "rb") as stream:
|
|
record = Blast.read(stream)
|
|
self.check_xml_21500_blastn_001_record(record, xml2=True)
|
|
record = Blast.read(path)
|
|
self.check_xml_21500_blastn_001_record(record, xml2=True)
|
|
|
|
def check_xml_21500_blastn_001_records(self, records, xml2=False):
|
|
self.assertEqual(records.program, "blastn")
|
|
self.assertEqual(records.version, "BLASTN 2.15.0+")
|
|
self.assertEqual(
|
|
records.reference,
|
|
'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.',
|
|
)
|
|
self.assertEqual(records.db, "genomic/10090/GCF_000001635.26")
|
|
if xml2 is False:
|
|
self.assertIsInstance(records.query, SeqRecord)
|
|
self.assertEqual(records.query.id, "Query_78041")
|
|
self.assertEqual(
|
|
records.query.description,
|
|
"G26684.1 human STS STS_D11570, sequence tagged site",
|
|
)
|
|
self.assertEqual(repr(records.query.seq), "Seq(None, length=285)")
|
|
self.assertEqual(len(records.param), 6)
|
|
self.assertAlmostEqual(records.param["expect"], 10.0)
|
|
self.assertEqual(records.param["sc-match"], 2)
|
|
self.assertEqual(records.param["sc-mismatch"], -3)
|
|
self.assertEqual(records.param["gap-open"], 5)
|
|
self.assertEqual(records.param["gap-extend"], 2)
|
|
self.assertEqual(records.param["filter"], "L;m;")
|
|
record = next(records)
|
|
self.assertRaises(StopIteration, next, records)
|
|
self.check_xml_21500_blastn_001_record(record, xml2=xml2)
|
|
|
|
def check_xml_21500_blastn_001_record(self, record, xml2=False):
|
|
if xml2 is False:
|
|
self.assertEqual(record.num, 1)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "Query_78041")
|
|
self.assertEqual(
|
|
record.query.description,
|
|
"G26684.1 human STS STS_D11570, sequence tagged site",
|
|
)
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=285)")
|
|
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 239)
|
|
self.assertEqual(record.stat["db-len"], 2818974565)
|
|
if xml2 is True:
|
|
self.assertEqual(record.stat["hsp-len"], 31)
|
|
self.assertEqual(record.stat["eff-space"], 716017657624)
|
|
else:
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertEqual(record.stat["eff-space"], 0)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.41)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.625)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.78)
|
|
self.assertEqual(len(record), 11)
|
|
hit = record[0]
|
|
self.assertEqual(hit.num, 1)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|372099107|ref|NC_000069.6|")
|
|
self.assertEqual(hit.target.name, "NC_000069")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 3, GRCm38.p6 C57BL/6J",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(hit.target.annotations["taxid"], 10090)
|
|
self.assertEqual(hit.target.annotations["sciname"], "Mus musculus")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=160039680)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 44.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 40.9604)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.334664)
|
|
self.assertEqual(hsp.annotations["identity"], 30)
|
|
if xml2 is False:
|
|
self.assertEqual(hsp.annotations["positive"], 30)
|
|
self.assertEqual(hsp.annotations["gaps"], 1)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[101449177, 101449150, 101449149, 101449143],
|
|
[ 133, 160, 160, 166]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 34))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq({133: 'GAATCCTAGAGGCTTGATTGGCCCAGGCTGCTG'}, length=285)",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_78041")
|
|
self.assertEqual(
|
|
hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({101449143: 'CAGCAGGCCAGGGCCAGTCCAGCCTCTAGGATTC'}, length=160039680)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|372099107|ref|NC_000069.6|")
|
|
self.assertEqual(hsp.target.name, "NC_000069")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 3, GRCm38.p6 C57BL/6J",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(hit.target.annotations["taxid"], 10090)
|
|
self.assertEqual(hit.target.annotations["sciname"], "Mus musculus")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"], "|||||||||||||| || |||||| || ||||||"
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099107|ref|NC_000069.6| Length: 160039680 Strand: Minus
|
|
Mus musculus strain C57BL/6J chromosome 3, GRCm38.p6 C57BL/6J
|
|
|
|
Score:40 bits(44), Expect:0.3,
|
|
Identities:30/34(88%), Gaps:1.34(3%)
|
|
|
|
gi|372099 101449177 GAATCCTAGAGGCTGGACTGGCCCTGGCCTGCTG 101449143
|
|
0 ||||||||||||||.||.||||||.||-|||||| 34
|
|
Query_780 133 GAATCCTAGAGGCTTGATTGGCCCAGG-CTGCTG 166
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099107|ref|NC_000069.6| Length: 160039680 Strand: Minus
|
|
Mus musculus strain C57BL/6J chromosome 3, GRCm38.p6 C57BL/6J
|
|
|
|
Score:40 bits(44), Expect:0.3,
|
|
Identities:30/34(88%), Positives:30/34(88%), Gaps:1.34(3%)
|
|
|
|
gi|372099 101449177 GAATCCTAGAGGCTGGACTGGCCCTGGCCTGCTG 101449143
|
|
0 ||||||||||||||.||.||||||.||-|||||| 34
|
|
Query_780 133 GAATCCTAGAGGCTTGATTGGCCCAGG-CTGCTG 166
|
|
|
|
""",
|
|
)
|
|
hit = record[1]
|
|
self.assertEqual(hit.num, 2)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|372099103|ref|NC_000073.6|")
|
|
self.assertEqual(hit.target.name, "NC_000073")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 7, GRCm38.p6 C57BL/6J",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(hit.target.annotations["taxid"], 10090)
|
|
self.assertEqual(hit.target.annotations["sciname"], "Mus musculus")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=145441459)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 44.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 40.9604)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.334664)
|
|
self.assertEqual(hsp.annotations["identity"], 26)
|
|
if xml2 is False:
|
|
self.assertEqual(hsp.annotations["positive"], 26)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[131772185, 131772156],
|
|
[ 204, 233]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 29))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq({204: 'GAAAGGAAATNAAAATGGAAAGTTCTTGT'}, length=285)",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_78041")
|
|
self.assertEqual(
|
|
hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({131772156: 'ACCAGAACTTTCCATTTTTTTTTCCTTTC'}, length=145441459)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|372099103|ref|NC_000073.6|")
|
|
self.assertEqual(hsp.target.name, "NC_000073")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 7, GRCm38.p6 C57BL/6J",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(hsp.annotations["midline"], "||||||||| ||||||||||||||| ||")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099103|ref|NC_000073.6| Length: 145441459 Strand: Minus
|
|
Mus musculus strain C57BL/6J chromosome 7, GRCm38.p6 C57BL/6J
|
|
|
|
Score:40 bits(44), Expect:0.3,
|
|
Identities:26/29(90%), Gaps:0.29(0%)
|
|
|
|
gi|372099 131772185 GAAAGGAAAAAAAAATGGAAAGTTCTGGT 131772156
|
|
0 |||||||||..|||||||||||||||.|| 29
|
|
Query_780 204 GAAAGGAAATNAAAATGGAAAGTTCTTGT 233
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099103|ref|NC_000073.6| Length: 145441459 Strand: Minus
|
|
Mus musculus strain C57BL/6J chromosome 7, GRCm38.p6 C57BL/6J
|
|
|
|
Score:40 bits(44), Expect:0.3,
|
|
Identities:26/29(90%), Positives:26/29(90%), Gaps:0.29(0%)
|
|
|
|
gi|372099 131772185 GAAAGGAAAAAAAAATGGAAAGTTCTGGT 131772156
|
|
0 |||||||||..|||||||||||||||.|| 29
|
|
Query_780 204 GAAAGGAAATNAAAATGGAAAGTTCTTGT 233
|
|
|
|
""",
|
|
)
|
|
hit = record[2]
|
|
self.assertEqual(hit.num, 3)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|372099106|ref|NC_000070.6|")
|
|
self.assertEqual(hit.target.name, "NC_000070")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 4, GRCm38.p6 C57BL/6J",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(hit.target.annotations["taxid"], 10090)
|
|
self.assertEqual(hit.target.annotations["sciname"], "Mus musculus")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=156508116)")
|
|
self.assertEqual(len(hit), 2)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 43.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 40.0587)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 1.16809)
|
|
self.assertEqual(hsp.annotations["identity"], 23)
|
|
if xml2 is False:
|
|
self.assertEqual(hsp.annotations["positive"], 23)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[9607562, 9607538],
|
|
[ 61, 85]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 24))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq), "Seq({61: 'CCAACACAGGCCAGCGACTTCTGG'}, length=285)"
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_78041")
|
|
self.assertEqual(
|
|
hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({9607538: 'CCAGAAGCCGCTGGCCTGTGTTGG'}, length=156508116)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|372099106|ref|NC_000070.6|")
|
|
self.assertEqual(hsp.target.name, "NC_000070")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 4, GRCm38.p6 C57BL/6J",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(hsp.annotations["midline"], "|||||||||||||||| |||||||")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099106|ref|NC_000070.6| Length: 156508116 Strand: Minus
|
|
Mus musculus strain C57BL/6J chromosome 4, GRCm38.p6 C57BL/6J
|
|
|
|
Score:40 bits(43), Expect:1,
|
|
Identities:23/24(96%), Gaps:0.24(0%)
|
|
|
|
gi|372099 9607562 CCAACACAGGCCAGCGGCTTCTGG 9607538
|
|
0 ||||||||||||||||.||||||| 24
|
|
Query_780 61 CCAACACAGGCCAGCGACTTCTGG 85
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099106|ref|NC_000070.6| Length: 156508116 Strand: Minus
|
|
Mus musculus strain C57BL/6J chromosome 4, GRCm38.p6 C57BL/6J
|
|
|
|
Score:40 bits(43), Expect:1,
|
|
Identities:23/24(96%), Positives:23/24(96%), Gaps:0.24(0%)
|
|
|
|
gi|372099 9607562 CCAACACAGGCCAGCGGCTTCTGG 9607538
|
|
0 ||||||||||||||||.||||||| 24
|
|
Query_780 61 CCAACACAGGCCAGCGACTTCTGG 85
|
|
|
|
""",
|
|
)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 40.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 37.3537)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705)
|
|
self.assertEqual(hsp.annotations["identity"], 28)
|
|
if xml2 is False:
|
|
self.assertEqual(hsp.annotations["positive"], 28)
|
|
self.assertEqual(hsp.annotations["gaps"], 1)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[142902531, 142902542, 142902543, 142902563],
|
|
[ 241, 252, 252, 272]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 32))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq({241: 'GCCTGACATGGGTAGCTGCTCAATAAATGCT'}, length=285)",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_78041")
|
|
self.assertEqual(
|
|
hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({142902531: 'GCCTGGCATGAAGTAACTGCTCAATAAATGCT'}, length=156508116)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|372099106|ref|NC_000070.6|")
|
|
self.assertEqual(hsp.target.name, "NC_000070")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 4, GRCm38.p6 C57BL/6J",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(hsp.annotations["midline"], "||||| |||| ||| ||||||||||||||||")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099106|ref|NC_000070.6| Length: 156508116 Strand: Plus
|
|
Mus musculus strain C57BL/6J chromosome 4, GRCm38.p6 C57BL/6J
|
|
|
|
Score:37 bits(40), Expect:4,
|
|
Identities:28/32(88%), Gaps:1.32(3%)
|
|
|
|
gi|372099 142902531 GCCTGGCATGAAGTAACTGCTCAATAAATGCT 142902563
|
|
0 |||||.||||.-|||.|||||||||||||||| 32
|
|
Query_780 241 GCCTGACATGG-GTAGCTGCTCAATAAATGCT 272
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099106|ref|NC_000070.6| Length: 156508116 Strand: Plus
|
|
Mus musculus strain C57BL/6J chromosome 4, GRCm38.p6 C57BL/6J
|
|
|
|
Score:37 bits(40), Expect:4,
|
|
Identities:28/32(88%), Positives:28/32(88%), Gaps:1.32(3%)
|
|
|
|
gi|372099 142902531 GCCTGGCATGAAGTAACTGCTCAATAAATGCT 142902563
|
|
0 |||||.||||.-|||.|||||||||||||||| 32
|
|
Query_780 241 GCCTGACATGG-GTAGCTGCTCAATAAATGCT 272
|
|
|
|
""",
|
|
)
|
|
hit = record[3]
|
|
self.assertEqual(hit.num, 4)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|372099108|ref|NC_000068.7|")
|
|
self.assertEqual(hit.target.name, "NC_000068")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 2, GRCm38.p6 C57BL/6J",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(hit.target.annotations["taxid"], 10090)
|
|
self.assertEqual(hit.target.annotations["sciname"], "Mus musculus")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=182113224)")
|
|
self.assertEqual(len(hit), 2)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 42.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 39.157)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 1.16809)
|
|
self.assertEqual(hsp.annotations["identity"], 27)
|
|
if xml2 is False:
|
|
self.assertEqual(hsp.annotations["positive"], 27)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[3799646, 3799677],
|
|
[ 238, 269]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 31))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq({238: 'AAGGCCTGACATGGGTAGCTGCTCAATAAAT'}, length=285)",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_78041")
|
|
self.assertEqual(
|
|
hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({3799646: 'AAGTCCTGGCATGAGTAGTTGCTCAATAAAT'}, length=182113224)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|372099108|ref|NC_000068.7|")
|
|
self.assertEqual(hsp.target.name, "NC_000068")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 2, GRCm38.p6 C57BL/6J",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(hsp.annotations["midline"], "||| |||| |||| |||| ||||||||||||")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099108|ref|NC_000068.7| Length: 182113224 Strand: Plus
|
|
Mus musculus strain C57BL/6J chromosome 2, GRCm38.p6 C57BL/6J
|
|
|
|
Score:39 bits(42), Expect:1,
|
|
Identities:27/31(87%), Gaps:0.31(0%)
|
|
|
|
gi|372099 3799646 AAGTCCTGGCATGAGTAGTTGCTCAATAAAT 3799677
|
|
0 |||.||||.||||.||||.|||||||||||| 31
|
|
Query_780 238 AAGGCCTGACATGGGTAGCTGCTCAATAAAT 269
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099108|ref|NC_000068.7| Length: 182113224 Strand: Plus
|
|
Mus musculus strain C57BL/6J chromosome 2, GRCm38.p6 C57BL/6J
|
|
|
|
Score:39 bits(42), Expect:1,
|
|
Identities:27/31(87%), Positives:27/31(87%), Gaps:0.31(0%)
|
|
|
|
gi|372099 3799646 AAGTCCTGGCATGAGTAGTTGCTCAATAAAT 3799677
|
|
0 |||.||||.||||.||||.|||||||||||| 31
|
|
Query_780 238 AAGGCCTGACATGGGTAGCTGCTCAATAAAT 269
|
|
|
|
""",
|
|
)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 41.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 38.2554)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705)
|
|
self.assertEqual(hsp.annotations["identity"], 23)
|
|
if xml2 is False:
|
|
self.assertEqual(hsp.annotations["positive"], 23)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[70278959, 70278984],
|
|
[ 210, 235]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 25))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq), "Seq({210: 'AAATNAAAATGGAAAGTTCTTGTAG'}, length=285)"
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_78041")
|
|
self.assertEqual(
|
|
hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({70278959: 'AAATGAAAATGGAAAGTTCTTATAG'}, length=182113224)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|372099108|ref|NC_000068.7|")
|
|
self.assertEqual(hsp.target.name, "NC_000068")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 2, GRCm38.p6 C57BL/6J",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(hsp.annotations["midline"], "|||| |||||||||||||||| |||")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099108|ref|NC_000068.7| Length: 182113224 Strand: Plus
|
|
Mus musculus strain C57BL/6J chromosome 2, GRCm38.p6 C57BL/6J
|
|
|
|
Score:38 bits(41), Expect:4,
|
|
Identities:23/25(92%), Gaps:0.25(0%)
|
|
|
|
gi|372099 70278959 AAATGAAAATGGAAAGTTCTTATAG 70278984
|
|
0 ||||.||||||||||||||||.||| 25
|
|
Query_780 210 AAATNAAAATGGAAAGTTCTTGTAG 235
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099108|ref|NC_000068.7| Length: 182113224 Strand: Plus
|
|
Mus musculus strain C57BL/6J chromosome 2, GRCm38.p6 C57BL/6J
|
|
|
|
Score:38 bits(41), Expect:4,
|
|
Identities:23/25(92%), Positives:23/25(92%), Gaps:0.25(0%)
|
|
|
|
gi|372099 70278959 AAATGAAAATGGAAAGTTCTTATAG 70278984
|
|
0 ||||.||||||||||||||||.||| 25
|
|
Query_780 210 AAATNAAAATGGAAAGTTCTTGTAG 235
|
|
|
|
""",
|
|
)
|
|
hit = record[4]
|
|
self.assertEqual(hit.num, 5)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|372099097|ref|NC_000079.6|")
|
|
self.assertEqual(hit.target.name, "NC_000079")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 13, GRCm38.p6 C57BL/6J",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(hit.target.annotations["taxid"], 10090)
|
|
self.assertEqual(hit.target.annotations["sciname"], "Mus musculus")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=120421639)")
|
|
self.assertEqual(len(hit), 2)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 42.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 39.157)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 1.16809)
|
|
self.assertEqual(hsp.annotations["identity"], 25)
|
|
if xml2 is False:
|
|
self.assertEqual(hsp.annotations["positive"], 25)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[26806584, 26806556],
|
|
[ 206, 234]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 28))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq({206: 'AAGGAAATNAAAATGGAAAGTTCTTGTA'}, length=285)",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_78041")
|
|
self.assertEqual(
|
|
hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({26806556: 'TAGAAGAACTTTCCATTTTGATGTCCTT'}, length=120421639)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|372099097|ref|NC_000079.6|")
|
|
self.assertEqual(hsp.target.name, "NC_000079")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 13, GRCm38.p6 C57BL/6J",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(hsp.annotations["midline"], "||||| || |||||||||||||||| ||")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099097|ref|NC_000079.6| Length: 120421639 Strand: Minus
|
|
Mus musculus strain C57BL/6J chromosome 13, GRCm38.p6 C57BL/6J
|
|
|
|
Score:39 bits(42), Expect:1,
|
|
Identities:25/28(89%), Gaps:0.28(0%)
|
|
|
|
gi|372099 26806584 AAGGACATCAAAATGGAAAGTTCTTCTA 26806556
|
|
0 |||||.||.||||||||||||||||.|| 28
|
|
Query_780 206 AAGGAAATNAAAATGGAAAGTTCTTGTA 234
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099097|ref|NC_000079.6| Length: 120421639 Strand: Minus
|
|
Mus musculus strain C57BL/6J chromosome 13, GRCm38.p6 C57BL/6J
|
|
|
|
Score:39 bits(42), Expect:1,
|
|
Identities:25/28(89%), Positives:25/28(89%), Gaps:0.28(0%)
|
|
|
|
gi|372099 26806584 AAGGACATCAAAATGGAAAGTTCTTCTA 26806556
|
|
0 |||||.||.||||||||||||||||.|| 28
|
|
Query_780 206 AAGGAAATNAAAATGGAAAGTTCTTGTA 234
|
|
|
|
""",
|
|
)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 40.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 37.3537)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705)
|
|
self.assertEqual(hsp.annotations["identity"], 32)
|
|
if xml2 is False:
|
|
self.assertEqual(hsp.annotations["positive"], 32)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[56840340, 56840300],
|
|
[ 233, 273]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 40))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq({233: 'AGCGCAAGGCCTGACATGGGTAGCTGCTCAATAAATGCTA'}, length=285)",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_78041")
|
|
self.assertEqual(
|
|
hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({56840300: 'TAGTATTCACTGAACATTTTCCTATGTCAGGCCTTGCGCT'}, length=120421639)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|372099097|ref|NC_000079.6|")
|
|
self.assertEqual(hsp.target.name, "NC_000079")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 13, GRCm38.p6 C57BL/6J",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"], "||||||||||||||||| || | || ||| | ||| |||"
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099097|ref|NC_000079.6| Length: 120421639 Strand: Minus
|
|
Mus musculus strain C57BL/6J chromosome 13, GRCm38.p6 C57BL/6J
|
|
|
|
Score:37 bits(40), Expect:4,
|
|
Identities:32/40(80%), Gaps:0.40(0%)
|
|
|
|
gi|372099 56840340 AGCGCAAGGCCTGACATAGGAAAATGTTCAGTGAATACTA 56840300
|
|
0 |||||||||||||||||.||.|..||.|||.|.|||.||| 40
|
|
Query_780 233 AGCGCAAGGCCTGACATGGGTAGCTGCTCAATAAATGCTA 273
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099097|ref|NC_000079.6| Length: 120421639 Strand: Minus
|
|
Mus musculus strain C57BL/6J chromosome 13, GRCm38.p6 C57BL/6J
|
|
|
|
Score:37 bits(40), Expect:4,
|
|
Identities:32/40(80%), Positives:32/40(80%), Gaps:0.40(0%)
|
|
|
|
gi|372099 56840340 AGCGCAAGGCCTGACATAGGAAAATGTTCAGTGAATACTA 56840300
|
|
0 |||||||||||||||||.||.|..||.|||.|.|||.||| 40
|
|
Query_780 233 AGCGCAAGGCCTGACATGGGTAGCTGCTCAATAAATGCTA 273
|
|
|
|
""",
|
|
)
|
|
hit = record[5]
|
|
self.assertEqual(hit.num, 6)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|372099098|ref|NC_000078.6|")
|
|
self.assertEqual(hit.target.name, "NC_000078")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 12, GRCm38.p6 C57BL/6J",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(hit.target.annotations["taxid"], 10090)
|
|
self.assertEqual(hit.target.annotations["sciname"], "Mus musculus")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=120129022)")
|
|
self.assertEqual(len(hit), 2)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 41.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 38.2554)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705)
|
|
self.assertEqual(hsp.annotations["identity"], 22)
|
|
if xml2 is False:
|
|
self.assertEqual(hsp.annotations["positive"], 22)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[113030662, 113030685],
|
|
[ 48, 71]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 23))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq), "Seq({48: 'CATCCATTCACACCCAACACAGG'}, length=285)"
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_78041")
|
|
self.assertEqual(
|
|
hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({113030662: 'CATCCATTCACACCCAGCACAGG'}, length=120129022)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|372099098|ref|NC_000078.6|")
|
|
self.assertEqual(hsp.target.name, "NC_000078")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 12, GRCm38.p6 C57BL/6J",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(hsp.annotations["midline"], "|||||||||||||||| ||||||")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099098|ref|NC_000078.6| Length: 120129022 Strand: Plus
|
|
Mus musculus strain C57BL/6J chromosome 12, GRCm38.p6 C57BL/6J
|
|
|
|
Score:38 bits(41), Expect:4,
|
|
Identities:22/23(96%), Gaps:0.23(0%)
|
|
|
|
gi|372099 113030662 CATCCATTCACACCCAGCACAGG 113030685
|
|
0 ||||||||||||||||.|||||| 23
|
|
Query_780 48 CATCCATTCACACCCAACACAGG 71
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099098|ref|NC_000078.6| Length: 120129022 Strand: Plus
|
|
Mus musculus strain C57BL/6J chromosome 12, GRCm38.p6 C57BL/6J
|
|
|
|
Score:38 bits(41), Expect:4,
|
|
Identities:22/23(96%), Positives:22/23(96%), Gaps:0.23(0%)
|
|
|
|
gi|372099 113030662 CATCCATTCACACCCAGCACAGG 113030685
|
|
0 ||||||||||||||||.|||||| 23
|
|
Query_780 48 CATCCATTCACACCCAACACAGG 71
|
|
|
|
""",
|
|
)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 40.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 37.3537)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705)
|
|
self.assertEqual(hsp.annotations["identity"], 28)
|
|
if xml2 is False:
|
|
self.assertEqual(hsp.annotations["positive"], 28)
|
|
self.assertEqual(hsp.annotations["gaps"], 1)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[108990272, 108990248, 108990248, 108990241],
|
|
[ 230, 254, 255, 262]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 32))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq({230: 'TGTAGCGCAAGGCCTGACATGGGTAGCTGCTC'}, length=285)",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_78041")
|
|
self.assertEqual(
|
|
hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({108990241: 'GACCAGCACCCATGTCAGGCCTAGAGCTACA'}, length=120129022)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|372099098|ref|NC_000078.6|")
|
|
self.assertEqual(hsp.target.name, "NC_000078")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 12, GRCm38.p6 C57BL/6J",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(hsp.annotations["midline"], "|||||| | ||||||||||||||| |||| ||")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099098|ref|NC_000078.6| Length: 120129022 Strand: Minus
|
|
Mus musculus strain C57BL/6J chromosome 12, GRCm38.p6 C57BL/6J
|
|
|
|
Score:37 bits(40), Expect:4,
|
|
Identities:28/32(88%), Gaps:1.32(3%)
|
|
|
|
gi|372099 108990272 TGTAGCTCTAGGCCTGACATGGGT-GCTGGTC 108990241
|
|
0 ||||||.|.|||||||||||||||-||||.|| 32
|
|
Query_780 230 TGTAGCGCAAGGCCTGACATGGGTAGCTGCTC 262
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099098|ref|NC_000078.6| Length: 120129022 Strand: Minus
|
|
Mus musculus strain C57BL/6J chromosome 12, GRCm38.p6 C57BL/6J
|
|
|
|
Score:37 bits(40), Expect:4,
|
|
Identities:28/32(88%), Positives:28/32(88%), Gaps:1.32(3%)
|
|
|
|
gi|372099 108990272 TGTAGCTCTAGGCCTGACATGGGT-GCTGGTC 108990241
|
|
0 ||||||.|.|||||||||||||||-||||.|| 32
|
|
Query_780 230 TGTAGCGCAAGGCCTGACATGGGTAGCTGCTC 262
|
|
|
|
""",
|
|
)
|
|
hit = record[6]
|
|
self.assertEqual(hit.num, 7)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|372099049|ref|NT_187008.1|")
|
|
self.assertEqual(hit.target.name, "NT_187008")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Mus musculus strain 129S1/SvImJ chromosome 16 genomic scaffold, GRCm38.p6 alternate locus group 129S1/SvImJ 129S1/SVIMJ_MMCHR16_CTG2",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(hit.target.annotations["taxid"], 10090)
|
|
self.assertEqual(hit.target.annotations["sciname"], "Mus musculus")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=250595)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 40.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 37.3537)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705)
|
|
self.assertEqual(hsp.annotations["identity"], 43)
|
|
if xml2 is False:
|
|
self.assertEqual(hsp.annotations["positive"], 43)
|
|
self.assertEqual(hsp.annotations["gaps"], 2)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[158458, 158482, 158483, 158491, 158492, 158514],
|
|
[ 174, 198, 198, 206, 206, 228]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 56))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq({174: 'GGAGGCAAAGAATCCCTACCTCCTAGGGGTGAAAGGAAATNAAAATGGAAAGTT'}, length=285)",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_78041")
|
|
self.assertEqual(
|
|
hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({158458: 'GGAGGCAAAGAATCCCTACATTGTGACAGCTGATAAAGAAGGTAAAATGGAAAATT'}, length=250595)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|372099049|ref|NT_187008.1|")
|
|
self.assertEqual(hsp.target.name, "NT_187008")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Mus musculus strain 129S1/SvImJ chromosome 16 genomic scaffold, GRCm38.p6 alternate locus group 129S1/SvImJ 129S1/SVIMJ_MMCHR16_CTG2",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"||||||||||||||||||| | | | | ||| || ||| |||||||||| ||",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099049|ref|NT_187008.1| Length: 250595 Strand: Plus
|
|
Mus musculus strain 129S1/SvImJ chromosome 16 genomic scaffold,
|
|
GRCm38.p6 alternate locus group 129S1/SvImJ 129S1/SVIMJ_MMCHR16_CTG2
|
|
|
|
Score:37 bits(40), Expect:4,
|
|
Identities:43/56(77%), Gaps:2.56(4%)
|
|
|
|
gi|372099 158458 GGAGGCAAAGAATCCCTACATTGTGACAGCTGATAAAGAAGGTAAAATGGAAAATT
|
|
0 |||||||||||||||||||.|..|-|..|.|||-||.|||...||||||||||.||
|
|
Query_780 174 GGAGGCAAAGAATCCCTACCTCCT-AGGGGTGA-AAGGAAATNAAAATGGAAAGTT
|
|
|
|
gi|372099 158514
|
|
56
|
|
Query_780 228
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099049|ref|NT_187008.1| Length: 250595 Strand: Plus
|
|
Mus musculus strain 129S1/SvImJ chromosome 16 genomic scaffold,
|
|
GRCm38.p6 alternate locus group 129S1/SvImJ 129S1/SVIMJ_MMCHR16_CTG2
|
|
|
|
Score:37 bits(40), Expect:4,
|
|
Identities:43/56(77%), Positives:43/56(77%), Gaps:2.56(4%)
|
|
|
|
gi|372099 158458 GGAGGCAAAGAATCCCTACATTGTGACAGCTGATAAAGAAGGTAAAATGGAAAATT
|
|
0 |||||||||||||||||||.|..|-|..|.|||-||.|||...||||||||||.||
|
|
Query_780 174 GGAGGCAAAGAATCCCTACCTCCT-AGGGGTGA-AAGGAAATNAAAATGGAAAGTT
|
|
|
|
gi|372099 158514
|
|
56
|
|
Query_780 228
|
|
|
|
""",
|
|
)
|
|
hit = record[7]
|
|
self.assertEqual(hit.num, 8)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|372099109|ref|NC_000067.6|")
|
|
self.assertEqual(hit.target.name, "NC_000067")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 1, GRCm38.p6 C57BL/6J",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(hit.target.annotations["taxid"], 10090)
|
|
self.assertEqual(hit.target.annotations["sciname"], "Mus musculus")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=195471971)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 40.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 37.3537)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705)
|
|
self.assertEqual(hsp.annotations["identity"], 35)
|
|
if xml2 is False:
|
|
self.assertEqual(hsp.annotations["positive"], 35)
|
|
self.assertEqual(hsp.annotations["gaps"], 2)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[65190107, 65190128, 65190128, 65190144, 65190144, 65190148],
|
|
[ 86, 107, 108, 124, 125, 129]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 43))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq({86: 'GCTCAGCCACAGACATGGTTTGTNACTNTTGAGCTTCTGTTCC'}, length=285)",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_78041")
|
|
self.assertEqual(
|
|
hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({65190107: 'GCTCAGCCACATACATGGTTTTAAGTGTTGAGGCTCTTTCC'}, length=195471971)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|372099109|ref|NC_000067.6|")
|
|
self.assertEqual(hsp.target.name, "NC_000067")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 1, GRCm38.p6 C57BL/6J",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"], "||||||||||| ||||||||| | | | ||||| ||| ||||"
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099109|ref|NC_000067.6| Length: 195471971 Strand: Plus
|
|
Mus musculus strain C57BL/6J chromosome 1, GRCm38.p6 C57BL/6J
|
|
|
|
Score:37 bits(40), Expect:4,
|
|
Identities:35/43(81%), Gaps:2.43(5%)
|
|
|
|
gi|372099 65190107 GCTCAGCCACATACATGGTTT-TAAGTGTTGAGGCTCT-TTCC 65190148
|
|
0 |||||||||||.|||||||||-|.|.|.|||||..|||-|||| 43
|
|
Query_780 86 GCTCAGCCACAGACATGGTTTGTNACTNTTGAGCTTCTGTTCC 129
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099109|ref|NC_000067.6| Length: 195471971 Strand: Plus
|
|
Mus musculus strain C57BL/6J chromosome 1, GRCm38.p6 C57BL/6J
|
|
|
|
Score:37 bits(40), Expect:4,
|
|
Identities:35/43(81%), Positives:35/43(81%), Gaps:2.43(5%)
|
|
|
|
gi|372099 65190107 GCTCAGCCACATACATGGTTT-TAAGTGTTGAGGCTCT-TTCC 65190148
|
|
0 |||||||||||.|||||||||-|.|.|.|||||..|||-|||| 43
|
|
Query_780 86 GCTCAGCCACAGACATGGTTTGTNACTNTTGAGCTTCTGTTCC 129
|
|
|
|
""",
|
|
)
|
|
hit = record[8]
|
|
self.assertEqual(hit.num, 9)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|372099101|ref|NC_000075.6|")
|
|
self.assertEqual(hit.target.name, "NC_000075")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 9, GRCm38.p6 C57BL/6J",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(hit.target.annotations["taxid"], 10090)
|
|
self.assertEqual(hit.target.annotations["sciname"], "Mus musculus")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=124595110)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 40.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 37.3537)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705)
|
|
self.assertEqual(hsp.annotations["identity"], 36)
|
|
if xml2 is False:
|
|
self.assertEqual(hsp.annotations["positive"], 36)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[58227241, 58227194],
|
|
[ 237, 284]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 47))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq({237: 'CAAGGCCTGACATGGGTAGCTGCTCAATAAATGCTAGTNTGTTATTT'}, length=285)",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_78041")
|
|
self.assertEqual(
|
|
hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({58227194: 'AAAAAAAAAAATAGTATTTATTGAGCAGTCATACCTGTCAGGCTTTG'}, length=124595110)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|372099101|ref|NC_000075.6|")
|
|
self.assertEqual(hsp.target.name, "NC_000075")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 9, GRCm38.p6 C57BL/6J",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"||| |||||||| | | ||||||||||||| ||| | | || |||",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099101|ref|NC_000075.6| Length: 124595110 Strand: Minus
|
|
Mus musculus strain C57BL/6J chromosome 9, GRCm38.p6 C57BL/6J
|
|
|
|
Score:37 bits(40), Expect:4,
|
|
Identities:36/47(77%), Gaps:0.47(0%)
|
|
|
|
gi|372099 58227241 CAAAGCCTGACAGGTATGACTGCTCAATAAATACTATTTTTTTTTTT 58227194
|
|
0 |||.||||||||.|..|..|||||||||||||.|||.|.|.||.||| 47
|
|
Query_780 237 CAAGGCCTGACATGGGTAGCTGCTCAATAAATGCTAGTNTGTTATTT 284
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099101|ref|NC_000075.6| Length: 124595110 Strand: Minus
|
|
Mus musculus strain C57BL/6J chromosome 9, GRCm38.p6 C57BL/6J
|
|
|
|
Score:37 bits(40), Expect:4,
|
|
Identities:36/47(77%), Positives:36/47(77%), Gaps:0.47(0%)
|
|
|
|
gi|372099 58227241 CAAAGCCTGACAGGTATGACTGCTCAATAAATACTATTTTTTTTTTT 58227194
|
|
0 |||.||||||||.|..|..|||||||||||||.|||.|.|.||.||| 47
|
|
Query_780 237 CAAGGCCTGACATGGGTAGCTGCTCAATAAATGCTAGTNTGTTATTT 284
|
|
|
|
""",
|
|
)
|
|
hit = record[9]
|
|
self.assertEqual(hit.num, 10)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|372099100|ref|NC_000076.6|")
|
|
self.assertEqual(hit.target.name, "NC_000076")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 10, GRCm38.p6 C57BL/6J",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(hit.target.annotations["taxid"], 10090)
|
|
self.assertEqual(hit.target.annotations["sciname"], "Mus musculus")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=130694993)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 40.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 37.3537)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705)
|
|
self.assertEqual(hsp.annotations["identity"], 20)
|
|
if xml2 is False:
|
|
self.assertEqual(hsp.annotations["positive"], 20)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[119337185, 119337205],
|
|
[ 254, 274]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 20))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq), "Seq({254: 'AGCTGCTCAATAAATGCTAG'}, length=285)"
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_78041")
|
|
self.assertEqual(
|
|
hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({119337185: 'AGCTGCTCAATAAATGCTAG'}, length=130694993)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|372099100|ref|NC_000076.6|")
|
|
self.assertEqual(hsp.target.name, "NC_000076")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 10, GRCm38.p6 C57BL/6J",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(hsp.annotations["midline"], "||||||||||||||||||||")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099100|ref|NC_000076.6| Length: 130694993 Strand: Plus
|
|
Mus musculus strain C57BL/6J chromosome 10, GRCm38.p6 C57BL/6J
|
|
|
|
Score:37 bits(40), Expect:4,
|
|
Identities:20/20(100%), Gaps:0.20(0%)
|
|
|
|
gi|372099 119337185 AGCTGCTCAATAAATGCTAG 119337205
|
|
0 |||||||||||||||||||| 20
|
|
Query_780 254 AGCTGCTCAATAAATGCTAG 274
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099100|ref|NC_000076.6| Length: 130694993 Strand: Plus
|
|
Mus musculus strain C57BL/6J chromosome 10, GRCm38.p6 C57BL/6J
|
|
|
|
Score:37 bits(40), Expect:4,
|
|
Identities:20/20(100%), Positives:20/20(100%), Gaps:0.20(0%)
|
|
|
|
gi|372099 119337185 AGCTGCTCAATAAATGCTAG 119337205
|
|
0 |||||||||||||||||||| 20
|
|
Query_780 254 AGCTGCTCAATAAATGCTAG 274
|
|
|
|
""",
|
|
)
|
|
hit = record[10]
|
|
self.assertEqual(hit.num, 11)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|372099094|ref|NC_000082.6|")
|
|
self.assertEqual(hit.target.name, "NC_000082")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 16, GRCm38.p6 C57BL/6J",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(hit.target.annotations["taxid"], 10090)
|
|
self.assertEqual(hit.target.annotations["sciname"], "Mus musculus")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=98207768)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 40.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 37.3537)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 4.07705)
|
|
self.assertEqual(hsp.annotations["identity"], 43)
|
|
if xml2 is False:
|
|
self.assertEqual(hsp.annotations["positive"], 43)
|
|
self.assertEqual(hsp.annotations["gaps"], 2)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[18854779, 18854803, 18854804, 18854812, 18854813, 18854835],
|
|
[ 174, 198, 198, 206, 206, 228]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 56))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq({174: 'GGAGGCAAAGAATCCCTACCTCCTAGGGGTGAAAGGAAATNAAAATGGAAAGTT'}, length=285)",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_78041")
|
|
self.assertEqual(
|
|
hsp.query.description, "G26684.1 human STS STS_D11570, sequence tagged site"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({18854779: 'GGAGGCAAAGAATCCCTACATTGTGACAGCTGATAAAGAAGGTAAAATGGAAAATT'}, length=98207768)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|372099094|ref|NC_000082.6|")
|
|
self.assertEqual(hsp.target.name, "NC_000082")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Mus musculus strain C57BL/6J chromosome 16, GRCm38.p6 C57BL/6J",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"||||||||||||||||||| | | | | ||| || ||| |||||||||| ||",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099094|ref|NC_000082.6| Length: 98207768 Strand: Plus
|
|
Mus musculus strain C57BL/6J chromosome 16, GRCm38.p6 C57BL/6J
|
|
|
|
Score:37 bits(40), Expect:4,
|
|
Identities:43/56(77%), Gaps:2.56(4%)
|
|
|
|
gi|372099 18854779 GGAGGCAAAGAATCCCTACATTGTGACAGCTGATAAAGAAGGTAAAATGGAAAATT
|
|
0 |||||||||||||||||||.|..|-|..|.|||-||.|||...||||||||||.||
|
|
Query_780 174 GGAGGCAAAGAATCCCTACCTCCT-AGGGGTGA-AAGGAAATNAAAATGGAAAGTT
|
|
|
|
gi|372099 18854835
|
|
56
|
|
Query_780 228
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_78041 Length: 285 Strand: Plus
|
|
G26684.1 human STS STS_D11570, sequence tagged site
|
|
Target: gi|372099094|ref|NC_000082.6| Length: 98207768 Strand: Plus
|
|
Mus musculus strain C57BL/6J chromosome 16, GRCm38.p6 C57BL/6J
|
|
|
|
Score:37 bits(40), Expect:4,
|
|
Identities:43/56(77%), Positives:43/56(77%), Gaps:2.56(4%)
|
|
|
|
gi|372099 18854779 GGAGGCAAAGAATCCCTACATTGTGACAGCTGATAAAGAAGGTAAAATGGAAAATT
|
|
0 |||||||||||||||||||.|..|-|..|.|||-||.|||...||||||||||.||
|
|
Query_780 174 GGAGGCAAAGAATCCCTACCTCCT-AGGGGTGA-AAGGAAATNAAAATGGAAAGTT
|
|
|
|
gi|372099 18854835
|
|
56
|
|
Query_780 228
|
|
|
|
""",
|
|
)
|
|
|
|
def test_xml_21500_blastn_001_writer(self):
|
|
"""Writing BLASTN 2.15.0+ (xml_21500_blastn_001.xml)."""
|
|
filename = "xml_21500_blastn_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream)
|
|
self.assertEqual(n, 1)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_xml_21500_blastn_001_records(written_records)
|
|
|
|
def test_xml2_21500_blastn_001_writer(self):
|
|
"""Writing BLASTN 2.15.0+ XML2 (xml2_21500_blastn_001.xml)."""
|
|
filename = "xml2_21500_blastn_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream, fmt="XML2")
|
|
self.assertEqual(n, 1)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_xml_21500_blastn_001_records(written_records, xml2=True)
|
|
|
|
def test_megablast_legacy(self):
|
|
"""Parsing megablast 2.2.26 [Sep-21-2011] (megablast_legacy.xml)."""
|
|
filename = "megablast_legacy.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_megablast_legacy_records(records)
|
|
with Blast.parse(path) as records:
|
|
self.check_megablast_legacy_records(records)
|
|
with open(path, "rb") as stream:
|
|
record = Blast.read(stream)
|
|
self.check_megablast_legacy_record(record)
|
|
record = Blast.read(path)
|
|
self.check_megablast_legacy_record(record)
|
|
self.assertEqual(
|
|
str(record[1::2]),
|
|
"""\
|
|
Program: megablast 2.2.26 [Sep-21-2011]
|
|
db: m_cold.fasta
|
|
Query: lcl|1_ (length=1111)
|
|
gi|8332116|gb|BE037100.1|BE037100 MP14H09 MP Mesembryanthemum
|
|
crystallinum cDNA 5' similar to cold acclimation protein, mRNA sequence
|
|
Hits: No hits found""",
|
|
)
|
|
|
|
def check_megablast_legacy_records(self, records):
|
|
self.assertEqual(records.program, "megablast")
|
|
self.assertEqual(records.version, "megablast 2.2.26 [Sep-21-2011]")
|
|
self.assertEqual(
|
|
records.reference,
|
|
'~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~"Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs", Nucleic Acids Res. 25:3389-3402.',
|
|
)
|
|
self.assertEqual(records.db, "m_cold.fasta")
|
|
self.assertIsInstance(records.query, SeqRecord)
|
|
self.assertEqual(records.query.id, "lcl|1_")
|
|
self.assertEqual(
|
|
records.query.description,
|
|
"gi|8332116|gb|BE037100.1|BE037100 MP14H09 MP Mesembryanthemum crystallinum cDNA 5' similar to cold acclimation protein, mRNA sequence",
|
|
)
|
|
self.assertEqual(repr(records.query.seq), "Seq(None, length=1111)")
|
|
self.assertEqual(len(records.param), 6)
|
|
self.assertAlmostEqual(records.param["expect"], 10.0)
|
|
self.assertEqual(records.param["sc-match"], 1)
|
|
self.assertEqual(records.param["sc-mismatch"], -3)
|
|
self.assertEqual(records.param["gap-open"], 0)
|
|
self.assertEqual(records.param["gap-extend"], 0)
|
|
self.assertEqual(records.param["filter"], "D")
|
|
record = next(records)
|
|
self.assertRaises(StopIteration, next, records)
|
|
self.check_megablast_legacy_record(record)
|
|
self.assertEqual(len(records.mbstat), 7)
|
|
self.assertEqual(records.mbstat["db-num"], 1)
|
|
self.assertEqual(records.mbstat["db-len"], 1111)
|
|
self.assertEqual(records.mbstat["hsp-len"], 10)
|
|
self.assertAlmostEqual(records.mbstat["eff-space"], 1212200.0)
|
|
self.assertAlmostEqual(records.mbstat["kappa"], 0.710603)
|
|
self.assertAlmostEqual(records.mbstat["lambda"], 1.37406)
|
|
self.assertAlmostEqual(records.mbstat["entropy"], 1.30725)
|
|
|
|
def check_megablast_legacy_record(self, record):
|
|
self.assertEqual(record.num, 0)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "lcl|1_")
|
|
self.assertEqual(
|
|
record.query.description,
|
|
"gi|8332116|gb|BE037100.1|BE037100 MP14H09 MP Mesembryanthemum crystallinum cDNA 5' similar to cold acclimation protein, mRNA sequence",
|
|
)
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=1111)")
|
|
self.assertEqual(repr(record), "<Bio.Blast.Record query.id='lcl|1_'; 1 hit>")
|
|
self.assertEqual(len(record), 1)
|
|
hit = record[0]
|
|
self.assertEqual(hit.num, 1)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gnl|BL_ORD_ID|0")
|
|
self.assertEqual(hit.target.name, "0")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"gi|8332116|gb|BE037100.1|BE037100 MP14H09 MP Mesembryanthemum crystallinum cDNA 5' similar to cold acclimation protein, mRNA sequence",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=1111)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 788.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 1562.59)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 797)
|
|
self.assertEqual(hsp.annotations["positive"], 797)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 797],
|
|
[ 0, 797]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 797))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq({0: 'CACTAGTACTCGAGCGTNCTGCACCAATTCGGCACGAGCAAGTGACTACGTTNT...GTG'}, length=1111)",
|
|
)
|
|
self.assertEqual(hsp.query.id, "lcl|1_")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|8332116|gb|BE037100.1|BE037100 MP14H09 MP Mesembryanthemum crystallinum cDNA 5' similar to cold acclimation protein, mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'CACTAGTACTCGAGCGTNCTGCACCAATTCGGCACGAGCAAGTGACTACGTTNT...GTG'}, length=1111)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gnl|BL_ORD_ID|0")
|
|
self.assertEqual(hsp.target.name, "0")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"gi|8332116|gb|BE037100.1|BE037100 MP14H09 MP Mesembryanthemum crystallinum cDNA 5' similar to cold acclimation protein, mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : lcl|1_ Length: 1111 Strand: Plus
|
|
gi|8332116|gb|BE037100.1|BE037100 MP14H09 MP Mesembryanthemum
|
|
crystallinum cDNA 5' similar to cold acclimation protein, mRNA sequence
|
|
Target: gnl|BL_ORD_ID|0 Length: 1111 Strand: Plus
|
|
gi|8332116|gb|BE037100.1|BE037100 MP14H09 MP Mesembryanthemum
|
|
crystallinum cDNA 5' similar to cold acclimation protein, mRNA sequence
|
|
|
|
Score:1562 bits(788), Expect:0,
|
|
Identities:797/797(100%), Positives:797/797(100%)
|
|
|
|
gnl|BL_OR 0 CACTAGTACTCGAGCGTNCTGCACCAATTCGGCACGAGCAAGTGACTACGTTNTGTGAAC
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
lcl|1_ 0 CACTAGTACTCGAGCGTNCTGCACCAATTCGGCACGAGCAAGTGACTACGTTNTGTGAAC
|
|
|
|
gnl|BL_OR 60 AGAAAATGGGGAGAGAAATGAAGTACTTGGCCATGAAAACTGATCAATTGGCCGTGGCTA
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
lcl|1_ 60 AGAAAATGGGGAGAGAAATGAAGTACTTGGCCATGAAAACTGATCAATTGGCCGTGGCTA
|
|
|
|
gnl|BL_OR 120 ATATGATCGATTCCGATATCAATGAGCTTAAAATGGCAACAATGAGGCTCATCAATGATG
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
lcl|1_ 120 ATATGATCGATTCCGATATCAATGAGCTTAAAATGGCAACAATGAGGCTCATCAATGATG
|
|
|
|
gnl|BL_OR 180 CTAGTATGCTCGGTCATTACGGGTTTGGCACTCATTTCCTCAAATGGCTCGCCTGCCTTG
|
|
180 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
lcl|1_ 180 CTAGTATGCTCGGTCATTACGGGTTTGGCACTCATTTCCTCAAATGGCTCGCCTGCCTTG
|
|
|
|
gnl|BL_OR 240 CGGCTATTTACTTGTTGATATTGGATCGAACAAACTGGAGAACCAACATGCTCACGTCAC
|
|
240 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
lcl|1_ 240 CGGCTATTTACTTGTTGATATTGGATCGAACAAACTGGAGAACCAACATGCTCACGTCAC
|
|
|
|
gnl|BL_OR 300 TTTTAGTCCCTTACATATTCCTCAGTCTTCCATCCGGGCCATTTCATCTGTTCAGAGGCG
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
lcl|1_ 300 TTTTAGTCCCTTACATATTCCTCAGTCTTCCATCCGGGCCATTTCATCTGTTCAGAGGCG
|
|
|
|
gnl|BL_OR 360 AGGTCGGGAAATGGATTGCCATCATTGCAGTCGTGTTAAGGCTGTTCTTCAACCGGCATT
|
|
360 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
lcl|1_ 360 AGGTCGGGAAATGGATTGCCATCATTGCAGTCGTGTTAAGGCTGTTCTTCAACCGGCATT
|
|
|
|
gnl|BL_OR 420 TCCCAGTTTGGCTGGAAATGCCTGGATCGTTGATACTCCTCCTGGTGGTGGCACCAGACT
|
|
420 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
lcl|1_ 420 TCCCAGTTTGGCTGGAAATGCCTGGATCGTTGATACTCCTCCTGGTGGTGGCACCAGACT
|
|
|
|
gnl|BL_OR 480 TCTTTACACACAAAGTGAAGGAGAGCTGGATCGGAATTGCAATTATGATAGCGATAGGGT
|
|
480 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
lcl|1_ 480 TCTTTACACACAAAGTGAAGGAGAGCTGGATCGGAATTGCAATTATGATAGCGATAGGGT
|
|
|
|
gnl|BL_OR 540 GTCACCTGATGCAAGAACATATCAGAGCCACTGGTGGCTTTTGGAATTCCTTCACACAGA
|
|
540 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
lcl|1_ 540 GTCACCTGATGCAAGAACATATCAGAGCCACTGGTGGCTTTTGGAATTCCTTCACACAGA
|
|
|
|
gnl|BL_OR 600 GCCACGGAACTTTTAACACAATTGGGCTTATCCTTCTACTGGCTTACCCTGTCTGTTTAT
|
|
600 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
lcl|1_ 600 GCCACGGAACTTTTAACACAATTGGGCTTATCCTTCTACTGGCTTACCCTGTCTGTTTAT
|
|
|
|
gnl|BL_OR 660 GGTCATCTTCATGATGTAGTAGCTTAGTCTTGATCCTAATCCTCAAATNTACTTTTCCAG
|
|
660 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
lcl|1_ 660 GGTCATCTTCATGATGTAGTAGCTTAGTCTTGATCCTAATCCTCAAATNTACTTTTCCAG
|
|
|
|
gnl|BL_OR 720 CTCTTTCGACGCTCTTGCTAAAGCCCATTCAATTCGCCCCATATTTCGCACACATTCATT
|
|
720 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
lcl|1_ 720 CTCTTTCGACGCTCTTGCTAAAGCCCATTCAATTCGCCCCATATTTCGCACACATTCATT
|
|
|
|
gnl|BL_OR 780 TCACCACCCAATACGTG 797
|
|
780 ||||||||||||||||| 797
|
|
lcl|1_ 780 TCACCACCCAATACGTG 797
|
|
|
|
""",
|
|
)
|
|
|
|
def test_megablast_legacy_writer(self):
|
|
"""Writing megablast 2.2.26 [Sep-21-2011] (megablast_legacy.xml)."""
|
|
filename = "megablast_legacy.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream)
|
|
self.assertEqual(n, 1)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_megablast_legacy_records(written_records)
|
|
|
|
|
|
class TestBlastx(unittest.TestCase):
|
|
"""Test the Blast XML parser for blastx output."""
|
|
|
|
def test_xml_2222_blastx_001_parser(self):
|
|
"""Parsing BLASTX 2.2.22+ (xml_2222_blastx_001.xml)."""
|
|
filename = "xml_2222_blastx_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_2222_blastx_001(records)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.assertEqual(
|
|
str(records),
|
|
"""\
|
|
Program: BLASTX 2.2.22+
|
|
db: nr
|
|
|
|
Query: 1 (length=1002)
|
|
gi|4104054|gb|AH007193.1|SEG_CVIGS Centaurea vallesiaca 18S ribosomal
|
|
RNA gene, partial sequence
|
|
Hits: ---- ----- ----------------------------------------------------------
|
|
# # HSP ID + description
|
|
---- ----- ----------------------------------------------------------
|
|
0 1 gi|149390769|gb|ABR25402.1| unknown [Oryza sativa (ind...
|
|
|
|
Query: 2 (length=2050)
|
|
gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein
|
|
HLPf gene, partial cds
|
|
Hits: ---- ----- ----------------------------------------------------------
|
|
# # HSP ID + description
|
|
---- ----- ----------------------------------------------------------
|
|
0 2 gi|4218936|gb|AAD12237.1| hevein-like protein HLPf [Sa...
|
|
1 2 gi|4206074|gb|AAD11408.1| hevein-like protein [Sambucu...
|
|
2 2 gi|4206070|gb|AAD11406.1| hevein-like protein [Sambucu...
|
|
3 2 gi|4206072|gb|AAD11407.1| hevein-like protein [Sambucu...
|
|
4 2 gi|16903131|gb|AAL30421.1|AF434174_1 hevein-like prote...
|
|
5 2 gi|16903133|gb|AAL30422.1|AF434175_1 hevein-like prote...
|
|
6 2 gi|30691147|gb|AAO17294.1| chitinase [Ficus carica]
|
|
7 2 gi|222139388|gb|ACM45713.1| class I chitinase [Pyrus p...
|
|
8 2 gi|23496435|dbj|BAB40817.2| endochitinase MCHT-2 [Cucu...
|
|
9 2 gi|82621253|gb|ABB86300.1| chitinase [Ficus awkeotsang]
|
|
|
|
Query: 3 (length=550)
|
|
gi|5690369|gb|AF158246.1|AF158246 Cricetulus griseus glucose phosphate
|
|
isomerase (GPI) gene, partial intron sequence
|
|
Hits: No hits found
|
|
|
|
Query: 4 (length=655)
|
|
gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber
|
|
Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN
|
|
gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636
|
|
(X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence
|
|
Hits: ---- ----- ----------------------------------------------------------
|
|
# # HSP ID + description
|
|
---- ----- ----------------------------------------------------------
|
|
0 1 gi|166343825|gb|ABY86655.1| beta-tubulin 4 [Gossypium ...
|
|
1 1 gi|223549899|gb|EEF51386.1| tubulin beta chain, putati...
|
|
2 1 gi|18420724|ref|NP_568437.1| TUB8 (tubulin beta-8) [Ar...
|
|
3 1 gi|225426385|ref|XP_002271992.1| PREDICTED: hypothetic...
|
|
4 1 gi|15451226|gb|AAK96884.1| beta tubulin [Arabidopsis t...
|
|
5 1 gi|225470745|ref|XP_002267380.1| PREDICTED: hypothetic...
|
|
6 1 gi|586076|sp|P37392.1|TBB1_LUPAL RecName: Full=Tubulin...
|
|
7 1 gi|224104341|ref|XP_002313404.1| tubulin, beta chain [...
|
|
8 1 gi|223549679|gb|EEF51167.1| tubulin beta chain, putati...
|
|
9 1 gi|224058553|ref|XP_002299541.1| tubulin, beta chain [...
|
|
|
|
Query: 5 (length=623)
|
|
gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed
|
|
spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2,
|
|
complete sequence
|
|
Hits: ---- ----- ----------------------------------------------------------
|
|
# # HSP ID + description
|
|
---- ----- ----------------------------------------------------------
|
|
0 2 gi|110740644|dbj|BAE98425.1| hypothetical protein [Ara...
|
|
1 1 gi|226453533|gb|EEH50844.1| predicted protein [Micromo...
|
|
2 1 gi|168069582|ref|XP_001786502.1| predicted protein [Ph...
|
|
3 1 gi|168068558|ref|XP_001786120.1| predicted protein [Ph...
|
|
4 1 gi|168068926|ref|XP_001786259.1| predicted protein [Ph...
|
|
5 1 gi|168070288|ref|XP_001786759.1| predicted protein [Ph...
|
|
6 1 gi|168068591|ref|XP_001786133.1| predicted protein [Ph...
|
|
7 1 gi|74622391|sp|Q8TGM5|ART3_YEAST Uncharacterized prote...
|
|
8 1 gi|168069944|ref|XP_001786634.1| predicted protein [Ph...
|
|
9 1 gi|50307717|ref|XP_453851.1| unnamed protein product [...
|
|
|
|
Query: 6 (length=309)
|
|
gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA)
|
|
gene, partial cds
|
|
Hits: ---- ----- ----------------------------------------------------------
|
|
# # HSP ID + description
|
|
---- ----- ----------------------------------------------------------
|
|
0 1 gi|3176603|gb|AAC18749.1| phytochrome A [Lathyrus odor...
|
|
1 1 gi|130188|sp|P15001.1|PHYA_PEA RecName: Full=Phytochro...
|
|
2 1 gi|2499555|sp|P93673.1|PHYA_LATSA RecName: Full=Phytoc...
|
|
3 1 gi|3176595|gb|AAC18745.1| phytochrome A [Lennea melano...
|
|
4 1 gi|1711106|gb|AAC18675.1| phytochrome A [Sophora affinis]
|
|
5 1 gi|1711090|gb|AAC18670.1| phytochrome A [Myrospermum s...
|
|
6 1 gi|3176605|gb|AAC18750.1| phytochrome A [Hybosema robu...
|
|
7 1 gi|3176454|gb|AAC18668.1| phytochrome A [Cyclolobium n...
|
|
8 1 gi|3176523|gb|AAC18709.1| phytochrome A [Millettia ric...
|
|
9 1 gi|3176494|gb|AAC18693.1| phytochrome A [Callerya atro...
|
|
|
|
Query: 7 (length=2551)
|
|
gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like
|
|
protein (matK) gene, complete cds; chloroplast gene for chloroplast
|
|
product
|
|
Hits: ---- ----- ----------------------------------------------------------
|
|
# # HSP ID + description
|
|
---- ----- ----------------------------------------------------------
|
|
0 1 gi|27805603|sp|Q9TKP6.1|MATK_WISFR RecName: Full=Matur...
|
|
1 1 gi|171909144|gb|ACB58148.1| maturase K [Wisteria frute...
|
|
2 1 gi|171909146|gb|ACB58149.1| maturase K [Wisteria frute...
|
|
3 1 gi|171909132|gb|ACB58142.1| maturase K [Callerya megas...
|
|
4 1 gi|5817760|gb|AAD52903.1|AF142732_1 maturase-like prot...
|
|
5 1 gi|171909134|gb|ACB58143.1| maturase K [Wisteria brach...
|
|
6 1 gi|5817761|gb|AAD52904.1|AF142733_1 maturase-like prot...
|
|
7 1 gi|5817762|gb|AAD52905.1|AF142734_1 maturase-like prot...
|
|
8 1 gi|152014012|gb|ABS20107.1| maturase-like protein [Ast...
|
|
9 1 gi|146197442|dbj|BAF57483.1| maturase [Glycyrrhiza ura...""",
|
|
)
|
|
|
|
def check_xml_2222_blastx_001(self, records):
|
|
self.assertEqual(records.program, "blastx")
|
|
self.assertEqual(records.version, "BLASTX 2.2.22+")
|
|
self.assertEqual(
|
|
records.reference,
|
|
'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.',
|
|
)
|
|
self.assertEqual(records.db, "nr")
|
|
self.assertIsInstance(records.query, SeqRecord)
|
|
self.assertEqual(records.query.id, "1")
|
|
self.assertEqual(
|
|
records.query.description,
|
|
"gi|4104054|gb|AH007193.1|SEG_CVIGS Centaurea vallesiaca 18S ribosomal RNA gene, partial sequence",
|
|
)
|
|
self.assertEqual(repr(records.query.seq), "Seq(None, length=1002)")
|
|
self.assertEqual(len(records.param), 5)
|
|
self.assertEqual(records.param["matrix"], "BLOSUM62")
|
|
self.assertAlmostEqual(records.param["expect"], 0.0001)
|
|
self.assertEqual(records.param["gap-open"], 11)
|
|
self.assertEqual(records.param["gap-extend"], 1)
|
|
self.assertEqual(records.param["filter"], "L;")
|
|
record = next(records)
|
|
self.assertEqual(record.num, 1)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "1")
|
|
self.assertEqual(
|
|
record.query.description,
|
|
"gi|4104054|gb|AH007193.1|SEG_CVIGS Centaurea vallesiaca 18S ribosomal RNA gene, partial sequence",
|
|
)
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=1002)")
|
|
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 8994603)
|
|
self.assertEqual(record.stat["db-len"], -1216159329)
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertEqual(record.stat["eff-space"], 367397307882)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.041)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.267)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.14)
|
|
self.assertEqual(len(record), 1)
|
|
hit = record[0]
|
|
self.assertEqual(hit.num, 1)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|149390769|gb|ABR25402.1|")
|
|
self.assertEqual(hit.target.name, "ABR25402")
|
|
self.assertEqual(
|
|
hit.target.description, "unknown [Oryza sativa (indica cultivar-group)]"
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=26)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 129.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 54.2989775733826)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 1.83262460293058e-05, places=19
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 24)
|
|
self.assertEqual(hsp.annotations["positive"], 25)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 26],
|
|
[ 0, 26]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 26))
|
|
self.assertEqual(repr(hsp.query.seq), "Seq('HMLVSKIKPCMCKYEQIQTVKLRMAH')")
|
|
self.assertEqual(hsp.query.id, "1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|4104054|gb|AH007193.1|SEG_CVIGS Centaurea vallesiaca 18S ribosomal RNA gene, partial sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(26))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "1:911..988")
|
|
self.assertEqual(repr(hsp.target.seq), "Seq('HMLVSKIKPCMCKYELIRTVKLRMAH')")
|
|
self.assertEqual(hsp.target.id, "gi|149390769|gb|ABR25402.1|")
|
|
self.assertEqual(hsp.target.name, "ABR25402")
|
|
self.assertEqual(
|
|
hsp.target.description, "unknown [Oryza sativa (indica cultivar-group)]"
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(hsp.annotations["midline"], "HMLVSKIKPCMCKYE I+TVKLRMAH")
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 1 Length: 26 Strand: Plus
|
|
gi|4104054|gb|AH007193.1|SEG_CVIGS Centaurea vallesiaca 18S ribosomal
|
|
RNA gene, partial sequence
|
|
Target: gi|149390769|gb|ABR25402.1| Length: 26 Strand: Plus
|
|
unknown [Oryza sativa (indica cultivar-group)]
|
|
|
|
Score:54 bits(129), Expect:2e-05,
|
|
Identities:24/26(92%), Positives:25/26(96%), Gaps:0.26(0%)
|
|
|
|
gi|149390 0 HMLVSKIKPCMCKYELIRTVKLRMAH 26
|
|
0 |||||||||||||||.|.|||||||| 26
|
|
1 0 HMLVSKIKPCMCKYEQIQTVKLRMAH 26
|
|
|
|
""",
|
|
)
|
|
record = next(records)
|
|
self.assertEqual(record.num, 2)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "2")
|
|
self.assertEqual(
|
|
record.query.description,
|
|
"gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds",
|
|
)
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=2050)")
|
|
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 8994603)
|
|
self.assertEqual(record.stat["db-len"], -1216159329)
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertEqual(record.stat["eff-space"], 967993058520)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.041)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.267)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.14)
|
|
self.assertEqual(len(record), 10)
|
|
hit = record[0]
|
|
self.assertEqual(hit.num, 1)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|4218936|gb|AAD12237.1|")
|
|
self.assertEqual(hit.target.name, "AAD12237")
|
|
self.assertEqual(
|
|
hit.target.description, "hevein-like protein HLPf [Sambucus nigra]"
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=333)")
|
|
self.assertEqual(len(hit), 2)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1053.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 410.223385721017)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 3.48406066731465e-112, places=126
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 199)
|
|
self.assertEqual(hsp.annotations["positive"], 200)
|
|
self.assertEqual(hsp.annotations["gaps"], 33)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 683.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 267.699542631596)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 2.79278546744412e-69, places=83
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 127)
|
|
self.assertEqual(hsp.annotations["positive"], 127)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[206, 333],
|
|
[ 0, 127]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 127))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "2")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(127))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "2:1669..2049")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({206: 'NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV'}, length=333)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|4218936|gb|AAD12237.1|")
|
|
self.assertEqual(hsp.target.name, "AAD12237")
|
|
self.assertEqual(
|
|
hsp.target.description, "hevein-like protein HLPf [Sambucus nigra]"
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASDQVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVAHIKMSVV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 2 Length: 127 Strand: Plus
|
|
gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein
|
|
HLPf gene, partial cds
|
|
Target: gi|4218936|gb|AAD12237.1| Length: 333 Strand: Plus
|
|
hevein-like protein HLPf [Sambucus nigra]
|
|
|
|
Score:267 bits(683), Expect:3e-69,
|
|
Identities:127/127(100%), Positives:127/127(100%), Gaps:0.127(0%)
|
|
|
|
gi|421893 206 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD
|
|
|
|
gi|421893 266 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
2 60 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA
|
|
|
|
gi|421893 326 HIKMSVV 333
|
|
120 ||||||| 127
|
|
2 120 HIKMSVV 127
|
|
|
|
""",
|
|
)
|
|
hit = record[1]
|
|
self.assertEqual(hit.num, 2)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|4206074|gb|AAD11408.1|")
|
|
self.assertEqual(hit.target.name, "AAD11408")
|
|
self.assertEqual(hit.target.description, "hevein-like protein [Sambucus nigra]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=333)")
|
|
self.assertEqual(len(hit), 2)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1043.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 406.371389961843)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 5.03097287018806e-111, places=125
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 198)
|
|
self.assertEqual(hsp.annotations["positive"], 199)
|
|
self.assertEqual(hsp.annotations["gaps"], 33)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 672.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 263.462347296505)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 5.26696544712228e-68, places=82
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 125)
|
|
self.assertEqual(hsp.annotations["positive"], 126)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[206, 333],
|
|
[ 0, 127]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 127))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "2")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(127))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "2:1669..2049")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({206: 'NYYYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SLV'}, length=333)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|4206074|gb|AAD11408.1|")
|
|
self.assertEqual(hsp.target.name, "AAD11408")
|
|
self.assertEqual(hsp.target.description, "hevein-like protein [Sambucus nigra]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"NY YGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASDQVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVAHIKMS+V",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 2 Length: 127 Strand: Plus
|
|
gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein
|
|
HLPf gene, partial cds
|
|
Target: gi|4206074|gb|AAD11408.1| Length: 333 Strand: Plus
|
|
hevein-like protein [Sambucus nigra]
|
|
|
|
Score:263 bits(672), Expect:5e-68,
|
|
Identities:125/127(98%), Positives:126/127(99%), Gaps:0.127(0%)
|
|
|
|
gi|420607 206 NYYYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD
|
|
0 ||.|||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD
|
|
|
|
gi|420607 266 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
2 60 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA
|
|
|
|
gi|420607 326 HIKMSLV 333
|
|
120 |||||.| 127
|
|
2 120 HIKMSVV 127
|
|
|
|
""",
|
|
)
|
|
hit = record[2]
|
|
self.assertEqual(hit.num, 3)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|4206070|gb|AAD11406.1|")
|
|
self.assertEqual(hit.target.name, "AAD11406")
|
|
self.assertEqual(hit.target.description, "hevein-like protein [Sambucus nigra]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=333)")
|
|
self.assertEqual(len(hit), 2)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1043.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 406.371389961843)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 5.03097287018806e-111, places=125
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 198)
|
|
self.assertEqual(hsp.annotations["positive"], 199)
|
|
self.assertEqual(hsp.annotations["gaps"], 33)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 680.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 266.543943903844)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 6.22167692942359e-69, places=83
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 126)
|
|
self.assertEqual(hsp.annotations["positive"], 127)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[206, 333],
|
|
[ 0, 127]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 127))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "2")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(127))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "2:1669..2049")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({206: 'NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SLV'}, length=333)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|4206070|gb|AAD11406.1|")
|
|
self.assertEqual(hsp.target.name, "AAD11406")
|
|
self.assertEqual(hsp.target.description, "hevein-like protein [Sambucus nigra]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASDQVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVAHIKMS+V",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 2 Length: 127 Strand: Plus
|
|
gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein
|
|
HLPf gene, partial cds
|
|
Target: gi|4206070|gb|AAD11406.1| Length: 333 Strand: Plus
|
|
hevein-like protein [Sambucus nigra]
|
|
|
|
Score:266 bits(680), Expect:6e-69,
|
|
Identities:126/127(99%), Positives:127/127(100%), Gaps:0.127(0%)
|
|
|
|
gi|420607 206 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD
|
|
|
|
gi|420607 266 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
2 60 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA
|
|
|
|
gi|420607 326 HIKMSLV 333
|
|
120 |||||.| 127
|
|
2 120 HIKMSVV 127
|
|
|
|
""",
|
|
)
|
|
hit = record[3]
|
|
self.assertEqual(hit.num, 4)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|4206072|gb|AAD11407.1|")
|
|
self.assertEqual(hit.target.name, "AAD11407")
|
|
self.assertEqual(hit.target.description, "hevein-like protein [Sambucus nigra]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=333)")
|
|
self.assertEqual(len(hit), 2)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1016.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 395.971001412075)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 6.7995613312017e-108, places=121
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 193)
|
|
self.assertEqual(hsp.annotations["positive"], 195)
|
|
self.assertEqual(hsp.annotations["gaps"], 33)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 646.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 253.447158322654)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 5.45045505347399e-65, places=79
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 120)
|
|
self.assertEqual(hsp.annotations["positive"], 124)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[206, 333],
|
|
[ 0, 127]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 127))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "2")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(127))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "2:1669..2049")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({206: 'NYYYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV'}, length=333)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|4206072|gb|AAD11407.1|")
|
|
self.assertEqual(hsp.target.name, "AAD11407")
|
|
self.assertEqual(hsp.target.description, "hevein-like protein [Sambucus nigra]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"NY YGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASDQVPSYGVIS+II+SN GHQS LDTITTSIGYYKRYCDMLEVSYGDNL+NWFDETPF+KVAHIKMSVV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 2 Length: 127 Strand: Plus
|
|
gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein
|
|
HLPf gene, partial cds
|
|
Target: gi|4206072|gb|AAD11407.1| Length: 333 Strand: Plus
|
|
hevein-like protein [Sambucus nigra]
|
|
|
|
Score:253 bits(646), Expect:5e-65,
|
|
Identities:120/127(94%), Positives:124/127(98%), Gaps:0.127(0%)
|
|
|
|
gi|420607 206 NYYYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD
|
|
0 ||.|||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD
|
|
|
|
gi|420607 266 QVPSYGVISEIIDSNIGHQSSLDTITTSIGYYKRYCDMLEVSYGDNLKNWFDETPFSKVA
|
|
60 |||||||||.||.||.||||.||||||||||||||||||||||||||.||||||||.|||
|
|
2 60 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA
|
|
|
|
gi|420607 326 HIKMSVV 333
|
|
120 ||||||| 127
|
|
2 120 HIKMSVV 127
|
|
|
|
""",
|
|
)
|
|
hit = record[4]
|
|
self.assertEqual(hit.num, 5)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|16903131|gb|AAL30421.1|AF434174_1")
|
|
self.assertEqual(hit.target.name, "AAL30421")
|
|
self.assertEqual(hit.target.description, "hevein-like protein [Sambucus nigra]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=330)")
|
|
self.assertEqual(len(hit), 2)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 986.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 384.415014134554)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 2.04729155722083e-104, places=118
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 190)
|
|
self.assertEqual(hsp.annotations["positive"], 191)
|
|
self.assertEqual(hsp.annotations["gaps"], 36)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 679.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 266.158744327927)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 8.12576171382949e-69, places=83
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 126)
|
|
self.assertEqual(hsp.annotations["positive"], 126)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[203, 330],
|
|
[ 0, 127]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 127))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "2")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(127))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "2:1669..2049")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({203: 'NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV'}, length=330)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|16903131|gb|AAL30421.1|AF434174_1")
|
|
self.assertEqual(hsp.target.name, "AAL30421")
|
|
self.assertEqual(hsp.target.description, "hevein-like protein [Sambucus nigra]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASDQVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKV HIKMSVV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 2 Length: 127 Strand: Plus
|
|
gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein
|
|
HLPf gene, partial cds
|
|
Target: gi|16903131|gb|AAL30421.1|AF434174_1 Length: 330 Strand: Plus
|
|
hevein-like protein [Sambucus nigra]
|
|
|
|
Score:266 bits(679), Expect:8e-69,
|
|
Identities:126/127(99%), Positives:126/127(99%), Gaps:0.127(0%)
|
|
|
|
gi|169031 203 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD
|
|
|
|
gi|169031 263 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVV
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||.
|
|
2 60 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA
|
|
|
|
gi|169031 323 HIKMSVV 330
|
|
120 ||||||| 127
|
|
2 120 HIKMSVV 127
|
|
|
|
""",
|
|
)
|
|
hit = record[5]
|
|
self.assertEqual(hit.num, 6)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|16903133|gb|AAL30422.1|AF434175_1")
|
|
self.assertEqual(hit.target.name, "AAL30422")
|
|
self.assertEqual(hit.target.description, "hevein-like protein [Sambucus nigra]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=336)")
|
|
self.assertEqual(len(hit), 2)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 713.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 279.255529909117)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 9.27553088557319e-73, places=87
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 148)
|
|
self.assertEqual(hsp.annotations["positive"], 162)
|
|
self.assertEqual(hsp.annotations["gaps"], 40)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 620.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 243.431969348803)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 5.64033703812707e-62, places=76
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 115)
|
|
self.assertEqual(hsp.annotations["positive"], 120)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[209, 336],
|
|
[ 0, 127]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 127))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...SVV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "2")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(127))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "2:1669..2049")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({209: 'NYNYGLAGEAIGIDLVNDPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...HVV'}, length=336)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|16903133|gb|AAL30422.1|AF434175_1")
|
|
self.assertEqual(hsp.target.name, "AAL30422")
|
|
self.assertEqual(hsp.target.description, "hevein-like protein [Sambucus nigra]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"NYNYGLAGEA+GIDLVN PDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINAN EASDQVPSYGV+S IINSN GH+SGLD ITTSIGYYKRYCDMLEVSYGDNL+NWFDETPF+KVA IKM VV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 2 Length: 127 Strand: Plus
|
|
gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein
|
|
HLPf gene, partial cds
|
|
Target: gi|16903133|gb|AAL30422.1|AF434175_1 Length: 336 Strand: Plus
|
|
hevein-like protein [Sambucus nigra]
|
|
|
|
Score:243 bits(620), Expect:6e-62,
|
|
Identities:115/127(91%), Positives:120/127(94%), Gaps:0.127(0%)
|
|
|
|
gi|169031 209 NYNYGLAGEAIGIDLVNDPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANYEASD
|
|
0 ||||||||||.||||||.|||||||||||||||||||||||||||||||||||||.||||
|
|
2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD
|
|
|
|
gi|169031 269 QVPSYGVLSNIINSNSGHKSGLDIITTSIGYYKRYCDMLEVSYGDNLKNWFDETPFSKVA
|
|
60 |||||||.|.|||||.||.||||.|||||||||||||||||||||||.||||||||.|||
|
|
2 60 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETPFTKVA
|
|
|
|
gi|169031 329 RIKMHVV 336
|
|
120 .|||.|| 127
|
|
2 120 HIKMSVV 127
|
|
|
|
""",
|
|
)
|
|
hit = record[6]
|
|
self.assertEqual(hit.num, 7)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|30691147|gb|AAO17294.1|")
|
|
self.assertEqual(hit.target.name, "AAO17294")
|
|
self.assertEqual(hit.target.description, "chitinase [Ficus carica]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=321)")
|
|
self.assertEqual(len(hit), 2)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 481.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 189.889228296291)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 7.40075731140555e-46, places=60
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 113)
|
|
self.assertEqual(hsp.annotations["positive"], 138)
|
|
self.assertEqual(hsp.annotations["gaps"], 49)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 426.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 168.703251620836)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 1.76559312729927e-39, places=53
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 81)
|
|
self.assertEqual(hsp.annotations["positive"], 99)
|
|
self.assertEqual(hsp.annotations["gaps"], 10)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[202, 261, 261, 266, 266, 308, 309, 320],
|
|
[ 0, 59, 60, 65, 73, 115, 115, 126]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 127))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...MSV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "2")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(126))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "2:1669..2046")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({202: 'NHNYGLVGEALGIDLINNPDLVATDPVVSFKTAIWFWMTRHQNKPSFHGVIINA...MPV'}, length=321)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|30691147|gb|AAO17294.1|")
|
|
self.assertEqual(hsp.target.name, "AAO17294")
|
|
self.assertEqual(hsp.target.description, "chitinase [Ficus carica]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"N+NYGL GEALGIDL+N+PDLVATDP+VSFKTAIWFWMT+H N PS H ++INANSE S +P++ SNFG +S LD + SIGYYKRYCDML+VS+GDNL+ W+D TP F+ V+ I M V",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 2 Length: 126 Strand: Plus
|
|
gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein
|
|
HLPf gene, partial cds
|
|
Target: gi|30691147|gb|AAO17294.1| Length: 321 Strand: Plus
|
|
chitinase [Ficus carica]
|
|
|
|
Score:168 bits(426), Expect:2e-39,
|
|
Identities:81/127(64%), Positives:99/127(78%), Gaps:10.127(8%)
|
|
|
|
gi|306911 202 NHNYGLVGEALGIDLINNPDLVATDPVVSFKTAIWFWMTRHQNKPSFHGVIINANSEPS-
|
|
0 |.||||.||||||||.|.||||||||.||||||||||||.|.|.||.|...||||||.|-
|
|
2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD
|
|
|
|
gi|306911 261 HIPNH--------SNFGQESVLDVVNRSIGYYKRYCDMLKVSFGDNLKYWYDGTPNFSDV
|
|
60 ..|..--------||||..|.||....||||||||||||.||.||||..|.|.||-|..|
|
|
2 60 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNLENWFDETP-FTKV
|
|
|
|
gi|306911 313 SRIGMPV 320
|
|
120 ..|.|.| 127
|
|
2 119 AHIKMSV 126
|
|
|
|
""",
|
|
)
|
|
hit = record[7]
|
|
self.assertEqual(hit.num, 8)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|222139388|gb|ACM45713.1|")
|
|
self.assertEqual(hit.target.name, "ACM45713")
|
|
self.assertEqual(hit.target.description, "class I chitinase [Pyrus pyrifolia]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=317)")
|
|
self.assertEqual(len(hit), 2)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 469.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 185.266833385283)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 1.82286993845418e-44, places=58
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 111)
|
|
self.assertEqual(hsp.annotations["positive"], 137)
|
|
self.assertEqual(hsp.annotations["gaps"], 50)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 318.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 127.101697421762)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 5.89123449548921e-27, places=40
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 62)
|
|
self.assertEqual(hsp.annotations["positive"], 84)
|
|
self.assertEqual(hsp.annotations["gaps"], 7)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[195, 247, 252, 284, 285, 309, 309, 316],
|
|
[ 0, 52, 52, 84, 84, 108, 109, 116]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 122))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...TPF')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "2")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(116))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "2:1669..2016")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({195: 'NYNYGQAGKAIGKDLINNPDLVATDPVVSFKTAIWFWMTPQGNKPSSHDVITGR...RPF'}, length=317)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|222139388|gb|ACM45713.1|")
|
|
self.assertEqual(hsp.target.name, "ACM45713")
|
|
self.assertEqual(hsp.target.description, "class I chitinase [Pyrus pyrifolia]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"NYNYG AG+A+G DL+N+PDLVATDP+VSFKTAIWFWMT N PS HD++ + ++ +VP YGVI+ IIN G D + + IG+Y+RYC +L V+ GDNL+ +++ PF",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 2 Length: 116 Strand: Plus
|
|
gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein
|
|
HLPf gene, partial cds
|
|
Target: gi|222139388|gb|ACM45713.1| Length: 317 Strand: Plus
|
|
class I chitinase [Pyrus pyrifolia]
|
|
|
|
Score:127 bits(318), Expect:6e-27,
|
|
Identities:62/122(51%), Positives:84/122(69%), Gaps:7.122(6%)
|
|
|
|
gi|222139 195 NYNYGQAGKAIGKDLINNPDLVATDPVVSFKTAIWFWMTPQGNKPSSHDVITGRWSPSTA
|
|
0 |||||.||.|.|.||.|.||||||||.||||||||||||...|.||.||...-----...
|
|
2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILI-----NAN
|
|
|
|
gi|222139 255 DRSAGRVPGYGVITNIINGGVECGKGQDARVASRIGFYRRYCQILGVNPGDNLD-CYNQR
|
|
60 ......||.||||..|||.......|.|.-....||.|.|||..|.|..||||.-.....
|
|
2 55 SEASDQVPSYGVISKIINSNFGHQSGLDT-ITTSIGYYKRYCDMLEVSYGDNLENWFDET
|
|
|
|
gi|222139 314 PF 316
|
|
120 || 122
|
|
2 114 PF 116
|
|
|
|
""",
|
|
)
|
|
hit = record[8]
|
|
self.assertEqual(hit.num, 9)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|23496435|dbj|BAB40817.2|")
|
|
self.assertEqual(hit.target.name, "BAB40817")
|
|
self.assertEqual(hit.target.description, "endochitinase MCHT-2 [Cucumis melo]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=311)")
|
|
self.assertEqual(len(hit), 2)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 460.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 181.800037202027)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 2.01541888137674e-43, places=57
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 109)
|
|
self.assertEqual(hsp.annotations["positive"], 132)
|
|
self.assertEqual(hsp.annotations["gaps"], 54)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 285.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 114.39011141649)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 3.95161831690075e-23, places=37
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 56)
|
|
self.assertEqual(hsp.annotations["positive"], 75)
|
|
self.assertEqual(hsp.annotations["gaps"], 7)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[191, 211, 211, 242, 247, 277, 278, 304],
|
|
[ 0, 20, 21, 52, 52, 82, 82, 108]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 114))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...NLE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "2")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(108))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "2:1669..1992")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({191: 'NYNYGPAGKAIGAPLLTNPDTATDPVTSFKTALWFWMTAQGNKPSCHNVITGNW...NLD'}, length=311)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|23496435|dbj|BAB40817.2|")
|
|
self.assertEqual(hsp.target.name, "BAB40817")
|
|
self.assertEqual(hsp.target.description, "endochitinase MCHT-2 [Cucumis melo]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"NYNYG AG+A+G L+ +PD ATDP+ SFKTA+WFWMT N PS H+++ ++ A+ +VP YGVI+ IIN G D + IG+YKRYCDML + YG+NL+",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 2 Length: 108 Strand: Plus
|
|
gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein
|
|
HLPf gene, partial cds
|
|
Target: gi|23496435|dbj|BAB40817.2| Length: 311 Strand: Plus
|
|
endochitinase MCHT-2 [Cucumis melo]
|
|
|
|
Score:114 bits(285), Expect:4e-23,
|
|
Identities:56/114(49%), Positives:75/114(66%), Gaps:7.114(6%)
|
|
|
|
gi|234964 191 NYNYGPAGKAIGAPLLTNPD-TATDPVTSFKTALWFWMTAQGNKPSCHNVITGNWQPSSA
|
|
0 |||||.||.|.|..|...||-.||||..|||||.|||||...|.||.|....-----...
|
|
2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILI-----NAN
|
|
|
|
gi|234964 250 DNAAGRVPGYGVITNIINGGLECGRGPDDRVKDRIGFYKRYCDMLGIGYGNNLD 304
|
|
60 ..|...||.||||..|||.......|.-|.....||.||||||||...||.||. 114
|
|
2 55 SEASDQVPSYGVISKIINSNFGHQSGL-DTITTSIGYYKRYCDMLEVSYGDNLE 108
|
|
|
|
""",
|
|
)
|
|
hit = record[9]
|
|
self.assertEqual(hit.num, 10)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|82621253|gb|ABB86300.1|")
|
|
self.assertEqual(hit.target.name, "ABB86300")
|
|
self.assertEqual(hit.target.description, "chitinase [Ficus awkeotsang]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=301)")
|
|
self.assertEqual(len(hit), 2)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 459.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 181.414837626109)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 2.6322185753765e-43, places=56
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 114)
|
|
self.assertEqual(hsp.annotations["positive"], 134)
|
|
self.assertEqual(hsp.annotations["gaps"], 50)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 359.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 142.894880034374)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 1.03749166509001e-31, places=45
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 67)
|
|
self.assertEqual(hsp.annotations["positive"], 83)
|
|
self.assertEqual(hsp.annotations["gaps"], 9)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[203, 263, 263, 268, 268, 301],
|
|
[ 0, 60, 61, 66, 74, 107]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 107))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINA...DNL')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "2")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein HLPf gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(107))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "2:1669..1989")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({203: 'NHNYGLVGEALGIDLINNPELVATDPVISFKTAIWFWMARYEDKPSFHDVIINA...DNL'}, length=301)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|82621253|gb|ABB86300.1|")
|
|
self.assertEqual(hsp.target.name, "ABB86300")
|
|
self.assertEqual(hsp.target.description, "chitinase [Ficus awkeotsang]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"N+NYGL GEALGIDL+N+P+LVATDP++SFKTAIWFWM ++++ PS HD++INAN EASD +P +G N G +S LD + SIGYYKRYCDML VS DNL",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 2 Length: 107 Strand: Plus
|
|
gi|4218935|gb|AF074388.1|AF074388 Sambucus nigra hevein-like protein
|
|
HLPf gene, partial cds
|
|
Target: gi|82621253|gb|ABB86300.1| Length: 301 Strand: Plus
|
|
chitinase [Ficus awkeotsang]
|
|
|
|
Score:142 bits(359), Expect:1e-31,
|
|
Identities:67/107(63%), Positives:83/107(78%), Gaps:9.107(8%)
|
|
|
|
gi|826212 203 NHNYGLVGEALGIDLINNPELVATDPVISFKTAIWFWMARYEDKPSFHDVIINANFEASD
|
|
0 |.||||.||||||||.|.|.||||||..||||||||||......||.||..||||.||||
|
|
2 0 NYNYGLAGEALGIDLVNHPDLVATDPIVSFKTAIWFWMTQHDNNPSLHDILINANSEASD
|
|
|
|
gi|826212 263 -IPYHG--------NSGQESSLDVVNRSIGYYKRYCDMLGVSCEDNL 301
|
|
60 -.|..|--------|.|..|.||....||||||||||||.||..||| 107
|
|
2 60 QVPSYGVISKIINSNFGHQSGLDTITTSIGYYKRYCDMLEVSYGDNL 107
|
|
|
|
""",
|
|
)
|
|
record = next(records)
|
|
self.assertEqual(record.num, 3)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "3")
|
|
self.assertEqual(
|
|
record.query.description,
|
|
"gi|5690369|gb|AF158246.1|AF158246 Cricetulus griseus glucose phosphate isomerase (GPI) gene, partial intron sequence",
|
|
)
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=550)")
|
|
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 8994603)
|
|
self.assertEqual(record.stat["db-len"], -1216159329)
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertEqual(record.stat["eff-space"], 108443629616)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.041)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.267)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.14)
|
|
self.assertEqual(len(record), 0)
|
|
record = next(records)
|
|
self.assertEqual(record.num, 4)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "4")
|
|
self.assertEqual(
|
|
record.query.description,
|
|
"gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence",
|
|
)
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=655)")
|
|
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 8994603)
|
|
self.assertEqual(record.stat["db-len"], -1216159329)
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertAlmostEqual(record.stat["eff-space"], 165344802738)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.041)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.267)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.14)
|
|
self.assertEqual(len(record), 10)
|
|
hit = record[0]
|
|
self.assertEqual(hit.num, 1)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|166343825|gb|ABY86655.1|")
|
|
self.assertEqual(hit.target.name, "ABY86655")
|
|
self.assertEqual(hit.target.description, "beta-tubulin 4 [Gossypium hirsutum]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=448)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1048.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 408.29738784143)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 2.26145081918239e-112, places=126
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 196)
|
|
self.assertEqual(hsp.annotations["positive"], 197)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 201],
|
|
[ 0, 201]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 201))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "4")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(201))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "4:50..652")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MREILHIQAGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDNDLQLERVNVYYNEA...CMV'}, length=448)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|166343825|gb|ABY86655.1|")
|
|
self.assertEqual(hsp.target.name, "ABY86655")
|
|
self.assertEqual(hsp.target.description, "beta-tubulin 4 [Gossypium hirsutum]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MREILHIQAGQCGNQIGA FWEVVCAEHGI+STGRYQGDNDLQLERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 4 Length: 201 Strand: Plus
|
|
gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber
|
|
Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN
|
|
gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636
|
|
(X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence
|
|
Target: gi|166343825|gb|ABY86655.1| Length: 448 Strand: Plus
|
|
beta-tubulin 4 [Gossypium hirsutum]
|
|
|
|
Score:408 bits(1048), Expect:2e-112,
|
|
Identities:196/201(98%), Positives:197/201(98%), Gaps:0.201(0%)
|
|
|
|
gi|166343 0 MREILHIQAGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDNDLQLERVNVYYNEASCGRFV
|
|
0 ||||||||||||||||||.|||||||||||.|||||||||||||||||||||||||||||
|
|
4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV
|
|
|
|
gi|166343 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.||||
|
|
4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV
|
|
|
|
gi|166343 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
120 ||||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||
|
|
4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
|
|
gi|166343 180 EPYNATLSVHQLVENADECMV 201
|
|
180 ||||||||||.|||||||||| 201
|
|
4 180 EPYNATLSVHXLVENADECMV 201
|
|
|
|
""",
|
|
)
|
|
hit = record[1]
|
|
self.assertEqual(hit.num, 2)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|223549899|gb|EEF51386.1|")
|
|
self.assertEqual(hit.target.name, "EEF51386")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"tubulin beta chain, putative [Ricinus communis]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=448)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1044.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 406.756589537761)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 6.57981456092236e-112, places=126
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 195)
|
|
self.assertEqual(hsp.annotations["positive"], 196)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 201],
|
|
[ 0, 201]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 201))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "4")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(201))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "4:50..652")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDNDLQLERVNVYYNEA...CMV'}, length=448)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|223549899|gb|EEF51386.1|")
|
|
self.assertEqual(hsp.target.name, "EEF51386")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"tubulin beta chain, putative [Ricinus communis]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MREILHIQ GQCGNQIGA FWEVVCAEHGI+STGRYQGDNDLQLERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 4 Length: 201 Strand: Plus
|
|
gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber
|
|
Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN
|
|
gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636
|
|
(X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence
|
|
Target: gi|223549899|gb|EEF51386.1| Length: 448 Strand: Plus
|
|
tubulin beta chain, putative [Ricinus communis]
|
|
|
|
Score:406 bits(1044), Expect:7e-112,
|
|
Identities:195/201(97%), Positives:196/201(98%), Gaps:0.201(0%)
|
|
|
|
gi|223549 0 MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDNDLQLERVNVYYNEASCGRFV
|
|
0 ||||||||.|||||||||.|||||||||||.|||||||||||||||||||||||||||||
|
|
4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV
|
|
|
|
gi|223549 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.||||
|
|
4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV
|
|
|
|
gi|223549 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
120 ||||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||
|
|
4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
|
|
gi|223549 180 EPYNATLSVHQLVENADECMV 201
|
|
180 ||||||||||.|||||||||| 201
|
|
4 180 EPYNATLSVHXLVENADECMV 201
|
|
|
|
""",
|
|
)
|
|
hit = record[2]
|
|
self.assertEqual(hit.num, 3)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|18420724|ref|NP_568437.1|")
|
|
self.assertEqual(hit.target.name, "NP_568437")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"TUB8 (tubulin beta-8) [Arabidopsis thaliana] >gi|27735261|sp|P29516.2|TBB8_ARATH RecName: Full=Tubulin beta-8 chain; AltName: Full=Beta-8-tubulin >gi|10176853|dbj|BAB10059.1| beta tubulin [Arabidopsis thaliana]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=449)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1040.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 405.215791234091)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 1.91443295113426e-111, places=125
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 194)
|
|
self.assertEqual(hsp.annotations["positive"], 196)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 201],
|
|
[ 0, 201]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 201))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "4")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(201))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "4:50..652")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGENDLQLERVNVYYNEA...CMV'}, length=449)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|18420724|ref|NP_568437.1|")
|
|
self.assertEqual(hsp.target.name, "NP_568437")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"TUB8 (tubulin beta-8) [Arabidopsis thaliana] >gi|27735261|sp|P29516.2|TBB8_ARATH RecName: Full=Tubulin beta-8 chain; AltName: Full=Beta-8-tubulin >gi|10176853|dbj|BAB10059.1| beta tubulin [Arabidopsis thaliana]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MREILHIQ GQCGNQIGA FWEVVCAEHGI+STGRYQG+NDLQLERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 4 Length: 201 Strand: Plus
|
|
gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber
|
|
Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN
|
|
gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636
|
|
(X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence
|
|
Target: gi|18420724|ref|NP_568437.1| Length: 449 Strand: Plus
|
|
TUB8 (tubulin beta-8) [Arabidopsis thaliana]
|
|
>gi|27735261|sp|P29516.2|TBB8_ARATH RecName: Full=Tubulin beta-8 chain;
|
|
AltName: Full=Beta-8-tubulin >gi|10176853|dbj|BAB10059.1| beta tubulin
|
|
[Arabidopsis thaliana]
|
|
|
|
Score:405 bits(1040), Expect:2e-111,
|
|
Identities:194/201(97%), Positives:196/201(98%), Gaps:0.201(0%)
|
|
|
|
gi|184207 0 MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGENDLQLERVNVYYNEASCGRFV
|
|
0 ||||||||.|||||||||.|||||||||||.|||||||.|||||||||||||||||||||
|
|
4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV
|
|
|
|
gi|184207 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.||||
|
|
4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV
|
|
|
|
gi|184207 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
120 ||||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||
|
|
4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
|
|
gi|184207 180 EPYNATLSVHQLVENADECMV 201
|
|
180 ||||||||||.|||||||||| 201
|
|
4 180 EPYNATLSVHXLVENADECMV 201
|
|
|
|
""",
|
|
)
|
|
hit = record[3]
|
|
self.assertEqual(hit.num, 4)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|225426385|ref|XP_002271992.1|")
|
|
self.assertEqual(hit.target.name, "XP_002271992")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"PREDICTED: hypothetical protein [Vitis vinifera] >gi|157356601|emb|CAO62796.1| unnamed protein product [Vitis vinifera]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=447)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1034.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 402.904593778587)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 9.50123195540709e-111, places=125
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 193)
|
|
self.assertEqual(hsp.annotations["positive"], 195)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 201],
|
|
[ 0, 201]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 201))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "4")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(201))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "4:50..652")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYHGDSDLQLERVNVYYNEA...CMV'}, length=447)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|225426385|ref|XP_002271992.1|")
|
|
self.assertEqual(hsp.target.name, "XP_002271992")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"PREDICTED: hypothetical protein [Vitis vinifera] >gi|157356601|emb|CAO62796.1| unnamed protein product [Vitis vinifera]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MREILHIQ GQCGNQIGA FWEVVCAEHGI+STGRY GD+DLQLERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 4 Length: 201 Strand: Plus
|
|
gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber
|
|
Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN
|
|
gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636
|
|
(X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence
|
|
Target: gi|225426385|ref|XP_002271992.1| Length: 447 Strand: Plus
|
|
PREDICTED: hypothetical protein [Vitis vinifera]
|
|
>gi|157356601|emb|CAO62796.1| unnamed protein product [Vitis vinifera]
|
|
|
|
Score:402 bits(1034), Expect:1e-110,
|
|
Identities:193/201(96%), Positives:195/201(97%), Gaps:0.201(0%)
|
|
|
|
gi|225426 0 MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYHGDSDLQLERVNVYYNEASCGRFV
|
|
0 ||||||||.|||||||||.|||||||||||.|||||.||.||||||||||||||||||||
|
|
4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV
|
|
|
|
gi|225426 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.||||
|
|
4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV
|
|
|
|
gi|225426 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
120 ||||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||
|
|
4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
|
|
gi|225426 180 EPYNATLSVHQLVENADECMV 201
|
|
180 ||||||||||.|||||||||| 201
|
|
4 180 EPYNATLSVHXLVENADECMV 201
|
|
|
|
""",
|
|
)
|
|
hit = record[4]
|
|
self.assertEqual(hit.num, 5)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|15451226|gb|AAK96884.1|")
|
|
self.assertEqual(hit.target.name, "AAK96884")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"beta tubulin [Arabidopsis thaliana] >gi|20148289|gb|AAM10035.1| beta tubulin [Arabidopsis thaliana]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=449)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1034.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 402.904593778587)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 9.50123195540709e-111, places=125
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 193)
|
|
self.assertEqual(hsp.annotations["positive"], 195)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 201],
|
|
[ 0, 201]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 201))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "4")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(201))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "4:50..652")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGEKDLQLERVNVYYNEA...CMV'}, length=449)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|15451226|gb|AAK96884.1|")
|
|
self.assertEqual(hsp.target.name, "AAK96884")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"beta tubulin [Arabidopsis thaliana] >gi|20148289|gb|AAM10035.1| beta tubulin [Arabidopsis thaliana]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MREILHIQ GQCGNQIGA FWEVVCAEHGI+STGRYQG+ DLQLERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 4 Length: 201 Strand: Plus
|
|
gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber
|
|
Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN
|
|
gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636
|
|
(X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence
|
|
Target: gi|15451226|gb|AAK96884.1| Length: 449 Strand: Plus
|
|
beta tubulin [Arabidopsis thaliana] >gi|20148289|gb|AAM10035.1| beta
|
|
tubulin [Arabidopsis thaliana]
|
|
|
|
Score:402 bits(1034), Expect:1e-110,
|
|
Identities:193/201(96%), Positives:195/201(97%), Gaps:0.201(0%)
|
|
|
|
gi|154512 0 MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGEKDLQLERVNVYYNEASCGRFV
|
|
0 ||||||||.|||||||||.|||||||||||.|||||||..||||||||||||||||||||
|
|
4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV
|
|
|
|
gi|154512 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.||||
|
|
4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV
|
|
|
|
gi|154512 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
120 ||||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||
|
|
4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
|
|
gi|154512 180 EPYNATLSVHQLVENADECMV 201
|
|
180 ||||||||||.|||||||||| 201
|
|
4 180 EPYNATLSVHXLVENADECMV 201
|
|
|
|
""",
|
|
)
|
|
hit = record[5]
|
|
self.assertEqual(hit.num, 6)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|225470745|ref|XP_002267380.1|")
|
|
self.assertEqual(hit.target.name, "XP_002267380")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"PREDICTED: hypothetical protein [Vitis vinifera] >gi|157327486|emb|CAO15467.1| unnamed protein product [Vitis vinifera]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=449)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1033.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 402.51939420267)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 1.24089932237309e-110, places=124
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 192)
|
|
self.assertEqual(hsp.annotations["positive"], 195)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 201],
|
|
[ 0, 201]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 201))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "4")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(201))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "4:50..652")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MREILHVQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDTELQLERVNVYYNEA...CMV'}, length=449)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|225470745|ref|XP_002267380.1|")
|
|
self.assertEqual(hsp.target.name, "XP_002267380")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"PREDICTED: hypothetical protein [Vitis vinifera] >gi|157327486|emb|CAO15467.1| unnamed protein product [Vitis vinifera]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MREILH+Q GQCGNQIGA FWEVVCAEHGI+STGRYQGD +LQLERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 4 Length: 201 Strand: Plus
|
|
gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber
|
|
Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN
|
|
gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636
|
|
(X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence
|
|
Target: gi|225470745|ref|XP_002267380.1| Length: 449 Strand: Plus
|
|
PREDICTED: hypothetical protein [Vitis vinifera]
|
|
>gi|157327486|emb|CAO15467.1| unnamed protein product [Vitis vinifera]
|
|
|
|
Score:402 bits(1033), Expect:1e-110,
|
|
Identities:192/201(96%), Positives:195/201(97%), Gaps:0.201(0%)
|
|
|
|
gi|225470 0 MREILHVQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDTELQLERVNVYYNEASCGRFV
|
|
0 ||||||.|.|||||||||.|||||||||||.||||||||..|||||||||||||||||||
|
|
4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV
|
|
|
|
gi|225470 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.||||
|
|
4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV
|
|
|
|
gi|225470 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
120 ||||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||
|
|
4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
|
|
gi|225470 180 EPYNATLSVHQLVENADECMV 201
|
|
180 ||||||||||.|||||||||| 201
|
|
4 180 EPYNATLSVHXLVENADECMV 201
|
|
|
|
""",
|
|
)
|
|
hit = record[6]
|
|
self.assertEqual(hit.num, 7)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|586076|sp|P37392.1|TBB1_LUPAL")
|
|
self.assertEqual(hit.target.name, "P37392")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RecName: Full=Tubulin beta-1 chain; AltName: Full=Beta-1-tubulin >gi|402636|emb|CAA49736.1| Beta tubulin 1 [Lupinus albus]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=447)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1033.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 402.51939420267)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 1.24089932237309e-110, places=124
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 193)
|
|
self.assertEqual(hsp.annotations["positive"], 195)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 201],
|
|
[ 0, 201]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 201))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "4")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(201))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "4:50..652")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYGGDNELQLERVNVYYNEA...CMV'}, length=447)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|586076|sp|P37392.1|TBB1_LUPAL")
|
|
self.assertEqual(hsp.target.name, "P37392")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"RecName: Full=Tubulin beta-1 chain; AltName: Full=Beta-1-tubulin >gi|402636|emb|CAA49736.1| Beta tubulin 1 [Lupinus albus]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MREILHIQ GQCGNQIGA FWEVVCAEHGI+STGRY GDN+LQLERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 4 Length: 201 Strand: Plus
|
|
gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber
|
|
Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN
|
|
gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636
|
|
(X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence
|
|
Target: gi|586076|sp|P37392.1|TBB1_LUPAL Length: 447 Strand: Plus
|
|
RecName: Full=Tubulin beta-1 chain; AltName: Full=Beta-1-tubulin
|
|
>gi|402636|emb|CAA49736.1| Beta tubulin 1 [Lupinus albus]
|
|
|
|
Score:402 bits(1033), Expect:1e-110,
|
|
Identities:193/201(96%), Positives:195/201(97%), Gaps:0.201(0%)
|
|
|
|
gi|586076 0 MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYGGDNELQLERVNVYYNEASCGRFV
|
|
0 ||||||||.|||||||||.|||||||||||.|||||.|||.|||||||||||||||||||
|
|
4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV
|
|
|
|
gi|586076 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.||||
|
|
4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV
|
|
|
|
gi|586076 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
120 ||||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||
|
|
4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
|
|
gi|586076 180 EPYNATLSVHQLVENADECMV 201
|
|
180 ||||||||||.|||||||||| 201
|
|
4 180 EPYNATLSVHXLVENADECMV 201
|
|
|
|
""",
|
|
)
|
|
hit = record[7]
|
|
self.assertEqual(hit.num, 8)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|224104341|ref|XP_002313404.1|")
|
|
self.assertEqual(hit.target.name, "XP_002313404")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"tubulin, beta chain [Populus trichocarpa] >gi|222849812|gb|EEE87359.1| tubulin, beta chain [Populus trichocarpa]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=451)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1031.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 401.748995050835)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 2.1166536544662e-110, places=123
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 192)
|
|
self.assertEqual(hsp.annotations["positive"], 195)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 201],
|
|
[ 0, 201]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 201))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "4")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(201))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "4:50..652")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDSPLQLERINVYYNEA...CMV'}, length=451)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|224104341|ref|XP_002313404.1|")
|
|
self.assertEqual(hsp.target.name, "XP_002313404")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"tubulin, beta chain [Populus trichocarpa] >gi|222849812|gb|EEE87359.1| tubulin, beta chain [Populus trichocarpa]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MREILHIQ GQCGNQIGA FWEVVCAEHGI+STGRYQGD+ LQLER+NVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 4 Length: 201 Strand: Plus
|
|
gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber
|
|
Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN
|
|
gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636
|
|
(X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence
|
|
Target: gi|224104341|ref|XP_002313404.1| Length: 451 Strand: Plus
|
|
tubulin, beta chain [Populus trichocarpa] >gi|222849812|gb|EEE87359.1|
|
|
tubulin, beta chain [Populus trichocarpa]
|
|
|
|
Score:401 bits(1031), Expect:2e-110,
|
|
Identities:192/201(96%), Positives:195/201(97%), Gaps:0.201(0%)
|
|
|
|
gi|224104 0 MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDSPLQLERINVYYNEASCGRFV
|
|
0 ||||||||.|||||||||.|||||||||||.||||||||..|||||.|||||||||||||
|
|
4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV
|
|
|
|
gi|224104 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.||||
|
|
4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV
|
|
|
|
gi|224104 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
120 ||||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||
|
|
4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
|
|
gi|224104 180 EPYNATLSVHQLVENADECMV 201
|
|
180 ||||||||||.|||||||||| 201
|
|
4 180 EPYNATLSVHXLVENADECMV 201
|
|
|
|
""",
|
|
)
|
|
hit = record[8]
|
|
self.assertEqual(hit.num, 9)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|223549679|gb|EEF51167.1|")
|
|
self.assertEqual(hit.target.name, "EEF51167")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"tubulin beta chain, putative [Ricinus communis]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=446)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1029.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 400.978595899)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 3.61046429165375e-110, places=124
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 191)
|
|
self.assertEqual(hsp.annotations["positive"], 194)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 201],
|
|
[ 0, 201]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 201))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "4")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(201))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "4:50..652")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MREILHVQGGQCGNQIGAKFWEVVCAEHGIDSTGRYHGDTDLQLERVNVYYNEA...CMV'}, length=446)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|223549679|gb|EEF51167.1|")
|
|
self.assertEqual(hsp.target.name, "EEF51167")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"tubulin beta chain, putative [Ricinus communis]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MREILH+Q GQCGNQIGA FWEVVCAEHGI+STGRY GD DLQLERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISK+REEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 4 Length: 201 Strand: Plus
|
|
gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber
|
|
Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN
|
|
gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636
|
|
(X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence
|
|
Target: gi|223549679|gb|EEF51167.1| Length: 446 Strand: Plus
|
|
tubulin beta chain, putative [Ricinus communis]
|
|
|
|
Score:400 bits(1029), Expect:4e-110,
|
|
Identities:191/201(95%), Positives:194/201(97%), Gaps:0.201(0%)
|
|
|
|
gi|223549 0 MREILHVQGGQCGNQIGAKFWEVVCAEHGIDSTGRYHGDTDLQLERVNVYYNEASCGRFV
|
|
0 ||||||.|.|||||||||.|||||||||||.|||||.||.||||||||||||||||||||
|
|
4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV
|
|
|
|
gi|223549 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.||||
|
|
4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV
|
|
|
|
gi|223549 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKMREEYPDRMMLTFSVFPSPKVSDTVV
|
|
120 ||||||||||||||||||||.|||||||||||||.|||||||||||||||||||||||||
|
|
4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
|
|
gi|223549 180 EPYNATLSVHQLVENADECMV 201
|
|
180 ||||||||||.|||||||||| 201
|
|
4 180 EPYNATLSVHXLVENADECMV 201
|
|
|
|
""",
|
|
)
|
|
hit = record[9]
|
|
self.assertEqual(hit.num, 10)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|224058553|ref|XP_002299541.1|")
|
|
self.assertEqual(hit.target.name, "XP_002299541")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"tubulin, beta chain [Populus trichocarpa] >gi|222846799|gb|EEE84346.1| tubulin, beta chain [Populus trichocarpa]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=447)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1029.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 400.978595899)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 3.61046429165375e-110, places=124
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 192)
|
|
self.assertEqual(hsp.annotations["positive"], 195)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 201],
|
|
[ 0, 201]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 201))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEA...CMV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "4")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636 (X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(201))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "4:50..652")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDSALQIERVNVYYNEA...CMV'}, length=447)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|224058553|ref|XP_002299541.1|")
|
|
self.assertEqual(hsp.target.name, "XP_002299541")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"tubulin, beta chain [Populus trichocarpa] >gi|222846799|gb|EEE84346.1| tubulin, beta chain [Populus trichocarpa]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MREILHIQ GQCGNQIGA FWEVVCAEHGI+STGRYQGD+ LQ+ERVNVYYNEASCGRFVPRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDS LDVVRKEAENCDCLQGFQVCHSLG GTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVVEPYNATLSVH LVENADECMV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 4 Length: 201 Strand: Plus
|
|
gi|5049839|gb|AI730987.1|AI730987 BNLGHi8354 Six-day Cotton fiber
|
|
Gossypium hirsutum cDNA 5' similar to TUBULIN BETA-1 CHAIN
|
|
gi|486734|pir|S35142 tubulin beta chain - white lupine gi|402636
|
|
(X70184) Beta tubulin 1 [Lupinus albus], mRNA sequence
|
|
Target: gi|224058553|ref|XP_002299541.1| Length: 447 Strand: Plus
|
|
tubulin, beta chain [Populus trichocarpa] >gi|222846799|gb|EEE84346.1|
|
|
tubulin, beta chain [Populus trichocarpa]
|
|
|
|
Score:400 bits(1029), Expect:4e-110,
|
|
Identities:192/201(96%), Positives:195/201(97%), Gaps:0.201(0%)
|
|
|
|
gi|224058 0 MREILHIQGGQCGNQIGAKFWEVVCAEHGIDSTGRYQGDSALQIERVNVYYNEASCGRFV
|
|
0 ||||||||.|||||||||.|||||||||||.||||||||..||.||||||||||||||||
|
|
4 0 MREILHIQAGQCGNQIGANFWEVVCAEHGINSTGRYQGDNDLQLERVNVYYNEASCGRFV
|
|
|
|
gi|224058 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSVLDVV
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||||||||||.||||
|
|
4 60 PRAVLMDLEPGTMDSVRSGPYGQIFRPDNFVFGQSGAGNNWAKGHYTEGAELIDSXLDVV
|
|
|
|
gi|224058 120 RKEAENCDCLQGFQVCHSLGGGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
120 ||||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||
|
|
4 120 RKEAENCDCLQGFQVCHSLGRGTGSGMGTLLISKIREEYPDRMMLTFSVFPSPKVSDTVV
|
|
|
|
gi|224058 180 EPYNATLSVHQLVENADECMV 201
|
|
180 ||||||||||.|||||||||| 201
|
|
4 180 EPYNATLSVHXLVENADECMV 201
|
|
|
|
""",
|
|
)
|
|
record = next(records)
|
|
self.assertEqual(record.num, 5)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "5")
|
|
self.assertEqual(
|
|
record.query.description,
|
|
"gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence",
|
|
)
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=623)")
|
|
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 8994603)
|
|
self.assertEqual(record.stat["db-len"], -1216159329)
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertEqual(record.stat["eff-space"], 147032237429)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.041)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.267)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.14)
|
|
self.assertEqual(len(record), 10)
|
|
hit = record[0]
|
|
self.assertEqual(hit.num, 1)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|110740644|dbj|BAE98425.1|")
|
|
self.assertEqual(hit.target.name, "BAE98425")
|
|
self.assertEqual(
|
|
hit.target.description, "hypothetical protein [Arabidopsis thaliana]"
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=80)")
|
|
self.assertEqual(len(hit), 2)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 231.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 93.5893343169526)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 5.57283114448317e-19, places=33
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 42)
|
|
self.assertEqual(hsp.annotations["positive"], 45)
|
|
self.assertEqual(hsp.annotations["gaps"], 1)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 53.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 25.0238098036637)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 5.57283114448317e-19, places=33
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 13)
|
|
self.assertEqual(hsp.annotations["positive"], 13)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[53, 70],
|
|
[ 0, 17]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 17))
|
|
self.assertEqual(repr(hsp.query.seq), "Seq('RKLVSRALRCAVGLNKS')")
|
|
self.assertEqual(hsp.query.id, "5")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(17))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "5:455..505")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq), "Seq({53: 'RKLVSRVLPHAVGLNPS'}, length=80)"
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|110740644|dbj|BAE98425.1|")
|
|
self.assertEqual(hsp.target.name, "BAE98425")
|
|
self.assertEqual(
|
|
hsp.target.description, "hypothetical protein [Arabidopsis thaliana]"
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(hsp.annotations["midline"], "RKLVSR L AVGLN S")
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 5 Length: 17 Strand: Plus
|
|
gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed
|
|
spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2,
|
|
complete sequence
|
|
Target: gi|110740644|dbj|BAE98425.1| Length: 80 Strand: Plus
|
|
hypothetical protein [Arabidopsis thaliana]
|
|
|
|
Score:25 bits(53), Expect:6e-19,
|
|
Identities:13/17(76%), Positives:13/17(76%), Gaps:0.17(0%)
|
|
|
|
gi|110740 53 RKLVSRVLPHAVGLNPS 70
|
|
0 ||||||.|..|||||.| 17
|
|
5 0 RKLVSRALRCAVGLNKS 17
|
|
|
|
""",
|
|
)
|
|
hit = record[1]
|
|
self.assertEqual(hit.num, 2)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|226453533|gb|EEH50844.1|")
|
|
self.assertEqual(hit.target.name, "EEH50844")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"predicted protein [Micromonas pusilla CCMP1545]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=81)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 238.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 96.2857313483741)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 1.69151855577931e-18, places=32
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 42)
|
|
self.assertEqual(hsp.annotations["positive"], 45)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 49],
|
|
[ 0, 49]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 49))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKNVAKCDTWCELQNPVNHRVFERKLRPKPLGRGHVCLGVSHRVAPNPF')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "5")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(49))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "5:283..429")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MKNVAKCDTWCELQNPVNHRVFERKLRPKPSGRGHVCLGVTNRRPPSSF'}, length=81)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|226453533|gb|EEH50844.1|")
|
|
self.assertEqual(hsp.target.name, "EEH50844")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"predicted protein [Micromonas pusilla CCMP1545]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKNVAKCDTWCELQNPVNHRVFERKLRPKP GRGHVCLGV++R P+ F",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 5 Length: 49 Strand: Plus
|
|
gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed
|
|
spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2,
|
|
complete sequence
|
|
Target: gi|226453533|gb|EEH50844.1| Length: 81 Strand: Plus
|
|
predicted protein [Micromonas pusilla CCMP1545]
|
|
|
|
Score:96 bits(238), Expect:2e-18,
|
|
Identities:42/49(86%), Positives:45/49(92%), Gaps:0.49(0%)
|
|
|
|
gi|226453 0 MKNVAKCDTWCELQNPVNHRVFERKLRPKPSGRGHVCLGVTNRRPPSSF 49
|
|
0 ||||||||||||||||||||||||||||||.|||||||||..|..|..| 49
|
|
5 0 MKNVAKCDTWCELQNPVNHRVFERKLRPKPLGRGHVCLGVSHRVAPNPF 49
|
|
|
|
""",
|
|
)
|
|
hit = record[2]
|
|
self.assertEqual(hit.num, 3)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|168069582|ref|XP_001786502.1|")
|
|
self.assertEqual(hit.target.name, "XP_001786502")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"predicted protein [Physcomitrella patens subsp. patens] >gi|162661153|gb|EDQ48685.1| predicted protein [Physcomitrella patens subsp. patens]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=88)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 183.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 75.0997546729196)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 4.03544314604194e-12, places=26
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 37)
|
|
self.assertEqual(hsp.annotations["positive"], 39)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 2, 44],
|
|
[ 0, 42]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 42))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq), "Seq('ASGATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV')"
|
|
)
|
|
self.assertEqual(hsp.query.id, "5")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(42))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "complement(5:245..370)")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({2: 'ASGATCVQKLDDSRDSAIHTTYRISLRSSSLQEPRYPLLRVV'}, length=88)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|168069582|ref|XP_001786502.1|")
|
|
self.assertEqual(hsp.target.name, "XP_001786502")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"predicted protein [Physcomitrella patens subsp. patens] >gi|162661153|gb|EDQ48685.1| predicted protein [Physcomitrella patens subsp. patens]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"], "ASGATCVQKLD SRDSAIHT YRISLRSSS++EPRYPL RVV"
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 5 Length: 42 Strand: Plus
|
|
gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed
|
|
spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2,
|
|
complete sequence
|
|
Target: gi|168069582|ref|XP_001786502.1| Length: 88 Strand: Plus
|
|
predicted protein [Physcomitrella patens subsp. patens]
|
|
>gi|162661153|gb|EDQ48685.1| predicted protein [Physcomitrella patens
|
|
subsp. patens]
|
|
|
|
Score:75 bits(183), Expect:4e-12,
|
|
Identities:37/42(88%), Positives:39/42(93%), Gaps:0.42(0%)
|
|
|
|
gi|168069 2 ASGATCVQKLDDSRDSAIHTTYRISLRSSSLQEPRYPLLRVV 44
|
|
0 |||||||||||.||||||||.|||||||||..||||||.||| 42
|
|
5 0 ASGATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV 42
|
|
|
|
""",
|
|
)
|
|
hit = record[3]
|
|
self.assertEqual(hit.num, 4)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|168068558|ref|XP_001786120.1|")
|
|
self.assertEqual(hit.target.name, "XP_001786120")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"predicted protein [Physcomitrella patens subsp. patens] >gi|162662102|gb|EDQ49068.1| predicted protein [Physcomitrella patens subsp. patens]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=130)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 178.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 73.1737567933329)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 1.53346675969648e-11, places=25
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 36)
|
|
self.assertEqual(hsp.annotations["positive"], 39)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 2, 44],
|
|
[ 0, 42]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 42))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq), "Seq('ASGATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV')"
|
|
)
|
|
self.assertEqual(hsp.query.id, "5")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(42))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "complement(5:245..370)")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({2: 'ASGATCVQKLDDSRNSAIHTTYRISLRSSSLQEPRYPLLRVV'}, length=130)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|168068558|ref|XP_001786120.1|")
|
|
self.assertEqual(hsp.target.name, "XP_001786120")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"predicted protein [Physcomitrella patens subsp. patens] >gi|162662102|gb|EDQ49068.1| predicted protein [Physcomitrella patens subsp. patens]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"], "ASGATCVQKLD SR+SAIHT YRISLRSSS++EPRYPL RVV"
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 5 Length: 42 Strand: Plus
|
|
gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed
|
|
spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2,
|
|
complete sequence
|
|
Target: gi|168068558|ref|XP_001786120.1| Length: 130 Strand: Plus
|
|
predicted protein [Physcomitrella patens subsp. patens]
|
|
>gi|162662102|gb|EDQ49068.1| predicted protein [Physcomitrella patens
|
|
subsp. patens]
|
|
|
|
Score:73 bits(178), Expect:2e-11,
|
|
Identities:36/42(86%), Positives:39/42(93%), Gaps:0.42(0%)
|
|
|
|
gi|168068 2 ASGATCVQKLDDSRNSAIHTTYRISLRSSSLQEPRYPLLRVV 44
|
|
0 |||||||||||.||.|||||.|||||||||..||||||.||| 42
|
|
5 0 ASGATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV 42
|
|
|
|
""",
|
|
)
|
|
hit = record[4]
|
|
self.assertEqual(hit.num, 5)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|168068926|ref|XP_001786259.1|")
|
|
self.assertEqual(hit.target.name, "XP_001786259")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"predicted protein [Physcomitrella patens subsp. patens] >gi|168069965|ref|XP_001786641.1| predicted protein [Physcomitrella patens subsp. patens] >gi|162660807|gb|EDQ48545.1| predicted protein [Physcomitrella patens subsp. patens] >gi|162661808|gb|EDQ48929.1| predicted protein [Physcomitrella patens subsp. patens]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=148)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 178.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 73.1737567933329)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 1.53346675969648e-11, places=25
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 36)
|
|
self.assertEqual(hsp.annotations["positive"], 39)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 2, 44],
|
|
[ 0, 42]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 42))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq), "Seq('ASGATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV')"
|
|
)
|
|
self.assertEqual(hsp.query.id, "5")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(42))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "complement(5:245..370)")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({2: 'ASGATCVQKLDDSRNSAIHTTYRISLRSSSLQEPRYPLLRVV'}, length=148)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|168068926|ref|XP_001786259.1|")
|
|
self.assertEqual(hsp.target.name, "XP_001786259")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"predicted protein [Physcomitrella patens subsp. patens] >gi|168069965|ref|XP_001786641.1| predicted protein [Physcomitrella patens subsp. patens] >gi|162660807|gb|EDQ48545.1| predicted protein [Physcomitrella patens subsp. patens] >gi|162661808|gb|EDQ48929.1| predicted protein [Physcomitrella patens subsp. patens]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"], "ASGATCVQKLD SR+SAIHT YRISLRSSS++EPRYPL RVV"
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 5 Length: 42 Strand: Plus
|
|
gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed
|
|
spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2,
|
|
complete sequence
|
|
Target: gi|168068926|ref|XP_001786259.1| Length: 148 Strand: Plus
|
|
predicted protein [Physcomitrella patens subsp. patens]
|
|
>gi|168069965|ref|XP_001786641.1| predicted protein [Physcomitrella
|
|
patens subsp. patens] >gi|162660807|gb|EDQ48545.1| predicted protein
|
|
[Physcomitrella patens subsp. patens] >gi|162661808|gb|EDQ48929.1|
|
|
predicted protein [Physcomitrella patens subsp. patens]
|
|
|
|
Score:73 bits(178), Expect:2e-11,
|
|
Identities:36/42(86%), Positives:39/42(93%), Gaps:0.42(0%)
|
|
|
|
gi|168068 2 ASGATCVQKLDDSRNSAIHTTYRISLRSSSLQEPRYPLLRVV 44
|
|
0 |||||||||||.||.|||||.|||||||||..||||||.||| 42
|
|
5 0 ASGATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV 42
|
|
|
|
""",
|
|
)
|
|
hit = record[5]
|
|
self.assertEqual(hit.num, 6)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|168070288|ref|XP_001786759.1|")
|
|
self.assertEqual(hit.target.name, "XP_001786759")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"predicted protein [Physcomitrella patens subsp. patens] >gi|162660550|gb|EDQ48427.1| predicted protein [Physcomitrella patens subsp. patens]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=148)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 178.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 73.1737567933329)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 1.53346675969648e-11, places=25
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 36)
|
|
self.assertEqual(hsp.annotations["positive"], 39)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 2, 44],
|
|
[ 0, 42]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 42))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq), "Seq('ASGATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV')"
|
|
)
|
|
self.assertEqual(hsp.query.id, "5")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(42))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "complement(5:245..370)")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({2: 'ASGATCVQKLDDSRNSAIHTTYRISLRSSSLQEPRYPLLRVV'}, length=148)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|168070288|ref|XP_001786759.1|")
|
|
self.assertEqual(hsp.target.name, "XP_001786759")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"predicted protein [Physcomitrella patens subsp. patens] >gi|162660550|gb|EDQ48427.1| predicted protein [Physcomitrella patens subsp. patens]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"], "ASGATCVQKLD SR+SAIHT YRISLRSSS++EPRYPL RVV"
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 5 Length: 42 Strand: Plus
|
|
gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed
|
|
spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2,
|
|
complete sequence
|
|
Target: gi|168070288|ref|XP_001786759.1| Length: 148 Strand: Plus
|
|
predicted protein [Physcomitrella patens subsp. patens]
|
|
>gi|162660550|gb|EDQ48427.1| predicted protein [Physcomitrella patens
|
|
subsp. patens]
|
|
|
|
Score:73 bits(178), Expect:2e-11,
|
|
Identities:36/42(86%), Positives:39/42(93%), Gaps:0.42(0%)
|
|
|
|
gi|168070 2 ASGATCVQKLDDSRNSAIHTTYRISLRSSSLQEPRYPLLRVV 44
|
|
0 |||||||||||.||.|||||.|||||||||..||||||.||| 42
|
|
5 0 ASGATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV 42
|
|
|
|
""",
|
|
)
|
|
hit = record[6]
|
|
self.assertEqual(hit.num, 7)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|168068591|ref|XP_001786133.1|")
|
|
self.assertEqual(hit.target.name, "XP_001786133")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"predicted protein [Physcomitrella patens subsp. patens] >gi|162662081|gb|EDQ49057.1| predicted protein [Physcomitrella patens subsp. patens]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=220)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 172.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 70.8625593378288)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 7.61051640442713e-11, places=25
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 42)
|
|
self.assertEqual(hsp.annotations["positive"], 50)
|
|
self.assertEqual(hsp.annotations["gaps"], 8)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[142, 169, 172, 183, 183, 220],
|
|
[ 0, 27, 27, 38, 43, 80]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 83))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('RPTAHRSARETNFRSQTVESRRKWVGGDAM*DAQADVPSA*WLRAQLAFKNSMV...IRC')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "5")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(80))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "complement(5:256..495)")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({142: 'RGLCHHADSDGQFHSTLPIKDIKRIGGCRDDALAGMPSDEPRAQLAFKNSMIHG...IRC'}, length=220)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|168068591|ref|XP_001786133.1|")
|
|
self.assertEqual(hsp.target.name, "XP_001786133")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"predicted protein [Physcomitrella patens subsp. patens] >gi|162662081|gb|EDQ49057.1| predicted protein [Physcomitrella patens subsp. patens]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"R H + + F S K +GG DA+ +D P RAQLAFKNSM+HGILQFT IAFR VLHRC+S+DIRC",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 5 Length: 80 Strand: Plus
|
|
gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed
|
|
spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2,
|
|
complete sequence
|
|
Target: gi|168068591|ref|XP_001786133.1| Length: 220 Strand: Plus
|
|
predicted protein [Physcomitrella patens subsp. patens]
|
|
>gi|162662081|gb|EDQ49057.1| predicted protein [Physcomitrella patens
|
|
subsp. patens]
|
|
|
|
Score:70 bits(172), Expect:8e-11,
|
|
Identities:42/83(51%), Positives:50/83(60%), Gaps:8.83(10%)
|
|
|
|
gi|168068 142 RGLCHHADSDGQFHSTLPIKDIKRIGGCRDDALAGMPSDEP-----RAQLAFKNSMIHGI
|
|
0 |...|.......|.|.......|..||---||......|.|-----||||||||||.|||
|
|
5 0 RPTAHRSARETNFRSQTVESRRKWVGG---DAM*DAQADVPSA*WLRAQLAFKNSMVHGI
|
|
|
|
gi|168068 197 LQFTLRIAFRCVLHRCKSQDIRC 220
|
|
60 ||||..||||.|||||.|.|||| 83
|
|
5 57 LQFTPSIAFRYVLHRCESRDIRC 80
|
|
|
|
""",
|
|
)
|
|
hit = record[7]
|
|
self.assertEqual(hit.num, 8)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|74622391|sp|Q8TGM5|ART3_YEAST")
|
|
self.assertEqual(hit.target.name, "Q8TGM5")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Uncharacterized protein ART3 (Antisense to ribosomal RNA transcript protein 3) >gi|18767126|gb|AAL79278.1| unknown [Saccharomyces cerevisiae]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=67)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 141.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 58.9213724843908)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 2.99274389212967e-07, places=21
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 29)
|
|
self.assertEqual(hsp.annotations["positive"], 32)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 7, 46],
|
|
[ 0, 39]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 39))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq), "Seq('GATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRV')"
|
|
)
|
|
self.assertEqual(hsp.query.id, "5")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(39))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "complement(5:248..364)")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({7: 'GAMCVQRFDDSRNSAIHITYRISLRSSSMREPRDPLLKV'}, length=67)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|74622391|sp|Q8TGM5|ART3_YEAST")
|
|
self.assertEqual(hsp.target.name, "Q8TGM5")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Uncharacterized protein ART3 (Antisense to ribosomal RNA transcript protein 3) >gi|18767126|gb|AAL79278.1| unknown [Saccharomyces cerevisiae]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"], "GA CVQ+ D SR+SAIH YRISLRSSSMREPR PL +V"
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 5 Length: 39 Strand: Plus
|
|
gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed
|
|
spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2,
|
|
complete sequence
|
|
Target: gi|74622391|sp|Q8TGM5|ART3_YEAST Length: 67 Strand: Plus
|
|
Uncharacterized protein ART3 (Antisense to ribosomal RNA transcript
|
|
protein 3) >gi|18767126|gb|AAL79278.1| unknown [Saccharomyces
|
|
cerevisiae]
|
|
|
|
Score:58 bits(141), Expect:3e-07,
|
|
Identities:29/39(74%), Positives:32/39(82%), Gaps:0.39(0%)
|
|
|
|
gi|746223 7 GAMCVQRFDDSRNSAIHITYRISLRSSSMREPRDPLLKV 46
|
|
0 ||.|||..|.||.||||..||||||||||||||.||..| 39
|
|
5 0 GATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRV 39
|
|
|
|
""",
|
|
)
|
|
hit = record[8]
|
|
self.assertEqual(hit.num, 9)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|168069944|ref|XP_001786634.1|")
|
|
self.assertEqual(hit.target.name, "XP_001786634")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"predicted protein [Physcomitrella patens subsp. patens] >gi|162660825|gb|EDQ48552.1| predicted protein [Physcomitrella patens subsp. patens]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=138)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 137.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 57.3805741807214)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 8.70755166175354e-07, places=21
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 28)
|
|
self.assertEqual(hsp.annotations["positive"], 31)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 34],
|
|
[ 0, 34]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 34))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq), "Seq('KLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV')"
|
|
)
|
|
self.assertEqual(hsp.query.id, "5")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(34))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "complement(5:245..346)")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'KLDDSRNSAIHTTYRISLRSSSLQEPRYPLLRVV'}, length=138)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|168069944|ref|XP_001786634.1|")
|
|
self.assertEqual(hsp.target.name, "XP_001786634")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"predicted protein [Physcomitrella patens subsp. patens] >gi|162660825|gb|EDQ48552.1| predicted protein [Physcomitrella patens subsp. patens]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"], "KLD SR+SAIHT YRISLRSSS++EPRYPL RVV"
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 5 Length: 34 Strand: Plus
|
|
gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed
|
|
spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2,
|
|
complete sequence
|
|
Target: gi|168069944|ref|XP_001786634.1| Length: 138 Strand: Plus
|
|
predicted protein [Physcomitrella patens subsp. patens]
|
|
>gi|162660825|gb|EDQ48552.1| predicted protein [Physcomitrella patens
|
|
subsp. patens]
|
|
|
|
Score:57 bits(137), Expect:9e-07,
|
|
Identities:28/34(82%), Positives:31/34(91%), Gaps:0.34(0%)
|
|
|
|
gi|168069 0 KLDDSRNSAIHTTYRISLRSSSLQEPRYPLLRVV 34
|
|
0 |||.||.|||||.|||||||||..||||||.||| 34
|
|
5 0 KLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV 34
|
|
|
|
""",
|
|
)
|
|
hit = record[9]
|
|
self.assertEqual(hit.num, 10)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|50307717|ref|XP_453851.1|")
|
|
self.assertEqual(hit.target.name, "XP_453851")
|
|
self.assertEqual(
|
|
hit.target.description, "unnamed protein product [Kluyveromyces lactis]"
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=54)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 134.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 56.2249754529693)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 1.93984013155423e-06, places=20
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 28)
|
|
self.assertEqual(hsp.annotations["positive"], 31)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 7, 47],
|
|
[ 0, 40]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 40))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq), "Seq('GATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV')"
|
|
)
|
|
self.assertEqual(hsp.query.id, "5")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2, complete sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(40))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "complement(5:245..364)")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({7: 'GAMCVQRFDDSRKSAIHNTYRNSLRSSSMREPRDPLLKVL'}, length=54)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|50307717|ref|XP_453851.1|")
|
|
self.assertEqual(hsp.target.name, "XP_453851")
|
|
self.assertEqual(
|
|
hsp.target.description, "unnamed protein product [Kluyveromyces lactis]"
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"], "GA CVQ+ D SR SAIH YR SLRSSSMREPR PL +V+"
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 5 Length: 40 Strand: Plus
|
|
gi|5052071|gb|AF067555.1|AF067555 Phlox stansburyi internal transcribed
|
|
spacer 1, 5.8S ribosomal RNA gene, and internal transcribed spacer 2,
|
|
complete sequence
|
|
Target: gi|50307717|ref|XP_453851.1| Length: 54 Strand: Plus
|
|
unnamed protein product [Kluyveromyces lactis]
|
|
|
|
Score:56 bits(134), Expect:2e-06,
|
|
Identities:28/40(70%), Positives:31/40(78%), Gaps:0.40(0%)
|
|
|
|
gi|503077 7 GAMCVQRFDDSRKSAIHNTYRNSLRSSSMREPRDPLLKVL 47
|
|
0 ||.|||..|.||.||||..||.|||||||||||.||..|. 40
|
|
5 0 GATCVQKLDGSRDSAIHTKYRISLRSSSMREPRYPLPRVV 40
|
|
|
|
""",
|
|
)
|
|
record = next(records)
|
|
self.assertEqual(record.num, 6)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "6")
|
|
self.assertEqual(
|
|
record.query.description,
|
|
"gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds",
|
|
)
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=309)")
|
|
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 8994603)
|
|
self.assertEqual(record.stat["db-len"], -1216159329)
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertEqual(record.stat["eff-space"], 75367093081)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.041)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.267)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.14)
|
|
self.assertEqual(len(record), 10)
|
|
hit = record[0]
|
|
self.assertEqual(hit.num, 1)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|3176603|gb|AAC18749.1|")
|
|
self.assertEqual(hit.target.name, "AAC18749")
|
|
self.assertEqual(hit.target.description, "phytochrome A [Lathyrus odoratus]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=103)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 543.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 213.771602003167)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 3.7262743863676e-54, places=67
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 103)
|
|
self.assertEqual(hsp.annotations["positive"], 103)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 103],
|
|
[ 0, 103]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 103))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "6")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(103))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "6:1..309")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|3176603|gb|AAC18749.1|")
|
|
self.assertEqual(hsp.target.name, "AAC18749")
|
|
self.assertEqual(hsp.target.description, "phytochrome A [Lathyrus odoratus]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 6 Length: 103 Strand: Plus
|
|
gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA)
|
|
gene, partial cds
|
|
Target: gi|3176603|gb|AAC18749.1| Length: 103 Strand: Plus
|
|
phytochrome A [Lathyrus odoratus]
|
|
|
|
Score:213 bits(543), Expect:4e-54,
|
|
Identities:103/103(100%), Positives:103/103(100%), Gaps:0.103(0%)
|
|
|
|
gi|317660 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
|
|
gi|317660 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103
|
|
60 ||||||||||||||||||||||||||||||||||||||||||| 103
|
|
6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103
|
|
|
|
""",
|
|
)
|
|
hit = record[1]
|
|
self.assertEqual(hit.num, 2)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|130188|sp|P15001.1|PHYA_PEA")
|
|
self.assertEqual(hit.target.name, "P15001")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RecName: Full=Phytochrome A >gi|169132|gb|AAA33682.1| phytochrome [Pisum sativum] >gi|295830|emb|CAA32242.1| phytochrome apoprotein [Pisum sativum] >gi|51173514|gb|AAT97643.1| phytochrome A apoprotein [Pisum sativum] >gi|226757|prf||1604466A phytochrome",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=1124)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 530.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 208.764007516241)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 1.1987013044853e-52, places=65
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 101)
|
|
self.assertEqual(hsp.annotations["positive"], 102)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[275, 378],
|
|
[ 0, 103]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 103))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "6")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(103))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "6:1..309")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({275: 'QAARFLFMKNKVRMIVDCNAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV'}, length=1124)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|130188|sp|P15001.1|PHYA_PEA")
|
|
self.assertEqual(hsp.target.name, "P15001")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"RecName: Full=Phytochrome A >gi|169132|gb|AAA33682.1| phytochrome [Pisum sativum] >gi|295830|emb|CAA32242.1| phytochrome apoprotein [Pisum sativum] >gi|51173514|gb|AAT97643.1| phytochrome A apoprotein [Pisum sativum] >gi|226757|prf||1604466A phytochrome",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"QAARFLFMKNKVRMIVDC+AKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVNDSDEDGDS DAVLPQKKKRLWGLVVCHNTTPRFV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 6 Length: 103 Strand: Plus
|
|
gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA)
|
|
gene, partial cds
|
|
Target: gi|130188|sp|P15001.1|PHYA_PEA Length: 1124 Strand: Plus
|
|
RecName: Full=Phytochrome A >gi|169132|gb|AAA33682.1| phytochrome [Pisum
|
|
sativum] >gi|295830|emb|CAA32242.1| phytochrome apoprotein [Pisum
|
|
sativum] >gi|51173514|gb|AAT97643.1| phytochrome A apoprotein [Pisum
|
|
sativum] >gi|226757|prf||1604466A phytochrome
|
|
|
|
Score:208 bits(530), Expect:1e-52,
|
|
Identities:101/103(98%), Positives:102/103(99%), Gaps:0.103(0%)
|
|
|
|
gi|130188 275 QAARFLFMKNKVRMIVDCNAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
0 ||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||||
|
|
6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
|
|
gi|130188 335 SLVMAVVVNDSDEDGDSADAVLPQKKKRLWGLVVCHNTTPRFV 378
|
|
60 |||||||||||||||||.||||||||||||||||||||||||| 103
|
|
6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103
|
|
|
|
""",
|
|
)
|
|
hit = record[2]
|
|
self.assertEqual(hit.num, 3)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|2499555|sp|P93673.1|PHYA_LATSA")
|
|
self.assertEqual(hit.target.name, "P93673")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RecName: Full=Phytochrome type A >gi|1848273|gb|AAB47994.1| phytochrome type A [Lathyrus sativus]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=1124)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 530.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 208.764007516241)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 1.1987013044853e-52, places=65
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 101)
|
|
self.assertEqual(hsp.annotations["positive"], 102)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[275, 378],
|
|
[ 0, 103]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 103))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "6")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(103))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "6:1..309")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({275: 'QAARFLFMKNKVRMIVDCNAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV'}, length=1124)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|2499555|sp|P93673.1|PHYA_LATSA")
|
|
self.assertEqual(hsp.target.name, "P93673")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"RecName: Full=Phytochrome type A >gi|1848273|gb|AAB47994.1| phytochrome type A [Lathyrus sativus]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"QAARFLFMKNKVRMIVDC+AKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVNDSDEDGDS DAVLPQKKKRLWGLVVCHNTTPRFV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 6 Length: 103 Strand: Plus
|
|
gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA)
|
|
gene, partial cds
|
|
Target: gi|2499555|sp|P93673.1|PHYA_LATSA Length: 1124 Strand: Plus
|
|
RecName: Full=Phytochrome type A >gi|1848273|gb|AAB47994.1| phytochrome
|
|
type A [Lathyrus sativus]
|
|
|
|
Score:208 bits(530), Expect:1e-52,
|
|
Identities:101/103(98%), Positives:102/103(99%), Gaps:0.103(0%)
|
|
|
|
gi|249955 275 QAARFLFMKNKVRMIVDCNAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
0 ||||||||||||||||||.|||||||||||||||||||||||||||||||||||||||||
|
|
6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
|
|
gi|249955 335 SLVMAVVVNDSDEDGDSADAVLPQKKKRLWGLVVCHNTTPRFV 378
|
|
60 |||||||||||||||||.||||||||||||||||||||||||| 103
|
|
6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103
|
|
|
|
""",
|
|
)
|
|
hit = record[3]
|
|
self.assertEqual(hit.num, 4)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|3176595|gb|AAC18745.1|")
|
|
self.assertEqual(hit.target.name, "AAC18745")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"phytochrome A [Lennea melanocarpa] >gi|3176597|gb|AAC18746.1| phytochrome A [Hebestigma cubense] >gi|3176609|gb|AAC18752.1| phytochrome A [Sesbania cochichinensis] >gi|3176611|gb|AAC18753.1| phytochrome A [Sesbania emerus]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=103)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 528.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 207.993608364407)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 2.04467473791515e-52, places=66
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 100)
|
|
self.assertEqual(hsp.annotations["positive"], 101)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 103],
|
|
[ 0, 103]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 103))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "6")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(103))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "6:1..309")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|3176595|gb|AAC18745.1|")
|
|
self.assertEqual(hsp.target.name, "AAC18745")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"phytochrome A [Lennea melanocarpa] >gi|3176597|gb|AAC18746.1| phytochrome A [Hebestigma cubense] >gi|3176609|gb|AAC18752.1| phytochrome A [Sesbania cochichinensis] >gi|3176611|gb|AAC18753.1| phytochrome A [Sesbania emerus]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVNDSDEDGDS DAV PQK+KRLWGLVVCHNTTPRFV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 6 Length: 103 Strand: Plus
|
|
gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA)
|
|
gene, partial cds
|
|
Target: gi|3176595|gb|AAC18745.1| Length: 103 Strand: Plus
|
|
phytochrome A [Lennea melanocarpa] >gi|3176597|gb|AAC18746.1|
|
|
phytochrome A [Hebestigma cubense] >gi|3176609|gb|AAC18752.1|
|
|
phytochrome A [Sesbania cochichinensis] >gi|3176611|gb|AAC18753.1|
|
|
phytochrome A [Sesbania emerus]
|
|
|
|
Score:207 bits(528), Expect:2e-52,
|
|
Identities:100/103(97%), Positives:101/103(98%), Gaps:0.103(0%)
|
|
|
|
gi|317659 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
|
|
gi|317659 60 SLVMAVVVNDSDEDGDSSDAVQPQKRKRLWGLVVCHNTTPRFV 103
|
|
60 |||||||||||||||||.|||.|||.||||||||||||||||| 103
|
|
6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103
|
|
|
|
""",
|
|
)
|
|
hit = record[4]
|
|
self.assertEqual(hit.num, 5)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|1711106|gb|AAC18675.1|")
|
|
self.assertEqual(hit.target.name, "AAC18675")
|
|
self.assertEqual(hit.target.description, "phytochrome A [Sophora affinis]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=210)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 528.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 207.993608364407)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 2.04467473791515e-52, places=66
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 100)
|
|
self.assertEqual(hsp.annotations["positive"], 101)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 40, 143],
|
|
[ 0, 103]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 103))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "6")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(103))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "6:1..309")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({40: 'QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV'}, length=210)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|1711106|gb|AAC18675.1|")
|
|
self.assertEqual(hsp.target.name, "AAC18675")
|
|
self.assertEqual(hsp.target.description, "phytochrome A [Sophora affinis]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVNDSDEDGDS DAV PQK+KRLWGLVVCHNTTPRFV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 6 Length: 103 Strand: Plus
|
|
gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA)
|
|
gene, partial cds
|
|
Target: gi|1711106|gb|AAC18675.1| Length: 210 Strand: Plus
|
|
phytochrome A [Sophora affinis]
|
|
|
|
Score:207 bits(528), Expect:2e-52,
|
|
Identities:100/103(97%), Positives:101/103(98%), Gaps:0.103(0%)
|
|
|
|
gi|171110 40 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
|
|
gi|171110 100 SLVMAVVVNDSDEDGDSSDAVQPQKRKRLWGLVVCHNTTPRFV 143
|
|
60 |||||||||||||||||.|||.|||.||||||||||||||||| 103
|
|
6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103
|
|
|
|
""",
|
|
)
|
|
hit = record[5]
|
|
self.assertEqual(hit.num, 6)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|1711090|gb|AAC18670.1|")
|
|
self.assertEqual(hit.target.name, "AAC18670")
|
|
self.assertEqual(hit.target.description, "phytochrome A [Myrospermum sousanum]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=210)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 525.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 206.838009636654)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 4.55506009801166e-52, places=66
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 99)
|
|
self.assertEqual(hsp.annotations["positive"], 101)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 40, 143],
|
|
[ 0, 103]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 103))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "6")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(103))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "6:1..309")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({40: 'QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV'}, length=210)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|1711090|gb|AAC18670.1|")
|
|
self.assertEqual(hsp.target.name, "AAC18670")
|
|
self.assertEqual(hsp.target.description, "phytochrome A [Myrospermum sousanum]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLV+AVVVNDSDEDGDS DAV PQK+KRLWGLVVCHNTTPRFV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 6 Length: 103 Strand: Plus
|
|
gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA)
|
|
gene, partial cds
|
|
Target: gi|1711090|gb|AAC18670.1| Length: 210 Strand: Plus
|
|
phytochrome A [Myrospermum sousanum]
|
|
|
|
Score:206 bits(525), Expect:5e-52,
|
|
Identities:99/103(96%), Positives:101/103(98%), Gaps:0.103(0%)
|
|
|
|
gi|171109 40 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
|
|
gi|171109 100 SLVLAVVVNDSDEDGDSSDAVQPQKRKRLWGLVVCHNTTPRFV 143
|
|
60 |||.|||||||||||||.|||.|||.||||||||||||||||| 103
|
|
6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103
|
|
|
|
""",
|
|
)
|
|
hit = record[6]
|
|
self.assertEqual(hit.num, 7)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|3176605|gb|AAC18750.1|")
|
|
self.assertEqual(hit.target.name, "AAC18750")
|
|
self.assertEqual(hit.target.description, "phytochrome A [Hybosema robustum]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=103)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 524.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 206.452810060737)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 5.94909272347008e-52, places=66
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 99)
|
|
self.assertEqual(hsp.annotations["positive"], 100)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 103],
|
|
[ 0, 103]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 103))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "6")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(103))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "6:1..309")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('QATRFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|3176605|gb|AAC18750.1|")
|
|
self.assertEqual(hsp.target.name, "AAC18750")
|
|
self.assertEqual(hsp.target.description, "phytochrome A [Hybosema robustum]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"QA RFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVNDSDEDGDS DAV PQK+KRLWGLVVCHNTTPRFV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 6 Length: 103 Strand: Plus
|
|
gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA)
|
|
gene, partial cds
|
|
Target: gi|3176605|gb|AAC18750.1| Length: 103 Strand: Plus
|
|
phytochrome A [Hybosema robustum]
|
|
|
|
Score:206 bits(524), Expect:6e-52,
|
|
Identities:99/103(96%), Positives:100/103(97%), Gaps:0.103(0%)
|
|
|
|
gi|317660 0 QATRFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
0 ||.|||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
|
|
gi|317660 60 SLVMAVVVNDSDEDGDSSDAVQPQKRKRLWGLVVCHNTTPRFV 103
|
|
60 |||||||||||||||||.|||.|||.||||||||||||||||| 103
|
|
6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103
|
|
|
|
""",
|
|
)
|
|
hit = record[7]
|
|
self.assertEqual(hit.num, 8)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|3176454|gb|AAC18668.1|")
|
|
self.assertEqual(hit.target.name, "AAC18668")
|
|
self.assertEqual(hit.target.description, "phytochrome A [Cyclolobium nutans]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=207)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 523.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 206.06761048482)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 7.76975571582328e-52, places=66
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 99)
|
|
self.assertEqual(hsp.annotations["positive"], 101)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 37, 140],
|
|
[ 0, 103]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 103))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "6")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(103))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "6:1..309")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({37: 'QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV'}, length=207)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|3176454|gb|AAC18668.1|")
|
|
self.assertEqual(hsp.target.name, "AAC18668")
|
|
self.assertEqual(hsp.target.description, "phytochrome A [Cyclolobium nutans]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVNDSDEDG+S DAV PQK+KRLWGLVVCHNTTPRFV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 6 Length: 103 Strand: Plus
|
|
gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA)
|
|
gene, partial cds
|
|
Target: gi|3176454|gb|AAC18668.1| Length: 207 Strand: Plus
|
|
phytochrome A [Cyclolobium nutans]
|
|
|
|
Score:206 bits(523), Expect:8e-52,
|
|
Identities:99/103(96%), Positives:101/103(98%), Gaps:0.103(0%)
|
|
|
|
gi|317645 37 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
|
|
gi|317645 97 SLVMAVVVNDSDEDGNSSDAVQPQKRKRLWGLVVCHNTTPRFV 140
|
|
60 |||||||||||||||.|.|||.|||.||||||||||||||||| 103
|
|
6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103
|
|
|
|
""",
|
|
)
|
|
hit = record[8]
|
|
self.assertEqual(hit.num, 9)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|3176523|gb|AAC18709.1|")
|
|
self.assertEqual(hit.target.name, "AAC18709")
|
|
self.assertEqual(
|
|
hit.target.description, "phytochrome A [Millettia richardiana]"
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=139)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 521.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 205.297211332985)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 1.3253195915005e-51, places=64
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 98)
|
|
self.assertEqual(hsp.annotations["positive"], 101)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 36, 139],
|
|
[ 0, 103]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 103))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "6")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(103))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "6:1..309")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({36: 'QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV'}, length=139)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|3176523|gb|AAC18709.1|")
|
|
self.assertEqual(hsp.target.name, "AAC18709")
|
|
self.assertEqual(
|
|
hsp.target.description, "phytochrome A [Millettia richardiana]"
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVND++EDGDS DAV PQK+KRLWGLVVCHNTTPRFV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 6 Length: 103 Strand: Plus
|
|
gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA)
|
|
gene, partial cds
|
|
Target: gi|3176523|gb|AAC18709.1| Length: 139 Strand: Plus
|
|
phytochrome A [Millettia richardiana]
|
|
|
|
Score:205 bits(521), Expect:1e-51,
|
|
Identities:98/103(95%), Positives:101/103(98%), Gaps:0.103(0%)
|
|
|
|
gi|317652 36 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
|
|
gi|317652 96 SLVMAVVVNDNEEDGDSSDAVQPQKRKRLWGLVVCHNTTPRFV 139
|
|
60 ||||||||||..|||||.|||.|||.||||||||||||||||| 103
|
|
6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103
|
|
|
|
""",
|
|
)
|
|
hit = record[9]
|
|
self.assertEqual(hit.num, 10)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|3176494|gb|AAC18693.1|")
|
|
self.assertEqual(hit.target.name, "AAC18693")
|
|
self.assertEqual(
|
|
hit.target.description, "phytochrome A [Callerya atropurpurea]"
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=177)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 520.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 204.912011757068)
|
|
self.assertAlmostEqual(
|
|
hsp.annotations["evalue"], 1.73092099081406e-51, places=65
|
|
)
|
|
self.assertEqual(hsp.annotations["identity"], 98)
|
|
self.assertEqual(hsp.annotations["positive"], 101)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 7, 110],
|
|
[ 0, 103]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 103))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "6")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA) gene, partial cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(103))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "6:1..309")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({7: 'QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMA...RFV'}, length=177)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|3176494|gb|AAC18693.1|")
|
|
self.assertEqual(hsp.target.name, "AAC18693")
|
|
self.assertEqual(
|
|
hsp.target.description, "phytochrome A [Callerya atropurpurea]"
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIASLVMAVVVNDS+EDGDS +AV PQK+KRLWGLVVCHNTTPRFV",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 6 Length: 103 Strand: Plus
|
|
gi|3176602|gb|U78617.1|LOU78617 Lathyrus odoratus phytochrome A (PHYA)
|
|
gene, partial cds
|
|
Target: gi|3176494|gb|AAC18693.1| Length: 177 Strand: Plus
|
|
phytochrome A [Callerya atropurpurea]
|
|
|
|
Score:204 bits(520), Expect:2e-51,
|
|
Identities:98/103(95%), Positives:101/103(98%), Gaps:0.103(0%)
|
|
|
|
gi|317649 7 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
6 0 QAARFLFMKNKVRMIVDCHAKHVKVLQDEKLPFDLTLCGSTLRAPHSCHLQYMANMDSIA
|
|
|
|
gi|317649 67 SLVMAVVVNDSEEDGDSSEAVQPQKRKRLWGLVVCHNTTPRFV 110
|
|
60 |||||||||||.|||||..||.|||.||||||||||||||||| 103
|
|
6 60 SLVMAVVVNDSDEDGDSRDAVLPQKKKRLWGLVVCHNTTPRFV 103
|
|
|
|
""",
|
|
)
|
|
record = next(records)
|
|
self.assertEqual(record.num, 7)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "7")
|
|
self.assertEqual(
|
|
record.query.description,
|
|
"gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product",
|
|
)
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=2551)")
|
|
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 8994603)
|
|
self.assertEqual(record.stat["db-len"], -1216159329)
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertEqual(record.stat["eff-space"], 1251086325060)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.041)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.267)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.14)
|
|
self.assertEqual(len(record), 10)
|
|
hit = record[0]
|
|
self.assertEqual(hit.num, 1)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|27805603|sp|Q9TKP6.1|MATK_WISFR")
|
|
self.assertEqual(hit.target.name, "Q9TKP6")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"RecName: Full=Maturase K; AltName: Full=Intron maturase >gi|5817759|gb|AAD52902.1|AF142731_1 maturase-like protein [Wisteria frutescens]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 2451.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 948.732392853477)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 506)
|
|
self.assertEqual(hsp.annotations["positive"], 506)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 506],
|
|
[ 0, 506]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 506))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "7")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(506))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|27805603|sp|Q9TKP6.1|MATK_WISFR")
|
|
self.assertEqual(hsp.target.name, "Q9TKP6")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"RecName: Full=Maturase K; AltName: Full=Intron maturase >gi|5817759|gb|AAD52902.1|AF142731_1 maturase-like protein [Wisteria frutescens]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRLITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIVKSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFFLYHFSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSVFFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHYFIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIKKLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSSKKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRTSSTLQRLHRNRIWYLDILFSNDLVNHE",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 7 Length: 506 Strand: Plus
|
|
gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like
|
|
protein (matK) gene, complete cds; chloroplast gene for chloroplast
|
|
product
|
|
Target: gi|27805603|sp|Q9TKP6.1|MATK_WISFR Length: 506 Strand: Plus
|
|
RecName: Full=Maturase K; AltName: Full=Intron maturase
|
|
>gi|5817759|gb|AAD52902.1|AF142731_1 maturase-like protein [Wisteria
|
|
frutescens]
|
|
|
|
Score:948 bits(2451), Expect:0,
|
|
Identities:506/506(100%), Positives:506/506(100%), Gaps:0.506(0%)
|
|
|
|
gi|278056 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL
|
|
|
|
gi|278056 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV
|
|
|
|
gi|278056 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFF
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||........
|
|
7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX
|
|
|
|
gi|278056 180 LYHFSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV
|
|
180 ....||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV
|
|
|
|
gi|278056 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
240 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
|
|
gi|278056 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
|
|
gi|278056 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
360 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
|
|
gi|278056 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRTS
|
|
420 ||||||||||||||||||||||||||||||||||||....................||||
|
|
7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS
|
|
|
|
gi|278056 480 STLQRLHRNRIWYLDILFSNDLVNHE 506
|
|
480 |||||||||||||||||||||||||| 506
|
|
7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506
|
|
|
|
""",
|
|
)
|
|
hit = record[1]
|
|
self.assertEqual(hit.num, 2)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|171909144|gb|ACB58148.1|")
|
|
self.assertEqual(hit.target.name, "ACB58148")
|
|
self.assertEqual(hit.target.description, "maturase K [Wisteria frutescens]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 2445.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 946.421195397973)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 505)
|
|
self.assertEqual(hsp.annotations["positive"], 505)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 506],
|
|
[ 0, 506]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 506))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "7")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(506))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKYSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|171909144|gb|ACB58148.1|")
|
|
self.assertEqual(hsp.target.name, "ACB58148")
|
|
self.assertEqual(hsp.target.description, "maturase K [Wisteria frutescens]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNK SLLIVKRLITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIVKSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFFLYHFSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSVFFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHYFIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIKKLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSSKKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRTSSTLQRLHRNRIWYLDILFSNDLVNHE",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 7 Length: 506 Strand: Plus
|
|
gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like
|
|
protein (matK) gene, complete cds; chloroplast gene for chloroplast
|
|
product
|
|
Target: gi|171909144|gb|ACB58148.1| Length: 506 Strand: Plus
|
|
maturase K [Wisteria frutescens]
|
|
|
|
Score:946 bits(2445), Expect:0,
|
|
Identities:505/506(100%), Positives:505/506(100%), Gaps:0.506(0%)
|
|
|
|
gi|171909 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKYSLLIVKRL
|
|
0 |||||||||||||||||||||||||||||||||||||||||||||||||||.||||||||
|
|
7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL
|
|
|
|
gi|171909 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV
|
|
|
|
gi|171909 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFF
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||........
|
|
7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX
|
|
|
|
gi|171909 180 LYHFSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV
|
|
180 ....||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV
|
|
|
|
gi|171909 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
240 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
|
|
gi|171909 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
|
|
gi|171909 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
360 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
|
|
gi|171909 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRTS
|
|
420 ||||||||||||||||||||||||||||||||||||....................||||
|
|
7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS
|
|
|
|
gi|171909 480 STLQRLHRNRIWYLDILFSNDLVNHE 506
|
|
480 |||||||||||||||||||||||||| 506
|
|
7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506
|
|
|
|
""",
|
|
)
|
|
hit = record[2]
|
|
self.assertEqual(hit.num, 3)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|171909146|gb|ACB58149.1|")
|
|
self.assertEqual(hit.target.name, "ACB58149")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"maturase K [Wisteria frutescens] >gi|171909148|gb|ACB58150.1| maturase K [Wisteria frutescens var. macrostachya]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 2443.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 945.650796246138)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 505)
|
|
self.assertEqual(hsp.annotations["positive"], 505)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 506],
|
|
[ 0, 506]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 506))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "7")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(506))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|171909146|gb|ACB58149.1|")
|
|
self.assertEqual(hsp.target.name, "ACB58149")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"maturase K [Wisteria frutescens] >gi|171909148|gb|ACB58150.1| maturase K [Wisteria frutescens var. macrostachya]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRLITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIVKSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFFLYHFSNRNSLITP KSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSVFFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHYFIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIKKLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSSKKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRTSSTLQRLHRNRIWYLDILFSNDLVNHE",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 7 Length: 506 Strand: Plus
|
|
gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like
|
|
protein (matK) gene, complete cds; chloroplast gene for chloroplast
|
|
product
|
|
Target: gi|171909146|gb|ACB58149.1| Length: 506 Strand: Plus
|
|
maturase K [Wisteria frutescens] >gi|171909148|gb|ACB58150.1| maturase K
|
|
[Wisteria frutescens var. macrostachya]
|
|
|
|
Score:945 bits(2443), Expect:0,
|
|
Identities:505/506(100%), Positives:505/506(100%), Gaps:0.506(0%)
|
|
|
|
gi|171909 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL
|
|
|
|
gi|171909 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV
|
|
|
|
gi|171909 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFF
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||........
|
|
7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX
|
|
|
|
gi|171909 180 LYHFSNRNSLITPIKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV
|
|
180 ....|||||||||.||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV
|
|
|
|
gi|171909 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
240 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
|
|
gi|171909 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
|
|
gi|171909 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
360 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
|
|
gi|171909 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRTS
|
|
420 ||||||||||||||||||||||||||||||||||||....................||||
|
|
7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS
|
|
|
|
gi|171909 480 STLQRLHRNRIWYLDILFSNDLVNHE 506
|
|
480 |||||||||||||||||||||||||| 506
|
|
7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506
|
|
|
|
""",
|
|
)
|
|
hit = record[3]
|
|
self.assertEqual(hit.num, 4)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|171909132|gb|ACB58142.1|")
|
|
self.assertEqual(hit.target.name, "ACB58142")
|
|
self.assertEqual(hit.target.description, "maturase K [Callerya megasperma]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 2439.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 944.109997942469)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 501)
|
|
self.assertEqual(hsp.annotations["positive"], 504)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 506],
|
|
[ 0, 506]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 506))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "7")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(506))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|171909132|gb|ACB58142.1|")
|
|
self.assertEqual(hsp.target.name, "ACB58142")
|
|
self.assertEqual(hsp.target.description, "maturase K [Callerya megasperma]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRLITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIVKSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFFLY++ NRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSVFFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHYFIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIKKLDIIVPIIPLIRSLAKAKFCNVLGHP+SKSVWADSSDFDIIDRFLRICRNLSHYYNGSSKKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPR SSTLQRLHRNRIWYLDILFSNDLVNHE",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 7 Length: 506 Strand: Plus
|
|
gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like
|
|
protein (matK) gene, complete cds; chloroplast gene for chloroplast
|
|
product
|
|
Target: gi|171909132|gb|ACB58142.1| Length: 506 Strand: Plus
|
|
maturase K [Callerya megasperma]
|
|
|
|
Score:944 bits(2439), Expect:0,
|
|
Identities:501/506(99%), Positives:504/506(100%), Gaps:0.506(0%)
|
|
|
|
gi|171909 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL
|
|
|
|
gi|171909 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV
|
|
|
|
gi|171909 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFF
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||........
|
|
7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX
|
|
|
|
gi|171909 180 LYNYCNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV
|
|
180 .....|||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV
|
|
|
|
gi|171909 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
240 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
|
|
gi|171909 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
|
|
gi|171909 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPISKSVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
360 |||||||||||||||||||||||||||.||||||||||||||||||||||||||||||||
|
|
7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
|
|
gi|171909 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRAS
|
|
420 ||||||||||||||||||||||||||||||||||||....................||.|
|
|
7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS
|
|
|
|
gi|171909 480 STLQRLHRNRIWYLDILFSNDLVNHE 506
|
|
480 |||||||||||||||||||||||||| 506
|
|
7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506
|
|
|
|
""",
|
|
)
|
|
hit = record[4]
|
|
self.assertEqual(hit.num, 5)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|5817760|gb|AAD52903.1|AF142732_1")
|
|
self.assertEqual(hit.target.name, "AAD52903")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"maturase-like protein [Wisteria sinensis] >gi|171909136|gb|ACB58144.1| maturase K [Wisteria brachybotrys] >gi|171909138|gb|ACB58145.1| maturase K [Wisteria floribunda] >gi|171909140|gb|ACB58146.1| maturase K [Wisteria floribunda] >gi|171909142|gb|ACB58147.1| maturase K [Wisteria floribunda] >gi|171909150|gb|ACB58151.1| maturase K [Wisteria sinensis] >gi|171909152|gb|ACB58152.1| maturase K [Wisteria sinensis] >gi|171909154|gb|ACB58153.1| maturase K [Wisteria sinensis] >gi|171909156|gb|ACB58154.1| maturase K [Wisteria villosa] >gi|171909158|gb|ACB58155.1| maturase K [Wisteria villosa] >gi|171909160|gb|ACB58156.1| maturase K [Wisteria villosa]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 2418.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 936.020806848204)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 498)
|
|
self.assertEqual(hsp.annotations["positive"], 500)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 506],
|
|
[ 0, 506]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 506))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "7")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(506))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDKKSSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|5817760|gb|AAD52903.1|AF142732_1")
|
|
self.assertEqual(hsp.target.name, "AAD52903")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"maturase-like protein [Wisteria sinensis] >gi|171909136|gb|ACB58144.1| maturase K [Wisteria brachybotrys] >gi|171909138|gb|ACB58145.1| maturase K [Wisteria floribunda] >gi|171909140|gb|ACB58146.1| maturase K [Wisteria floribunda] >gi|171909142|gb|ACB58147.1| maturase K [Wisteria floribunda] >gi|171909150|gb|ACB58151.1| maturase K [Wisteria sinensis] >gi|171909152|gb|ACB58152.1| maturase K [Wisteria sinensis] >gi|171909154|gb|ACB58153.1| maturase K [Wisteria sinensis] >gi|171909156|gb|ACB58154.1| maturase K [Wisteria villosa] >gi|171909158|gb|ACB58155.1| maturase K [Wisteria villosa] >gi|171909160|gb|ACB58156.1| maturase K [Wisteria villosa]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYD KSSLLIVKRLITRMYQQNHLIISANDSNKNPFLGYN NFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIVKSYHNLRSIHSIFPFLEDK TY NYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFFLY+F NRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSVFFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHYFIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIKKLDIIVPIIPLIRSLAKAKFCNVLGHP+SKSVWADSSDFDIIDRFLRICRNLSHYYNGSSKKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPR SSTLQRLHRNRIWYLDILFSNDLVNHE",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 7 Length: 506 Strand: Plus
|
|
gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like
|
|
protein (matK) gene, complete cds; chloroplast gene for chloroplast
|
|
product
|
|
Target: gi|5817760|gb|AAD52903.1|AF142732_1 Length: 506 Strand: Plus
|
|
maturase-like protein [Wisteria sinensis] >gi|171909136|gb|ACB58144.1|
|
|
maturase K [Wisteria brachybotrys] >gi|171909138|gb|ACB58145.1| maturase
|
|
K [Wisteria floribunda] >gi|171909140|gb|ACB58146.1| maturase K
|
|
[Wisteria floribunda] >gi|171909142|gb|ACB58147.1| maturase K [Wisteria
|
|
floribunda] >gi|171909150|gb|ACB58151.1| maturase K [Wisteria sinensis]
|
|
>gi|171909152|gb|ACB58152.1| maturase K [Wisteria sinensis]
|
|
>gi|171909154|gb|ACB58153.1| maturase K [Wisteria sinensis]
|
|
>gi|171909156|gb|ACB58154.1| maturase K [Wisteria villosa]
|
|
>gi|171909158|gb|ACB58155.1| maturase K [Wisteria villosa]
|
|
>gi|171909160|gb|ACB58156.1| maturase K [Wisteria villosa]
|
|
|
|
Score:936 bits(2418), Expect:0,
|
|
Identities:498/506(98%), Positives:500/506(99%), Gaps:0.506(0%)
|
|
|
|
gi|581776 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDKKSSLLIVKRL
|
|
0 |||||||||||||||||||||||||||||||||||||||||||||||||.||||||||||
|
|
7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL
|
|
|
|
gi|581776 60 ITRMYQQNHLIISANDSNKNPFLGYNNNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV
|
|
60 ||||||||||||||||||||||||||.|||||||||||||||||||||||||||||||||
|
|
7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV
|
|
|
|
gi|581776 120 KSYHNLRSIHSIFPFLEDKLTYFNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFF
|
|
120 |||||||||||||||||||.||.|||||||||||||||||||||||||||||........
|
|
7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX
|
|
|
|
gi|581776 180 LYNFCNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV
|
|
180 .....|||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV
|
|
|
|
gi|581776 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
240 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
|
|
gi|581776 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
|
|
gi|581776 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPISKSVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
360 |||||||||||||||||||||||||||.||||||||||||||||||||||||||||||||
|
|
7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
|
|
gi|581776 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRAS
|
|
420 ||||||||||||||||||||||||||||||||||||....................||.|
|
|
7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS
|
|
|
|
gi|581776 480 STLQRLHRNRIWYLDILFSNDLVNHE 506
|
|
480 |||||||||||||||||||||||||| 506
|
|
7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506
|
|
|
|
""",
|
|
)
|
|
hit = record[5]
|
|
self.assertEqual(hit.num, 6)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|171909134|gb|ACB58143.1|")
|
|
self.assertEqual(hit.target.name, "ACB58143")
|
|
self.assertEqual(hit.target.description, "maturase K [Wisteria brachybotrys]")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 2398.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 928.316815329857)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 496)
|
|
self.assertEqual(hsp.annotations["positive"], 498)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 506],
|
|
[ 0, 506]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 506))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "7")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(506))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDKKSSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|171909134|gb|ACB58143.1|")
|
|
self.assertEqual(hsp.target.name, "ACB58143")
|
|
self.assertEqual(hsp.target.description, "maturase K [Wisteria brachybotrys]")
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYD KSSLLIVKRLITRMYQQNHLIISANDSNKNPFLGYN FYSQIISDGFAVVVEIPFFLQLSSSLEEAEIVKSYHNLRSIHSIFPFLEDK TY NYVSDIRIPYPIHLEILVQILRY VKDASFFHLLRFFLY+F NRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSVFFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHYFIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIKKLDIIVPIIPLIRSLAKAKFCNVLGHP+SKSVWADSSDFDIIDRFLRICRNLSHYYNGSSKKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPR SSTLQRLHRNRIWYLDILFSNDLVNHE",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 7 Length: 506 Strand: Plus
|
|
gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like
|
|
protein (matK) gene, complete cds; chloroplast gene for chloroplast
|
|
product
|
|
Target: gi|171909134|gb|ACB58143.1| Length: 506 Strand: Plus
|
|
maturase K [Wisteria brachybotrys]
|
|
|
|
Score:928 bits(2398), Expect:0,
|
|
Identities:496/506(98%), Positives:498/506(98%), Gaps:0.506(0%)
|
|
|
|
gi|171909 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDKKSSLLIVKRL
|
|
0 |||||||||||||||||||||||||||||||||||||||||||||||||.||||||||||
|
|
7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL
|
|
|
|
gi|171909 60 ITRMYQQNHLIISANDSNKNPFLGYNNKFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV
|
|
60 ||||||||||||||||||||||||||..||||||||||||||||||||||||||||||||
|
|
7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV
|
|
|
|
gi|171909 120 KSYHNLRSIHSIFPFLEDKLTYFNYVSDIRIPYPIHLEILVQILRYRVKDASFFHLLRFF
|
|
120 |||||||||||||||||||.||.|||||||||||||||||||||||.|||||........
|
|
7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX
|
|
|
|
gi|171909 180 LYNFCNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV
|
|
180 .....|||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV
|
|
|
|
gi|171909 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
240 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
|
|
gi|171909 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
|
|
gi|171909 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPISKSVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
360 |||||||||||||||||||||||||||.||||||||||||||||||||||||||||||||
|
|
7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
|
|
gi|171909 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRAS
|
|
420 ||||||||||||||||||||||||||||||||||||....................||.|
|
|
7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS
|
|
|
|
gi|171909 480 STLQRLHRNRIWYLDILFSNDLVNHE 506
|
|
480 |||||||||||||||||||||||||| 506
|
|
7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506
|
|
|
|
""",
|
|
)
|
|
hit = record[6]
|
|
self.assertEqual(hit.num, 7)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|5817761|gb|AAD52904.1|AF142733_1")
|
|
self.assertEqual(hit.target.name, "AAD52904")
|
|
self.assertEqual(
|
|
hit.target.description, "maturase-like protein [Callerya reticulata]"
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 2390.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 925.235218722518)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 493)
|
|
self.assertEqual(hsp.annotations["positive"], 498)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 506],
|
|
[ 0, 506]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 506))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "7")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(506))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKEYQAYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|5817761|gb|AAD52904.1|AF142733_1")
|
|
self.assertEqual(hsp.target.name, "AAD52904")
|
|
self.assertEqual(
|
|
hsp.target.description, "maturase-like protein [Callerya reticulata]"
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKEYQ YLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRLITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQL+SSLEEAEIVKSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFFLY+F NRNSLITPKKSISTFSK NPRLFLFLYNFYV EYESIF FLRNQSSHLR KSFSVFFERIFFYAKREHL+KVFPKDFSSTLTFFKDPFIHYVRYQ KSILASKNAPLLMNKWKHYFIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIKKLDIIVPIIPLIRSLAKAKFCNVLGHP+SKSVWADSSDFDIIDRFLRICRNLSHYYNGSSKKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPR SSTL+RLHRNRIWYLDILFSNDLVNHE",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 7 Length: 506 Strand: Plus
|
|
gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like
|
|
protein (matK) gene, complete cds; chloroplast gene for chloroplast
|
|
product
|
|
Target: gi|5817761|gb|AAD52904.1|AF142733_1 Length: 506 Strand: Plus
|
|
maturase-like protein [Callerya reticulata]
|
|
|
|
Score:925 bits(2390), Expect:0,
|
|
Identities:493/506(97%), Positives:498/506(98%), Gaps:0.506(0%)
|
|
|
|
gi|581776 0 MKEYQAYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL
|
|
0 |||||.||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL
|
|
|
|
gi|581776 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLNSSLEEAEIV
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||||.|||||||||
|
|
7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV
|
|
|
|
gi|581776 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASFFHLLRFF
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||........
|
|
7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX
|
|
|
|
gi|581776 180 LYNFCNRNSLITPKKSISTFSKCNPRLFLFLYNFYVWEYESIFLFLRNQSSHLRFKSFSV
|
|
180 .....|||||||||||||||||.|||||||||||||.||||||.||||||||||.|||||
|
|
7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV
|
|
|
|
gi|581776 240 FFERIFFYAKREHLLKVFPKDFSSTLTFFKDPFIHYVRYQEKSILASKNAPLLMNKWKHY
|
|
240 ||||||||||||||.|||||||||||||||||||||||||.|||||||||||||||||||
|
|
7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
|
|
gi|581776 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
|
|
gi|581776 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPISKSVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
360 |||||||||||||||||||||||||||.||||||||||||||||||||||||||||||||
|
|
7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
|
|
gi|581776 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGSEELLEEFFTEEEEILSLIFPRAS
|
|
420 ||||||||||||||||||||||||||||||||||||....................||.|
|
|
7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS
|
|
|
|
gi|581776 480 STLKRLHRNRIWYLDILFSNDLVNHE 506
|
|
480 |||.|||||||||||||||||||||| 506
|
|
7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506
|
|
|
|
""",
|
|
)
|
|
hit = record[7]
|
|
self.assertEqual(hit.num, 8)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|5817762|gb|AAD52905.1|AF142734_1")
|
|
self.assertEqual(hit.target.name, "AAD52905")
|
|
self.assertEqual(
|
|
hit.target.description, "maturase-like protein [Callerya atropurpurea]"
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 2301.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 890.952456465874)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 472)
|
|
self.assertEqual(hsp.annotations["positive"], 488)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 506],
|
|
[ 0, 506]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 506))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "7")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(506))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYTYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|5817762|gb|AAD52905.1|AF142734_1")
|
|
self.assertEqual(hsp.target.name, "AAD52905")
|
|
self.assertEqual(
|
|
hsp.target.description, "maturase-like protein [Callerya atropurpurea]"
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKEYQVYLERDRSRQQDFLYPLIFREY YGLAYSHDFNRSIFVENVGYDNKSSLLIVKRLITRMYQQNHLIIS NDSNKNPFLGYNKNFYSQIIS+ FA+V EIPFF QLSSSLE+AEIVKSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDA FFHLLR FLY+F N N++ TPKKSISTFS+SNPR FLFLYNFYVCEYESIF FLRN+SSHLRLKSFSVFFERIFFYAKREHLV+VF KDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHYFIHLWQ FFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTF+IEI IKKLDIIVPIIPLIRSLAKAKFCNVLGHP+SK VWADSSDFDII+RFLRICRNLSHYYNGSSKKK+LYRIKYILRLSCIKTLACKHKSTVRAFLK+ GSEELLEEFFTEEEEILSLIFPR SSTLQ+LHRNRIWYLDILF+NDLVNHE",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 7 Length: 506 Strand: Plus
|
|
gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like
|
|
protein (matK) gene, complete cds; chloroplast gene for chloroplast
|
|
product
|
|
Target: gi|5817762|gb|AAD52905.1|AF142734_1 Length: 506 Strand: Plus
|
|
maturase-like protein [Callerya atropurpurea]
|
|
|
|
Score:890 bits(2301), Expect:0,
|
|
Identities:472/506(93%), Positives:488/506(96%), Gaps:0.506(0%)
|
|
|
|
gi|581776 0 MKEYQVYLERDRSRQQDFLYPLIFREYTYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL
|
|
0 |||||||||||||||||||||||||||.||||||||||||||||||||||||||||||||
|
|
7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL
|
|
|
|
gi|581776 60 ITRMYQQNHLIISPNDSNKNPFLGYNKNFYSQIISEVFAIVAEIPFFRQLSSSLEQAEIV
|
|
60 |||||||||||||.|||||||||||||||||||||..||.|.|||||.|||||||.||||
|
|
7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV
|
|
|
|
gi|581776 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDAPFFHLLRLF
|
|
120 |||||||||||||||||||||||||||||||||||||||||||||||||||.........
|
|
7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX
|
|
|
|
gi|581776 180 LYNFCNWNTVFTPKKSISTFSRSNPRFFLFLYNFYVCEYESIFLFLRNKSSHLRLKSFSV
|
|
180 .....|.|...||||||||||.||||.||||||||||||||||.||||.|||||||||||
|
|
7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV
|
|
|
|
gi|581776 240 FFERIFFYAKREHLVEVFAKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
240 |||||||||||||||.||.|||||||||||||||||||||||||||||||||||||||||
|
|
7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
|
|
gi|581776 300 FIHLWQSFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFIIEIGIK
|
|
300 ||||||.||||||||||||||||||||||||||||||||||||||||||||||.|||.||
|
|
7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
|
|
gi|581776 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPISKPVWADSSDFDIIERFLRICRNLSHYYNGSS
|
|
360 |||||||||||||||||||||||||||.||.|||||||||||.|||||||||||||||||
|
|
7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
|
|
gi|581776 420 KKKSLYRIKYILRLSCIKTLACKHKSTVRAFLKRLGSEELLEEFFTEEEEILSLIFPRAS
|
|
420 |||.|||||||||||||||||||||||||||||..|....................||.|
|
|
7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS
|
|
|
|
gi|581776 480 STLQKLHRNRIWYLDILFTNDLVNHE 506
|
|
480 ||||.|||||||||||||.||||||| 506
|
|
7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506
|
|
|
|
""",
|
|
)
|
|
hit = record[8]
|
|
self.assertEqual(hit.num, 9)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|152014012|gb|ABS20107.1|")
|
|
self.assertEqual(hit.target.name, "ABS20107")
|
|
self.assertEqual(
|
|
hit.target.description, "maturase-like protein [Astragalus uliginosus]"
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 2293.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 887.870859858535)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 470)
|
|
self.assertEqual(hsp.annotations["positive"], 487)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 506],
|
|
[ 0, 506]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 506))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "7")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(506))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKEYQVFLERDRSRQQDFLYPLIFREYVYGLAYSHDFNRSTFVENVGYDNKYSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|152014012|gb|ABS20107.1|")
|
|
self.assertEqual(hsp.target.name, "ABS20107")
|
|
self.assertEqual(
|
|
hsp.target.description, "maturase-like protein [Astragalus uliginosus]"
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKEYQV+LERDRSRQQDFLYPLIFREY+YGLAYSHDFNRS FVENVGYDNK SLLIVKRLITRMYQQNHLIISANDS KNPFLGYNKNFYSQIIS+GFA+VVEIPFFLQ SSSL+EAEIVKSY NLRSIHSIFPFLEDKF YLNYVSDIRIPYPIHLEILVQILRYWVKDA FFHLLR FLY+F NRNS +TPKKSISTFSKSNPRLFLFLYNFYVCEYESIF FLR +SSHLRLKSFSVFFERIFFYAKREHLV+VF KDFSSTLTFFKDP IHYVRYQGKSILASKNAPLLMNKWKHYFIHLW+CFFDVWSQPGTIHI QLSEHSF+ LGYFSNVRLNRSVVRSQMLQNTFLIEIV KKLDIIVPIIP+IRSLAKAKFCNVLGHP+SK+VWADSSDFDIIDRFLRICRNLSHYYNGSSKKK+LYRIKYILRLSCIKTLACKHKSTVRAFLK+SGSEELLEEFFTEEEEILSLIFPR SSTLQ+LH NRIWYLDILFSNDLVNHE",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 7 Length: 506 Strand: Plus
|
|
gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like
|
|
protein (matK) gene, complete cds; chloroplast gene for chloroplast
|
|
product
|
|
Target: gi|152014012|gb|ABS20107.1| Length: 506 Strand: Plus
|
|
maturase-like protein [Astragalus uliginosus]
|
|
|
|
Score:887 bits(2293), Expect:0,
|
|
Identities:470/506(93%), Positives:487/506(96%), Gaps:0.506(0%)
|
|
|
|
gi|152014 0 MKEYQVFLERDRSRQQDFLYPLIFREYVYGLAYSHDFNRSTFVENVGYDNKYSLLIVKRL
|
|
0 ||||||.||||||||||||||||||||.||||||||||||.||||||||||.||||||||
|
|
7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL
|
|
|
|
gi|152014 60 ITRMYQQNHLIISANDSKKNPFLGYNKNFYSQIISEGFAIVVEIPFFLQFSSSLKEAEIV
|
|
60 |||||||||||||||||.|||||||||||||||||.|||.|||||||||.||||.|||||
|
|
7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV
|
|
|
|
gi|152014 120 KSYKNLRSIHSIFPFLEDKFPYLNYVSDIRIPYPIHLEILVQILRYWVKDAPFFHLLRLF
|
|
120 |||.||||||||||||||||.||||||||||||||||||||||||||||||.........
|
|
7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX
|
|
|
|
gi|152014 180 LYNFCNRNSFLTPKKSISTFSKSNPRLFLFLYNFYVCEYESIFLFLRKKSSHLRLKSFSV
|
|
180 .....||||..||||||||||||||||||||||||||||||||.|||..|||||||||||
|
|
7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV
|
|
|
|
gi|152014 240 FFERIFFYAKREHLVEVFAKDFSSTLTFFKDPLIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
240 |||||||||||||||.||.|||||||||||||.|||||||||||||||||||||||||||
|
|
7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
|
|
gi|152014 300 FIHLWECFFDVWSQPGTIHIKQLSEHSFYLLGYFSNVRLNRSVVRSQMLQNTFLIEIVSK
|
|
300 |||||.||||||||||||||.|||||||..||||||||||||||||||||||||||||.|
|
|
7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
|
|
gi|152014 360 KLDIIVPIIPIIRSLAKAKFCNVLGHPISKAVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
360 ||||||||||.||||||||||||||||.||.|||||||||||||||||||||||||||||
|
|
7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
|
|
gi|152014 420 KKKSLYRIKYILRLSCIKTLACKHKSTVRAFLKRSGSEELLEEFFTEEEEILSLIFPRAS
|
|
420 |||.|||||||||||||||||||||||||||||.||....................||.|
|
|
7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS
|
|
|
|
gi|152014 480 STLQKLHGNRIWYLDILFSNDLVNHE 506
|
|
480 ||||.||.|||||||||||||||||| 506
|
|
7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506
|
|
|
|
""",
|
|
)
|
|
hit = record[9]
|
|
self.assertEqual(hit.num, 10)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|146197442|dbj|BAF57483.1|")
|
|
self.assertEqual(hit.target.name, "BAF57483")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"maturase [Glycyrrhiza uralensis] >gi|146197444|dbj|BAF57484.1| maturase [Glycyrrhiza glabra] >gi|152014018|gb|ABS20110.1| maturase-like protein [Glycyrrhiza pallidiflora]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=506)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 2292.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 887.485660282618)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0)
|
|
self.assertEqual(hsp.annotations["identity"], 471)
|
|
self.assertEqual(hsp.annotations["positive"], 489)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 506],
|
|
[ 0, 506]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 506))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSL...NHE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "7")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like protein (matK) gene, complete cds; chloroplast gene for chloroplast product",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(506))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "7:727..2244")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MKEYQVYLERDRSRQQDFLYPLIFREYIYGIAYSHNLNRSIFVENVGYDNKFSL...NNE')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|146197442|dbj|BAF57483.1|")
|
|
self.assertEqual(hsp.target.name, "BAF57483")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"maturase [Glycyrrhiza uralensis] >gi|146197444|dbj|BAF57484.1| maturase [Glycyrrhiza glabra] >gi|152014018|gb|ABS20110.1| maturase-like protein [Glycyrrhiza pallidiflora]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MKEYQVYLERDRSRQQDFLYPLIFREYIYG+AYSH+ NRSIFVENVGYDNK SLLIVKRLITRMYQQNHLIISANDSNKNPF GYNKN YSQ+ISDGFAVVVEIPFFLQ SSSLEEAEIVKSY+NLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDA FFHLLR FLY+F N NSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIF FLRN+SSHLRLKSFSVFFERIFFYAKREHLV VF KD+S TLT FKDPFIHYVRYQGK+ILAS+NAPLLMNKWKHYFIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIKKLDIIVPIIPLIRSLAKAKFCNVLGHP+SK VWADSSDF+II+RFLRICRNLSHYY+GSSKKK+LYRIKYILRLSCIKTLACKHKSTVRAFLK+ GSEELLEEFFTEEEEILSLIFP+ SSTLQ+LHRNRIWYLDILFSNDLVN+E",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : 7 Length: 506 Strand: Plus
|
|
gi|5817701|gb|AF142731.1|AF142731 Wisteria frutescens maturase-like
|
|
protein (matK) gene, complete cds; chloroplast gene for chloroplast
|
|
product
|
|
Target: gi|146197442|dbj|BAF57483.1| Length: 506 Strand: Plus
|
|
maturase [Glycyrrhiza uralensis] >gi|146197444|dbj|BAF57484.1| maturase
|
|
[Glycyrrhiza glabra] >gi|152014018|gb|ABS20110.1| maturase-like protein
|
|
[Glycyrrhiza pallidiflora]
|
|
|
|
Score:887 bits(2292), Expect:0,
|
|
Identities:471/506(93%), Positives:489/506(97%), Gaps:0.506(0%)
|
|
|
|
gi|146197 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGIAYSHNLNRSIFVENVGYDNKFSLLIVKRL
|
|
0 ||||||||||||||||||||||||||||||.||||..||||||||||||||.||||||||
|
|
7 0 MKEYQVYLERDRSRQQDFLYPLIFREYIYGLAYSHDFNRSIFVENVGYDNKSSLLIVKRL
|
|
|
|
gi|146197 60 ITRMYQQNHLIISANDSNKNPFSGYNKNIYSQLISDGFAVVVEIPFFLQFSSSLEEAEIV
|
|
60 ||||||||||||||||||||||.|||||.|||.||||||||||||||||.||||||||||
|
|
7 60 ITRMYQQNHLIISANDSNKNPFLGYNKNFYSQIISDGFAVVVEIPFFLQLSSSLEEAEIV
|
|
|
|
gi|146197 120 KSYNNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDAPFFHLLRLF
|
|
120 |||.|||||||||||||||||||||||||||||||||||||||||||||||.........
|
|
7 120 KSYHNLRSIHSIFPFLEDKFTYLNYVSDIRIPYPIHLEILVQILRYWVKDASXXXXXXXX
|
|
|
|
gi|146197 180 LYNFCNWNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFLFLRNKSSHLRLKSFSV
|
|
180 .....|.||||||||||||||||||||||||||||||||||||.||||.|||||||||||
|
|
7 180 XXXXSNRNSLITPKKSISTFSKSNPRLFLFLYNFYVCEYESIFRFLRNQSSHLRLKSFSV
|
|
|
|
gi|146197 240 FFERIFFYAKREHLVDVFAKDYSPTLTLFKDPFIHYVRYQGKAILASRNAPLLMNKWKHY
|
|
240 |||||||||||||||.||.||.|.|||.||||||||||||||.||||.||||||||||||
|
|
7 240 FFERIFFYAKREHLVKVFPKDFSSTLTFFKDPFIHYVRYQGKSILASKNAPLLMNKWKHY
|
|
|
|
gi|146197 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
300 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
7 300 FIHLWQCFFDVWSQPGTIHINQLSEHSFHFLGYFSNVRLNRSVVRSQMLQNTFLIEIVIK
|
|
|
|
gi|146197 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPISKPVWADSSDFEIIERFLRICRNLSHYYSGSS
|
|
360 |||||||||||||||||||||||||||.||.||||||||.||.|||||||||||||.|||
|
|
7 360 KLDIIVPIIPLIRSLAKAKFCNVLGHPLSKSVWADSSDFDIIDRFLRICRNLSHYYNGSS
|
|
|
|
gi|146197 420 KKKSLYRIKYILRLSCIKTLACKHKSTVRAFLKRLGSEELLEEFFTEEEEILSLIFPKAS
|
|
420 |||.|||||||||||||||||||||||||||||..|....................|..|
|
|
7 420 KKKNLYRIKYILRLSCIKTLACKHKSTVRAFLKKSGXXXXXXXXXXXXXXXXXXXXPRTS
|
|
|
|
gi|146197 480 STLQKLHRNRIWYLDILFSNDLVNNE 506
|
|
480 ||||.|||||||||||||||||||.| 506
|
|
7 480 STLQRLHRNRIWYLDILFSNDLVNHE 506
|
|
|
|
""",
|
|
)
|
|
|
|
def test_xml_2222_blastx_001_writer(self):
|
|
"""Writing BLASTX 2.2.22+ (xml_2222_blastx_001.xml)."""
|
|
filename = "xml_2222_blastx_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream)
|
|
self.assertEqual(n, 7)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_xml_2222_blastx_001(written_records)
|
|
|
|
def test_xml_21500_blastx_001_parser(self):
|
|
"""Parsing BLASTX 2.15.0+ (xml_21500_blastx_001.xml)."""
|
|
filename = "xml_21500_blastx_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_21500_blastx_001_records(records)
|
|
with Blast.parse(path) as records:
|
|
self.check_xml_21500_blastx_001_records(records)
|
|
with open(path, "rb") as stream:
|
|
record = Blast.read(stream)
|
|
self.check_xml_21500_blastx_001_record(record, xml2=False)
|
|
record = Blast.read(path)
|
|
self.check_xml_21500_blastx_001_record(record, xml2=False)
|
|
with Blast.parse(path) as records:
|
|
self.assertEqual(
|
|
str(records),
|
|
"""\
|
|
Program: BLASTX 2.15.0+
|
|
db: nr
|
|
|
|
Query: AI021773.1 (length=365)
|
|
MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma
|
|
mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin
|
|
mRNA, complete cds, mRNA sequence
|
|
Hits: ---- ----- ----------------------------------------------------------
|
|
# # HSP ID + description
|
|
---- ----- ----------------------------------------------------------
|
|
0 1 emb|VDM03167.1| unnamed protein product, partial [Schi...
|
|
1 1 gb|EPB74633.1| hypothetical protein ANCCEY_06263 [Ancy...
|
|
2 1 ref|XP_009175831.1| hypothetical protein T265_11027 [O...
|
|
3 1 gb|EMD49430.1| actin, putative, partial [Entamoeba his...
|
|
4 1 emb|CAX83035.1| Actin-2, partial [Schistosoma japonicum]
|
|
5 1 emb|VDP83060.1| unnamed protein product, partial [Echi...
|
|
6 1 emb|CAA50205.1| actin, partial [Entamoeba histolytica]
|
|
7 1 emb|VDN44756.1| unnamed protein product, partial [Dibo...
|
|
8 1 ref|XP_027046469.1| actin-1, partial [Pocillopora dami...
|
|
9 1 ref|XP_027046487.1| actin-1-like [Pocillopora damicornis]""",
|
|
)
|
|
|
|
def check_xml_21500_blastx_001_records(self, records, xml2=False):
|
|
self.assertEqual(records.program, "blastx")
|
|
self.assertEqual(records.version, "BLASTX 2.15.0+")
|
|
self.assertEqual(
|
|
records.reference,
|
|
'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.',
|
|
)
|
|
self.assertEqual(records.db, "nr")
|
|
if xml2 is False:
|
|
self.assertIsInstance(records.query, SeqRecord)
|
|
self.assertEqual(records.query.id, "AI021773.1")
|
|
self.assertEqual(
|
|
records.query.description,
|
|
"MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence",
|
|
)
|
|
self.assertEqual(repr(records.query.seq), "Seq(None, length=365)")
|
|
if xml2 is True:
|
|
self.assertEqual(len(records.param), 7)
|
|
else:
|
|
self.assertEqual(len(records.param), 5)
|
|
self.assertEqual(records.param["matrix"], "BLOSUM62")
|
|
self.assertAlmostEqual(records.param["expect"], 10.0)
|
|
self.assertEqual(records.param["gap-open"], 11)
|
|
self.assertEqual(records.param["gap-extend"], 1)
|
|
self.assertEqual(records.param["filter"], "F")
|
|
if xml2 is True:
|
|
self.assertEqual(records.param["cbs"], 2)
|
|
self.assertEqual(records.param["query-gencode"], 1)
|
|
record = next(records)
|
|
self.assertRaises(StopIteration, next, records)
|
|
self.check_xml_21500_blastx_001_record(record, xml2)
|
|
|
|
def check_xml_21500_blastx_001_record(self, record, xml2):
|
|
if xml2 is False:
|
|
self.assertEqual(record.num, 1)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "AI021773.1")
|
|
self.assertEqual(
|
|
record.query.description,
|
|
"MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence",
|
|
)
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=365)")
|
|
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 718367499)
|
|
self.assertEqual(record.stat["db-len"], 277248733561)
|
|
if xml2 is True:
|
|
self.assertEqual(record.stat["hsp-len"], 89)
|
|
self.assertEqual(record.stat["eff-space"], 6826048836800)
|
|
else:
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertEqual(record.stat["eff-space"], 0)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.041)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.267)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.14)
|
|
self.assertEqual(len(record), 10)
|
|
hit = record[0]
|
|
self.assertEqual(hit.num, 1)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "emb|VDM03167.1|")
|
|
self.assertEqual(hit.target.name, "VDM03167")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"unnamed protein product, partial [Schistocephalus solidus]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=132)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 408.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 161.77)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 3.13203e-48, places=53)
|
|
self.assertEqual(hsp.annotations["identity"], 81)
|
|
self.assertEqual(hsp.annotations["positive"], 83)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 108],
|
|
[ 0, 108]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 108))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "AI021773.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(108))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MADEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSY...LTE'}, length=132)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "emb|VDM03167.1|")
|
|
self.assertEqual(hsp.target.name, "VDM03167")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"unnamed protein product, partial [Schistocephalus solidus]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MADEEVQALVVDNGSGMCKAG ++ P G KDSYVGDEAQSKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE",
|
|
)
|
|
self.maxDiff = None
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : AI021773.1 Length: 108 Strand: Plus
|
|
MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma
|
|
mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA,
|
|
complete cds, mRNA sequence
|
|
Target: emb|VDM03167.1| Length: 132 Strand: Plus
|
|
unnamed protein product, partial [Schistocephalus solidus]
|
|
|
|
Score:161 bits(408), Expect:3e-48,
|
|
Identities:81/108(75%), Positives:83/108(77%), Gaps:0.108(0%)
|
|
|
|
emb|VDM03 0 MADEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ
|
|
0 |||||||||||||||||||||...........|...............|.||||||||||
|
|
AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ
|
|
|
|
emb|VDM03 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||| 108
|
|
AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
|
|
""",
|
|
)
|
|
hit = record[1]
|
|
self.assertEqual(hit.num, 2)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gb|EPB74633.1|")
|
|
self.assertEqual(hit.target.name, "EPB74633")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"hypothetical protein ANCCEY_06263 [Ancylostoma ceylanicum]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=119)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 405.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 160.614)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 5.40441e-48, places=53)
|
|
self.assertEqual(hsp.annotations["identity"], 81)
|
|
self.assertEqual(hsp.annotations["positive"], 85)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 115],
|
|
[ 0, 115]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 115))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...RKP')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "AI021773.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(115))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..364")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MCDDDVAALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSY...LKP'}, length=119)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gb|EPB74633.1|")
|
|
self.assertEqual(hsp.target.name, "EPB74633")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"hypothetical protein ANCCEY_06263 [Ancylostoma ceylanicum]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"M D++V ALVVDNGSGMCKAG ++ P G KDSYVGDEAQSKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE H I KP",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : AI021773.1 Length: 115 Strand: Plus
|
|
MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma
|
|
mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA,
|
|
complete cds, mRNA sequence
|
|
Target: gb|EPB74633.1| Length: 119 Strand: Plus
|
|
hypothetical protein ANCCEY_06263 [Ancylostoma ceylanicum]
|
|
|
|
Score:160 bits(405), Expect:5e-48,
|
|
Identities:81/115(70%), Positives:85/115(74%), Gaps:0.115(0%)
|
|
|
|
gb|EPB746 0 MCDDDVAALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ
|
|
0 |.|..|.||||||||||||||...........|...............|.||||||||||
|
|
AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ
|
|
|
|
gb|EPB746 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTEAHSILKP 115
|
|
60 ||||||||||||||||||||||||||||||||||||||||||||||||.|.|.|| 115
|
|
AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTELHCIRKP 115
|
|
|
|
""",
|
|
)
|
|
hit = record[2]
|
|
self.assertEqual(hit.num, 3)
|
|
target = hit.target
|
|
self.assertIsInstance(target, SeqRecord)
|
|
self.assertEqual(target.id, "ref|XP_009175831.1|")
|
|
self.assertEqual(target.name, "XP_009175831")
|
|
seq = target.seq
|
|
self.assertEqual(repr(seq), "Seq(None, length=246)")
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
target.description,
|
|
"hypothetical protein T265_11027 [Opisthorchis viverrini]",
|
|
)
|
|
self.assertIs(target, hit.targets[0])
|
|
self.assertEqual(len(hit.targets), 2)
|
|
target = hit.targets[1]
|
|
self.assertIsInstance(target, SeqRecord)
|
|
self.assertEqual(target.id, "gb|KER20427.1|")
|
|
self.assertEqual(target.name, "KER20427")
|
|
self.assertIs(target.seq, seq)
|
|
self.assertEqual(
|
|
target.description,
|
|
"hypothetical protein T265_11027 [Opisthorchis viverrini]",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
target.description,
|
|
"hypothetical protein T265_11027 [Opisthorchis viverrini] >gb|KER20427.1| hypothetical protein T265_11027 [Opisthorchis viverrini]",
|
|
)
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 413.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 163.696)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 1.69953e-47, places=52)
|
|
self.assertEqual(hsp.annotations["identity"], 81)
|
|
self.assertEqual(hsp.annotations["positive"], 83)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 108],
|
|
[ 0, 108]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 108))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "AI021773.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(108))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MADEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSY...LTE'}, length=246)",
|
|
)
|
|
self.assertEqual(hsp.target.id, hit.target.id)
|
|
self.assertEqual(hsp.target.name, hit.target.name)
|
|
self.assertEqual(hsp.target.description, hit.target.description)
|
|
self.assertEqual(len(hit), 1)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MADEEVQALVVDNGSGMCKAG ++ P G KDSYVGDEAQSKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : AI021773.1 Length: 108 Strand: Plus
|
|
MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma
|
|
mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA,
|
|
complete cds, mRNA sequence
|
|
Target: ref|XP_009175831.1| Length: 246 Strand: Plus
|
|
hypothetical protein T265_11027 [Opisthorchis viverrini]
|
|
|
|
Score:163 bits(413), Expect:2e-47,
|
|
Identities:81/108(75%), Positives:83/108(77%), Gaps:0.108(0%)
|
|
|
|
ref|XP_00 0 MADEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ
|
|
0 |||||||||||||||||||||...........|...............|.||||||||||
|
|
AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ
|
|
|
|
ref|XP_00 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||| 108
|
|
AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
|
|
""",
|
|
)
|
|
else:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : AI021773.1 Length: 108 Strand: Plus
|
|
MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma
|
|
mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA,
|
|
complete cds, mRNA sequence
|
|
Target: ref|XP_009175831.1| Length: 246 Strand: Plus
|
|
hypothetical protein T265_11027 [Opisthorchis viverrini] >gb|KER20427.1|
|
|
hypothetical protein T265_11027 [Opisthorchis viverrini]
|
|
|
|
Score:163 bits(413), Expect:2e-47,
|
|
Identities:81/108(75%), Positives:83/108(77%), Gaps:0.108(0%)
|
|
|
|
ref|XP_00 0 MADEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ
|
|
0 |||||||||||||||||||||...........|...............|.||||||||||
|
|
AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ
|
|
|
|
ref|XP_00 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||| 108
|
|
AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
|
|
""",
|
|
)
|
|
hit = record[3]
|
|
self.assertEqual(hit.num, 4)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gb|EMD49430.1|")
|
|
self.assertEqual(hit.target.name, "EMD49430")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"actin, putative, partial [Entamoeba histolytica KU27]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=124)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 401.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 159.073)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 3.49189e-47, places=52)
|
|
self.assertEqual(hsp.annotations["identity"], 78)
|
|
self.assertEqual(hsp.annotations["positive"], 81)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 108],
|
|
[ 0, 108]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 108))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "AI021773.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(108))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MGDEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHVSVMAGMGQKDAY...LTE'}, length=124)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gb|EMD49430.1|")
|
|
self.assertEqual(hsp.target.name, "EMD49430")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"actin, putative, partial [Entamoeba histolytica KU27]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"M DEEVQALVVDNGSGMCKAG ++ P G KD+YVGDEAQSKRGILTLKYPIEHGIV NWDDMEKIWHHTFYNELRVAPEEHPVLLTE",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : AI021773.1 Length: 108 Strand: Plus
|
|
MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma
|
|
mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA,
|
|
complete cds, mRNA sequence
|
|
Target: gb|EMD49430.1| Length: 124 Strand: Plus
|
|
actin, putative, partial [Entamoeba histolytica KU27]
|
|
|
|
Score:159 bits(401), Expect:3e-47,
|
|
Identities:78/108(72%), Positives:81/108(75%), Gaps:0.108(0%)
|
|
|
|
gb|EMD494 0 MGDEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHVSVMAGMGQKDAYVGDEAQ
|
|
0 |.|||||||||||||||||||...........|...............|.||.|||||||
|
|
AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ
|
|
|
|
gb|EMD494 60 SKRGILTLKYPIEHGIVNNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
60 |||||||||||||||||.|||||||||||||||||||||||||||||| 108
|
|
AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
|
|
""",
|
|
)
|
|
hit = record[4]
|
|
self.assertEqual(hit.num, 5)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "emb|CAX83035.1|")
|
|
self.assertEqual(hit.target.name, "CAX83035")
|
|
self.assertEqual(
|
|
hit.target.description, "Actin-2, partial [Schistosoma japonicum]"
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=252)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 411.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 162.925)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 3.86747e-47, places=52)
|
|
self.assertEqual(hsp.annotations["identity"], 81)
|
|
self.assertEqual(hsp.annotations["positive"], 83)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 108],
|
|
[ 0, 108]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 108))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "AI021773.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(108))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MADEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSY...LTE'}, length=252)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "emb|CAX83035.1|")
|
|
self.assertEqual(hsp.target.name, "CAX83035")
|
|
self.assertEqual(
|
|
hsp.target.description, "Actin-2, partial [Schistosoma japonicum]"
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MADEEVQALVVDNGSGMCKAG ++ P G KDSYVGDEAQSKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : AI021773.1 Length: 108 Strand: Plus
|
|
MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma
|
|
mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA,
|
|
complete cds, mRNA sequence
|
|
Target: emb|CAX83035.1| Length: 252 Strand: Plus
|
|
Actin-2, partial [Schistosoma japonicum]
|
|
|
|
Score:162 bits(411), Expect:4e-47,
|
|
Identities:81/108(75%), Positives:83/108(77%), Gaps:0.108(0%)
|
|
|
|
emb|CAX83 0 MADEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ
|
|
0 |||||||||||||||||||||...........|...............|.||||||||||
|
|
AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ
|
|
|
|
emb|CAX83 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||| 108
|
|
AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
|
|
""",
|
|
)
|
|
hit = record[5]
|
|
self.assertEqual(hit.num, 6)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "emb|VDP83060.1|")
|
|
self.assertEqual(hit.target.name, "VDP83060")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"unnamed protein product, partial [Echinostoma caproni]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=209)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 407.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 161.384)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 4.49182e-47, places=52)
|
|
self.assertEqual(hsp.annotations["identity"], 80)
|
|
self.assertEqual(hsp.annotations["positive"], 83)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 108],
|
|
[ 0, 108]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 108))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "AI021773.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(108))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MADDEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSY...LTE'}, length=209)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "emb|VDP83060.1|")
|
|
self.assertEqual(hsp.target.name, "VDP83060")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"unnamed protein product, partial [Echinostoma caproni]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MAD+EVQALVVDNGSGMCKAG ++ P G KDSYVGDEAQSKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : AI021773.1 Length: 108 Strand: Plus
|
|
MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma
|
|
mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA,
|
|
complete cds, mRNA sequence
|
|
Target: emb|VDP83060.1| Length: 209 Strand: Plus
|
|
unnamed protein product, partial [Echinostoma caproni]
|
|
|
|
Score:161 bits(407), Expect:4e-47,
|
|
Identities:80/108(74%), Positives:83/108(77%), Gaps:0.108(0%)
|
|
|
|
emb|VDP83 0 MADDEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ
|
|
0 |||.|||||||||||||||||...........|...............|.||||||||||
|
|
AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ
|
|
|
|
emb|VDP83 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||| 108
|
|
AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
|
|
""",
|
|
)
|
|
hit = record[6]
|
|
self.assertEqual(hit.num, 7)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "emb|CAA50205.1|")
|
|
self.assertEqual(hit.target.name, "CAA50205")
|
|
self.assertEqual(
|
|
hit.target.description, "actin, partial [Entamoeba histolytica]"
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=137)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 401.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 159.073)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 4.85734e-47, places=52)
|
|
self.assertEqual(hsp.annotations["identity"], 78)
|
|
self.assertEqual(hsp.annotations["positive"], 81)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 108],
|
|
[ 0, 108]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 108))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "AI021773.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(108))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MGDEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHVSVMAGMGQKDAY...LTE'}, length=137)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "emb|CAA50205.1|")
|
|
self.assertEqual(hsp.target.name, "CAA50205")
|
|
self.assertEqual(
|
|
hsp.target.description, "actin, partial [Entamoeba histolytica]"
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"M DEEVQALVVDNGSGMCKAG ++ P G KD+YVGDEAQSKRGILTLKYPIEHGIV NWDDMEKIWHHTFYNELRVAPEEHPVLLTE",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : AI021773.1 Length: 108 Strand: Plus
|
|
MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma
|
|
mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA,
|
|
complete cds, mRNA sequence
|
|
Target: emb|CAA50205.1| Length: 137 Strand: Plus
|
|
actin, partial [Entamoeba histolytica]
|
|
|
|
Score:159 bits(401), Expect:5e-47,
|
|
Identities:78/108(72%), Positives:81/108(75%), Gaps:0.108(0%)
|
|
|
|
emb|CAA50 0 MGDEEVQALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHVSVMAGMGQKDAYVGDEAQ
|
|
0 |.|||||||||||||||||||...........|...............|.||.|||||||
|
|
AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ
|
|
|
|
emb|CAA50 60 SKRGILTLKYPIEHGIVNNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
60 |||||||||||||||||.|||||||||||||||||||||||||||||| 108
|
|
AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
|
|
""",
|
|
)
|
|
hit = record[7]
|
|
self.assertEqual(hit.num, 8)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "emb|VDN44756.1|")
|
|
self.assertEqual(hit.target.name, "VDN44756")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"unnamed protein product, partial [Dibothriocephalus latus]",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=145)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 400.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 158.688)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 6.88203e-47, places=52)
|
|
self.assertEqual(hsp.annotations["identity"], 78)
|
|
self.assertEqual(hsp.annotations["positive"], 82)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 108],
|
|
[ 0, 108]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 108))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "AI021773.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(108))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MGDEDVQALVIDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSY...LTE'}, length=145)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "emb|VDN44756.1|")
|
|
self.assertEqual(hsp.target.name, "VDN44756")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"unnamed protein product, partial [Dibothriocephalus latus]",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"M DE+VQALV+DNGSGMCKAG ++ P G KDSYVGDEAQSKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : AI021773.1 Length: 108 Strand: Plus
|
|
MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma
|
|
mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA,
|
|
complete cds, mRNA sequence
|
|
Target: emb|VDN44756.1| Length: 145 Strand: Plus
|
|
unnamed protein product, partial [Dibothriocephalus latus]
|
|
|
|
Score:158 bits(400), Expect:7e-47,
|
|
Identities:78/108(72%), Positives:82/108(76%), Gaps:0.108(0%)
|
|
|
|
emb|VDN44 0 MGDEDVQALVIDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ
|
|
0 |.||.|||||.||||||||||...........|...............|.||||||||||
|
|
AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ
|
|
|
|
emb|VDN44 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||| 108
|
|
AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
|
|
""",
|
|
)
|
|
hit = record[8]
|
|
self.assertEqual(hit.num, 9)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|XP_027046469.1|")
|
|
self.assertEqual(hit.target.name, "XP_027046469")
|
|
self.assertEqual(
|
|
hit.target.description, "actin-1, partial [Pocillopora damicornis]"
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=122)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 398.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 157.918)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 7.4607e-47, places=52)
|
|
self.assertEqual(hsp.annotations["identity"], 78)
|
|
self.assertEqual(hsp.annotations["positive"], 82)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 108],
|
|
[ 0, 108]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 108))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "AI021773.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(108))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MADEEVAALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSY...LTE'}, length=122)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "ref|XP_027046469.1|")
|
|
self.assertEqual(hsp.target.name, "XP_027046469")
|
|
self.assertEqual(
|
|
hsp.target.description, "actin-1, partial [Pocillopora damicornis]"
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MADEEV ALVVDNGSGMCKAG ++ P G KDSYVGDEAQSKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELR+APEEHP+LLTE",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : AI021773.1 Length: 108 Strand: Plus
|
|
MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma
|
|
mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA,
|
|
complete cds, mRNA sequence
|
|
Target: ref|XP_027046469.1| Length: 122 Strand: Plus
|
|
actin-1, partial [Pocillopora damicornis]
|
|
|
|
Score:157 bits(398), Expect:7e-47,
|
|
Identities:78/108(72%), Positives:82/108(76%), Gaps:0.108(0%)
|
|
|
|
ref|XP_02 0 MADEEVAALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ
|
|
0 ||||||.||||||||||||||...........|...............|.||||||||||
|
|
AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ
|
|
|
|
ref|XP_02 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRIAPEEHPILLTE 108
|
|
60 ||||||||||||||||||||||||||||||||||||.||||||.|||| 108
|
|
AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
|
|
""",
|
|
)
|
|
hit = record[9]
|
|
self.assertEqual(hit.num, 10)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "ref|XP_027046487.1|")
|
|
self.assertEqual(hit.target.name, "XP_027046487")
|
|
self.assertEqual(
|
|
hit.target.description, "actin-1-like [Pocillopora damicornis]"
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=134)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 399.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 158.303)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 9.11071e-47, places=52)
|
|
self.assertEqual(hsp.annotations["identity"], 79)
|
|
self.assertEqual(hsp.annotations["positive"], 82)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 108],
|
|
[ 0, 108]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 108))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSY...LTE')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "AI021773.1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA, complete cds, mRNA sequence",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(108))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "AI021773.1:20..343")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({0: 'MADEDVAALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSY...LTE'}, length=134)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "ref|XP_027046487.1|")
|
|
self.assertEqual(hsp.target.name, "XP_027046487")
|
|
self.assertEqual(
|
|
hsp.target.description, "actin-1-like [Pocillopora damicornis]"
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MADE+V ALVVDNGSGMCKAG ++ P G KDSYVGDEAQSKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : AI021773.1 Length: 108 Strand: Plus
|
|
MAAD0534.RAR Schistosoma mansoni, adult worm (J.C.Parra) Schistosoma
|
|
mansoni cDNA clone MAAD0534.RAR 5' end similar to S. mansoni actin mRNA,
|
|
complete cds, mRNA sequence
|
|
Target: ref|XP_027046487.1| Length: 134 Strand: Plus
|
|
actin-1-like [Pocillopora damicornis]
|
|
|
|
Score:158 bits(399), Expect:9e-47,
|
|
Identities:79/108(73%), Positives:82/108(76%), Gaps:0.108(0%)
|
|
|
|
ref|XP_02 0 MADEDVAALVVDNGSGMCKAGFAGDDAPRAVFPSIVGRPRHQGVMVGMGQKDSYVGDEAQ
|
|
0 ||||.|.||||||||||||||...........|...............|.||||||||||
|
|
AI021773. 0 MADEEVQALVVDNGSGMCKAGIRW**CTKSSIPFHRWTTSTSRCDGWYGSKDSYVGDEAQ
|
|
|
|
ref|XP_02 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
60 |||||||||||||||||||||||||||||||||||||||||||||||| 108
|
|
AI021773. 60 SKRGILTLKYPIEHGIVTNWDDMEKIWHHTFYNELRVAPEEHPVLLTE 108
|
|
|
|
""",
|
|
)
|
|
|
|
def test_xml2_21500_blastx_001_parser(self):
|
|
"""Parsing BLASTX 2.15.0+ (xml2_21500_blastx_001.xml)."""
|
|
filename = "xml2_21500_blastx_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_21500_blastx_001_records(records, xml2=True)
|
|
with Blast.parse(path) as records:
|
|
self.check_xml_21500_blastx_001_records(records, xml2=True)
|
|
with open(path, "rb") as stream:
|
|
record = Blast.read(stream)
|
|
self.check_xml_21500_blastx_001_record(record, xml2=True)
|
|
record = Blast.read(path)
|
|
self.check_xml_21500_blastx_001_record(record, xml2=True)
|
|
|
|
def test_xml_21500_blastx_001_writer(self):
|
|
"""Writing BLASTX 2.15.0+ (xml_21500_blastx_001.xml)."""
|
|
filename = "xml_21500_blastx_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream)
|
|
self.assertEqual(n, 1)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_xml_21500_blastx_001_records(written_records, xml2=False)
|
|
|
|
def test_xml2_21500_blastx_001_writer(self):
|
|
"""Writing BLASTX 2.15.0+ XML2 (xml2_21500_blastx_001.xml)."""
|
|
filename = "xml2_21500_blastx_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream, fmt="XML2")
|
|
self.assertEqual(n, 1)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_xml_21500_blastx_001_records(written_records, xml2=True)
|
|
|
|
|
|
class TestTBlastn(unittest.TestCase):
|
|
"""Test the Blast XML parser for tblastn output."""
|
|
|
|
def test_xml_21500_tblastn_001_parser(self):
|
|
"""Parsing TBLASTN 2.15.0+ (xml_21500_tblastn_001.xml)."""
|
|
filename = "xml_21500_tblastn_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_21500_tblastn_001_records(records)
|
|
with Blast.parse(path) as records:
|
|
self.check_xml_21500_tblastn_001_records(records)
|
|
with open(path, "rb") as stream:
|
|
record = Blast.read(stream)
|
|
self.check_xml_21500_tblastn_001_record(record, xml2=False)
|
|
record = Blast.read(path)
|
|
self.check_xml_21500_tblastn_001_record(record, xml2=False)
|
|
with Blast.parse(path) as records:
|
|
self.assertEqual(
|
|
str(records),
|
|
"""\
|
|
Program: TBLASTN 2.15.0+
|
|
db: nt
|
|
|
|
Query: CAJ99216.1 (length=234)
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Hits: ---- ----- ----------------------------------------------------------
|
|
# # HSP ID + description
|
|
---- ----- ----------------------------------------------------------
|
|
0 1 gi|109713861|emb|AM260522.1| Helicobacter acinonychis ...
|
|
1 1 gi|1336928286|emb|LT900055.1| Helicobacter acinonychis...
|
|
2 1 gi|1033012332|gb|CP011486.1| Helicobacter pylori strai...
|
|
3 1 gi|2641533851|gb|CP078169.1| Helicobacter pylori strai...
|
|
4 1 gi|2641529532|gb|CP078166.1| Helicobacter pylori strai...
|
|
5 1 gi|1033009499|gb|CP011484.1| Helicobacter pylori strai...
|
|
6 1 gi|317010283|gb|CP002336.1| Helicobacter pylori SouthA...
|
|
7 1 gi|2641538240|gb|CP078172.1| Helicobacter pylori strai...
|
|
8 1 gi|2640367186|gb|CP079077.1| Helicobacter pylori strai...
|
|
9 1 gi|532105813|gb|CP006691.1| Helicobacter pylori SouthA...""",
|
|
)
|
|
|
|
def test_xml2_21500_tblastn_001_parser(self):
|
|
"""Parsing TBLASTN 2.15.0+ (xml2_21500_tblastn_001.xml)."""
|
|
filename = "xml2_21500_tblastn_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_21500_tblastn_001_records(records, xml2=True)
|
|
with Blast.parse(path) as records:
|
|
self.check_xml_21500_tblastn_001_records(records, xml2=True)
|
|
with open(path, "rb") as stream:
|
|
record = Blast.read(stream)
|
|
self.check_xml_21500_tblastn_001_record(record, xml2=True)
|
|
record = Blast.read(path)
|
|
self.check_xml_21500_tblastn_001_record(record, xml2=True)
|
|
|
|
def check_xml_21500_tblastn_001_records(self, records, xml2=False):
|
|
self.assertEqual(records.program, "tblastn")
|
|
self.assertEqual(records.version, "TBLASTN 2.15.0+")
|
|
self.assertEqual(
|
|
records.reference,
|
|
'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.',
|
|
)
|
|
self.assertEqual(records.db, "nt")
|
|
if xml2 is False:
|
|
self.assertIsInstance(records.query, SeqRecord)
|
|
self.assertEqual(records.query.id, "CAJ99216.1")
|
|
self.assertEqual(
|
|
records.query.description, "tim [Helicobacter acinonychis str. Sheeba]"
|
|
)
|
|
self.assertEqual(repr(records.query.seq), "Seq(None, length=234)")
|
|
self.assertEqual(len(records.param), 5)
|
|
else:
|
|
self.assertEqual(len(records.param), 7)
|
|
self.assertEqual(records.param["matrix"], "BLOSUM62")
|
|
self.assertAlmostEqual(records.param["expect"], 10.0)
|
|
self.assertEqual(records.param["gap-open"], 11)
|
|
self.assertEqual(records.param["gap-extend"], 1)
|
|
self.assertEqual(records.param["filter"], "L;")
|
|
if xml2 is True:
|
|
self.assertEqual(records.param["cbs"], 2)
|
|
self.assertEqual(records.param["db-gencode"], 1)
|
|
record = next(records)
|
|
self.assertRaises(StopIteration, next, records)
|
|
self.check_xml_21500_tblastn_001_record(record, xml2)
|
|
|
|
def check_xml_21500_tblastn_001_record(self, record, xml2):
|
|
if xml2 is False:
|
|
self.assertEqual(record.num, 1)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "CAJ99216.1")
|
|
self.assertEqual(
|
|
record.query.description, "tim [Helicobacter acinonychis str. Sheeba]"
|
|
)
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=234)")
|
|
if xml2 is True:
|
|
self.assertEqual(len(record.query.features), 1)
|
|
feature = record.query.features[0]
|
|
self.assertEqual(feature.type, "masking")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(101), ExactPosition(116))"
|
|
)
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 104338276)
|
|
self.assertEqual(record.stat["db-len"], 348058846)
|
|
if xml2 is True:
|
|
self.assertEqual(record.stat["hsp-len"], 168)
|
|
self.assertEqual(record.stat["eff-space"], 34567702523838)
|
|
else:
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertEqual(record.stat["eff-space"], 0)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.041)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.267)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.14)
|
|
self.assertEqual(len(record), 10)
|
|
|
|
hit = record[0]
|
|
self.assertEqual(hit.num, 1)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|109713861|emb|AM260522.1|")
|
|
self.assertEqual(hit.target.name, "AM260522")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Helicobacter acinonychis str. Sheeba complete genome, strain Sheeba",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=1553927)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1137.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 442.58)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 1.28996e-138, places=143)
|
|
self.assertEqual(hsp.annotations["identity"], 234)
|
|
self.assertEqual(hsp.annotations["positive"], 234)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 234],
|
|
[ 0, 234]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 234))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "CAJ99216.1")
|
|
self.assertEqual(
|
|
hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|109713861|emb|AM260522.1|")
|
|
self.assertEqual(hsp.target.name, "AM260522")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Helicobacter acinonychis str. Sheeba complete genome, strain Sheeba",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(1553927))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(
|
|
feature.qualifiers["coded_by"],
|
|
"gi|109713861|emb|AM260522.1|:325802..326503",
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQNAYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNFLKEKFDFFKDKKFKIVYCIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHGFLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL",
|
|
)
|
|
if xml2 is False:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|109713861|emb|AM260522.1| Length: 234 Strand: Plus
|
|
Helicobacter acinonychis str. Sheeba complete genome, strain Sheeba
|
|
|
|
Score:442 bits(1137), Expect:1e-138,
|
|
Identities:234/234(100%), Positives:234/234(100%), Gaps:0.234(0%)
|
|
|
|
gi|109713 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|109713 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNFLKEKFDFFKDKKFKIVY
|
|
60 ||||||||||||||||||||||||||||||||||||||||||...............|||
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY
|
|
|
|
gi|109713 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|109713 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
180 |||||||||||||||||||||||||||||||||||||||||||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
else: # XML2 is True
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|109713861|emb|AM260522.1| Length: 234 Strand: Plus
|
|
Helicobacter acinonychis str. Sheeba complete genome, strain Sheeba
|
|
|
|
Score:442 bits(1137), Expect:1e-138,
|
|
Identities:234/234(100%), Positives:234/234(100%), Gaps:0.234(0%)
|
|
|
|
gi|109713 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|109713 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNFLKEKFDFFKDKKFKIVY
|
|
60 ||||||||||||||||||||||||||||||||||||||||||...............|||
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY
|
|
|
|
gi|109713 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|109713 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
180 |||||||||||||||||||||||||||||||||||||||||||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
hit = record[1]
|
|
self.assertEqual(hit.num, 2)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|1336928286|emb|LT900055.1|")
|
|
self.assertEqual(hit.target.name, "LT900055")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Helicobacter acinonychis isolate 212_9 genome assembly, chromosome: I",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=1550239)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1128.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 439.113)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 2.05117e-137, places=142)
|
|
self.assertEqual(hsp.annotations["identity"], 232)
|
|
self.assertEqual(hsp.annotations["positive"], 232)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 234],
|
|
[ 0, 234]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 234))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "CAJ99216.1")
|
|
self.assertEqual(
|
|
hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|1336928286|emb|LT900055.1|")
|
|
self.assertEqual(hsp.target.name, "LT900055")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Helicobacter acinonychis isolate 212_9 genome assembly, chromosome: I",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(1550239))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(
|
|
feature.qualifiers["coded_by"],
|
|
"gi|1336928286|emb|LT900055.1|:325704..326405",
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQNAYPKDCGAFTGEITSKHLEELKINTLLIGHSERR LLKESPNFLKEKFDFFKDKKFKIVYCIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHGFLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGS SLELENFKTIISFL",
|
|
)
|
|
if xml2 is False:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|1336928286|emb|LT900055.1| Length: 234 Strand: Plus
|
|
Helicobacter acinonychis isolate 212_9 genome assembly, chromosome: I
|
|
|
|
Score:439 bits(1128), Expect:2e-137,
|
|
Identities:232/234(99%), Positives:232/234(99%), Gaps:0.234(0%)
|
|
|
|
gi|133692 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|133692 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRALLKESPNFLKEKFDFFKDKKFKIVY
|
|
60 ||||||||||||||||||||||||||||||||||.|||||||...............|||
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY
|
|
|
|
gi|133692 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|133692 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSASLELENFKTIISFL 234
|
|
180 |||||||||||||||||||||||||||||||||||||||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
else: # XML2 is True
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|1336928286|emb|LT900055.1| Length: 234 Strand: Plus
|
|
Helicobacter acinonychis isolate 212_9 genome assembly, chromosome: I
|
|
|
|
Score:439 bits(1128), Expect:2e-137,
|
|
Identities:232/234(99%), Positives:232/234(99%), Gaps:0.234(0%)
|
|
|
|
gi|133692 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
0 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|133692 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRALLKESPNFLKEKFDFFKDKKFKIVY
|
|
60 ||||||||||||||||||||||||||||||||||.|||||||...............|||
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY
|
|
|
|
gi|133692 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
120 ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|133692 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSASLELENFKTIISFL 234
|
|
180 |||||||||||||||||||||||||||||||||||||||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
hit = record[2]
|
|
self.assertEqual(hit.num, 3)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|1033012332|gb|CP011486.1|")
|
|
self.assertEqual(hit.target.name, "CP011486")
|
|
self.assertEqual(
|
|
hit.target.description, "Helicobacter pylori strain K26A1, complete genome"
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=1570310)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1076.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 419.083)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 2.02413e-130, places=135)
|
|
self.assertEqual(hsp.annotations["identity"], 221)
|
|
self.assertEqual(hsp.annotations["positive"], 224)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 234],
|
|
[ 0, 234]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 234))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "CAJ99216.1")
|
|
self.assertEqual(
|
|
hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|1033012332|gb|CP011486.1|")
|
|
self.assertEqual(hsp.target.name, "CP011486")
|
|
self.assertEqual(
|
|
hsp.target.description, "Helicobacter pylori strain K26A1, complete genome"
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(1570310))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(
|
|
feature.qualifiers["coded_by"],
|
|
"gi|1033012332|gb|CP011486.1|:196043..196744",
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLG QNAYPKDCGAFTGEITSKHLEELKINTLLIGHSERR LLKESP+FLKEKFDFFKDK FKI+YCIGEDLKTREKGL AVKEFLNEQLENIDL Y NLIVAYEPIWAIGT KSASLEDIYLTHGFLKQ LNQK PLLYGGSVNTQNAKEILGIDSVDGLL+GS SLELENFKTIISFL",
|
|
)
|
|
if xml2 is False:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|1033012332|gb|CP011486.1| Length: 234 Strand: Plus
|
|
Helicobacter pylori strain K26A1, complete genome
|
|
|
|
Score:419 bits(1076), Expect:2e-130,
|
|
Identities:221/234(94%), Positives:224/234(96%), Gaps:0.234(0%)
|
|
|
|
gi|103301 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGAQN
|
|
0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||.||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|103301 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRTLLKESPSFLKEKFDFFKDKNFKIIY
|
|
60 ||||||||||||||||||||||||||||||||||.||||||................|.|
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY
|
|
|
|
gi|103301 120 CIGEDLKTREKGLAAVKEFLNEQLENIDLSYHNLIVAYEPIWAIGTKKSASLEDIYLTHG
|
|
120 |||||||||||||.|||||||||||||||.|.||||||||||||||.|||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|103301 180 FLKQILNQKTPLLYGGSVNTQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234
|
|
180 ||||.||||.||||||||||||||||||||||||||.||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
else: # xml2 is True
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|1033012332|gb|CP011486.1| Length: 234 Strand: Plus
|
|
Helicobacter pylori strain K26A1, complete genome
|
|
|
|
Score:419 bits(1076), Expect:2e-130,
|
|
Identities:221/234(94%), Positives:224/234(96%), Gaps:0.234(0%)
|
|
|
|
gi|103301 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGAQN
|
|
0 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||.||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|103301 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRTLLKESPSFLKEKFDFFKDKNFKIIY
|
|
60 ||||||||||||||||||||||||||||||||||.||||||................|.|
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY
|
|
|
|
gi|103301 120 CIGEDLKTREKGLAAVKEFLNEQLENIDLSYHNLIVAYEPIWAIGTKKSASLEDIYLTHG
|
|
120 |||||||||||||.|||||||||||||||.|.||||||||||||||.|||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|103301 180 FLKQILNQKTPLLYGGSVNTQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234
|
|
180 ||||.||||.||||||||||||||||||||||||||.||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
hit = record[3]
|
|
self.assertEqual(hit.num, 4)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|2641533851|gb|CP078169.1|")
|
|
self.assertEqual(hit.target.name, "CP078169")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Helicobacter pylori strain HpGP-ZAF-006 chromosome, complete genome",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=1641225)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1069.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 416.387)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 1.58851e-129, places=134)
|
|
self.assertEqual(hsp.annotations["identity"], 221)
|
|
self.assertEqual(hsp.annotations["positive"], 224)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 234],
|
|
[ 0, 234]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 234))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "CAJ99216.1")
|
|
self.assertEqual(
|
|
hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|2641533851|gb|CP078169.1|")
|
|
self.assertEqual(hsp.target.name, "CP078169")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Helicobacter pylori strain HpGP-ZAF-006 chromosome, complete genome",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(1641225))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(
|
|
feature.qualifiers["coded_by"],
|
|
"gi|2641533851|gb|CP078169.1|:260420..261121",
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCD+VFVFPDFLGLLPNAFLHFTLG QNAYPKDCGAFTGEITSKHLEELKI+TLLIGHSERR LLKESP+FLKEKFDFFKDK FKIVYCIGEDLKTREKGLGAVKEFLNEQLENIDL Y NLIVAYEPIWAIGT KSASLEDIYLTHGFLKQ LNQK PLLYGGSVN QNAKEILGIDSVDGLLIGS SLELENFKTIISFL",
|
|
)
|
|
if xml2 is False:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|2641533851|gb|CP078169.1| Length: 234 Strand: Plus
|
|
Helicobacter pylori strain HpGP-ZAF-006 chromosome, complete genome
|
|
|
|
Score:416 bits(1069), Expect:2e-129,
|
|
Identities:221/234(94%), Positives:224/234(96%), Gaps:0.234(0%)
|
|
|
|
gi|264153 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN
|
|
0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|264153 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRALLKESPSFLKEKFDFFKDKNFKIVY
|
|
60 |||||||||||||||||||||||.||||||||||.||||||................|||
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY
|
|
|
|
gi|264153 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLSYHNLIVAYEPIWAIGTKKSASLEDIYLTHG
|
|
120 |||||||||||||||||||||||||||||.|.||||||||||||||.|||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|264153 180 FLKQILNQKTPLLYGGSVNAQNAKEILGIDSVDGLLIGSASLELENFKTIISFL 234
|
|
180 ||||.||||.|||||||||.|||||||||||||||||||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
else: # xml2 is True
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|2641533851|gb|CP078169.1| Length: 234 Strand: Plus
|
|
Helicobacter pylori strain HpGP-ZAF-006 chromosome, complete genome
|
|
|
|
Score:416 bits(1069), Expect:2e-129,
|
|
Identities:221/234(94%), Positives:224/234(96%), Gaps:0.234(0%)
|
|
|
|
gi|264153 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN
|
|
0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|264153 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRALLKESPSFLKEKFDFFKDKNFKIVY
|
|
60 |||||||||||||||||||||||.||||||||||.||||||................|||
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY
|
|
|
|
gi|264153 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLSYHNLIVAYEPIWAIGTKKSASLEDIYLTHG
|
|
120 |||||||||||||||||||||||||||||.|.||||||||||||||.|||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|264153 180 FLKQILNQKTPLLYGGSVNAQNAKEILGIDSVDGLLIGSASLELENFKTIISFL 234
|
|
180 ||||.||||.|||||||||.|||||||||||||||||||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
hit = record[4]
|
|
self.assertEqual(hit.num, 5)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|2641529532|gb|CP078166.1|")
|
|
self.assertEqual(hit.target.name, "CP078166")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Helicobacter pylori strain HpGP-ZAF-009 chromosome, complete genome",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=1681930)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1067.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 415.616)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 3.21842e-129, places=134)
|
|
self.assertEqual(hsp.annotations["identity"], 220)
|
|
self.assertEqual(hsp.annotations["positive"], 226)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 234],
|
|
[ 0, 234]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 234))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "CAJ99216.1")
|
|
self.assertEqual(
|
|
hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|2641529532|gb|CP078166.1|")
|
|
self.assertEqual(hsp.target.name, "CP078166")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Helicobacter pylori strain HpGP-ZAF-009 chromosome, complete genome",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(1681930))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(
|
|
feature.qualifiers["coded_by"],
|
|
"gi|2641529532|gb|CP078166.1|:198327..199028",
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCD+VFVFPDFLGLLPNAFLHFTLG QNAYPKDCGAFTGEITSKHLEELKI+TLLIGHSERR LLKESP+FLKEKFDFFKDKKFKI+YCIGEDLKTREKGLGAVKEFLNEQLENIDL Y +LIVAYEPIWAIGT KSASLEDIYLTHGFLKQ LNQK PLLYGGSVNTQNAKEILGIDSVDGLL+GS SLELENFKTIISFL",
|
|
)
|
|
if xml2 is False:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|2641529532|gb|CP078166.1| Length: 234 Strand: Plus
|
|
Helicobacter pylori strain HpGP-ZAF-009 chromosome, complete genome
|
|
|
|
Score:415 bits(1067), Expect:3e-129,
|
|
Identities:220/234(94%), Positives:226/234(97%), Gaps:0.234(0%)
|
|
|
|
gi|264152 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN
|
|
0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|264152 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRTLLKESPSFLKEKFDFFKDKKFKIIY
|
|
60 |||||||||||||||||||||||.||||||||||.||||||................|.|
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY
|
|
|
|
gi|264152 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLSYHHLIVAYEPIWAIGTKKSASLEDIYLTHG
|
|
120 |||||||||||||||||||||||||||||.|..|||||||||||||.|||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|264152 180 FLKQILNQKTPLLYGGSVNTQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234
|
|
180 ||||.||||.||||||||||||||||||||||||||.||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
else: # xml2 is True
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|2641529532|gb|CP078166.1| Length: 234 Strand: Plus
|
|
Helicobacter pylori strain HpGP-ZAF-009 chromosome, complete genome
|
|
|
|
Score:415 bits(1067), Expect:3e-129,
|
|
Identities:220/234(94%), Positives:226/234(97%), Gaps:0.234(0%)
|
|
|
|
gi|264152 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN
|
|
0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|264152 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRTLLKESPSFLKEKFDFFKDKKFKIIY
|
|
60 |||||||||||||||||||||||.||||||||||.||||||................|.|
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY
|
|
|
|
gi|264152 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLSYHHLIVAYEPIWAIGTKKSASLEDIYLTHG
|
|
120 |||||||||||||||||||||||||||||.|..|||||||||||||.|||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|264152 180 FLKQILNQKTPLLYGGSVNTQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234
|
|
180 ||||.||||.||||||||||||||||||||||||||.||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
hit = record[5]
|
|
self.assertEqual(hit.num, 6)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|1033009499|gb|CP011484.1|")
|
|
self.assertEqual(hit.target.name, "CP011484")
|
|
self.assertEqual(
|
|
hit.target.description, "Helicobacter pylori strain CC33C, complete genome"
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=1659899)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1066.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 415.231)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 4.05935e-129, places=134)
|
|
self.assertEqual(hsp.annotations["identity"], 220)
|
|
self.assertEqual(hsp.annotations["positive"], 227)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 234],
|
|
[ 0, 234]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 234))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "CAJ99216.1")
|
|
self.assertEqual(
|
|
hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MTKIAMANFKSSMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|1033009499|gb|CP011484.1|")
|
|
self.assertEqual(hsp.target.name, "CP011484")
|
|
self.assertEqual(
|
|
hsp.target.description, "Helicobacter pylori strain CC33C, complete genome"
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(1659899))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(
|
|
feature.qualifiers["coded_by"],
|
|
"gi|1033009499|gb|CP011484.1|:248047..248748",
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTKIAMANFKS+MPIFKSHAYLKELEKTLKPQHCD+VFVFPDFLGLLPNAFLHFTLG QNAYPKDCGAFTGEITSKHLEELKI+TLLIGHSERRVLLKESP+FLKEKFDFFKDKKFKIVYCIGEDLKTREKGLGAVKEFLNEQLENIDL+Y +LIVAYEPIWAIGT KSASLEDIYLTHGFLKQ LNQK PLLYGGSVN QNAKEILGIDSVDGLL+GS SLELENFKTIISFL",
|
|
)
|
|
if xml2 is False:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|1033009499|gb|CP011484.1| Length: 234 Strand: Plus
|
|
Helicobacter pylori strain CC33C, complete genome
|
|
|
|
Score:415 bits(1066), Expect:4e-129,
|
|
Identities:220/234(94%), Positives:227/234(97%), Gaps:0.234(0%)
|
|
|
|
gi|103300 0 MTKIAMANFKSSMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN
|
|
0 |||||||||||.|||||||||||||||||||||||.|||||||||||||||||||||.||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|103300 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRVLLKESPSFLKEKFDFFKDKKFKIVY
|
|
60 |||||||||||||||||||||||.|||||||||||||||||................|||
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY
|
|
|
|
gi|103300 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLNYHHLIVAYEPIWAIGTKKSASLEDIYLTHG
|
|
120 |||||||||||||||||||||||||||||.|..|||||||||||||.|||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|103300 180 FLKQILNQKTPLLYGGSVNAQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234
|
|
180 ||||.||||.|||||||||.||||||||||||||||.||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
else: # xml2 is True
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|1033009499|gb|CP011484.1| Length: 234 Strand: Plus
|
|
Helicobacter pylori strain CC33C, complete genome
|
|
|
|
Score:415 bits(1066), Expect:4e-129,
|
|
Identities:220/234(94%), Positives:227/234(97%), Gaps:0.234(0%)
|
|
|
|
gi|103300 0 MTKIAMANFKSSMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN
|
|
0 |||||||||||.|||||||||||||||||||||||.|||||||||||||||||||||.||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|103300 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRVLLKESPSFLKEKFDFFKDKKFKIVY
|
|
60 |||||||||||||||||||||||.|||||||||||||||||................|||
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY
|
|
|
|
gi|103300 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLNYHHLIVAYEPIWAIGTKKSASLEDIYLTHG
|
|
120 |||||||||||||||||||||||||||||.|..|||||||||||||.|||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|103300 180 FLKQILNQKTPLLYGGSVNAQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234
|
|
180 ||||.||||.|||||||||.||||||||||||||||.||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
hit = record[6]
|
|
self.assertEqual(hit.num, 7)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|317010283|gb|CP002336.1|")
|
|
self.assertEqual(hit.target.name, "CP002336")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Helicobacter pylori SouthAfrica7, complete genome",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=1653913)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1066.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 415.231)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 4.73879e-129, places=134)
|
|
self.assertEqual(hsp.annotations["identity"], 221)
|
|
self.assertEqual(hsp.annotations["positive"], 226)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 234],
|
|
[ 0, 234]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 234))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "CAJ99216.1")
|
|
self.assertEqual(
|
|
hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|317010283|gb|CP002336.1|")
|
|
self.assertEqual(hsp.target.name, "CP002336")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Helicobacter pylori SouthAfrica7, complete genome",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(1653913))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(
|
|
feature.qualifiers["coded_by"],
|
|
"gi|317010283|gb|CP002336.1|:194539..195240",
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCD+VFVFPDFLGLLPNAFLHFTLG QNAYPKD GAFTGEITSKHLEELKINTLLIGHSERRVLLKESP+FLKEKFDFFKDKKFKI+YCIGEDLKTREKGLGAVKEFLNEQLENIDL Y +LIVAYEPIWAIGT KSASLEDIYLTHGFLKQ LNQK PLLYGGSVNTQNAKEILGIDSVDGLL+GS SLELENFKTIISFL",
|
|
)
|
|
if xml2 is False:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|317010283|gb|CP002336.1| Length: 234 Strand: Plus
|
|
Helicobacter pylori SouthAfrica7, complete genome
|
|
|
|
Score:415 bits(1066), Expect:5e-129,
|
|
Identities:221/234(94%), Positives:226/234(97%), Gaps:0.234(0%)
|
|
|
|
gi|317010 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN
|
|
0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|317010 60 AYPKDYGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPSFLKEKFDFFKDKKFKIIY
|
|
60 |||||.|||||||||||||||||||||||||||||||||||................|.|
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY
|
|
|
|
gi|317010 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLSYHHLIVAYEPIWAIGTKKSASLEDIYLTHG
|
|
120 |||||||||||||||||||||||||||||.|..|||||||||||||.|||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|317010 180 FLKQILNQKTPLLYGGSVNTQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234
|
|
180 ||||.||||.||||||||||||||||||||||||||.||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
else: # xml2 is True
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|317010283|gb|CP002336.1| Length: 234 Strand: Plus
|
|
Helicobacter pylori SouthAfrica7, complete genome
|
|
|
|
Score:415 bits(1066), Expect:5e-129,
|
|
Identities:221/234(94%), Positives:226/234(97%), Gaps:0.234(0%)
|
|
|
|
gi|317010 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN
|
|
0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|317010 60 AYPKDYGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPSFLKEKFDFFKDKKFKIIY
|
|
60 |||||.|||||||||||||||||||||||||||||||||||................|.|
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY
|
|
|
|
gi|317010 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLSYHHLIVAYEPIWAIGTKKSASLEDIYLTHG
|
|
120 |||||||||||||||||||||||||||||.|..|||||||||||||.|||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|317010 180 FLKQILNQKTPLLYGGSVNTQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234
|
|
180 ||||.||||.||||||||||||||||||||||||||.||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
hit = record[7]
|
|
self.assertEqual(hit.num, 8)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|2641538240|gb|CP078172.1|")
|
|
self.assertEqual(hit.target.name, "CP078172")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Helicobacter pylori strain HpGP-ZAF-001 chromosome, complete genome",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=1714499)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1062.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 413.69)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 1.37323e-128, places=133)
|
|
self.assertEqual(hsp.annotations["identity"], 219)
|
|
self.assertEqual(hsp.annotations["positive"], 225)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 234],
|
|
[ 0, 234]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 234))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "CAJ99216.1")
|
|
self.assertEqual(
|
|
hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|2641538240|gb|CP078172.1|")
|
|
self.assertEqual(hsp.target.name, "CP078172")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Helicobacter pylori strain HpGP-ZAF-001 chromosome, complete genome",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(1714499))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(
|
|
feature.qualifiers["coded_by"],
|
|
"complement(gi|2641538240|gb|CP078172.1|:1021670..1022371)",
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCD+VFVFPDFLGLLPNAFLHFTLG QNAYPKDCGAFTGEITSKHLEELKI+TLLIGHSERR LLKESP+FLKEKFDFFKDKKFKI+YCIGEDLKTREKGLGAVKEFLNEQLENIDL Y +LIVAYEPIWAIGT KSASLEDIYLTHGFLKQ LNQK PLLYGGSVN QNAKEILGIDSVDGLL+GS SLELENFKTIISFL",
|
|
)
|
|
if xml2 is False:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|2641538240|gb|CP078172.1| Length: 234 Strand: Plus
|
|
Helicobacter pylori strain HpGP-ZAF-001 chromosome, complete genome
|
|
|
|
Score:413 bits(1062), Expect:1e-128,
|
|
Identities:219/234(94%), Positives:225/234(96%), Gaps:0.234(0%)
|
|
|
|
gi|264153 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN
|
|
0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|264153 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRTLLKESPSFLKEKFDFFKDKKFKIIY
|
|
60 |||||||||||||||||||||||.||||||||||.||||||................|.|
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY
|
|
|
|
gi|264153 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLSYHHLIVAYEPIWAIGTKKSASLEDIYLTHG
|
|
120 |||||||||||||||||||||||||||||.|..|||||||||||||.|||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|264153 180 FLKQILNQKTPLLYGGSVNAQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234
|
|
180 ||||.||||.|||||||||.||||||||||||||||.||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
else: # xml2 is True
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|2641538240|gb|CP078172.1| Length: 234 Strand: Plus
|
|
Helicobacter pylori strain HpGP-ZAF-001 chromosome, complete genome
|
|
|
|
Score:413 bits(1062), Expect:1e-128,
|
|
Identities:219/234(94%), Positives:225/234(96%), Gaps:0.234(0%)
|
|
|
|
gi|264153 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN
|
|
0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|264153 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRTLLKESPSFLKEKFDFFKDKKFKIIY
|
|
60 |||||||||||||||||||||||.||||||||||.||||||................|.|
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY
|
|
|
|
gi|264153 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLSYHHLIVAYEPIWAIGTKKSASLEDIYLTHG
|
|
120 |||||||||||||||||||||||||||||.|..|||||||||||||.|||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|264153 180 FLKQILNQKTPLLYGGSVNAQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234
|
|
180 ||||.||||.|||||||||.||||||||||||||||.||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
hit = record[8]
|
|
self.assertEqual(hit.num, 9)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|2640367186|gb|CP079077.1|")
|
|
self.assertEqual(hit.target.name, "CP079077")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Helicobacter pylori strain HpGP-ARG-001 chromosome, complete genome",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=1624657)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1059.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 412.535)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 3.5777e-128, places=132)
|
|
self.assertEqual(hsp.annotations["identity"], 220)
|
|
self.assertEqual(hsp.annotations["positive"], 225)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 234],
|
|
[ 0, 234]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 234))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "CAJ99216.1")
|
|
self.assertEqual(
|
|
hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHFDRVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|2640367186|gb|CP079077.1|")
|
|
self.assertEqual(hsp.target.name, "CP079077")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Helicobacter pylori strain HpGP-ARG-001 chromosome, complete genome",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(1624657))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(
|
|
feature.qualifiers["coded_by"],
|
|
"gi|2640367186|gb|CP079077.1|:197481..198182",
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQH DRVFVFPDFLGLLPNAFLHFTLGVQNAYP+DCGAFTGEITSKHLEELKI+TLLIGHSERRVLLKESP+FLKEKFDFFKDK FKIVYCIGEDLKTREKG AVKEFL+EQLENIDL+Y NLIVAYEPIWAIGT KSASLEDIYLTHGFLKQ LNQK PLLYGGSVNTQNAKEILGIDSVDGLLIGS SLELENFKTIISFL",
|
|
)
|
|
if xml2 is False:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|2640367186|gb|CP079077.1| Length: 234 Strand: Plus
|
|
Helicobacter pylori strain HpGP-ARG-001 chromosome, complete genome
|
|
|
|
Score:412 bits(1059), Expect:4e-128,
|
|
Identities:220/234(94%), Positives:225/234(96%), Gaps:0.234(0%)
|
|
|
|
gi|264036 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHFDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
0 |||||||||||||||||||||||||||||||||.||||||||||||||||||||||||||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|264036 60 AYPRDCGAFTGEITSKHLEELKIHTLLIGHSERRVLLKESPSFLKEKFDFFKDKNFKIVY
|
|
60 |||.|||||||||||||||||||.|||||||||||||||||................|||
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY
|
|
|
|
gi|264036 120 CIGEDLKTREKGFKAVKEFLSEQLENIDLNYSNLIVAYEPIWAIGTKKSASLEDIYLTHG
|
|
120 ||||||||||||..||||||.||||||||.|.||||||||||||||.|||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|264036 180 FLKQILNQKTPLLYGGSVNTQNAKEILGIDSVDGLLIGSASLELENFKTIISFL 234
|
|
180 ||||.||||.|||||||||||||||||||||||||||||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
else: # xml2 is True
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|2640367186|gb|CP079077.1| Length: 234 Strand: Plus
|
|
Helicobacter pylori strain HpGP-ARG-001 chromosome, complete genome
|
|
|
|
Score:412 bits(1059), Expect:4e-128,
|
|
Identities:220/234(94%), Positives:225/234(96%), Gaps:0.234(0%)
|
|
|
|
gi|264036 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHFDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
0 |||||||||||||||||||||||||||||||||.||||||||||||||||||||||||||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|264036 60 AYPRDCGAFTGEITSKHLEELKIHTLLIGHSERRVLLKESPSFLKEKFDFFKDKNFKIVY
|
|
60 |||.|||||||||||||||||||.|||||||||||||||||................|||
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY
|
|
|
|
gi|264036 120 CIGEDLKTREKGFKAVKEFLSEQLENIDLNYSNLIVAYEPIWAIGTKKSASLEDIYLTHG
|
|
120 ||||||||||||..||||||.||||||||.|.||||||||||||||.|||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|264036 180 FLKQILNQKTPLLYGGSVNTQNAKEILGIDSVDGLLIGSASLELENFKTIISFL 234
|
|
180 ||||.||||.|||||||||||||||||||||||||||||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
hit = record[9]
|
|
self.assertEqual(hit.num, 10)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|532105813|gb|CP006691.1|")
|
|
self.assertEqual(hit.target.name, "CP006691")
|
|
self.assertEqual(
|
|
hit.target.description, "Helicobacter pylori SouthAfrica20, complete genome"
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=1622903)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 1057.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 411.764)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 7.68167e-128, places=133)
|
|
self.assertEqual(hsp.annotations["identity"], 218)
|
|
self.assertEqual(hsp.annotations["positive"], 225)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 234],
|
|
[ 0, 234]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 234))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "CAJ99216.1")
|
|
self.assertEqual(
|
|
hsp.query.description, "tim [Helicobacter acinonychis str. Sheeba]"
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHF...SFL')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|532105813|gb|CP006691.1|")
|
|
self.assertEqual(hsp.target.name, "CP006691")
|
|
self.assertEqual(
|
|
hsp.target.description, "Helicobacter pylori SouthAfrica20, complete genome"
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(1622903))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(
|
|
feature.qualifiers["coded_by"],
|
|
"gi|532105813|gb|CP006691.1|:197836..198537",
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCD+VFVFPDFLGLLPNAFLHFTLG QNAYPKDCGAFTGEITSKHLEELKI+TLLIGHSERRVLLKESP+FLKEKFDFFKDKKFKI+YCIGEDLKTREKG AVKEFLNEQLENIDL+Y +LIVAYEPIWAIGT KSASLEDIYLTHGFLKQ LNQK PLLYGGSVN QNAKEILGIDSVDGLL+GS SLELENFKTIISFL",
|
|
)
|
|
if xml2 is False:
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|532105813|gb|CP006691.1| Length: 234 Strand: Plus
|
|
Helicobacter pylori SouthAfrica20, complete genome
|
|
|
|
Score:411 bits(1057), Expect:8e-128,
|
|
Identities:218/234(93%), Positives:225/234(96%), Gaps:0.234(0%)
|
|
|
|
gi|532105 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN
|
|
0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|532105 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRVLLKESPSFLKEKFDFFKDKKFKIIY
|
|
60 |||||||||||||||||||||||.|||||||||||||||||................|.|
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNXXXXXXXXXXXXXXXIVY
|
|
|
|
gi|532105 120 CIGEDLKTREKGFNAVKEFLNEQLENIDLNYHHLIVAYEPIWAIGTKKSASLEDIYLTHG
|
|
120 ||||||||||||..|||||||||||||||.|..|||||||||||||.|||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|532105 180 FLKQILNQKTPLLYGGSVNAQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234
|
|
180 ||||.||||.|||||||||.||||||||||||||||.||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
else: # xml2 is True
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : CAJ99216.1 Length: 234 Strand: Plus
|
|
tim [Helicobacter acinonychis str. Sheeba]
|
|
Target: gi|532105813|gb|CP006691.1| Length: 234 Strand: Plus
|
|
Helicobacter pylori SouthAfrica20, complete genome
|
|
|
|
Score:411 bits(1057), Expect:8e-128,
|
|
Identities:218/234(93%), Positives:225/234(96%), Gaps:0.234(0%)
|
|
|
|
gi|532105 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDKVFVFPDFLGLLPNAFLHFTLGAQN
|
|
0 |||||||||||||||||||||||||||||||||||.|||||||||||||||||||||.||
|
|
CAJ99216. 0 MTKIAMANFKSAMPIFKSHAYLKELEKTLKPQHCDRVFVFPDFLGLLPNAFLHFTLGVQN
|
|
|
|
gi|532105 60 AYPKDCGAFTGEITSKHLEELKIHTLLIGHSERRVLLKESPSFLKEKFDFFKDKKFKIIY
|
|
60 |||||||||||||||||||||||.|||||||||||||||||................|.|
|
|
CAJ99216. 60 AYPKDCGAFTGEITSKHLEELKINTLLIGHSERRVLLKESPNflkekfdffkdkkfkIVY
|
|
|
|
gi|532105 120 CIGEDLKTREKGFNAVKEFLNEQLENIDLNYHHLIVAYEPIWAIGTKKSASLEDIYLTHG
|
|
120 ||||||||||||..|||||||||||||||.|..|||||||||||||.|||||||||||||
|
|
CAJ99216. 120 CIGEDLKTREKGLGAVKEFLNEQLENIDLDYQNLIVAYEPIWAIGTGKSASLEDIYLTHG
|
|
|
|
gi|532105 180 FLKQILNQKTPLLYGGSVNAQNAKEILGIDSVDGLLVGSASLELENFKTIISFL 234
|
|
180 ||||.||||.|||||||||.||||||||||||||||.||.|||||||||||||| 234
|
|
CAJ99216. 180 FLKQHLNQKMPLLYGGSVNTQNAKEILGIDSVDGLLIGSTSLELENFKTIISFL 234
|
|
|
|
""",
|
|
)
|
|
|
|
def test_xml_21500_tblastn_001_writer(self):
|
|
"""Writing TBLASTN 2.15.0+ (xml_21500_tblastn_001.xml)."""
|
|
filename = "xml_21500_tblastn_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream)
|
|
self.assertEqual(n, 1)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_xml_21500_tblastn_001_records(written_records)
|
|
|
|
def test_xml2_21500_tblastn_001_writer(self):
|
|
"""Writing TBLASTN 2.15.0+ XML2 (xml2_21500_tblastn_001.xml)."""
|
|
filename = "xml2_21500_tblastn_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream, fmt="XML2")
|
|
self.assertEqual(n, 1)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_xml_21500_tblastn_001_records(written_records, xml2=True)
|
|
|
|
|
|
class TestTBlastx(unittest.TestCase):
|
|
"""Test the Blast XML parser for tblastx output."""
|
|
|
|
def test_xml_2226_tblastx_004(self):
|
|
"""Parsing TBLASTX 2.2.26+ (xml_2226_tblastx_004.xml)."""
|
|
filename = "xml_2226_tblastx_004.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_2226_tblastx_004(records)
|
|
with Blast.parse(path) as records:
|
|
self.check_xml_2226_tblastx_004(records)
|
|
with Blast.parse(path) as records:
|
|
self.assertEqual(
|
|
str(records),
|
|
"""\
|
|
Program: TBLASTX 2.2.26+
|
|
db: refseq_rna
|
|
|
|
Query: Query_1 (length=128)
|
|
random_s00
|
|
Hits: No hits found
|
|
|
|
Query: Query_2 (length=350)
|
|
gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA,
|
|
complete cds
|
|
Hits: ---- ----- ----------------------------------------------------------
|
|
# # HSP ID + description
|
|
---- ----- ----------------------------------------------------------
|
|
0 7 gi|296147483|ref|NM_001183135.1| Saccharomyces cerevis...
|
|
1 6 gi|365982352|ref|XM_003667962.1| Naumovozyma dairenens...
|
|
2 4 gi|366988334|ref|XM_003673886.1| Naumovozyma castellii...
|
|
3 2 gi|255710474|ref|XM_002551475.1| Lachancea thermotoler...
|
|
4 4 gi|254579534|ref|XM_002495708.1| Zygosaccharomyces rou...""",
|
|
)
|
|
|
|
def check_xml_2226_tblastx_004(self, records):
|
|
self.assertEqual(records.program, "tblastx")
|
|
self.assertEqual(records.version, "TBLASTX 2.2.26+")
|
|
self.assertEqual(
|
|
records.reference,
|
|
'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.',
|
|
)
|
|
self.assertEqual(records.db, "refseq_rna")
|
|
self.assertIsInstance(records.query, SeqRecord)
|
|
self.assertEqual(records.query.id, "Query_1")
|
|
self.assertEqual(records.query.description, "random_s00")
|
|
self.assertEqual(repr(records.query.seq), "Seq(None, length=128)")
|
|
self.assertEqual(len(records.param), 5)
|
|
self.assertEqual(records.param["matrix"], "BLOSUM62")
|
|
self.assertAlmostEqual(records.param["expect"], 10.0)
|
|
self.assertEqual(records.param["gap-open"], 11)
|
|
self.assertEqual(records.param["gap-extend"], 1)
|
|
self.assertEqual(records.param["filter"], "L;")
|
|
record = next(records)
|
|
self.assertEqual(record.num, 1)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "Query_1")
|
|
self.assertEqual(record.query.description, "random_s00")
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=128)")
|
|
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 2933984)
|
|
self.assertEqual(record.stat["db-len"], 4726730735)
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertEqual(record.stat["eff-space"], 0)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.0)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.0)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.0)
|
|
self.assertEqual(len(record), 0)
|
|
record = next(records)
|
|
self.assertEqual(record.num, 2)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "Query_2")
|
|
self.assertEqual(
|
|
record.query.description,
|
|
"gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds",
|
|
)
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=350)")
|
|
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 2933984)
|
|
self.assertEqual(record.stat["db-len"], 4726730735)
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertEqual(record.stat["eff-space"], 0)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.0)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.0)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.0)
|
|
self.assertEqual(len(record), 5)
|
|
hit = record[0]
|
|
self.assertEqual(hit.num, 1)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|296147483|ref|NM_001183135.1|")
|
|
self.assertEqual(hit.target.name, "NM_001183135")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds >gi|116616412|gb|EF059095.1| Synthetic construct Saccharomyces cerevisiae clone FLH203015.01X MON2, complete sequence",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=4911)")
|
|
self.assertEqual(len(hit), 7)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 626.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 289.739)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 1.05874e-76, places=81)
|
|
self.assertEqual(hsp.annotations["identity"], 116)
|
|
self.assertEqual(hsp.annotations["positive"], 116)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 602.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 278.742)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 2.16381e-73, places=78)
|
|
self.assertEqual(hsp.annotations["identity"], 116)
|
|
self.assertEqual(hsp.annotations["positive"], 116)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[2]
|
|
self.assertEqual(hsp.num, 3)
|
|
self.assertAlmostEqual(hsp.score, 593.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 274.618)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 3.77251e-72, places=77)
|
|
self.assertEqual(hsp.annotations["identity"], 116)
|
|
self.assertEqual(hsp.annotations["positive"], 116)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[3]
|
|
self.assertEqual(hsp.num, 4)
|
|
self.assertAlmostEqual(hsp.score, 583.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 270.036)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 9.03598e-71, places=76)
|
|
self.assertEqual(hsp.annotations["identity"], 116)
|
|
self.assertEqual(hsp.annotations["positive"], 116)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[4]
|
|
self.assertEqual(hsp.num, 5)
|
|
self.assertAlmostEqual(hsp.score, 495.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 229.713)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 1.24226e-58, places=63)
|
|
self.assertEqual(hsp.annotations["identity"], 116)
|
|
self.assertEqual(hsp.annotations["positive"], 116)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[5]
|
|
self.assertEqual(hsp.num, 6)
|
|
self.assertAlmostEqual(hsp.score, 425.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 197.639)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 9.12288e-54, places=59)
|
|
self.assertEqual(hsp.annotations["identity"], 85)
|
|
self.assertEqual(hsp.annotations["positive"], 85)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[6]
|
|
self.assertEqual(hsp.num, 7)
|
|
self.assertAlmostEqual(hsp.score, 73.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 36.3494)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 9.12288e-54, places=59)
|
|
self.assertEqual(hsp.annotations["identity"], 14)
|
|
self.assertEqual(hsp.annotations["positive"], 14)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 14],
|
|
[ 0, 14]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 14))
|
|
self.assertEqual(repr(hsp.query.seq), "Seq('MAMNTGGFDSMQRQ')")
|
|
self.assertEqual(hsp.query.id, "Query_2")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(14))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "Query_2:1..42")
|
|
self.assertEqual(repr(hsp.target.seq), "Seq('MAMNTGGFDSMQRQ')")
|
|
self.assertEqual(hsp.target.id, "gi|296147483|ref|NM_001183135.1|")
|
|
self.assertEqual(hsp.target.name, "NM_001183135")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds >gi|116616412|gb|EF059095.1| Synthetic construct Saccharomyces cerevisiae clone FLH203015.01X MON2, complete sequence",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(4911))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(
|
|
feature.qualifiers["coded_by"], "gi|296147483|ref|NM_001183135.1|:1..42"
|
|
)
|
|
self.assertEqual(hsp.annotations["midline"], "MAMNTGGFDSMQRQ")
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_2 Length: 14 Strand: Plus
|
|
gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA,
|
|
complete cds
|
|
Target: gi|296147483|ref|NM_001183135.1| Length: 14 Strand: Plus
|
|
Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds
|
|
>gi|116616412|gb|EF059095.1| Synthetic construct Saccharomyces
|
|
cerevisiae clone FLH203015.01X MON2, complete sequence
|
|
|
|
Score:36 bits(73), Expect:9e-54,
|
|
Identities:14/14(100%), Positives:14/14(100%), Gaps:0.14(0%)
|
|
|
|
gi|296147 0 MAMNTGGFDSMQRQ 14
|
|
0 |||||||||||||| 14
|
|
Query_2 0 MAMNTGGFDSMQRQ 14
|
|
|
|
""",
|
|
)
|
|
hit = record[1]
|
|
self.assertEqual(hit.num, 2)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|365982352|ref|XM_003667962.1|")
|
|
self.assertEqual(hit.target.name, "XM_003667962")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Naumovozyma dairenensis CBS 421 hypothetical protein (NDAI0A06120), mRNA",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=4932)")
|
|
self.assertEqual(len(hit), 6)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 327.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 152.734)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 2.38069e-37, places=42)
|
|
self.assertEqual(hsp.annotations["identity"], 62)
|
|
self.assertEqual(hsp.annotations["positive"], 73)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 51.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 26.2688)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 2.38069e-37, places=42)
|
|
self.assertEqual(hsp.annotations["identity"], 11)
|
|
self.assertEqual(hsp.annotations["positive"], 11)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[2]
|
|
self.assertEqual(hsp.num, 3)
|
|
self.assertAlmostEqual(hsp.score, 142.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 67.9658)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 4.80116e-20, places=25)
|
|
self.assertEqual(hsp.annotations["identity"], 34)
|
|
self.assertEqual(hsp.annotations["positive"], 38)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[3]
|
|
self.assertEqual(hsp.num, 4)
|
|
self.assertAlmostEqual(hsp.score, 109.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 52.8449)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 4.80116e-20, places=25)
|
|
self.assertEqual(hsp.annotations["identity"], 24)
|
|
self.assertEqual(hsp.annotations["positive"], 29)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[4]
|
|
self.assertEqual(hsp.num, 5)
|
|
self.assertAlmostEqual(hsp.score, 127.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 61.0927)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 7.14684e-08, places=13)
|
|
self.assertEqual(hsp.annotations["identity"], 36)
|
|
self.assertEqual(hsp.annotations["positive"], 52)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[5]
|
|
self.assertEqual(hsp.num, 6)
|
|
self.assertAlmostEqual(hsp.score, 87.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 42.7643)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.0235231)
|
|
self.assertEqual(hsp.annotations["identity"], 28)
|
|
self.assertEqual(hsp.annotations["positive"], 36)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 65],
|
|
[ 0, 65]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 65))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('G*QSL*ALHCQGRHFSIP*LASQHERECEIRMSF*LLKTMYSFQYLNGFITSMA...FGR')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_2")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(65))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "complement(Query_2:67..261)")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('GRQSL*ALHGN*S*FGISR*TGQDQRCNKIRVPYKFFYILNSL*NID*FVASMF...F*R')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|365982352|ref|XM_003667962.1|")
|
|
self.assertEqual(hsp.target.name, "XM_003667962")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Naumovozyma dairenensis CBS 421 hypothetical protein (NDAI0A06120), mRNA",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(4932))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(
|
|
feature.qualifiers["coded_by"],
|
|
"complement(gi|365982352|ref|XM_003667962.1|:61..255)",
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"G QSL*ALH F I Q +R +IR+ + + S ++ F+ SM NG*ISSFRF R",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_2 Length: 65 Strand: Plus
|
|
gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA,
|
|
complete cds
|
|
Target: gi|365982352|ref|XM_003667962.1| Length: 65 Strand: Plus
|
|
Naumovozyma dairenensis CBS 421 hypothetical protein (NDAI0A06120), mRNA
|
|
|
|
Score:42 bits(87), Expect:0.02,
|
|
Identities:28/65(43%), Positives:36/65(55%), Gaps:0.65(0%)
|
|
|
|
gi|365982 0 GRQSL*ALHGN*S*FGISR*TGQDQRCNKIRVPYKFFYILNSL*NID*FVASMFNG*ISS
|
|
0 |.|||||||.....|.|.....|..|...||..........|......|..||.||||||
|
|
Query_2 0 G*QSL*ALHCQGRHFSIP*LASQHERECEIRMSF*LLKTMYSFQYLNGFITSMANG*ISS
|
|
|
|
gi|365982 60 FRF*R 65
|
|
60 |||.| 65
|
|
Query_2 60 FRFGR 65
|
|
|
|
""",
|
|
)
|
|
hit = record[2]
|
|
self.assertEqual(hit.num, 3)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|366988334|ref|XM_003673886.1|")
|
|
self.assertEqual(hit.target.name, "XM_003673886")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Naumovozyma castellii CBS 4309 hypothetical protein (NCAS0A09950) mRNA, complete cds",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=4938)")
|
|
self.assertEqual(len(hit), 4)
|
|
self.assertEqual(
|
|
repr(hit),
|
|
"<Bio.Blast.Hit target.id='gi|366988334|ref|XM_003673886.1|' query.id='Query_2'; 4 HSPs>",
|
|
)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 306.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 143.112)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 1.45826e-32, places=37)
|
|
self.assertEqual(hsp.annotations["identity"], 58)
|
|
self.assertEqual(hsp.annotations["positive"], 71)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 130.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 62.4673)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 5.61057e-16, places=21)
|
|
self.assertEqual(hsp.annotations["identity"], 30)
|
|
self.assertEqual(hsp.annotations["positive"], 36)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[2]
|
|
self.assertEqual(hsp.num, 3)
|
|
self.assertAlmostEqual(hsp.score, 91.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 44.5971)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 5.61057e-16, places=21)
|
|
self.assertEqual(hsp.annotations["identity"], 20)
|
|
self.assertEqual(hsp.annotations["positive"], 24)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[3]
|
|
self.assertEqual(hsp.num, 4)
|
|
self.assertAlmostEqual(hsp.score, 112.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 54.2195)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 8.37784e-06, places=11)
|
|
self.assertEqual(hsp.annotations["identity"], 38)
|
|
self.assertEqual(hsp.annotations["positive"], 58)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 115],
|
|
[ 0, 115]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 115))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('*LNLHREMSSLNEGIQNFRQPASRNRWNG*QSL*ALHCQGRHFSIP*LASQHER...HGH')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_2")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(115))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "complement(Query_2:1..345)")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('*LYFNCKMCRFYKSIENSR*SAFRYGWCRC*AL*TLHSNRGQLSVP**T*FYQW...NSH')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|366988334|ref|XM_003673886.1|")
|
|
self.assertEqual(hsp.target.name, "XM_003673886")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Naumovozyma castellii CBS 4309 hypothetical protein (NCAS0A09950) mRNA, complete cds",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(4938))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(
|
|
feature.qualifiers["coded_by"],
|
|
"complement(gi|366988334|ref|XM_003673886.1|:1..345)",
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"*L + +M + I+N R A R W +L* LH S+P* ++ +IRM+ + + SF L+ +T M + + SFRFGR QF +L L G+K S++ H",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_2 Length: 115 Strand: Plus
|
|
gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA,
|
|
complete cds
|
|
Target: gi|366988334|ref|XM_003673886.1| Length: 115 Strand: Plus
|
|
Naumovozyma castellii CBS 4309 hypothetical protein (NCAS0A09950) mRNA,
|
|
complete cds
|
|
|
|
Score:54 bits(112), Expect:8e-06,
|
|
Identities:38/115(33%), Positives:58/115(50%), Gaps:0.115(0%)
|
|
|
|
gi|366988 0 *LYFNCKMCRFYKSIENSR*SAFRYGWCRC*AL*TLHSNRGQLSVP**T*FYQWSDKIRM
|
|
0 ||.....|......|.|.|..|.|..|.....||.||......|.||..........|||
|
|
Query_2 0 *LNLHREMSSLNEGIQNFRQPASRNRWNG*QSL*ALHCQGRHFSIP*LASQHERECEIRM
|
|
|
|
gi|366988 60 ACQIFNVLDSF*DLDRLVTCMLDRSVPSFRFGRQ*MQF*VQLFLKGLKTCSLNSH 115
|
|
60 .........||..|....|.|......||||||...||...|.|.|.|..|...| 115
|
|
Query_2 60 SF*LLKTMYSFQYLNGFITSMANG*ISSFRFGR*RTQFCFKLPLHGVKPSSVHGH 115
|
|
|
|
""",
|
|
)
|
|
hit = record[3]
|
|
self.assertEqual(hit.num, 4)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|255710474|ref|XM_002551475.1|")
|
|
self.assertEqual(hit.target.name, "XM_002551475")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Lachancea thermotolerans CBS 6340 KLTH0A01342p (KLTH0A01342g) mRNA, complete cds",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=4845)")
|
|
self.assertEqual(len(hit), 2)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 303.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 141.737)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 3.78129e-32, places=37)
|
|
self.assertEqual(hsp.annotations["identity"], 55)
|
|
self.assertEqual(hsp.annotations["positive"], 71)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 92.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 45.0554)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.00480643)
|
|
self.assertEqual(hsp.annotations["identity"], 25)
|
|
self.assertEqual(hsp.annotations["positive"], 29)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 55],
|
|
[ 0, 55]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 55))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('TFN*ISIAR*VASMKASKISDSRLRGIDGTVDSPCRHCIARVVILAFLDWQANTK')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_2")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(55))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "complement(Query_2:186..350)")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('TLSCISAARWVESMNASSTSSILSSGMQLTDDIFCRHCTETLVSLAFLEAHERTK')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|255710474|ref|XM_002551475.1|")
|
|
self.assertEqual(hsp.target.name, "XM_002551475")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Lachancea thermotolerans CBS 6340 KLTH0A01342p (KLTH0A01342g) mRNA, complete cds",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(4845))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(
|
|
feature.qualifiers["coded_by"],
|
|
"complement(gi|255710474|ref|XM_002551475.1|:183..347)",
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"T + IS AR V SM AS S G+ T D CRHC +V LAFL+ TK",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_2 Length: 55 Strand: Plus
|
|
gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA,
|
|
complete cds
|
|
Target: gi|255710474|ref|XM_002551475.1| Length: 55 Strand: Plus
|
|
Lachancea thermotolerans CBS 6340 KLTH0A01342p (KLTH0A01342g) mRNA,
|
|
complete cds
|
|
|
|
Score:45 bits(92), Expect:0.005,
|
|
Identities:25/55(45%), Positives:29/55(53%), Gaps:0.55(0%)
|
|
|
|
gi|255710 0 TLSCISAARWVESMNASSTSSILSSGMQLTDDIFCRHCTETLVSLAFLEAHERTK 55
|
|
0 |...||.||.|.||.||..|.....|...|.|..||||....|.||||.....|| 55
|
|
Query_2 0 TFN*ISIAR*VASMKASKISDSRLRGIDGTVDSPCRHCIARVVILAFLDWQANTK 55
|
|
|
|
""",
|
|
)
|
|
hit = record[4]
|
|
self.assertEqual(hit.num, 5)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|254579534|ref|XM_002495708.1|")
|
|
self.assertEqual(hit.target.name, "XM_002495708")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Zygosaccharomyces rouxii hypothetical protein (ZYRO0C02266g) mRNA, complete cds",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=4866)")
|
|
self.assertEqual(len(hit), 4)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 302.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 141.279)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 5.19486e-32, places=37)
|
|
self.assertEqual(hsp.annotations["identity"], 57)
|
|
self.assertEqual(hsp.annotations["positive"], 72)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 105.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 51.0121)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 8.66978e-12, places=17)
|
|
self.assertEqual(hsp.annotations["identity"], 27)
|
|
self.assertEqual(hsp.annotations["positive"], 33)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[2]
|
|
self.assertEqual(hsp.num, 3)
|
|
self.assertAlmostEqual(hsp.score, 85.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 41.8479)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 8.66978e-12, places=17)
|
|
self.assertEqual(hsp.annotations["identity"], 20)
|
|
self.assertEqual(hsp.annotations["positive"], 25)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
hsp = hit[3]
|
|
self.assertEqual(hsp.num, 4)
|
|
self.assertAlmostEqual(hsp.score, 92.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 45.0554)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.00480643)
|
|
self.assertEqual(hsp.annotations["identity"], 31)
|
|
self.assertEqual(hsp.annotations["positive"], 53)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 99],
|
|
[ 0, 99]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 99))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('RIAFFIFRIEKKKFNHSPC***IH*DIEKST*F*GARKTSGFRTPFRVGLPIKE...SIK')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_2")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(99))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(feature.qualifiers["coded_by"], "Query_2:51..347")
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('RVAFTFIRVEEKEYCY*EC*R*IN*NFESSSQL*GIIKTSRFYSTASDVMCIQE...KTK')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|254579534|ref|XM_002495708.1|")
|
|
self.assertEqual(hsp.target.name, "XM_002495708")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Zygosaccharomyces rouxii hypothetical protein (ZYRO0C02266g) mRNA, complete cds",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(
|
|
repr(feature.location),
|
|
"SimpleLocation(ExactPosition(0), ExactPosition(4866))",
|
|
)
|
|
self.assertEqual(feature.type, "CDS")
|
|
self.assertEqual(len(feature.qualifiers), 1)
|
|
self.assertEqual(
|
|
feature.qualifiers["coded_by"], "gi|254579534|ref|XM_002495708.1|:51..347"
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"R+AF R+E+K++ + C* *I+*+ E S+ *G KTS F + + I+EC D NAM + ++Y+ + + C++ G + +G K",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_2 Length: 99 Strand: Plus
|
|
gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA,
|
|
complete cds
|
|
Target: gi|254579534|ref|XM_002495708.1| Length: 99 Strand: Plus
|
|
Zygosaccharomyces rouxii hypothetical protein (ZYRO0C02266g) mRNA,
|
|
complete cds
|
|
|
|
Score:45 bits(92), Expect:0.005,
|
|
Identities:31/99(31%), Positives:53/99(54%), Gaps:0.99(0%)
|
|
|
|
gi|254579 0 RVAFTFIRVEEKEYCY*EC*R*IN*NFESSSQL*GIIKTSRFYSTASDVMCIQECQIDYY
|
|
0 |.||...|.|.|......||.||.|..|.|...||..|||.|.........|.||..|..
|
|
Query_2 0 RIAFFIFRIEKKKFNHSPC***IH*DIEKST*F*GARKTSGFRTPFRVGLPIKEC*NDDP
|
|
|
|
gi|254579 60 INAMFPKIGHSAMYTGR*TLRRT*CVYRGQPAGNGYKTK 99
|
|
60 .|||.........|..........|...|.....|...| 99
|
|
Query_2 60 GNAMPTGTVNRSIYSSKPAV*NFGCLH*GYSSRDGDSIK 99
|
|
|
|
""",
|
|
)
|
|
hit = record[0]
|
|
self.assertEqual(hit.num, 1)
|
|
hsps = hit[1:5:2]
|
|
self.assertEqual(
|
|
str(hsps),
|
|
"""\
|
|
Query: Query_2
|
|
gi|296147483:1-350 Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA,
|
|
complete cds
|
|
Hit: gi|296147483|ref|NM_001183135.1| (length=4911)
|
|
Saccharomyces cerevisiae S288c Mon2p (MON2) mRNA, complete cds
|
|
>gi|116616412|gb|EF059095.1| Synthetic construct Saccharomyces cerevisiae
|
|
clone FLH203015.01X MON2, complete sequence
|
|
HSPs: ---- -------- --------- ------ --------------- ---------------------
|
|
# E-value Bit score Span Query range Hit range
|
|
---- -------- --------- ------ --------------- ---------------------
|
|
0 2.2e-73 278.74 116 [0:116] [0:116]
|
|
1 9e-71 270.04 116 [0:116] [0:116]""",
|
|
)
|
|
|
|
def test_xml_2226_tblastx_004_writer(self):
|
|
"""Writing TBLASTX 2.2.26+ (xml_2226_tblastx_004.xml)."""
|
|
filename = "xml_2226_tblastx_004.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream)
|
|
self.assertEqual(n, 2)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_xml_2226_tblastx_004(written_records)
|
|
|
|
def test_xml_21500_tblastx_001_parser(self):
|
|
"""Parsing TBLASTX 2.15.0+ (xml_21500_tblastx_001.xml)."""
|
|
filename = "xml_21500_tblastx_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_21500_tblastx_001_records(records)
|
|
with Blast.parse(path) as records:
|
|
self.check_xml_21500_tblastx_001_records(records)
|
|
with open(path, "rb") as stream:
|
|
record = Blast.read(stream)
|
|
self.check_xml_21500_tblastx_001_record(record, xml2=False)
|
|
record = Blast.read(path)
|
|
self.check_xml_21500_tblastx_001_record(record, xml2=False)
|
|
with Blast.parse(path) as records:
|
|
self.assertEqual(
|
|
str(records),
|
|
"""\
|
|
Program: TBLASTX 2.15.0+
|
|
db: refseq_rna
|
|
|
|
Query: Query_949527 (length=804)
|
|
NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168
|
|
complete genome
|
|
Hits: ---- ----- ----------------------------------------------------------
|
|
# # HSP ID + description
|
|
---- ----- ----------------------------------------------------------
|
|
0 1 gi|1048027041|ref|XM_017618728.1| PREDICTED: Rhagoleti...
|
|
1 2 gi|2670929493|ref|XM_062937224.1| Kwoniella shivajii u...
|
|
2 1 gi|2432161024|ref|XM_053088494.1| Dioszegia hungarica ...
|
|
3 1 gi|799335188|ref|XM_012195534.1| Cryptococcus neoforma...
|
|
4 1 gi|1799711371|ref|XM_032001855.1| Kwoniella shandongen...
|
|
5 1 gi|1102541390|ref|XM_019193349.1| Kwoniella bestiolae ...
|
|
6 1 gi|1799711369|ref|XM_032001854.1| Kwoniella shandongen...
|
|
7 1 gi|2592096353|ref|XM_060229193.1| PREDICTED: Ylistrum ...
|
|
8 1 gi|2044197324|ref|XM_041762518.1| PREDICTED: Vulpes la...
|
|
9 1 gi|1101784196|ref|XM_019135081.1| Cryptococcus amylole...""",
|
|
)
|
|
|
|
def test_xml2_21500_tblastx_001_parser(self):
|
|
"""Parsing TBLASTX 2.15.0+ (xml2_21500_tblastx_001.xml)."""
|
|
filename = "xml2_21500_tblastx_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_21500_tblastx_001_records(records, xml2=True)
|
|
with Blast.parse(path) as records:
|
|
self.check_xml_21500_tblastx_001_records(records, xml2=True)
|
|
with open(path, "rb") as stream:
|
|
record = Blast.read(stream)
|
|
self.check_xml_21500_tblastx_001_record(record, xml2=True)
|
|
record = Blast.read(path)
|
|
self.check_xml_21500_tblastx_001_record(record, xml2=True)
|
|
|
|
def check_xml_21500_tblastx_001_records(self, records, xml2=False):
|
|
self.assertEqual(records.program, "tblastx")
|
|
self.assertEqual(records.version, "TBLASTX 2.15.0+")
|
|
self.assertEqual(
|
|
records.reference,
|
|
'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.',
|
|
)
|
|
self.assertEqual(records.db, "refseq_rna")
|
|
if xml2 is False:
|
|
self.assertIsInstance(records.query, SeqRecord)
|
|
self.assertEqual(records.query.id, "Query_949527")
|
|
self.assertEqual(
|
|
records.query.description,
|
|
"NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome",
|
|
)
|
|
self.assertEqual(repr(records.query.seq), "Seq(None, length=804)")
|
|
self.assertEqual(len(records.param), 5)
|
|
self.assertEqual(records.param["matrix"], "BLOSUM62")
|
|
self.assertAlmostEqual(records.param["expect"], 0.05)
|
|
self.assertEqual(records.param["filter"], "L;")
|
|
if xml2 is False:
|
|
self.assertEqual(records.param["gap-open"], 11)
|
|
self.assertEqual(records.param["gap-extend"], 1)
|
|
else:
|
|
self.assertEqual(records.param["query-gencode"], 1)
|
|
self.assertEqual(records.param["db-gencode"], 1)
|
|
record = next(records)
|
|
self.assertRaises(StopIteration, next, records)
|
|
self.check_xml_21500_tblastx_001_record(record, xml2)
|
|
|
|
def check_xml_21500_tblastx_001_record(self, record, xml2):
|
|
if xml2 is False:
|
|
self.assertEqual(record.num, 1)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "Query_949527")
|
|
self.assertEqual(
|
|
record.query.description,
|
|
"NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome",
|
|
)
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=804)")
|
|
if xml2 is True:
|
|
self.assertEqual(len(record.query.features), 1)
|
|
feature = record.query.features[0]
|
|
self.assertEqual(feature.type, "masking")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(259), ExactPosition(293))"
|
|
)
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 60837734)
|
|
self.assertEqual(record.stat["db-len"], 161330429008)
|
|
if xml2 is True:
|
|
self.assertEqual(record.stat["hsp-len"], 66)
|
|
self.assertEqual(record.stat["eff-space"], 10051826883450)
|
|
self.assertEqual(record.stat["kappa"], -1)
|
|
self.assertEqual(record.stat["lambda"], -1)
|
|
self.assertEqual(record.stat["entropy"], -1)
|
|
else:
|
|
self.assertEqual(record.stat["hsp-len"], 0)
|
|
self.assertEqual(record.stat["eff-space"], 0)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.133956144488482)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.317605957635731)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.401214524497119)
|
|
self.assertEqual(len(record), 10)
|
|
hit = record[0]
|
|
self.assertEqual(hit.num, 1)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|1048027041|ref|XM_017618728.1|")
|
|
self.assertEqual(hit.target.name, "XM_017618728")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"PREDICTED: Rhagoletis zephyria response regulator PleD-like (LOC108364862), partial mRNA",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=917)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 150.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 71.6314)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 5.49617e-09, places=14)
|
|
self.assertEqual(hsp.annotations["identity"], 29)
|
|
self.assertEqual(hsp.annotations["positive"], 52)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 89],
|
|
[ 0, 89]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 89))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAF...QVS')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_949527")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(89))"
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('ACGGQEALNLVVEQQPDIVLLDVMMPGIDGFMVCKEMKDNPMTTHIPVVMVTAL...SLT')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|1048027041|ref|XM_017618728.1|")
|
|
self.assertEqual(hsp.target.name, "XM_017618728")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"PREDICTED: Rhagoletis zephyria response regulator PleD-like (LOC108364862), partial mRNA",
|
|
)
|
|
if xml2 is True:
|
|
self.assertEqual(len(record.query.features), 1)
|
|
feature = record.query.features[0]
|
|
self.assertEqual(feature.type, "masking")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(259), ExactPosition(293))"
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"A GQE L+L E+ PD+++LD++MP +DG V + ++++ + V+M+TA + K ++ GA F+ KP D L I+ ++",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='gi|1048027041|ref|XM_017618728.1|' query.id='Query_949527'; 2 rows x 89 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_949527 Length: 89 Strand: Plus
|
|
NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168
|
|
complete genome
|
|
Target: gi|1048027041|ref|XM_017618728.1| Length: 89 Strand: Plus
|
|
PREDICTED: Rhagoletis zephyria response regulator PleD-like
|
|
(LOC108364862), partial mRNA
|
|
|
|
Score:71 bits(150), Expect:5e-09,
|
|
Identities:29/89(33%), Positives:52/89(58%), Gaps:0.89(0%)
|
|
|
|
gi|104802 0 ACGGQEALNLVVEQQPDIVLLDVMMPGIDGFMVCKEMKDNPMTTHIPVVMVTALHDTEDR
|
|
0 |..|||.|.|..|..||...||..||..||..|..............|.|.||.......
|
|
Query_949 0 AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVT
|
|
|
|
gi|104802 60 VKGINAGADDFLTKPIDETALSARIKSLT 89
|
|
60 .|....||..|..||.|...|...|.... 89
|
|
Query_949 60 KKAVDLGASYFILKPFDMENLVGHIRQVS 89
|
|
|
|
""",
|
|
)
|
|
hit = record[1]
|
|
self.assertEqual(hit.num, 2)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|2670929493|ref|XM_062937224.1|")
|
|
self.assertEqual(hit.target.name, "XM_062937224")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Kwoniella shivajii uncharacterized protein (IL334_005512), partial mRNA",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=9882)")
|
|
self.assertEqual(len(hit), 2)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 146.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 69.7986)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 1.95794e-08, places=13)
|
|
self.assertEqual(hsp.annotations["identity"], 32)
|
|
self.assertEqual(hsp.annotations["positive"], 51)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 94],
|
|
[ 0, 94]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 94))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('IEGQEDMEVIGVAYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDL...NLV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_949527")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(94))"
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('VDDSYDMRKVIEAHDGQEALELCSRALPDLIISDIMMPRLDGFGLLQALKSSSN...ELL')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|2670929493|ref|XM_062937224.1|")
|
|
self.assertEqual(hsp.target.name, "XM_062937224")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Kwoniella shivajii uncharacterized protein (IL334_005512), partial mRNA",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(9882))"
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"++ DM + A++GQE L L PD+++ DI+MP LDG +L+ L+ S +I+LTA G +D + GA ++ KPF L+",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='gi|2670929493|ref|XM_062937224.1|' query.id='Query_949527'; 2 rows x 94 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_949527 Length: 94 Strand: Plus
|
|
NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168
|
|
complete genome
|
|
Target: gi|2670929493|ref|XM_062937224.1| Length: 94 Strand: Plus
|
|
Kwoniella shivajii uncharacterized protein (IL334_005512), partial mRNA
|
|
|
|
Score:69 bits(146), Expect:2e-08,
|
|
Identities:32/94(34%), Positives:51/94(54%), Gaps:0.94(0%)
|
|
|
|
gi|267092 0 VDDSYDMRKVIEAHDGQEALELCSRALPDLIISDIMMPRLDGFGLLQALKSSSNLISVPI
|
|
0 .....||.....|..|||.|.|.....||....||.||.|||...|..|..|........
|
|
Query_949 0 IEGQEDMEVIGVAYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNV
|
|
|
|
gi|267092 60 ILLTARGDQDFKVGGLMSGAEDYLSKPFSTPELL 94
|
|
60 |.|||.|..|........||.....|||....|. 94
|
|
Query_949 60 IMLTAFGQEDVTKKAVDLGASYFILKPFDMENLV 94
|
|
|
|
""",
|
|
)
|
|
hsp = hit[1]
|
|
self.assertEqual(hsp.num, 2)
|
|
self.assertAlmostEqual(hsp.score, 118.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 56.9688)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.000142549)
|
|
self.assertEqual(hsp.annotations["identity"], 25)
|
|
self.assertEqual(hsp.annotations["positive"], 42)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 68],
|
|
[ 0, 68]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 68))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAF...LGA')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_949527")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(68))"
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('ARDGQEALALCAQQAPDLIISDVMMPNLDGFGLLRALKQSKKLAIIPIIMLTAR...AGA')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|2670929493|ref|XM_062937224.1|")
|
|
self.assertEqual(hsp.target.name, "XM_062937224")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Kwoniella shivajii uncharacterized protein (IL334_005512), partial mRNA",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(9882))"
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"A +GQE L+L ++ PD+++ D++MP+LDG +L L++S +IMLTA G ++ + GA",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='gi|2670929493|ref|XM_062937224.1|' query.id='Query_949527'; 2 rows x 68 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_949527 Length: 68 Strand: Plus
|
|
NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168
|
|
complete genome
|
|
Target: gi|2670929493|ref|XM_062937224.1| Length: 68 Strand: Plus
|
|
Kwoniella shivajii uncharacterized protein (IL334_005512), partial mRNA
|
|
|
|
Score:56 bits(118), Expect:0.0001,
|
|
Identities:25/68(37%), Positives:42/68(62%), Gaps:0.68(0%)
|
|
|
|
gi|267092 0 ARDGQEALALCAQQAPDLIISDVMMPNLDGFGLLRALKQSKKLAIIPIIMLTARGGDEAR
|
|
0 |..|||.|.|.....||....|..||.|||...|..|..|........|||||.|.....
|
|
Query_949 0 AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVT
|
|
|
|
gi|267092 60 VDGILAGA 68
|
|
60 ......|| 68
|
|
Query_949 60 KKAVDLGA 68
|
|
|
|
""",
|
|
)
|
|
hit = record[2]
|
|
self.assertEqual(hit.num, 3)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|2432161024|ref|XM_053088494.1|")
|
|
self.assertEqual(hit.target.name, "XM_053088494")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Dioszegia hungarica CnHHK4 (MKK02DRAFT_32386), partial mRNA",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=5601)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 146.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 69.7986)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 1.95794e-08, places=13)
|
|
self.assertEqual(hsp.annotations["identity"], 32)
|
|
self.assertEqual(hsp.annotations["positive"], 47)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 82],
|
|
[ 0, 82]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 82))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAF...NLV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_949527")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(82))"
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('ARDGQEALEMCKKSLPHVVITDVMMPNLDGFGLLAALKEDPKLSMVPVIMLTAR...ELI')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|2432161024|ref|XM_053088494.1|")
|
|
self.assertEqual(hsp.target.name, "XM_053088494")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Dioszegia hungarica CnHHK4 (MKK02DRAFT_32386), partial mRNA",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(5601))"
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"A +GQE L + K+ P V++ D++MP+LDG +L L+E VIMLTA G E+ + GA +I KPF+ L+",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='gi|2432161024|ref|XM_053088494.1|' query.id='Query_949527'; 2 rows x 82 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_949527 Length: 82 Strand: Plus
|
|
NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168
|
|
complete genome
|
|
Target: gi|2432161024|ref|XM_053088494.1| Length: 82 Strand: Plus
|
|
Dioszegia hungarica CnHHK4 (MKK02DRAFT_32386), partial mRNA
|
|
|
|
Score:69 bits(146), Expect:2e-08,
|
|
Identities:32/82(39%), Positives:47/82(57%), Gaps:0.82(0%)
|
|
|
|
gi|243216 0 ARDGQEALEMCKKSLPHVVITDVMMPNLDGFGLLAALKEDPKLSMVPVIMLTARGGEEAK
|
|
0 |..|||.|...|...|.|...|..||.|||...|..|.|........||||||.|.|...
|
|
Query_949 0 AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVT
|
|
|
|
gi|243216 60 VDGLLAGADDYIAKPFNARELI 82
|
|
60 ......||...|.|||....|. 82
|
|
Query_949 60 KKAVDLGASYFILKPFDMENLV 82
|
|
|
|
""",
|
|
)
|
|
hit = record[3]
|
|
self.assertEqual(hit.num, 4)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|799335188|ref|XM_012195534.1|")
|
|
self.assertEqual(hit.target.name, "XM_012195534")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Cryptococcus neoformans var. grubii H99 hypothetical protein (CNAG_03355), mRNA",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=5764)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 146.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 69.7986)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 1.95794e-08, places=13)
|
|
self.assertEqual(hsp.annotations["identity"], 31)
|
|
self.assertEqual(hsp.annotations["positive"], 48)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 82],
|
|
[ 0, 82]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 82))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAF...NLV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_949527")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(82))"
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('ARDGQEALELCQKSVPDLIISDVMMPHLNGFELLVALKRSKDLKMVPVIMLTAR...EIV')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|799335188|ref|XM_012195534.1|")
|
|
self.assertEqual(hsp.target.name, "XM_012195534")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Cryptococcus neoformans var. grubii H99 hypothetical protein (CNAG_03355), mRNA",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(5764))"
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"A +GQE L L ++ PD+++ D++MPHL+G +L L+ S K VIMLTA G ++ + GA ++ KPF +V",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='gi|799335188|ref|XM_012195534.1|' query.id='Query_949527'; 2 rows x 82 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_949527 Length: 82 Strand: Plus
|
|
NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168
|
|
complete genome
|
|
Target: gi|799335188|ref|XM_012195534.1| Length: 82 Strand: Plus
|
|
Cryptococcus neoformans var. grubii H99 hypothetical protein
|
|
(CNAG_03355), mRNA
|
|
|
|
Score:69 bits(146), Expect:2e-08,
|
|
Identities:31/82(38%), Positives:48/82(59%), Gaps:0.82(0%)
|
|
|
|
gi|799335 0 ARDGQEALELCQKSVPDLIISDVMMPHLNGFELLVALKRSKDLKMVPVIMLTARGADESK
|
|
0 |..|||.|.|.....||....|..||||.|...|..|..|...|...||||||.|.....
|
|
Query_949 0 AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVT
|
|
|
|
gi|799335 60 VDGIMAGAEDYLAKPFSAREIV 82
|
|
60 ......||.....|||.....| 82
|
|
Query_949 60 KKAVDLGASYFILKPFDMENLV 82
|
|
|
|
""",
|
|
)
|
|
hit = record[4]
|
|
self.assertEqual(hit.num, 5)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|1799711371|ref|XM_032001855.1|")
|
|
self.assertEqual(hit.target.name, "XM_032001855")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Kwoniella shandongensis uncharacterized protein (CI109_000662), partial mRNA",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=5538)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 145.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 69.3404)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 2.68988e-08, places=13)
|
|
self.assertEqual(hsp.annotations["identity"], 31)
|
|
self.assertEqual(hsp.annotations["positive"], 51)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 82],
|
|
[ 0, 82]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 82))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAF...NLV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_949527")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(82))"
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('ARDGVEALQLCKKQLPNLIITDVMMPNLDGFGLLAALKESRAMKVIPVIMLTAR...EIV')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|1799711371|ref|XM_032001855.1|")
|
|
self.assertEqual(hsp.target.name, "XM_032001855")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Kwoniella shandongensis uncharacterized protein (CI109_000662), partial mRNA",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(5538))"
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"A +G E L L K++ P++++ D++MP+LDG +L L+ES K VIMLTA G ++ + + GA ++ KPF+ +V",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='gi|1799711371|ref|XM_032001855.1|' query.id='Query_949527'; 2 rows x 82 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_949527 Length: 82 Strand: Plus
|
|
NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168
|
|
complete genome
|
|
Target: gi|1799711371|ref|XM_032001855.1| Length: 82 Strand: Plus
|
|
Kwoniella shandongensis uncharacterized protein (CI109_000662), partial
|
|
mRNA
|
|
|
|
Score:69 bits(145), Expect:3e-08,
|
|
Identities:31/82(38%), Positives:51/82(62%), Gaps:0.82(0%)
|
|
|
|
gi|179971 0 ARDGVEALQLCKKQLPNLIITDVMMPNLDGFGLLAALKESRAMKVIPVIMLTARGGDESK
|
|
0 |..|.|.|.|.|...|.....|..||.|||...|..|.||...|...||||||.|.....
|
|
Query_949 0 AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVT
|
|
|
|
gi|179971 60 VEGILAGADDYLAKPFNAREIV 82
|
|
60 ......||.....|||.....| 82
|
|
Query_949 60 KKAVDLGASYFILKPFDMENLV 82
|
|
|
|
""",
|
|
)
|
|
hit = record[5]
|
|
self.assertEqual(hit.num, 6)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|1102541390|ref|XM_019193349.1|")
|
|
self.assertEqual(hit.target.name, "XM_019193349")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Kwoniella bestiolae CBS 10118 hypothetical protein partial mRNA",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=5319)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 144.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 68.8822)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 3.69545e-08, places=13)
|
|
self.assertEqual(hsp.annotations["identity"], 32)
|
|
self.assertEqual(hsp.annotations["positive"], 49)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 82],
|
|
[ 0, 82]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 82))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAF...NLV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_949527")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(82))"
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('ARDGQEALEMCGKKMPDLIISDVMMPNLDGFGLLEALKASKELSIIPVIMLTAR...ELV')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|1102541390|ref|XM_019193349.1|")
|
|
self.assertEqual(hsp.target.name, "XM_019193349")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Kwoniella bestiolae CBS 10118 hypothetical protein partial mRNA",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(5319))"
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"A +GQE L + +K PD+++ D++MP+LDG +LE L+ S VIMLTA G ++ + GA ++ KPF+ LV",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='gi|1102541390|ref|XM_019193349.1|' query.id='Query_949527'; 2 rows x 82 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_949527 Length: 82 Strand: Plus
|
|
NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168
|
|
complete genome
|
|
Target: gi|1102541390|ref|XM_019193349.1| Length: 82 Strand: Plus
|
|
Kwoniella bestiolae CBS 10118 hypothetical protein partial mRNA
|
|
|
|
Score:68 bits(144), Expect:4e-08,
|
|
Identities:32/82(39%), Positives:49/82(60%), Gaps:0.82(0%)
|
|
|
|
gi|110254 0 ARDGQEALEMCGKKMPDLIISDVMMPNLDGFGLLEALKASKELSIIPVIMLTARGGDEAK
|
|
0 |..|||.|.....|.||....|..||.|||...||.|..|.......||||||.|.....
|
|
Query_949 0 AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVT
|
|
|
|
gi|110254 60 VDGLLAGADDYLAKPFNSRELV 82
|
|
60 ......||.....|||....|| 82
|
|
Query_949 60 KKAVDLGASYFILKPFDMENLV 82
|
|
|
|
""",
|
|
)
|
|
hit = record[6]
|
|
self.assertEqual(hit.num, 7)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|1799711369|ref|XM_032001854.1|")
|
|
self.assertEqual(hit.target.name, "XM_032001854")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Kwoniella shandongensis uncharacterized protein (CI109_000661), partial mRNA",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=3591)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 143.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 68.424)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 5.07694e-08, places=13)
|
|
self.assertEqual(hsp.annotations["identity"], 31)
|
|
self.assertEqual(hsp.annotations["positive"], 53)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 86],
|
|
[ 0, 86]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 86))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('VIGVAYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIM...NLV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_949527")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(86))"
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('VVVEARDGQEALDKCMQIRPDLIITDVMMPVLDGFGLLRALKQSDELKAIPVIM...ELI')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|1799711369|ref|XM_032001854.1|")
|
|
self.assertEqual(hsp.target.name, "XM_032001854")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Kwoniella shandongensis uncharacterized protein (CI109_000661), partial mRNA",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(3591))"
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"V+ A +GQE L + PD+++ D++MP LDG +L L++SD K VIM+TA ++ +A+ GA +++KPF++ L+",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='gi|1799711369|ref|XM_032001854.1|' query.id='Query_949527'; 2 rows x 86 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_949527 Length: 86 Strand: Plus
|
|
NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168
|
|
complete genome
|
|
Target: gi|1799711369|ref|XM_032001854.1| Length: 86 Strand: Plus
|
|
Kwoniella shandongensis uncharacterized protein (CI109_000661), partial
|
|
mRNA
|
|
|
|
Score:68 bits(143), Expect:5e-08,
|
|
Identities:31/86(36%), Positives:53/86(62%), Gaps:0.86(0%)
|
|
|
|
gi|179971 0 VVVEARDGQEALDKCMQIRPDLIITDVMMPVLDGFGLLRALKQSDELKAIPVIMVTAHDG
|
|
0 |...|..|||.|.......||....|..||.|||...|..|..||..|...|||.||...
|
|
Query_949 0 VIGVAYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQ
|
|
|
|
gi|179971 60 DEAKVEALLGGADDYMVKPFNVRELI 86
|
|
60 ......|...||.....|||....|. 86
|
|
Query_949 60 EDVTKKAVDLGASYFILKPFDMENLV 86
|
|
|
|
""",
|
|
)
|
|
hit = record[7]
|
|
self.assertEqual(hit.num, 8)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|2592096353|ref|XM_060229193.1|")
|
|
self.assertEqual(hit.target.name, "XM_060229193")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"PREDICTED: Ylistrum balloti signal transduction histidine-protein kinase BarA-like (LOC132564539), mRNA",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=1374)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 142.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 67.9658)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 6.97488e-08, places=13)
|
|
self.assertEqual(hsp.annotations["identity"], 31)
|
|
self.assertEqual(hsp.annotations["positive"], 51)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 84],
|
|
[ 0, 84]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 84))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('IGVAYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIML...ENL')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_949527")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(84))"
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('IHLASNGQEAIQKTKVLDPDLIFLDLHMPDMSGLEVIEILRSITAYRDTPIIIL...DKL')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|2592096353|ref|XM_060229193.1|")
|
|
self.assertEqual(hsp.target.name, "XM_060229193")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"PREDICTED: Ylistrum balloti signal transduction histidine-protein kinase BarA-like (LOC132564539), mRNA",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(1374))"
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"I +A NGQE + K DPD++ LD+ MP + GL V+E LR + +I+L+A + +KA+ +GAS ++ KP +++ L",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='gi|2592096353|ref|XM_060229193.1|' query.id='Query_949527'; 2 rows x 84 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_949527 Length: 84 Strand: Plus
|
|
NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168
|
|
complete genome
|
|
Target: gi|2592096353|ref|XM_060229193.1| Length: 84 Strand: Plus
|
|
PREDICTED: Ylistrum balloti signal transduction histidine-protein kinase
|
|
BarA-like (LOC132564539), mRNA
|
|
|
|
Score:67 bits(142), Expect:7e-08,
|
|
Identities:31/84(37%), Positives:51/84(61%), Gaps:0.84(0%)
|
|
|
|
gi|259209 0 IHLASNGQEAIQKTKVLDPDLIFLDLHMPDMSGLEVIEILRSITAYRDTPIIILSADAII
|
|
0 |..|.||||.....|..|||...||..||...||.|.|.||..........|.|.|....
|
|
Query_949 0 IGVAYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQE
|
|
|
|
gi|259209 60 EQQEKALAVGASAYLTKPIEIDKL 84
|
|
60 ....||...|||....||.....| 84
|
|
Query_949 60 DVTKKAVDLGASYFILKPFDMENL 84
|
|
|
|
""",
|
|
)
|
|
hit = record[8]
|
|
self.assertEqual(hit.num, 9)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|2044197324|ref|XM_041762518.1|")
|
|
self.assertEqual(hit.target.name, "XM_041762518")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"PREDICTED: Vulpes lagopus sensory transduction protein RegX3-like (LOC121495284), mRNA",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=681)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 142.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 67.9658)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 6.97488e-08, places=13)
|
|
self.assertEqual(hsp.annotations["identity"], 30)
|
|
self.assertEqual(hsp.annotations["positive"], 45)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 73],
|
|
[ 0, 73]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 73))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('PDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVTKKAVDLGAS...RQV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_949527")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(73))"
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('PDLILLDIMLPGSDGLSSLEELRAKKSSESIPVIMATAKGTEFDKVKGLDMGAD...KAV')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|2044197324|ref|XM_041762518.1|")
|
|
self.assertEqual(hsp.target.name, "XM_041762518")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"PREDICTED: Vulpes lagopus sensory transduction protein RegX3-like (LOC121495284), mRNA",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(681))"
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"PD+++LDI++P DGL+ LE LR + VIM TA G E K +D+GA +++KPF M ++ I+ V",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='gi|2044197324|ref|XM_041762518.1|' query.id='Query_949527'; 2 rows x 73 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_949527 Length: 73 Strand: Plus
|
|
NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168
|
|
complete genome
|
|
Target: gi|2044197324|ref|XM_041762518.1| Length: 73 Strand: Plus
|
|
PREDICTED: Vulpes lagopus sensory transduction protein RegX3-like
|
|
(LOC121495284), mRNA
|
|
|
|
Score:67 bits(142), Expect:7e-08,
|
|
Identities:30/73(41%), Positives:45/73(62%), Gaps:0.73(0%)
|
|
|
|
gi|204419 0 PDLILLDIMLPGSDGLSSLEELRAKKSSESIPVIMATAKGTEFDKVKGLDMGADDYLVKP
|
|
0 ||...|||..|..|||..||.||.........|||.||.|.|....|..|.||.....||
|
|
Query_949 0 PDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVTKKAVDLGASYFILKP
|
|
|
|
gi|204419 60 FGMMEMISRIKAV 73
|
|
60 |.|......|..| 73
|
|
Query_949 60 FDMENLVGHIRQV 73
|
|
|
|
""",
|
|
)
|
|
hit = record[9]
|
|
self.assertEqual(hit.num, 10)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gi|1101784196|ref|XM_019135081.1|")
|
|
self.assertEqual(hit.target.name, "XM_019135081")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"Cryptococcus amylolentus CBS 6039 hypothetical protein (L202_01642), partial mRNA",
|
|
)
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=5487)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 142.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 67.9658)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 6.97488e-08, places=13)
|
|
self.assertEqual(hsp.annotations["identity"], 28)
|
|
self.assertEqual(hsp.annotations["positive"], 50)
|
|
self.assertEqual(hsp.annotations["gaps"], 0)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 82],
|
|
[ 0, 82]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 82))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq('AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAF...NLV')",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_949527")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168 complete genome",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 1)
|
|
feature = hsp.query.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(82))"
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('ARDGREALELCAKQKPNLIISDVMMPHVDGFELLTTLKDSSEFRMIPVIMLTAR...EIV')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gi|1101784196|ref|XM_019135081.1|")
|
|
self.assertEqual(hsp.target.name, "XM_019135081")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"Cryptococcus amylolentus CBS 6039 hypothetical protein (L202_01642), partial mRNA",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 1)
|
|
feature = hsp.target.features[0]
|
|
self.assertEqual(feature.type, "CDS")
|
|
location = feature.location
|
|
self.assertEqual(
|
|
repr(location), "SimpleLocation(ExactPosition(0), ExactPosition(5487))"
|
|
)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"A +G+E L L ++ P++++ D++MPH+DG +L L++S + VIMLTA G ++ + GA ++ KPF+ +V",
|
|
)
|
|
self.assertEqual(
|
|
repr(hsp),
|
|
"<Bio.Blast.HSP target.id='gi|1101784196|ref|XM_019135081.1|' query.id='Query_949527'; 2 rows x 82 columns>",
|
|
)
|
|
self.assertEqual(
|
|
str(hsp),
|
|
"""\
|
|
Query : Query_949527 Length: 82 Strand: Plus
|
|
NC_000964.3:2518023-2518826 Bacillus subtilis subsp. subtilis str. 168
|
|
complete genome
|
|
Target: gi|1101784196|ref|XM_019135081.1| Length: 82 Strand: Plus
|
|
Cryptococcus amylolentus CBS 6039 hypothetical protein (L202_01642),
|
|
partial mRNA
|
|
|
|
Score:67 bits(142), Expect:7e-08,
|
|
Identities:28/82(34%), Positives:50/82(61%), Gaps:0.82(0%)
|
|
|
|
gi|110178 0 ARDGREALELCAKQKPNLIISDVMMPHVDGFELLTTLKDSSEFRMIPVIMLTARGADESK
|
|
0 |..|.|.|.|.....|.....|..|||.||...|..|..|.......||||||.|.....
|
|
Query_949 0 AYNGQECLSLFKEKDPDVLVLDIIMPHLDGLAVLERLRESDLKKQPNVIMLTAFGQEDVT
|
|
|
|
gi|110178 60 VSGIMAGAEDYLAKPFNAREIV 82
|
|
60 ......||.....|||.....| 82
|
|
Query_949 60 KKAVDLGASYFILKPFDMENLV 82
|
|
|
|
""",
|
|
)
|
|
|
|
def test_xml_21500_tblastx_001_writer(self):
|
|
"""Writing TBLASTX 2.15.0+ (xml_21500_tblastx_001.xml)."""
|
|
filename = "xml_21500_tblastx_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream)
|
|
self.assertEqual(n, 1)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_xml_21500_tblastx_001_records(written_records)
|
|
|
|
def test_xml2_21500_tblastx_001_writer(self):
|
|
"""Writing TBLASTX 2.15.0+ XML2 (xml2_21500_tblastx_001.xml)."""
|
|
filename = "xml2_21500_tblastx_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream, fmt="XML2")
|
|
self.assertEqual(n, 1)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_xml_21500_tblastx_001_records(written_records, xml2=True)
|
|
|
|
|
|
class TestRPSBlast(unittest.TestCase):
|
|
"""Test the Blast XML parser for rpsblast output."""
|
|
|
|
def test_xml_21500_rpsblast_001_parser(self):
|
|
"""Parsing RPSBLAST 2.15.0+ (xml_21500_rpsblast_001.xml)."""
|
|
filename = "xml_21500_rpsblast_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_21500_rpsblast_001_records(records)
|
|
with Blast.parse(path) as records:
|
|
self.check_xml_21500_rpsblast_001_records(records)
|
|
with open(path, "rb") as stream:
|
|
record = Blast.read(stream)
|
|
self.check_xml_21500_rpsblast_001_record(record)
|
|
record = Blast.read(path)
|
|
self.check_xml_21500_rpsblast_001_record(record)
|
|
|
|
def test_xml2_21500_rpsblast_001_parser(self):
|
|
"""Parsing RPSBLAST 2.15.0+ (xml2_21500_rpsblast_001.xml)."""
|
|
filename = "xml2_21500_rpsblast_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
self.check_xml_21500_rpsblast_001_records(records, xml2=True)
|
|
with Blast.parse(path) as records:
|
|
self.check_xml_21500_rpsblast_001_records(records, xml2=True)
|
|
with open(path, "rb") as stream:
|
|
record = Blast.read(stream)
|
|
self.check_xml_21500_rpsblast_001_record(record, xml2=True)
|
|
record = Blast.read(path)
|
|
self.check_xml_21500_rpsblast_001_record(record, xml2=True)
|
|
|
|
def test_xml_21500_rpsblast_001_writer(self):
|
|
"""Writing rpsblast 2.15.0+ (xml_21500_rpsblast_001.xml)."""
|
|
filename = "xml_21500_rpsblast_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream)
|
|
self.assertEqual(n, 1)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_xml_21500_rpsblast_001_records(written_records)
|
|
|
|
def test_xml2_21500_rpsblast_001_writer(self):
|
|
"""Writing rpsblast 2.9.0+ XML2 (xml2_21500_rpsblast_001_v2.xml)."""
|
|
filename = "xml2_21500_rpsblast_001.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with Blast.parse(path) as records:
|
|
stream = io.BytesIO()
|
|
n = Blast.write(records, stream, fmt="XML2")
|
|
self.assertEqual(n, 1)
|
|
stream.seek(0)
|
|
written_records = Blast.parse(stream)
|
|
self.check_xml_21500_rpsblast_001_records(written_records, xml2=True)
|
|
|
|
def check_xml_21500_rpsblast_001_records(self, records, xml2=False):
|
|
self.assertEqual(records.program, "rpsblast")
|
|
self.assertEqual(records.version, "RPSBLAST 2.15.0+")
|
|
self.assertEqual(
|
|
records.reference,
|
|
'Stephen F. Altschul, Thomas L. Madden, Alejandro A. Schäffer, Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), "Gapped BLAST and PSI-BLAST: a new generation of protein database search programs", Nucleic Acids Res. 25:3389-3402.',
|
|
)
|
|
self.assertEqual(records.db, "Cdd")
|
|
if xml2 is False:
|
|
self.assertIsInstance(records.query, SeqRecord)
|
|
self.assertEqual(records.query.id, "Query_1")
|
|
self.assertEqual(
|
|
records.query.description,
|
|
"pdb|3FAJ|A Chain A, Structure of the structural protein P131 of the archaeal virus Acidianus Two-tailed virus (ATV)",
|
|
)
|
|
self.assertEqual(repr(records.query.seq), "Seq(None, length=151)")
|
|
self.assertEqual(records.param["matrix"], "BLOSUM62")
|
|
self.assertAlmostEqual(records.param["expect"], 1)
|
|
self.assertEqual(records.param["gap-open"], 11)
|
|
self.assertEqual(records.param["gap-extend"], 1)
|
|
self.assertEqual(records.param["filter"], "F")
|
|
if xml2 is True:
|
|
self.assertEqual(records.param["cbs"], 1)
|
|
self.assertEqual(len(records.param), 6)
|
|
else:
|
|
self.assertEqual(len(records.param), 5)
|
|
record = next(records)
|
|
self.assertRaises(StopIteration, next, records)
|
|
self.check_xml_21500_rpsblast_001_record(record, xml2)
|
|
|
|
def check_xml_21500_rpsblast_001_record(self, record, xml2=False):
|
|
if xml2 is False:
|
|
self.assertEqual(record.num, 1)
|
|
self.assertEqual(
|
|
repr(record),
|
|
"<Bio.Blast.Record query.id='Query_1'; 2 hits>",
|
|
)
|
|
self.assertIsInstance(record.query, SeqRecord)
|
|
self.assertEqual(record.query.id, "Query_1")
|
|
self.assertEqual(
|
|
record.query.description,
|
|
"pdb|3FAJ|A Chain A, Structure of the structural protein P131 of the archaeal virus Acidianus Two-tailed virus (ATV)",
|
|
)
|
|
self.assertEqual(repr(record.query.seq), "Seq(None, length=151)")
|
|
self.assertEqual(len(record.stat), 7)
|
|
self.assertEqual(record.stat["db-num"], 59693)
|
|
self.assertEqual(record.stat["db-len"], 13521240)
|
|
self.assertEqual(record.stat["hsp-len"], 89)
|
|
self.assertEqual(record.stat["eff-space"], 508930906)
|
|
self.assertAlmostEqual(record.stat["kappa"], 0.041)
|
|
self.assertAlmostEqual(record.stat["lambda"], 0.267)
|
|
self.assertAlmostEqual(record.stat["entropy"], 0.14)
|
|
self.assertEqual(len(record), 2)
|
|
hit = record[0]
|
|
self.assertEqual(hit.num, 1)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gnl|CDD|165101")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"PHA02734, PHA02734, coat protein; Provisional.",
|
|
)
|
|
self.assertEqual(hit.target.name, "165101")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=149)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 520.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 204.685)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 9.29691e-69, places=74)
|
|
self.assertEqual(hsp.annotations["identity"], 94)
|
|
self.assertEqual(hsp.annotations["positive"], 103)
|
|
self.assertEqual(hsp.annotations["gaps"], 18)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[ 0, 77, 93, 110, 112, 149],
|
|
[ 20, 97, 97, 114, 114, 151]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 149))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq({20: 'MAKYEPKKGDYAGGAVKILDMFENGQLGYPEVTLKLAGEEANARRAGDERTKEA...MAS'}, length=151)",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"pdb|3FAJ|A Chain A, Structure of the structural protein P131 of the archaeal virus Acidianus Two-tailed virus (ATV)",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq('MAKEEPKKGDYAGGAAKILDGFEAGQLGFPEVSLKLAGEEANARKAGDANAKAA...AAM')",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gnl|CDD|165101")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"PHA02734, PHA02734, coat protein; Provisional.",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(
|
|
hsp.annotations["midline"],
|
|
"MAK EPKKGDYAGGA KILD FE GQLG+PEV+LKLAGEEANAR+AGD K AIHAI+KMI DAMKP RNKG GF+ SQ IPGE+ AQV + E YQQAKAFLA+PA R + E LSKGAK LA A A ",
|
|
)
|
|
hit = record[1]
|
|
self.assertEqual(hit.num, 2)
|
|
self.assertIsInstance(hit.target, SeqRecord)
|
|
self.assertEqual(hit.target.id, "gnl|CDD|410801")
|
|
self.assertEqual(
|
|
hit.target.description,
|
|
"cd20027, FH_FOXL1, Forkhead (FH) domain found in Forkhead box protein L1 (FOXL1) and similar proteins. FOXL1, also called Forkhead-related protein FKHL11 or Forkhead-related transcription factor 7 (FREAC-7), acts as a transcription factor required for proper proliferation and differentiation in the gastrointestinal epithelium. It may play a critical role in suppressing tumorigenesis. The FH domain is a winged helix DNA-binding domain. FOX transcription factors recognize the core sequence 5'-(A/C)AA(C/T)A-3'.",
|
|
)
|
|
self.assertEqual(hit.target.name, "410801")
|
|
self.assertEqual(repr(hit.target.seq), "Seq(None, length=98)")
|
|
self.assertEqual(len(hit), 1)
|
|
hsp = hit[0]
|
|
self.assertEqual(hsp.num, 1)
|
|
self.assertAlmostEqual(hsp.score, 65.0)
|
|
self.assertAlmostEqual(hsp.annotations["bit score"], 29.0095)
|
|
self.assertAlmostEqual(hsp.annotations["evalue"], 0.517343)
|
|
self.assertEqual(hsp.annotations["identity"], 13)
|
|
self.assertEqual(hsp.annotations["positive"], 15)
|
|
self.assertEqual(hsp.annotations["gaps"], 4)
|
|
self.assertTrue(
|
|
np.array_equal(
|
|
hsp.coordinates,
|
|
# fmt: off
|
|
np.array([[63, 66, 66, 90],
|
|
[14, 17, 21, 45]])
|
|
# fmt: on
|
|
)
|
|
)
|
|
self.assertEqual(hsp.shape, (2, 31))
|
|
self.assertEqual(
|
|
repr(hsp.query.seq),
|
|
"Seq({14: 'VPRGSHMAKYEPKKGDYAGGAVKILDMFENG'}, length=151)",
|
|
)
|
|
self.assertEqual(hsp.query.id, "Query_1")
|
|
self.assertEqual(
|
|
hsp.query.description,
|
|
"pdb|3FAJ|A Chain A, Structure of the structural protein P131 of the archaeal virus Acidianus Two-tailed virus (ATV)",
|
|
)
|
|
self.assertEqual(len(hsp.query.features), 0)
|
|
self.assertEqual(
|
|
repr(hsp.target.seq),
|
|
"Seq({63: 'VPREKGRPGKGNYWTLDPDCEEMFENG'}, length=98)",
|
|
)
|
|
self.assertEqual(hsp.target.id, "gnl|CDD|410801")
|
|
self.assertEqual(
|
|
hsp.target.description,
|
|
"cd20027, FH_FOXL1, Forkhead (FH) domain found in Forkhead box protein L1 (FOXL1) and similar proteins. FOXL1, also called Forkhead-related protein FKHL11 or Forkhead-related transcription factor 7 (FREAC-7), acts as a transcription factor required for proper proliferation and differentiation in the gastrointestinal epithelium. It may play a critical role in suppressing tumorigenesis. The FH domain is a winged helix DNA-binding domain. FOX transcription factors recognize the core sequence 5'-(A/C)AA(C/T)A-3'.",
|
|
)
|
|
self.assertEqual(len(hsp.target.features), 0)
|
|
self.assertEqual(hsp.annotations["midline"], "VPR K P KG+Y +MFENG")
|
|
|
|
|
|
class TestBlastErrors(unittest.TestCase):
|
|
"""Tests if the Blast XML parser raises the appropriate Exception."""
|
|
|
|
def test_not_xml(self):
|
|
"""Try to parse a FASTA file."""
|
|
message = "Failed to parse the XML data (syntax error: line 1, column 0). Please make sure that the input data are in XML format."
|
|
filename = "wisteria.nu"
|
|
path = os.path.join("Fasta", filename)
|
|
with open(path, "rb") as stream:
|
|
with self.assertRaises(Blast.NotXMLError) as cm:
|
|
records = Blast.parse(stream)
|
|
self.assertEqual(str(cm.exception), message)
|
|
with self.assertRaises(Blast.NotXMLError) as cm:
|
|
records = Blast.parse(path)
|
|
self.assertEqual(str(cm.exception), message)
|
|
|
|
def test_premature_end_header(self):
|
|
"""Try to parse an XML file terminating in the header."""
|
|
message = r"^premature end of XML file \(after reading [1-9]\d* bytes\)$"
|
|
filename = "broken1.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
with self.assertRaises(ValueError) as cm:
|
|
records = Blast.parse(stream)
|
|
self.assertRegex(str(cm.exception), message)
|
|
with self.assertRaises(ValueError) as cm:
|
|
records = Blast.parse(path)
|
|
self.assertRegex(str(cm.exception), message)
|
|
with open(path, "rb") as stream:
|
|
with self.assertRaises(ValueError) as cm:
|
|
records = Blast.read(stream)
|
|
self.assertRegex(str(cm.exception), message)
|
|
with self.assertRaises(ValueError) as cm:
|
|
records = Blast.read(path)
|
|
self.assertRegex(str(cm.exception), message)
|
|
|
|
def test_premature_end_first_block(self):
|
|
"""Try to parse an XML file terminating within the first block."""
|
|
message = r"^premature end of XML file \(after reading [1-9]\d* bytes\)$"
|
|
filename = "broken2.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
with self.assertRaises(ValueError) as cm:
|
|
record = next(records)
|
|
self.assertRegex(str(cm.exception), message)
|
|
with Blast.parse(path) as records:
|
|
with self.assertRaises(ValueError) as cm:
|
|
record = next(records)
|
|
self.assertRegex(str(cm.exception), message)
|
|
with open(path, "rb") as stream:
|
|
with self.assertRaises(ValueError) as cm:
|
|
record = Blast.read(stream)
|
|
self.assertRegex(str(cm.exception), message)
|
|
with self.assertRaises(ValueError) as cm:
|
|
record = Blast.read(path)
|
|
self.assertRegex(str(cm.exception), message)
|
|
|
|
def test_premature_end_second_block(self):
|
|
"""Try to parse an XML file terminating in the second block."""
|
|
message = r"^premature end of XML file \(after reading [1-9]\d* bytes\)$"
|
|
filename = "broken3.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
with self.assertRaises(ValueError) as cm:
|
|
record = next(records)
|
|
self.assertRegex(str(cm.exception), message)
|
|
with Blast.parse(path) as records:
|
|
with self.assertRaises(ValueError) as cm:
|
|
record = next(records)
|
|
self.assertRegex(str(cm.exception), message)
|
|
with open(path, "rb") as stream:
|
|
with self.assertRaises(ValueError) as cm:
|
|
record = Blast.read(stream)
|
|
self.assertRegex(str(cm.exception), message)
|
|
with self.assertRaises(ValueError) as cm:
|
|
record = Blast.read(path)
|
|
self.assertRegex(str(cm.exception), message)
|
|
|
|
def test_premature_end_after_one_record(self):
|
|
"""Try to parse an XML file terminating after the first record."""
|
|
message = r"^premature end of XML file \(after reading [1-9]\d* bytes\)$"
|
|
filename = "broken4.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
records = Blast.parse(stream)
|
|
record = next(records)
|
|
with self.assertRaises(ValueError) as cm:
|
|
record = next(records)
|
|
self.assertRegex(str(cm.exception), message)
|
|
with Blast.parse(path) as records:
|
|
record = next(records)
|
|
with self.assertRaises(ValueError) as cm:
|
|
record = next(records)
|
|
self.assertRegex(str(cm.exception), message)
|
|
with open(path, "rb") as stream:
|
|
with self.assertRaises(ValueError) as cm:
|
|
record = Blast.read(stream)
|
|
self.assertRegex(str(cm.exception), message)
|
|
with self.assertRaises(ValueError) as cm:
|
|
record = Blast.read(path)
|
|
self.assertRegex(str(cm.exception), message)
|
|
|
|
def test_corrupt_xml(self):
|
|
"""Try to parse a broken XML file."""
|
|
message = "Failed to parse the XML data (not well-formed (invalid token): line 10, column 2). Please make sure that the input data are not corrupted."
|
|
|
|
filename = "broken5.xml"
|
|
path = os.path.join("Blast", filename)
|
|
with open(path, "rb") as stream:
|
|
with self.assertRaises(Blast.CorruptedXMLError) as cm:
|
|
records = Blast.parse(stream)
|
|
self.assertEqual(str(cm.exception), message)
|
|
with self.assertRaises(Blast.CorruptedXMLError) as cm:
|
|
with Blast.parse(path):
|
|
pass
|
|
self.assertEqual(str(cm.exception), message)
|
|
with open(path, "rb") as stream:
|
|
with self.assertRaises(Blast.CorruptedXMLError) as cm:
|
|
record = Blast.read(stream)
|
|
self.assertEqual(str(cm.exception), message)
|
|
with self.assertRaises(Blast.CorruptedXMLError) as cm:
|
|
record = Blast.read(path)
|
|
self.assertEqual(str(cm.exception), message)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
runner = unittest.TextTestRunner(verbosity=2)
|
|
unittest.main(testRunner=runner)
|